mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2025-07-15 19:18:49 +02:00
refactor: clean up unused code and comments in BGL scraper
This commit is contained in:
@ -25,9 +25,6 @@ def fetch_all_properties():
|
||||
|
||||
url = f"{base_url}?{url_params}"
|
||||
|
||||
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
@ -51,9 +48,7 @@ def fetch_all_properties():
|
||||
for prop in bgl_page_properties:
|
||||
# get a from prop with class "btn btn-primary details z-0 me-2 mb-2 stretched-link_" and only extract the href
|
||||
prop_url = prop.find("a", {"class": "btn btn-primary details z-0 me-2 mb-2 stretched-link_"})["href"]
|
||||
print(prop_url)
|
||||
response = requests.request("GET", bgl_url + prop_url, headers=headers)
|
||||
|
||||
prop_soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# get h3 with class adresse and extract the text
|
||||
@ -74,9 +69,6 @@ def fetch_all_properties():
|
||||
category = li.find("span").text.strip()
|
||||
facts.append((category, value))
|
||||
|
||||
|
||||
|
||||
|
||||
# get elements from facts that has the category "Zimmer"
|
||||
room_count = get_element_from_facts(facts, "Zimmer")
|
||||
size = get_element_from_facts(facts, "Wohnfläche")
|
||||
|
Reference in New Issue
Block a user