refactor: clean up unused code and comments in BGL scraper

This commit is contained in:
Elmar Kresse
2025-02-09 19:46:50 +01:00
parent 17078d10f6
commit f16116040d

View File

@ -25,9 +25,6 @@ def fetch_all_properties():
url = f"{base_url}?{url_params}" url = f"{base_url}?{url_params}"
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@ -51,9 +48,7 @@ def fetch_all_properties():
for prop in bgl_page_properties: for prop in bgl_page_properties:
# get a from prop with class "btn btn-primary details z-0 me-2 mb-2 stretched-link_" and only extract the href # get a from prop with class "btn btn-primary details z-0 me-2 mb-2 stretched-link_" and only extract the href
prop_url = prop.find("a", {"class": "btn btn-primary details z-0 me-2 mb-2 stretched-link_"})["href"] prop_url = prop.find("a", {"class": "btn btn-primary details z-0 me-2 mb-2 stretched-link_"})["href"]
print(prop_url)
response = requests.request("GET", bgl_url + prop_url, headers=headers) response = requests.request("GET", bgl_url + prop_url, headers=headers)
prop_soup = BeautifulSoup(response.text, "html.parser") prop_soup = BeautifulSoup(response.text, "html.parser")
# get h3 with class adresse and extract the text # get h3 with class adresse and extract the text
@ -74,9 +69,6 @@ def fetch_all_properties():
category = li.find("span").text.strip() category = li.find("span").text.strip()
facts.append((category, value)) facts.append((category, value))
# get elements from facts that has the category "Zimmer" # get elements from facts that has the category "Zimmer"
room_count = get_element_from_facts(facts, "Zimmer") room_count = get_element_from_facts(facts, "Zimmer")
size = get_element_from_facts(facts, "Wohnfläche") size = get_element_from_facts(facts, "Wohnfläche")