diff --git a/main.py b/main.py index a0ad3e6..06785d4 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,7 @@ import src.lwb.scraper as lwb_scraper import src.discord.webhook as localwebhook import src.lipsia.lipsia as lipsia_scraper import src.bgl.bgl as bgl_scraper +import src.vlw.scraper as vlw_scraper # URL of the website to scrape TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/" @@ -44,6 +45,10 @@ def main(): print("Scraped " + str(len(properties_bgl)) + " properties from BGL") properties += properties_bgl + properties_vlw = vlw_scraper.scrape_vlw() + print("Scraped " + str(len(properties_vlw)) + " properties from VLW") + properties += properties_vlw + for prop in properties: if prop["id"] not in known_properties: diff --git a/src/vlw/scraper.py b/src/vlw/scraper.py new file mode 100644 index 0000000..923ad80 --- /dev/null +++ b/src/vlw/scraper.py @@ -0,0 +1,77 @@ +import requests +from bs4 import BeautifulSoup +import hashlib + +def scrape_vlw(): + # Scrape the VLW website + # https://vlw-eg.de/suchergebnisse?search-price-min=&search-price-max=&search-size-from=&search-size-to=&search-room-min=&search-room-max=&send=suchen + + url = "https://vlw-eg.de/suchergebnisse" + parameter = { + "search-price-min": "", + "search-price-max": "", + "search-size-from": "", + "search-size-to": "", + "search-room-min": "", + "search-room-max": "", + "senden": "suchen", + } + + response = requests.get(url=url, params=parameter) + soup = BeautifulSoup(response.content, 'html.parser') + + + properties = [] + + # get div with class "estate-result-list" + + estate_result_list = soup.find("div", class_="estate-result-list") + + # get child div with class "estate-item no-border" + + estate_items = estate_result_list.find_all("div", class_="estate-item no-border") + + for estate in estate_items: + #
+ image_url = estate.find("div", class_="image-wrapper")["style"].split("'")[1] + + # title

3-Raumwohnung sucht Nachmieter – Großartiger Weitblick inklusive!!

+ title = estate.find("h4", class_="heading_h4").text + + # addres

  Teichstr. 14

+ subtitle = estate.find("p", class_="size").text + + #

61 m²

+ size = estate.find("p", class_="size").text + + # rooms

3 Zimmer

+ rooms = estate.find("p", class_="rooms").text + + #

682 € warm

+ warm_rent = estate.find("p", class_="price").text + + # availability

01.03.2025

+ availability = estate.find("p", class_="date").text + + # link and id Details und Kontakt + link = estate.find("a", class_="link link-typ-2")["href"] + property_id = link.split("=")[1] + + hashID = f"{title}{subtitle}{rooms}{size}{warm_rent}{availability}" + id = hashlib.sha256(hashID.encode('utf-8')).hexdigest() + + properties.append({ + "id": property_id, + "title": "Wogetra - "+ title, + "subtitle": subtitle, + "rooms": rooms, + "size": size, + "rent": "", + "link": link, + "abstract": "", + "warm_rent": warm_rent, + "availability": availability, + "image_url": image_url, + }) + + return properties \ No newline at end of file