feat: add VLW property scraping functionality to main process

This commit is contained in:
Elmar Kresse
2025-02-16 19:20:27 +01:00
parent 92bb5b2e85
commit 395434da1c
2 changed files with 82 additions and 0 deletions

View File

@ -6,6 +6,7 @@ import src.lwb.scraper as lwb_scraper
import src.discord.webhook as localwebhook import src.discord.webhook as localwebhook
import src.lipsia.lipsia as lipsia_scraper import src.lipsia.lipsia as lipsia_scraper
import src.bgl.bgl as bgl_scraper import src.bgl.bgl as bgl_scraper
import src.vlw.scraper as vlw_scraper
# URL of the website to scrape # URL of the website to scrape
TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/" TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/"
@ -44,6 +45,10 @@ def main():
print("Scraped " + str(len(properties_bgl)) + " properties from BGL") print("Scraped " + str(len(properties_bgl)) + " properties from BGL")
properties += properties_bgl properties += properties_bgl
properties_vlw = vlw_scraper.scrape_vlw()
print("Scraped " + str(len(properties_vlw)) + " properties from VLW")
properties += properties_vlw
for prop in properties: for prop in properties:
if prop["id"] not in known_properties: if prop["id"] not in known_properties:

77
src/vlw/scraper.py Normal file
View File

@ -0,0 +1,77 @@
import requests
from bs4 import BeautifulSoup
import hashlib
def scrape_vlw():
# Scrape the VLW website
# https://vlw-eg.de/suchergebnisse?search-price-min=&search-price-max=&search-size-from=&search-size-to=&search-room-min=&search-room-max=&send=suchen
url = "https://vlw-eg.de/suchergebnisse"
parameter = {
"search-price-min": "",
"search-price-max": "",
"search-size-from": "",
"search-size-to": "",
"search-room-min": "",
"search-room-max": "",
"senden": "suchen",
}
response = requests.get(url=url, params=parameter)
soup = BeautifulSoup(response.content, 'html.parser')
properties = []
# get div with class "estate-result-list"
estate_result_list = soup.find("div", class_="estate-result-list")
# get child div with class "estate-item no-border"
estate_items = estate_result_list.find_all("div", class_="estate-item no-border")
for estate in estate_items:
# <div class="image-wrapper" style="background-image: url(' income/actual/new/42da0fdb1bcaed578d2256f1a0599bf6.jpg ');">
image_url = estate.find("div", class_="image-wrapper")["style"].split("'")[1]
# title <h4 class="heading_h4">3-Raumwohnung sucht Nachmieter Großartiger Weitblick inklusive!!</h4>
title = estate.find("h4", class_="heading_h4").text
# addres <p class="size"><i class="fa fa-map-marker"></i>&nbsp;&nbsp;Teichstr.&nbsp;14</p>
subtitle = estate.find("p", class_="size").text
# <p class="size"><i class="fa fa-signal"></i> 61 m²</p>
size = estate.find("p", class_="size").text
# rooms <p class="rooms"><i class="fa fa-home"></i> 3 Zimmer</p>
rooms = estate.find("p", class_="rooms").text
# <p class="price">682 € warm</p>
warm_rent = estate.find("p", class_="price").text
# availability <p class="date"><i class="fa fa-calendar"></i> 01.03.2025</p>
availability = estate.find("p", class_="date").text
# link and id <a class="link link-typ-2" href="https://vlw-eg.de/suchergebnisse?objekt_id=333111" title="Details und Kontakt">Details und Kontakt</a>
link = estate.find("a", class_="link link-typ-2")["href"]
property_id = link.split("=")[1]
hashID = f"{title}{subtitle}{rooms}{size}{warm_rent}{availability}"
id = hashlib.sha256(hashID.encode('utf-8')).hexdigest()
properties.append({
"id": property_id,
"title": "Wogetra - "+ title,
"subtitle": subtitle,
"rooms": rooms,
"size": size,
"rent": "",
"link": link,
"abstract": "",
"warm_rent": warm_rent,
"availability": availability,
"image_url": image_url,
})
return properties