Files
flatscraper/src/vlw/scraper.py

77 lines
2.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from bs4 import BeautifulSoup
import hashlib
def scrape_vlw():
# Scrape the VLW website
# https://vlw-eg.de/suchergebnisse?search-price-min=&search-price-max=&search-size-from=&search-size-to=&search-room-min=&search-room-max=&send=suchen
url = "https://vlw-eg.de/suchergebnisse"
parameter = {
"search-price-min": "",
"search-price-max": "",
"search-size-from": "",
"search-size-to": "",
"search-room-min": "",
"search-room-max": "",
"senden": "suchen",
}
response = requests.get(url=url, params=parameter)
soup = BeautifulSoup(response.content, 'html.parser')
properties = []
# get div with class "estate-result-list"
estate_result_list = soup.find("div", class_="estate-result-list")
# get child div with class "estate-item no-border"
estate_items = estate_result_list.find_all("div", class_="estate-item no-border")
for estate in estate_items:
# <div class="image-wrapper" style="background-image: url(' income/actual/new/42da0fdb1bcaed578d2256f1a0599bf6.jpg ');">
image_url = estate.find("div", class_="image-wrapper")["style"].split("'")[1]
# title <h4 class="heading_h4">3-Raumwohnung sucht Nachmieter Großartiger Weitblick inklusive!!</h4>
title = estate.find("h4", class_="heading_h4").text
# addres <p class="size"><i class="fa fa-map-marker"></i>&nbsp;&nbsp;Teichstr.&nbsp;14</p>
subtitle = estate.find("p", class_="size").text
# <p class="size"><i class="fa fa-signal"></i> 61 m²</p>
size = estate.find("p", class_="size").text
# rooms <p class="rooms"><i class="fa fa-home"></i> 3 Zimmer</p>
rooms = estate.find("p", class_="rooms").text
# <p class="price">682 € warm</p>
warm_rent = estate.find("p", class_="price").text
# availability <p class="date"><i class="fa fa-calendar"></i> 01.03.2025</p>
availability = estate.find("p", class_="date").text
# link and id <a class="link link-typ-2" href="https://vlw-eg.de/suchergebnisse?objekt_id=333111" title="Details und Kontakt">Details und Kontakt</a>
link = estate.find("a", class_="link link-typ-2")["href"]
property_id = link.split("=")[1]
hashID = f"{title}{subtitle}{rooms}{size}{warm_rent}{availability}"
id = hashlib.sha256(hashID.encode('utf-8')).hexdigest()
properties.append({
"id": property_id,
"title": "Wogetra - "+ title,
"subtitle": subtitle,
"rooms": rooms,
"size": size,
"rent": "",
"link": link,
"abstract": "",
"warm_rent": warm_rent,
"availability": availability,
"image_url": image_url,
})
return properties