mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2025-07-16 11:38:49 +02:00
77 lines
2.8 KiB
Python
77 lines
2.8 KiB
Python
import requests
|
||
from bs4 import BeautifulSoup
|
||
import hashlib
|
||
|
||
def scrape_vlw():
|
||
# Scrape the VLW website
|
||
# https://vlw-eg.de/suchergebnisse?search-price-min=&search-price-max=&search-size-from=&search-size-to=&search-room-min=&search-room-max=&send=suchen
|
||
|
||
url = "https://vlw-eg.de/suchergebnisse"
|
||
parameter = {
|
||
"search-price-min": "",
|
||
"search-price-max": "",
|
||
"search-size-from": "",
|
||
"search-size-to": "",
|
||
"search-room-min": "",
|
||
"search-room-max": "",
|
||
"senden": "suchen",
|
||
}
|
||
|
||
response = requests.get(url=url, params=parameter)
|
||
soup = BeautifulSoup(response.content, 'html.parser')
|
||
|
||
|
||
properties = []
|
||
|
||
# get div with class "estate-result-list"
|
||
|
||
estate_result_list = soup.find("div", class_="estate-result-list")
|
||
|
||
# get child div with class "estate-item no-border"
|
||
|
||
estate_items = estate_result_list.find_all("div", class_="estate-item no-border")
|
||
|
||
for estate in estate_items:
|
||
# <div class="image-wrapper" style="background-image: url(' income/actual/new/42da0fdb1bcaed578d2256f1a0599bf6.jpg ');">
|
||
image_url = estate.find("div", class_="image-wrapper")["style"].split("'")[1]
|
||
|
||
# title <h4 class="heading_h4">3-Raumwohnung sucht Nachmieter – Großartiger Weitblick inklusive!!</h4>
|
||
title = estate.find("h4", class_="heading_h4").text
|
||
|
||
# addres <p class="size"><i class="fa fa-map-marker"></i> Teichstr. 14</p>
|
||
subtitle = estate.find("p", class_="size").text
|
||
|
||
# <p class="size"><i class="fa fa-signal"></i> 61 m²</p>
|
||
size = estate.find("p", class_="size").text
|
||
|
||
# rooms <p class="rooms"><i class="fa fa-home"></i> 3 Zimmer</p>
|
||
rooms = estate.find("p", class_="rooms").text
|
||
|
||
# <p class="price">682 € warm</p>
|
||
warm_rent = estate.find("p", class_="price").text
|
||
|
||
# availability <p class="date"><i class="fa fa-calendar"></i> 01.03.2025</p>
|
||
availability = estate.find("p", class_="date").text
|
||
|
||
# link and id <a class="link link-typ-2" href="https://vlw-eg.de/suchergebnisse?objekt_id=333111" title="Details und Kontakt">Details und Kontakt</a>
|
||
link = estate.find("a", class_="link link-typ-2")["href"]
|
||
property_id = link.split("=")[1]
|
||
|
||
hashID = f"{title}{subtitle}{rooms}{size}{warm_rent}{availability}"
|
||
id = hashlib.sha256(hashID.encode('utf-8')).hexdigest()
|
||
|
||
properties.append({
|
||
"id": property_id,
|
||
"title": "Wogetra - "+ title,
|
||
"subtitle": subtitle,
|
||
"rooms": rooms,
|
||
"size": size,
|
||
"rent": "",
|
||
"link": link,
|
||
"abstract": "",
|
||
"warm_rent": warm_rent,
|
||
"availability": availability,
|
||
"image_url": image_url,
|
||
})
|
||
|
||
return properties |