add scraping functionality for Lipsia and improve error handling for image downloads

This commit is contained in:
Elmar Kresse
2025-01-13 11:57:20 +01:00
parent 1093f49438
commit b2ef4eb5f2
13 changed files with 58 additions and 5 deletions

5
docker-compose.yml Normal file
View File

@ -0,0 +1,5 @@
services:
python:
image: gitlab.dit.htwk-leipzig.de:5050/ekresse/flatscraper:main
container_name: flatscraper

View File

@ -1 +1 @@
["803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", "id-193-1-13", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "B4095706-A65C-F421-B02E-1D227B684B62", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "id-154-2-71", "id-105-12-78", "id-88-5-45"]
["9A0B42A2-6D9B-331A-DAA7-624A5FA49606", "BAAEC20A-F667-FE22-6693-E4B4CA366889", "FA45C0B6-813C-DE65-496A-EDD8DA3F2526", "id-105-12-78", "C02892BE-F34F-5A8A-E174-4A79549DC9A9", 51624, "803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", 51628, 51632, "id-88-5-45", "E7B71D28-C557-CFE4-805D-42C2793E9248", "B57516F9-E364-7E54-A211-527ED54388E6", "21C20126-380D-9B0F-73F8-C4279897F189", "B4095706-A65C-F421-B02E-1D227B684B62", "id-154-2-71", "A979EBAE-EF87-FB51-152A-5453CD7DC794", "892BD779-F186-9BD1-A97A-5783EFB6F56D", "665243A8-FD34-86F8-322F-FE9B0B392083", "57446DF5-CB9F-951C-A40A-4BA775DA7426", "id-193-1-13", "172DFAD2-7CDB-51B4-212E-E6F9C7F0601A", "F7992488-7C24-DFA9-F8B2-94DDC18E66A3", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "3B73B720-13F2-62A4-8829-557676725A95"]

View File

@ -4,7 +4,7 @@ import time
import src.wogetra.scraper as wogetra_scraper
import src.lwb.scraper as lwb_scraper
import src.discord.webhook as localwebhook
import src.lipsia.lipsia as lipsia_scraper
# URL of the website to scrape
TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/"
@ -30,6 +30,8 @@ def main():
properties = wogetra_scraper.scrape_wogetra()
print("Scraping properties from LWB...")
properties += lwb_scraper.scrape_easysquare()
print("Scraping properties from Lipsia...")
properties += lipsia_scraper.scrape_lipsia()
for prop in properties:
if prop["id"] not in known_properties:

View File

@ -33,6 +33,13 @@ def send_to_discord(property_data):
# Download the image
image_response = scrape_image.scrape_image(property_data["image_url"])
# Check if the image was downloaded successfully
if image_response == b"":
print("Fehler beim Herunterladen des Bildes: Leere Antwort")
payload = {"content": message}
response = requests.post(WEBHOOK_URL, data=json.dumps(payload), headers=headers)
return
# Send the message with an image attachment
files = {"file": ("image.jpg", image_response)}
payload = {"content": message}

Binary file not shown.

38
src/lipsia/lipsia.py Normal file
View File

@ -0,0 +1,38 @@
import requests
from datetime import datetime
def scrape_lipsia():
url = "https://wg-lipsia.de/wp-admin/admin-ajax.php?action=emk_immosearch_api&endpoint=list"
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to fetch data: {response.status_code}")
return []
data = response.json()
if data["status"] != "success":
print("Failed to fetch properties: Invalid response status")
return []
properties = []
for item in data["list"]:
lat = item.get("lat", "")
lon = item.get("lon", "")
google_maps_link = f"https://www.google.com/maps/search/?api=1&query={lat},{lon}"
properties.append({
"id": item.get("id"),
"title": "Lipsia - " + item.get("headline", ""),
"subtitle": item.get("adresse_strasse", "") + " " + item.get("adresse_plz_ort", ""),
"rooms": item.get("zimmer_anzahl", 0),
"size": str(item.get("wohnflaeche", 0)) + "",
"rent": "Kalt: " + str(item.get("miete_kalt_euro", 0)) + "",
"link": google_maps_link,
"abstract": item.get("highlight_1", ""),
"warm_rent": "", # Placeholder as warm rent is not provided
"availability": (item.get("highlight_3", "")), # Customize as needed
"image_url": item.get("image", ""),
})
return properties

View File

@ -27,7 +27,8 @@ def scrape_image(url):
if response.status_code != 200:
print(f"Fehler beim Abrufen von Easysquare: {response.status_code}")
return []
# return empty image
return b''
# get image from response

View File

@ -101,7 +101,7 @@ def scrape_easysquare():
properties.append({
"id": id,
"title": prop_title,
"title": "LWB - " + prop_title,
"subtitle": subtitle,
"rooms": format.format_room(rooms),
"size": format.format_roomSize(size),

View File

@ -35,7 +35,7 @@ def scrape_wogetra():
# Add property to list
properties.append({
"id": property_id,
"title": title,
"title": "Wogetra - "+ title,
"subtitle": subtitle,
"rooms": rooms,
"size": size,