mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2025-07-16 11:38:49 +02:00
add scraping functionality for Lipsia and improve error handling for image downloads
This commit is contained in:
5
docker-compose.yml
Normal file
5
docker-compose.yml
Normal file
@ -0,0 +1,5 @@
|
||||
services:
|
||||
python:
|
||||
image: gitlab.dit.htwk-leipzig.de:5050/ekresse/flatscraper:main
|
||||
container_name: flatscraper
|
||||
|
@ -1 +1 @@
|
||||
["803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", "id-193-1-13", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "B4095706-A65C-F421-B02E-1D227B684B62", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "id-154-2-71", "id-105-12-78", "id-88-5-45"]
|
||||
["9A0B42A2-6D9B-331A-DAA7-624A5FA49606", "BAAEC20A-F667-FE22-6693-E4B4CA366889", "FA45C0B6-813C-DE65-496A-EDD8DA3F2526", "id-105-12-78", "C02892BE-F34F-5A8A-E174-4A79549DC9A9", 51624, "803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", 51628, 51632, "id-88-5-45", "E7B71D28-C557-CFE4-805D-42C2793E9248", "B57516F9-E364-7E54-A211-527ED54388E6", "21C20126-380D-9B0F-73F8-C4279897F189", "B4095706-A65C-F421-B02E-1D227B684B62", "id-154-2-71", "A979EBAE-EF87-FB51-152A-5453CD7DC794", "892BD779-F186-9BD1-A97A-5783EFB6F56D", "665243A8-FD34-86F8-322F-FE9B0B392083", "57446DF5-CB9F-951C-A40A-4BA775DA7426", "id-193-1-13", "172DFAD2-7CDB-51B4-212E-E6F9C7F0601A", "F7992488-7C24-DFA9-F8B2-94DDC18E66A3", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "3B73B720-13F2-62A4-8829-557676725A95"]
|
4
main.py
4
main.py
@ -4,7 +4,7 @@ import time
|
||||
import src.wogetra.scraper as wogetra_scraper
|
||||
import src.lwb.scraper as lwb_scraper
|
||||
import src.discord.webhook as localwebhook
|
||||
|
||||
import src.lipsia.lipsia as lipsia_scraper
|
||||
|
||||
# URL of the website to scrape
|
||||
TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/"
|
||||
@ -30,6 +30,8 @@ def main():
|
||||
properties = wogetra_scraper.scrape_wogetra()
|
||||
print("Scraping properties from LWB...")
|
||||
properties += lwb_scraper.scrape_easysquare()
|
||||
print("Scraping properties from Lipsia...")
|
||||
properties += lipsia_scraper.scrape_lipsia()
|
||||
|
||||
for prop in properties:
|
||||
if prop["id"] not in known_properties:
|
||||
|
Binary file not shown.
@ -33,6 +33,13 @@ def send_to_discord(property_data):
|
||||
# Download the image
|
||||
image_response = scrape_image.scrape_image(property_data["image_url"])
|
||||
|
||||
# Check if the image was downloaded successfully
|
||||
if image_response == b"":
|
||||
print("Fehler beim Herunterladen des Bildes: Leere Antwort")
|
||||
payload = {"content": message}
|
||||
response = requests.post(WEBHOOK_URL, data=json.dumps(payload), headers=headers)
|
||||
return
|
||||
|
||||
# Send the message with an image attachment
|
||||
files = {"file": ("image.jpg", image_response)}
|
||||
payload = {"content": message}
|
||||
|
BIN
src/lipsia/__pycache__/lipsia.cpython-310.pyc
Normal file
BIN
src/lipsia/__pycache__/lipsia.cpython-310.pyc
Normal file
Binary file not shown.
38
src/lipsia/lipsia.py
Normal file
38
src/lipsia/lipsia.py
Normal file
@ -0,0 +1,38 @@
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
def scrape_lipsia():
|
||||
url = "https://wg-lipsia.de/wp-admin/admin-ajax.php?action=emk_immosearch_api&endpoint=list"
|
||||
response = requests.get(url)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to fetch data: {response.status_code}")
|
||||
return []
|
||||
|
||||
data = response.json()
|
||||
if data["status"] != "success":
|
||||
print("Failed to fetch properties: Invalid response status")
|
||||
return []
|
||||
|
||||
properties = []
|
||||
for item in data["list"]:
|
||||
|
||||
lat = item.get("lat", "")
|
||||
lon = item.get("lon", "")
|
||||
google_maps_link = f"https://www.google.com/maps/search/?api=1&query={lat},{lon}"
|
||||
|
||||
properties.append({
|
||||
"id": item.get("id"),
|
||||
"title": "Lipsia - " + item.get("headline", ""),
|
||||
"subtitle": item.get("adresse_strasse", "") + " " + item.get("adresse_plz_ort", ""),
|
||||
"rooms": item.get("zimmer_anzahl", 0),
|
||||
"size": str(item.get("wohnflaeche", 0)) + " m²",
|
||||
"rent": "Kalt: " + str(item.get("miete_kalt_euro", 0)) + " €",
|
||||
"link": google_maps_link,
|
||||
"abstract": item.get("highlight_1", ""),
|
||||
"warm_rent": "", # Placeholder as warm rent is not provided
|
||||
"availability": (item.get("highlight_3", "")), # Customize as needed
|
||||
"image_url": item.get("image", ""),
|
||||
})
|
||||
|
||||
return properties
|
Binary file not shown.
Binary file not shown.
@ -27,7 +27,8 @@ def scrape_image(url):
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"Fehler beim Abrufen von Easysquare: {response.status_code}")
|
||||
return []
|
||||
# return empty image
|
||||
return b''
|
||||
|
||||
# get image from response
|
||||
|
||||
|
@ -101,7 +101,7 @@ def scrape_easysquare():
|
||||
|
||||
properties.append({
|
||||
"id": id,
|
||||
"title": prop_title,
|
||||
"title": "LWB - " + prop_title,
|
||||
"subtitle": subtitle,
|
||||
"rooms": format.format_room(rooms),
|
||||
"size": format.format_roomSize(size),
|
||||
|
Binary file not shown.
@ -35,7 +35,7 @@ def scrape_wogetra():
|
||||
# Add property to list
|
||||
properties.append({
|
||||
"id": property_id,
|
||||
"title": title,
|
||||
"title": "Wogetra - "+ title,
|
||||
"subtitle": subtitle,
|
||||
"rooms": rooms,
|
||||
"size": size,
|
||||
|
Reference in New Issue
Block a user