diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..7c78c19 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,5 @@ +services: + python: + image: gitlab.dit.htwk-leipzig.de:5050/ekresse/flatscraper:main + container_name: flatscraper + \ No newline at end of file diff --git a/known_properties.json b/known_properties.json index c865079..1aa7521 100644 --- a/known_properties.json +++ b/known_properties.json @@ -1 +1 @@ -["803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", "id-193-1-13", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "B4095706-A65C-F421-B02E-1D227B684B62", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "id-154-2-71", "id-105-12-78", "id-88-5-45"] \ No newline at end of file +["9A0B42A2-6D9B-331A-DAA7-624A5FA49606", "BAAEC20A-F667-FE22-6693-E4B4CA366889", "FA45C0B6-813C-DE65-496A-EDD8DA3F2526", "id-105-12-78", "C02892BE-F34F-5A8A-E174-4A79549DC9A9", 51624, "803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", 51628, 51632, "id-88-5-45", "E7B71D28-C557-CFE4-805D-42C2793E9248", "B57516F9-E364-7E54-A211-527ED54388E6", "21C20126-380D-9B0F-73F8-C4279897F189", "B4095706-A65C-F421-B02E-1D227B684B62", "id-154-2-71", "A979EBAE-EF87-FB51-152A-5453CD7DC794", "892BD779-F186-9BD1-A97A-5783EFB6F56D", "665243A8-FD34-86F8-322F-FE9B0B392083", "57446DF5-CB9F-951C-A40A-4BA775DA7426", "id-193-1-13", "172DFAD2-7CDB-51B4-212E-E6F9C7F0601A", "F7992488-7C24-DFA9-F8B2-94DDC18E66A3", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "3B73B720-13F2-62A4-8829-557676725A95"] \ No newline at end of file diff --git a/main.py b/main.py index 40cae0b..da472e5 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ import time import src.wogetra.scraper as wogetra_scraper import src.lwb.scraper as lwb_scraper import src.discord.webhook as localwebhook - +import src.lipsia.lipsia as lipsia_scraper # URL of the website to scrape TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/" @@ -30,6 +30,8 @@ def main(): properties = wogetra_scraper.scrape_wogetra() print("Scraping properties from LWB...") properties += lwb_scraper.scrape_easysquare() + print("Scraping properties from Lipsia...") + properties += lipsia_scraper.scrape_lipsia() for prop in properties: if prop["id"] not in known_properties: diff --git a/src/discord/__pycache__/webhook.cpython-310.pyc b/src/discord/__pycache__/webhook.cpython-310.pyc index dfbbda8..1ab96eb 100644 Binary files a/src/discord/__pycache__/webhook.cpython-310.pyc and b/src/discord/__pycache__/webhook.cpython-310.pyc differ diff --git a/src/discord/webhook.py b/src/discord/webhook.py index b097ccc..32ce523 100644 --- a/src/discord/webhook.py +++ b/src/discord/webhook.py @@ -33,6 +33,13 @@ def send_to_discord(property_data): # Download the image image_response = scrape_image.scrape_image(property_data["image_url"]) + # Check if the image was downloaded successfully + if image_response == b"": + print("Fehler beim Herunterladen des Bildes: Leere Antwort") + payload = {"content": message} + response = requests.post(WEBHOOK_URL, data=json.dumps(payload), headers=headers) + return + # Send the message with an image attachment files = {"file": ("image.jpg", image_response)} payload = {"content": message} diff --git a/src/lipsia/__pycache__/lipsia.cpython-310.pyc b/src/lipsia/__pycache__/lipsia.cpython-310.pyc new file mode 100644 index 0000000..f822c06 Binary files /dev/null and b/src/lipsia/__pycache__/lipsia.cpython-310.pyc differ diff --git a/src/lipsia/lipsia.py b/src/lipsia/lipsia.py new file mode 100644 index 0000000..a86ecc3 --- /dev/null +++ b/src/lipsia/lipsia.py @@ -0,0 +1,38 @@ +import requests +from datetime import datetime + +def scrape_lipsia(): + url = "https://wg-lipsia.de/wp-admin/admin-ajax.php?action=emk_immosearch_api&endpoint=list" + response = requests.get(url) + + if response.status_code != 200: + print(f"Failed to fetch data: {response.status_code}") + return [] + + data = response.json() + if data["status"] != "success": + print("Failed to fetch properties: Invalid response status") + return [] + + properties = [] + for item in data["list"]: + + lat = item.get("lat", "") + lon = item.get("lon", "") + google_maps_link = f"https://www.google.com/maps/search/?api=1&query={lat},{lon}" + + properties.append({ + "id": item.get("id"), + "title": "Lipsia - " + item.get("headline", ""), + "subtitle": item.get("adresse_strasse", "") + " " + item.get("adresse_plz_ort", ""), + "rooms": item.get("zimmer_anzahl", 0), + "size": str(item.get("wohnflaeche", 0)) + " m²", + "rent": "Kalt: " + str(item.get("miete_kalt_euro", 0)) + " €", + "link": google_maps_link, + "abstract": item.get("highlight_1", ""), + "warm_rent": "", # Placeholder as warm rent is not provided + "availability": (item.get("highlight_3", "")), # Customize as needed + "image_url": item.get("image", ""), + }) + + return properties \ No newline at end of file diff --git a/src/lwb/__pycache__/scrape_image.cpython-310.pyc b/src/lwb/__pycache__/scrape_image.cpython-310.pyc index 3ab8e9d..ba3170d 100644 Binary files a/src/lwb/__pycache__/scrape_image.cpython-310.pyc and b/src/lwb/__pycache__/scrape_image.cpython-310.pyc differ diff --git a/src/lwb/__pycache__/scraper.cpython-310.pyc b/src/lwb/__pycache__/scraper.cpython-310.pyc index e196550..f6c3196 100644 Binary files a/src/lwb/__pycache__/scraper.cpython-310.pyc and b/src/lwb/__pycache__/scraper.cpython-310.pyc differ diff --git a/src/lwb/scrape_image.py b/src/lwb/scrape_image.py index c46b6c1..09f1eef 100644 --- a/src/lwb/scrape_image.py +++ b/src/lwb/scrape_image.py @@ -27,7 +27,8 @@ def scrape_image(url): if response.status_code != 200: print(f"Fehler beim Abrufen von Easysquare: {response.status_code}") - return [] + # return empty image + return b'' # get image from response diff --git a/src/lwb/scraper.py b/src/lwb/scraper.py index 0e9dd18..b622a55 100644 --- a/src/lwb/scraper.py +++ b/src/lwb/scraper.py @@ -101,7 +101,7 @@ def scrape_easysquare(): properties.append({ "id": id, - "title": prop_title, + "title": "LWB - " + prop_title, "subtitle": subtitle, "rooms": format.format_room(rooms), "size": format.format_roomSize(size), diff --git a/src/wogetra/__pycache__/scraper.cpython-310.pyc b/src/wogetra/__pycache__/scraper.cpython-310.pyc index 2c24036..c774dd2 100644 Binary files a/src/wogetra/__pycache__/scraper.cpython-310.pyc and b/src/wogetra/__pycache__/scraper.cpython-310.pyc differ diff --git a/src/wogetra/scraper.py b/src/wogetra/scraper.py index 9f1fcdf..e296389 100644 --- a/src/wogetra/scraper.py +++ b/src/wogetra/scraper.py @@ -35,7 +35,7 @@ def scrape_wogetra(): # Add property to list properties.append({ "id": property_id, - "title": title, + "title": "Wogetra - "+ title, "subtitle": subtitle, "rooms": rooms, "size": size,