diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d17dae --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.venv diff --git a/known_properties.json b/known_properties.json new file mode 100644 index 0000000..c865079 --- /dev/null +++ b/known_properties.json @@ -0,0 +1 @@ +["803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", "id-193-1-13", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "B4095706-A65C-F421-B02E-1D227B684B62", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "id-154-2-71", "id-105-12-78", "id-88-5-45"] \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..a857929 --- /dev/null +++ b/main.py @@ -0,0 +1,49 @@ +from bs4 import BeautifulSoup +import json +import time +import src.wogetra.scraper as wogetra_scraper +import src.lwb.scraper as lwb_scraper +import src.discord.webhook as localwebhook + + +# URL of the website to scrape +TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/" + +# Store known property IDs to avoid duplicate notifications +known_properties = set() + +# Main loop to periodically check for new listings +def main(): + global known_properties + + # Load known properties from file + try: + with open("known_properties.json", "r") as file: + known_properties = set(json.load(file)) + except FileNotFoundError: + print("No known properties file found. Starting fresh.") + + while True: + print("Scraping properties...") + print("Scraping properties from Wogetra...") + properties = wogetra_scraper.scrape_wogetra() + print("Scraping properties from LWB...") + properties += lwb_scraper.scrape_easysquare() + + for prop in properties: + if prop["id"] not in known_properties: + # Notify Discord and mark as known + localwebhook.send_to_discord(prop) + known_properties.add(prop["id"]) + + + # save known properties to file + with open("known_properties.json", "w") as file: + json.dump(list(known_properties), file) + + # Wait before checking again + print("Waiting for the next check...") + time.sleep(300) # Check every 5 minutes + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/discord/__pycache__/webhook.cpython-310.pyc b/src/discord/__pycache__/webhook.cpython-310.pyc new file mode 100644 index 0000000..dfbbda8 Binary files /dev/null and b/src/discord/__pycache__/webhook.cpython-310.pyc differ diff --git a/src/discord/webhook.py b/src/discord/webhook.py new file mode 100644 index 0000000..b097ccc --- /dev/null +++ b/src/discord/webhook.py @@ -0,0 +1,55 @@ +import json +import requests + +import src.lwb.scrape_image as scrape_image + +# Webhook URL from Discord +WEBHOOK_URL = "https://discord.com/api/webhooks/1327600813367099462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzGCSru" + + +# Funktion: Nachricht an Discord senden +def send_to_discord(property_data): + + message = ( + f"**{property_data['title']}**\n" + f"{property_data['subtitle']}\n" + f"**Zimmer:** {property_data['rooms']}\n" + f"**Wohnfläche:** {property_data['size']}\n" + f"**Gesamtmiete:** {property_data['rent']}\n" + f"**Warmmiete:** {property_data['warm_rent']}\n" + f"**Verfügbar ab:** {property_data['availability']}\n" + f"**Link:** {property_data['link']}\n" + f"**Beschreibung:** {property_data['abstract']}" + ) + + + # Set headers + headers = {"Content-Type": "application/json"} + + # Check for optional image URL + if "image_url" in property_data and property_data["image_url"]: + try: + + # Download the image + image_response = scrape_image.scrape_image(property_data["image_url"]) + + # Send the message with an image attachment + files = {"file": ("image.jpg", image_response)} + payload = {"content": message} + response = requests.post(WEBHOOK_URL, data=payload, files=files) + + except requests.exceptions.RequestException as e: + print(f"Fehler beim Herunterladen des Bildes: {e}") + payload = {"content": message} + response = requests.post(WEBHOOK_URL, data=json.dumps(payload), headers=headers) + else: + # Send the message without an image + payload = {"content": message} + response = requests.post(WEBHOOK_URL, data=json.dumps(payload), headers=headers) + + # Check if the message was sent successfully when the status code is >= 200 and < 300 + if response.status_code >= 200 and response.status_code < 300: + print(f"Benachrichtigung gesendet: {property_data['title']} - response: {response.status_code}") + else: + print(f"Fehler beim Senden der Benachrichtigung: {response.status_code}") + diff --git a/src/lwb/__pycache__/format.cpython-310.pyc b/src/lwb/__pycache__/format.cpython-310.pyc new file mode 100644 index 0000000..957f1c1 Binary files /dev/null and b/src/lwb/__pycache__/format.cpython-310.pyc differ diff --git a/src/lwb/__pycache__/scrape_image.cpython-310.pyc b/src/lwb/__pycache__/scrape_image.cpython-310.pyc new file mode 100644 index 0000000..3ab8e9d Binary files /dev/null and b/src/lwb/__pycache__/scrape_image.cpython-310.pyc differ diff --git a/src/lwb/__pycache__/scraper.cpython-310.pyc b/src/lwb/__pycache__/scraper.cpython-310.pyc new file mode 100644 index 0000000..e196550 Binary files /dev/null and b/src/lwb/__pycache__/scraper.cpython-310.pyc differ diff --git a/src/lwb/format.py b/src/lwb/format.py new file mode 100644 index 0000000..8542d96 --- /dev/null +++ b/src/lwb/format.py @@ -0,0 +1,57 @@ +from datetime import datetime + +def format_date(date): + # Extract the date part (skip the "B" prefix) + date_part = date[1:] + + # Convert to a datetime object + date_object = datetime.strptime(date_part, "%Y%m%d") + + # Format as "day month year" + formatted_date = date_object.strftime("%d %B %Y") + + return formatted_date + +def format_room(room): + + room = room[-5:].lstrip("0") + + room_count = int(room[:1]) + if room_count == 1: + return (f"{room_count} Zimmer") + else: + return (f"{room_count} Zimmer") + + + +def format_money(money): + # B000000079900 -> 799,00 € + + # Extract the amount part (skip the "B" prefix) + amount_part = money[1:] + + # remove leading zeros + amount_part = amount_part.lstrip("0") + + # Split the amount into euros and cents + euros = amount_part[:-2] + cents = amount_part[-2:] + + # Combine the parts with a comma + formatted_money = f"{euros},{cents} €" + + return formatted_money + +def format_roomSize(room): + + # Extract the amount part (skip the "B" prefix) + amount_part = room[1:] + # remove leading zeros + amount_part = amount_part.lstrip("0") + # Split the amount into meters and centimeters + meters = amount_part[:-4] + centimeters = amount_part[-4:] + # Combine the parts with a comma + formatted_room = f"{meters},{centimeters} m²" + + return formatted_room \ No newline at end of file diff --git a/src/lwb/scrape_image.py b/src/lwb/scrape_image.py new file mode 100644 index 0000000..c46b6c1 --- /dev/null +++ b/src/lwb/scrape_image.py @@ -0,0 +1,35 @@ +import requests + +EASYSQUARE_HEADERS = { + "DNT": "1", + "Host": "portal1s.easysquare.com", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Cookie": "SAP_SESSIONID_PP0_581=zqFIhvNbEsOs_n3cgRTIO1V7ZaLQCxHvhYgKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Accept-Language": "de,en-US;q=0.7,en;q=0.3", + "Upgrade-Insecure-Requests": "1" +} +EASYSQUARE_PARAMS = { + "application": "ESQ_IA_REOBJ", + "sap-client": "581", + "command": "action", + "name": "boxlist", + "api": "6.169", + "head-oppc-version": "6.169.22", + "_": "1736595414769" +} + + +def scrape_image(url): + session = requests.Session() + response = session.get(url, headers=EASYSQUARE_HEADERS, params=EASYSQUARE_PARAMS) + + if response.status_code != 200: + print(f"Fehler beim Abrufen von Easysquare: {response.status_code}") + return [] + + # get image from response + + return response.content + diff --git a/src/lwb/scraper.py b/src/lwb/scraper.py new file mode 100644 index 0000000..0e9dd18 --- /dev/null +++ b/src/lwb/scraper.py @@ -0,0 +1,116 @@ +import requests +import xml.etree.ElementTree as ET +import src.lwb.format as format + +EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms" +EASYSQUARE_HEADERS = { + "DNT": "1", + "Host": "portal1s.easysquare.com", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Cookie": "SAP_SESSIONID_PP0_581=Vg3w4pn8whD76BldaU2wvP-YzyrRkRHvhWoKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Accept-Language": "de,en-US;q=0.7,en;q=0.3", + "Upgrade-Insecure-Requests": "1" +} +EASYSQUARE_PARAMS = { + "application": "ESQ_IA_REOBJ", + "sap-client": "581", + "command": "action", + "name": "boxlist", + "api": "6.169", + "head-oppc-version": "6.169.22", + "_": "1736761256321" +} + +# curl --location 'https://portal1s.easysquare.com/prorex/xmlforms?application=ESQ_IA_REOBJ&sap-client=581&command=action&name=boxlist&api=6.169&head-oppc-version=6.169.22&_=1736761255682' \ +# --header 'DNT: 1' \ +# --header 'UTC: 1736761256321' \ +# --header 'Host: portal1s.easysquare.com' \ +# --header 'host: portal1s.easysquare.com' \ +# --header 'Accept: text/plain, */*; q=0.01' \ +# --header 'Cookie: cookiesession1=678ADA67ADF0813997206FE9F4133118; sap-usercontext=sap-language=de&sap-client=581; SAP_SESSIONID_PP0_581=Vg3w4pn8whD76BldaU2wvP-YzyrRkRHvhWoKELG5Agg%3d' \ +# --header 'Referer: https://portal1s.easysquare.com/meinelwb/index.html?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL' \ +# --header 'Sec-GPC: 1' \ +# --header 'oppc-id: D9925A2D-4ED9-4911-8AD3-2626DA41FBB0' \ +# --header 'Connection: keep-alive' \ +# --header 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0' \ +# --header 'Content-Type: text/plain;charset=UTF-8' \ +# --header 'Sec-Fetch-Dest: empty' \ +# --header 'Sec-Fetch-Mode: cors' \ +# --header 'Sec-Fetch-Site: same-origin' \ +# --header 'Accept-Encoding: gzip, deflate, br, zstd' \ +# --header 'Accept-Language: de,en-US;q=0.7,en;q=0.3' \ +# --header 'X-Requested-With: XMLHttpRequest' + + +# Funktion: Scrape von Easysquare +def scrape_easysquare(): + session = requests.Session() + response = session.get(EASYSQUARE_URL, headers=EASYSQUARE_HEADERS, params=EASYSQUARE_PARAMS) + + if response.status_code != 200: + print(f"Fehler beim Abrufen von Easysquare: {response.status_code}") + return [] + + # XML-Daten parsen + root = ET.fromstring(response.content) + namespace = {"ns": "http://www.openpromos.com/OPPC/XMLForms"} + + properties = [] + for head in root.findall(".//ns:head", namespace): + prop_title = head.find("ns:title", namespace).text + subtitle = head.find("ns:subtitle", namespace).text + abstract = head.find("ns:abstract", namespace).text.strip() + + # get adress lat and long + #
+ + adress = head.find("ns:address", namespace) + lat = adress.get("lat") + lon = adress.get("lon") + + image = head.find("ns:image", namespace) + iamge_resourceId = image.get("resourceId") + + id = head.find("ns:id", namespace).text + + # Details extrahieren + rooms = "N/A" + size = "N/A" + rent = "N/A" + availability = "N/A" + + for criterion in head.findall(".//ns:criterion", namespace): + criterion_title = criterion.get("title") + value = criterion.text.strip() if criterion.text else "N/A" + if criterion_title == "Zimmer": + rooms = value + elif criterion_title == "Fläche": + size = value + elif criterion_title == "Gesamtmiete": + rent = value + elif criterion_title == "Verfügbar ab": + availability = value + + # link create google maps link with lat and long + link = f"https://www.google.com/maps/search/?api=1&query={lat},{lon}" + + # https://portal1s.easysquare.com/prorex/xmlforms/image.jpg?application=ESQ_IA_REOBJ&command=action&id=1EC8D4E6-191A-A827-47FF-72D8C5379070&name=get + image_url = f"https://portal1s.easysquare.com/prorex/xmlforms/image.jpg?application=ESQ_IA_REOBJ&command=action&id={iamge_resourceId}&name=get" + + properties.append({ + "id": id, + "title": prop_title, + "subtitle": subtitle, + "rooms": format.format_room(rooms), + "size": format.format_roomSize(size), + "rent": format.format_money(rent), + "link": link, + "abstract": abstract, + "warm_rent": "", + "availability": format.format_date(availability), + "image_url": image_url, + }) + + return properties \ No newline at end of file diff --git a/src/wogetra/__pycache__/scraper.cpython-310.pyc b/src/wogetra/__pycache__/scraper.cpython-310.pyc new file mode 100644 index 0000000..2c24036 Binary files /dev/null and b/src/wogetra/__pycache__/scraper.cpython-310.pyc differ diff --git a/src/wogetra/scraper.py b/src/wogetra/scraper.py new file mode 100644 index 0000000..9f1fcdf --- /dev/null +++ b/src/wogetra/scraper.py @@ -0,0 +1,50 @@ +import requests +from bs4 import BeautifulSoup + +WOGETRA_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/" + +# Funktion: Scrape von Wogetra +def scrape_wogetra(): + response = requests.get(WOGETRA_URL) + soup = BeautifulSoup(response.content, 'html.parser') + + # Find all property containers + property_elements = soup.find_all("div", class_="property-container") + properties = [] + + for prop in property_elements: + # Extract property details + title_element = prop.find("h3", class_="property-title") + subtitle_element = prop.find("div", class_="property-subtitle") + link_element = title_element.find("a") if title_element else None + + details = prop.find("div", class_="property-data") + + # Extract details (ID, rooms, size, etc.) + property_id = prop.get("id", "") + title = title_element.text.strip() if title_element else "No Title" + subtitle = subtitle_element.text.strip() if subtitle_element else "No Subtitle" + link = link_element["href"] if link_element else "#" + + rooms = details.find("div", class_="data-anzahl_zimmer").find("div", class_="dd").text.strip() if details else "N/A" + size = details.find("div", class_="data-wohnflaeche").find("div", class_="dd").text.strip() if details else "N/A" + rent = details.find("div", class_="data-nettokaltmiete").find("div", class_="dd").text.strip() if details else "N/A" + warm_rent = details.find("div", class_="data-warmmiete").find("div", class_="dd").text.strip() if details else "N/A" + availability = details.find("div", class_="data-verfuegbar_ab").find("div", class_="dd").text.strip() if details else "N/A" + + # Add property to list + properties.append({ + "id": property_id, + "title": title, + "subtitle": subtitle, + "rooms": rooms, + "size": size, + "rent": rent, + "link": link, + "abstract": "", + "warm_rent": warm_rent, + "availability": availability, + "image_url": "", + }) + + return properties \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..c9b0f9d --- /dev/null +++ b/test.py @@ -0,0 +1,12 @@ +import src.lwb.scrape_image as scrape_image + +# B000000502800 -> 50,28 m² +room = "B000000502800" + + + +iamge = scrape_image.scrape_image("https://portal1s.easysquare.com/prorex/xmlforms/image.jpg?application=ESQ_IA_REOBJ&command=action&id=1EC8D4E6-191A-A827-47FF-72D8C5379070&name=get") + +# save image +with open(f"image_{room}.jpg", "wb") as file: + file.write(iamge)