add initial scraping functionality and related utilities

2026-01-17 05:52:26 +01:00 · 2025-01-13 10:44:01 +01:00
parent 9a0a72f640
commit b337b7c2f8
15 changed files with 376 additions and 0 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,49 @@
+from bs4 import BeautifulSoup
+import json
+import time
+import src.wogetra.scraper as wogetra_scraper
+import src.lwb.scraper as lwb_scraper
+import src.discord.webhook as localwebhook
+
+
+# URL of the website to scrape
+TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/"
+
+# Store known property IDs to avoid duplicate notifications
+known_properties = set()
+
+# Main loop to periodically check for new listings
+def main():
+    global known_properties
+
+    # Load known properties from file
+    try:
+        with open("known_properties.json", "r") as file:
+            known_properties = set(json.load(file))
+    except FileNotFoundError:
+        print("No known properties file found. Starting fresh.")
+
+    while True:
+        print("Scraping properties...")
+        print("Scraping properties from Wogetra...")
+        properties = wogetra_scraper.scrape_wogetra()
+        print("Scraping properties from LWB...")
+        properties += lwb_scraper.scrape_easysquare()
+
+        for prop in properties:
+            if prop["id"] not in known_properties:
+                # Notify Discord and mark as known
+                localwebhook.send_to_discord(prop)
+                known_properties.add(prop["id"])
+
+
+        # save known properties to file
+        with open("known_properties.json", "w") as file:
+            json.dump(list(known_properties), file)
+
+        # Wait before checking again
+        print("Waiting for the next check...")
+        time.sleep(300)  # Check every 5 minutes
+
+if __name__ == "__main__":
+    main()