mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2025-07-15 11:08:48 +02:00
70 lines
2.4 KiB
Python
70 lines
2.4 KiB
Python
from bs4 import BeautifulSoup
|
|
import json
|
|
import time
|
|
import src.wogetra.scraper as wogetra_scraper
|
|
import src.lwb.scraper as lwb_scraper
|
|
import src.discord.webhook as localwebhook
|
|
import src.lipsia.lipsia as lipsia_scraper
|
|
import src.bgl.bgl as bgl_scraper
|
|
import src.vlw.scraper as vlw_scraper
|
|
|
|
# URL of the website to scrape
|
|
TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/"
|
|
|
|
# Store known property IDs to avoid duplicate notifications
|
|
known_properties = set()
|
|
|
|
# Main loop to periodically check for new listings
|
|
def main():
|
|
global known_properties
|
|
|
|
# Load known properties from file
|
|
try:
|
|
with open("known_properties.json", "r") as file:
|
|
known_properties = set(json.load(file))
|
|
except FileNotFoundError:
|
|
print("No known properties file found. Starting fresh.")
|
|
|
|
while True:
|
|
current_time = time.strftime("%H:%M:%S", time.localtime())
|
|
print("Scraping properties at " + current_time)
|
|
|
|
properties_wogetra = wogetra_scraper.scrape_wogetra()
|
|
print("Scraped " + str(len(properties_wogetra)) + " properties from Wogetra")
|
|
properties = properties_wogetra
|
|
|
|
properties_lwb = lwb_scraper.scrape_easysquare()
|
|
print("Scraped " + str(len(properties_lwb)) + " properties from LWB")
|
|
properties += properties_lwb
|
|
|
|
properties_lipsia = lipsia_scraper.scrape_lipsia()
|
|
print("Scraped " + str(len(properties_lipsia)) + " properties from Lipsia")
|
|
properties += properties_lipsia
|
|
|
|
properties_bgl = bgl_scraper.fetch_all_properties()
|
|
print("Scraped " + str(len(properties_bgl)) + " properties from BGL")
|
|
properties += properties_bgl
|
|
|
|
properties_vlw = vlw_scraper.scrape_vlw()
|
|
print("Scraped " + str(len(properties_vlw)) + " properties from VLW")
|
|
properties = properties_vlw
|
|
|
|
|
|
for prop in properties:
|
|
if prop["id"] not in known_properties:
|
|
# Notify Discord and mark as known
|
|
localwebhook.send_to_discord(prop)
|
|
known_properties.add(prop["id"])
|
|
|
|
|
|
# save known properties to file
|
|
with open("known_properties.json", "w") as file:
|
|
json.dump(list(known_properties), file)
|
|
|
|
# Wait before checking again
|
|
print("Waiting for the next check...")
|
|
time.sleep(300) # Check every 5 minutes
|
|
|
|
if __name__ == "__main__":
|
|
main()
|