Files
flatscraper/main.py

70 lines
2.4 KiB
Python

from bs4 import BeautifulSoup
import json
import time
import src.wogetra.scraper as wogetra_scraper
import src.lwb.scraper as lwb_scraper
import src.discord.webhook as localwebhook
import src.lipsia.lipsia as lipsia_scraper
import src.bgl.bgl as bgl_scraper
import src.vlw.scraper as vlw_scraper
# URL of the website to scrape
TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/"
# Store known property IDs to avoid duplicate notifications
known_properties = set()
# Main loop to periodically check for new listings
def main():
global known_properties
# Load known properties from file
try:
with open("known_properties.json", "r") as file:
known_properties = set(json.load(file))
except FileNotFoundError:
print("No known properties file found. Starting fresh.")
while True:
current_time = time.strftime("%H:%M:%S", time.localtime())
print("Scraping properties at " + current_time)
properties_wogetra = wogetra_scraper.scrape_wogetra()
print("Scraped " + str(len(properties_wogetra)) + " properties from Wogetra")
properties = properties_wogetra
properties_lwb = lwb_scraper.scrape_easysquare()
print("Scraped " + str(len(properties_lwb)) + " properties from LWB")
properties += properties_lwb
properties_lipsia = lipsia_scraper.scrape_lipsia()
print("Scraped " + str(len(properties_lipsia)) + " properties from Lipsia")
properties += properties_lipsia
properties_bgl = bgl_scraper.fetch_all_properties()
print("Scraped " + str(len(properties_bgl)) + " properties from BGL")
properties += properties_bgl
properties_vlw = vlw_scraper.scrape_vlw()
print("Scraped " + str(len(properties_vlw)) + " properties from VLW")
properties = properties_vlw
for prop in properties:
if prop["id"] not in known_properties:
# Notify Discord and mark as known
localwebhook.send_to_discord(prop)
known_properties.add(prop["id"])
# save known properties to file
with open("known_properties.json", "w") as file:
json.dump(list(known_properties), file)
# Wait before checking again
print("Waiting for the next check...")
time.sleep(300) # Check every 5 minutes
if __name__ == "__main__":
main()