mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2025-07-16 11:38:49 +02:00
add initial scraping functionality and related utilities
This commit is contained in:
49
main.py
Normal file
49
main.py
Normal file
@ -0,0 +1,49 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import time
|
||||
import src.wogetra.scraper as wogetra_scraper
|
||||
import src.lwb.scraper as lwb_scraper
|
||||
import src.discord.webhook as localwebhook
|
||||
|
||||
|
||||
# URL of the website to scrape
|
||||
TARGET_URL = "https://www.wogetra.de/immobilien-vermarktungsart/miete/"
|
||||
|
||||
# Store known property IDs to avoid duplicate notifications
|
||||
known_properties = set()
|
||||
|
||||
# Main loop to periodically check for new listings
|
||||
def main():
|
||||
global known_properties
|
||||
|
||||
# Load known properties from file
|
||||
try:
|
||||
with open("known_properties.json", "r") as file:
|
||||
known_properties = set(json.load(file))
|
||||
except FileNotFoundError:
|
||||
print("No known properties file found. Starting fresh.")
|
||||
|
||||
while True:
|
||||
print("Scraping properties...")
|
||||
print("Scraping properties from Wogetra...")
|
||||
properties = wogetra_scraper.scrape_wogetra()
|
||||
print("Scraping properties from LWB...")
|
||||
properties += lwb_scraper.scrape_easysquare()
|
||||
|
||||
for prop in properties:
|
||||
if prop["id"] not in known_properties:
|
||||
# Notify Discord and mark as known
|
||||
localwebhook.send_to_discord(prop)
|
||||
known_properties.add(prop["id"])
|
||||
|
||||
|
||||
# save known properties to file
|
||||
with open("known_properties.json", "w") as file:
|
||||
json.dump(list(known_properties), file)
|
||||
|
||||
# Wait before checking again
|
||||
print("Waiting for the next check...")
|
||||
time.sleep(300) # Check every 5 minutes
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user