mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2026-01-17 22:12:26 +01:00
feat: update Docker setup, add environment variables, and enhance property scraping logic
This commit is contained in:
@@ -52,7 +52,10 @@ def fetch_all_properties():
|
||||
prop_soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# get h3 with class adresse and extract the text
|
||||
prop_title = prop_soup.find("h3", {"class": "adresse"}).text.strip()
|
||||
try :
|
||||
prop_title = prop_soup.find("h3", {"class": "adresse"}).text.strip()
|
||||
except:
|
||||
prop_title = "N/A"
|
||||
|
||||
# create a value entrie tuple list
|
||||
facts = []
|
||||
@@ -91,15 +94,23 @@ def fetch_all_properties():
|
||||
# image is in img tag with class "img-responsive"
|
||||
image_url = prop_soup.find("img", {"class": "img-responsive"})["src"]
|
||||
|
||||
# from prop_soup get the ifram with id "gmap_canvas" and extract the src
|
||||
google_maps_link = prop_soup.find("iframe", {"id": "gmap_canvas"})
|
||||
google_maps_link = google_maps_link["data-original-src"]
|
||||
# remove the query parameter output=embed
|
||||
google_maps_link = google_maps_link.replace("&output=embed", "")
|
||||
# remove width and height
|
||||
google_maps_link = google_maps_link.replace("width=300&height=220&", "")
|
||||
|
||||
properties.append({
|
||||
"id": obj_id,
|
||||
"title": "BGL - " + prop_title,
|
||||
"subtitle": "",
|
||||
"subtitle": google_maps_link,
|
||||
"rooms": room_count,
|
||||
"size": size,
|
||||
"rent": cold_rent,
|
||||
"link": bgl_url + prop_url,
|
||||
"abstract": "",
|
||||
"abstract": "Andere Kosten: " + other_costs + " Heizkosten: " + heating_costs + " Etage:" + level,
|
||||
"warm_rent": rent,
|
||||
"availability": availability,
|
||||
"image_url": image_url,
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import json
|
||||
import requests
|
||||
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
import src.lwb.scrape_image as scrape_image
|
||||
load_dotenv()
|
||||
|
||||
# Webhook URL from Discord
|
||||
WEBHOOK_URL = "https://discord.com/api/webhooks/1327600813367099462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzGCSru"
|
||||
|
||||
WEBHOOK_URL = os.getenv("WEBHOOK_URL")
|
||||
|
||||
# Funktion: Nachricht an Discord senden
|
||||
def send_to_discord(property_data):
|
||||
|
||||
@@ -3,6 +3,7 @@ from src.lwb.scraper import EASYSQUARE_HEADERS, EASYSQUARE_PARAMS
|
||||
|
||||
def scrape_image(url, owner):
|
||||
session = requests.Session()
|
||||
response = None
|
||||
|
||||
if owner == "BGL":
|
||||
response = session.get(url)
|
||||
@@ -18,6 +19,12 @@ def scrape_image(url, owner):
|
||||
# return empty image
|
||||
return b''
|
||||
|
||||
# get image from response
|
||||
# Handle other owners or fallback
|
||||
if response is None:
|
||||
response = session.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Fehler beim Abrufen der Standardquelle: {response.status_code}")
|
||||
return b''
|
||||
|
||||
|
||||
return response.content
|
||||
|
||||
@@ -2,11 +2,15 @@ import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
import src.lwb.format as format
|
||||
import hashlib
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
SESSION_CREATE_URL = "https://portal1s.easysquare.com/meinelwb/index.html?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
|
||||
|
||||
SAP_SESSIONID = "iZ52JjFdvDRY0528vXt4y4tdOvzk1xHvhW4KELG5Agg%3d"
|
||||
COOKIE_SESSION = "678ADA670E24565B64423D923CC07C0B"
|
||||
SAP_SESSIONID = os.getenv("SAP_SESSIONID")
|
||||
COOKIE_SESSION = os.getenv("COOKIE_SESSION")
|
||||
|
||||
EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms"
|
||||
EASYSQUARE_HEADERS = {
|
||||
|
||||
Reference in New Issue
Block a user