feat: update Docker setup, add environment variables, and enhance property scraping logic

This commit is contained in:
Elmar Kresse
2025-02-10 10:02:53 +01:00
parent f16116040d
commit 92bb5b2e85
11 changed files with 66 additions and 74 deletions

View File

@@ -52,7 +52,10 @@ def fetch_all_properties():
prop_soup = BeautifulSoup(response.text, "html.parser")
# get h3 with class adresse and extract the text
prop_title = prop_soup.find("h3", {"class": "adresse"}).text.strip()
try :
prop_title = prop_soup.find("h3", {"class": "adresse"}).text.strip()
except:
prop_title = "N/A"
# create a value entrie tuple list
facts = []
@@ -91,15 +94,23 @@ def fetch_all_properties():
# image is in img tag with class "img-responsive"
image_url = prop_soup.find("img", {"class": "img-responsive"})["src"]
# from prop_soup get the ifram with id "gmap_canvas" and extract the src
google_maps_link = prop_soup.find("iframe", {"id": "gmap_canvas"})
google_maps_link = google_maps_link["data-original-src"]
# remove the query parameter output=embed
google_maps_link = google_maps_link.replace("&output=embed", "")
# remove width and height
google_maps_link = google_maps_link.replace("width=300&height=220&", "")
properties.append({
"id": obj_id,
"title": "BGL - " + prop_title,
"subtitle": "",
"subtitle": google_maps_link,
"rooms": room_count,
"size": size,
"rent": cold_rent,
"link": bgl_url + prop_url,
"abstract": "",
"abstract": "Andere Kosten: " + other_costs + " Heizkosten: " + heating_costs + " Etage:" + level,
"warm_rent": rent,
"availability": availability,
"image_url": image_url,

View File

@@ -1,11 +1,12 @@
import json
import requests
from dotenv import load_dotenv
import os
import src.lwb.scrape_image as scrape_image
load_dotenv()
# Webhook URL from Discord
WEBHOOK_URL = "https://discord.com/api/webhooks/1327600813367099462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzGCSru"
WEBHOOK_URL = os.getenv("WEBHOOK_URL")
# Funktion: Nachricht an Discord senden
def send_to_discord(property_data):

View File

@@ -3,6 +3,7 @@ from src.lwb.scraper import EASYSQUARE_HEADERS, EASYSQUARE_PARAMS
def scrape_image(url, owner):
session = requests.Session()
response = None
if owner == "BGL":
response = session.get(url)
@@ -18,6 +19,12 @@ def scrape_image(url, owner):
# return empty image
return b''
# get image from response
# Handle other owners or fallback
if response is None:
response = session.get(url)
if response.status_code != 200:
print(f"Fehler beim Abrufen der Standardquelle: {response.status_code}")
return b''
return response.content

View File

@@ -2,11 +2,15 @@ import requests
import xml.etree.ElementTree as ET
import src.lwb.format as format
import hashlib
import os
from dotenv import load_dotenv
load_dotenv()
SESSION_CREATE_URL = "https://portal1s.easysquare.com/meinelwb/index.html?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
SAP_SESSIONID = "iZ52JjFdvDRY0528vXt4y4tdOvzk1xHvhW4KELG5Agg%3d"
COOKIE_SESSION = "678ADA670E24565B64423D923CC07C0B"
SAP_SESSIONID = os.getenv("SAP_SESSIONID")
COOKIE_SESSION = os.getenv("COOKIE_SESSION")
EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms"
EASYSQUARE_HEADERS = {