mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2025-08-29 17:43:50 +02:00
feat: automate session cookie retrieval and remove manual session ID setup
This commit is contained in:
@@ -37,8 +37,7 @@ You can run the bot natively on your machine or use a Docker image. The requirem
|
||||
### 1. Environment Setup
|
||||
|
||||
Ensure that the `.env` file is configured correctly. An example is available in the `sample.env` file. Copy it to `.env` and fill in the required values.
|
||||
The `SAP_SESSIONID` and `COOKIE_SESSSION` are obtained after performing a search on the LWB website. Use your browser's developer tools to locate them in local storage.
|
||||
*Future versions will include automatic form processing to obtain a valid session ID.*
|
||||
You no longer need to set `SAP_SESSIONID` or `COOKIE_SESSION` manually. The scraper opens the LWB portal with a headless browser, clicks “Ich suche eine Wohnung”, opens “Immobiliensuche” → “MEHR ANZEIGEN”, sets the maximum results to 1000, clicks “Suchen”, and extracts session cookies automatically.
|
||||
|
||||
### 2. Python Environment
|
||||
|
||||
|
@@ -1,3 +1 @@
|
||||
SAP_SESSIONID=UrN6nRbjuCBe4dkLw7vkJLcpV5zniRHvhkwAAKG5Agg%3d
|
||||
COOKIE_SESSION=678ADA67ADF0813997206FE9F4132819
|
||||
WEBHOOK_URL=https://discord.com/api/webhooks/1327600813367432462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzLKAaa
|
@@ -2,20 +2,16 @@ import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
import src.lwb.format as format
|
||||
import hashlib
|
||||
import os
|
||||
import time
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
SESSION_CREATE_URL = (
|
||||
"https://portal1s.easysquare.com/meinelwb/index.html"
|
||||
"?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
|
||||
)
|
||||
|
||||
# Session tokens (from .env as fallback; will be refreshed dynamically)
|
||||
SAP_SESSIONID = os.getenv("SAP_SESSIONID")
|
||||
COOKIE_SESSION = os.getenv("COOKIE_SESSION")
|
||||
# Session tokens (resolved dynamically via session bootstrap; no .env needed)
|
||||
SAP_SESSIONID = None
|
||||
COOKIE_SESSION = None
|
||||
|
||||
EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms"
|
||||
|
||||
|
@@ -11,6 +11,30 @@ SESSION_CREATE_URL = (
|
||||
"?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
|
||||
)
|
||||
|
||||
# UI string constants
|
||||
BTN_SEARCH_ENTRY = "Ich suche eine Wohnung"
|
||||
BTN_MORE = "MEHR ANZEIGEN"
|
||||
BTN_SEARCH = ("Suchen", "SUCHEN")
|
||||
LABEL_MAX_RESULTS = (
|
||||
"Maximale Trefferanzahl",
|
||||
"Maximale Trefferzahl",
|
||||
"Maximale\u00A0Trefferanzahl",
|
||||
"Maximale\u00A0Trefferzahl",
|
||||
)
|
||||
|
||||
|
||||
def _cookie_map(context) -> dict:
|
||||
"""Return a name->cookie dict safely from Playwright context cookies."""
|
||||
mapping = {}
|
||||
try:
|
||||
for c in context.cookies(): # type: ignore[attr-defined]
|
||||
name = c.get("name") if isinstance(c, dict) else None
|
||||
if name:
|
||||
mapping[name] = c
|
||||
except Exception:
|
||||
pass
|
||||
return mapping
|
||||
|
||||
|
||||
def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None:
|
||||
"""Create or update .env with given key/value pairs."""
|
||||
@@ -31,7 +55,7 @@ def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None:
|
||||
|
||||
def fetch_session(
|
||||
headless: bool = True,
|
||||
save_to_env: bool = True,
|
||||
save_to_env: bool = False,
|
||||
) -> Dict[str, Optional[str]]:
|
||||
"""Open the LWB portal in a headless browser and extract cookies.
|
||||
|
||||
@@ -47,7 +71,11 @@ def fetch_session(
|
||||
)
|
||||
raise
|
||||
|
||||
load_dotenv()
|
||||
# .env not required for cookie fetch; loading is harmless but optional
|
||||
try:
|
||||
load_dotenv()
|
||||
except Exception:
|
||||
pass
|
||||
result: Dict[str, Optional[str]] = {
|
||||
"COOKIE_SESSION": None,
|
||||
"SAP_SESSIONID": None,
|
||||
@@ -61,19 +89,13 @@ def fetch_session(
|
||||
|
||||
# Click the entry button: "Ich suche eine Wohnung"
|
||||
try:
|
||||
page.get_by_role(
|
||||
"button", name="Ich suche eine Wohnung"
|
||||
).click(timeout=3000)
|
||||
page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000)
|
||||
except Exception:
|
||||
try:
|
||||
page.get_by_role(
|
||||
"link", name="Ich suche eine Wohnung"
|
||||
).click(timeout=3000)
|
||||
page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000)
|
||||
except Exception:
|
||||
try:
|
||||
page.get_by_text(
|
||||
"Ich suche eine Wohnung", exact=True
|
||||
).click(timeout=3000)
|
||||
page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -87,11 +109,11 @@ def fetch_session(
|
||||
cookie_session = None
|
||||
sap_session = None
|
||||
for _ in range(10):
|
||||
cookies = {c["name"]: c for c in context.cookies()}
|
||||
cookies = _cookie_map(context)
|
||||
if not cookie_session and "cookiesession1" in cookies:
|
||||
cookie_session = cookies["cookiesession1"]["value"]
|
||||
cookie_session = cookies.get("cookiesession1", {}).get("value")
|
||||
if not sap_session and "SAP_SESSIONID_PP0_581" in cookies:
|
||||
sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"]
|
||||
sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
|
||||
if cookie_session and sap_session:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
@@ -127,9 +149,9 @@ def fetch_session(
|
||||
|
||||
# Re-check cookies for SAP session
|
||||
for _ in range(10):
|
||||
cookies = {c["name"]: c for c in context.cookies()}
|
||||
cookies = _cookie_map(context)
|
||||
if "SAP_SESSIONID_PP0_581" in cookies:
|
||||
sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"]
|
||||
sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
@@ -162,7 +184,7 @@ def fetch_session(
|
||||
|
||||
def apply_search_via_ui(
|
||||
headless: bool = True,
|
||||
save_to_env: bool = True,
|
||||
save_to_env: bool = False,
|
||||
) -> Dict[str, Optional[str]]:
|
||||
"""Drive the UI to initialize the search context.
|
||||
|
||||
@@ -182,7 +204,10 @@ def apply_search_via_ui(
|
||||
)
|
||||
raise
|
||||
|
||||
load_dotenv()
|
||||
try:
|
||||
load_dotenv()
|
||||
except Exception:
|
||||
pass
|
||||
result: Dict[str, Optional[str]] = {
|
||||
"COOKIE_SESSION": None,
|
||||
"SAP_SESSIONID": None,
|
||||
@@ -197,15 +222,9 @@ def apply_search_via_ui(
|
||||
# Click entry button (robust tries)
|
||||
clicked = False
|
||||
for sel in [
|
||||
lambda: page.get_by_role(
|
||||
"button", name="Ich suche eine Wohnung"
|
||||
).click(timeout=3000),
|
||||
lambda: page.get_by_role(
|
||||
"link", name="Ich suche eine Wohnung"
|
||||
).click(timeout=3000),
|
||||
lambda: page.get_by_text(
|
||||
"Ich suche eine Wohnung", exact=True
|
||||
).click(timeout=3000),
|
||||
lambda: page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000),
|
||||
lambda: page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000),
|
||||
lambda: page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000),
|
||||
]:
|
||||
try:
|
||||
sel()
|
||||
@@ -224,26 +243,17 @@ def apply_search_via_ui(
|
||||
container = page.locator(
|
||||
"div.easy-services-service-container"
|
||||
).filter(has_text="Immobiliensuche")
|
||||
container.get_by_role(
|
||||
"button", name="MEHR ANZEIGEN"
|
||||
).first.click(timeout=5000)
|
||||
container.get_by_role("button", name=BTN_MORE).first.click(timeout=5000)
|
||||
except Exception:
|
||||
try:
|
||||
page.get_by_role("button", name="MEHR ANZEIGEN").click(
|
||||
timeout=5000
|
||||
)
|
||||
page.get_by_role("button", name=BTN_MORE).click(timeout=5000)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fill "Maximale Trefferanzahl" to 1000
|
||||
def _fill_max_results():
|
||||
# Try common label variations
|
||||
for label in (
|
||||
"Maximale Trefferanzahl",
|
||||
"Maximale Trefferzahl",
|
||||
"Maximale\u00A0Trefferanzahl",
|
||||
"Maximale\u00A0Trefferzahl",
|
||||
):
|
||||
for label in LABEL_MAX_RESULTS:
|
||||
try:
|
||||
page.get_by_label(label).fill("1000", timeout=3000)
|
||||
return True
|
||||
@@ -266,7 +276,7 @@ def apply_search_via_ui(
|
||||
pass
|
||||
|
||||
# Click Suchen
|
||||
for name in ["Suchen", "SUCHEN"]:
|
||||
for name in BTN_SEARCH:
|
||||
try:
|
||||
page.get_by_role("button", name=name).click(timeout=4000)
|
||||
break
|
||||
@@ -283,15 +293,11 @@ def apply_search_via_ui(
|
||||
pass
|
||||
|
||||
# Collect cookies
|
||||
cookies = {c["name"]: c for c in context.cookies()}
|
||||
cookies = _cookie_map(context)
|
||||
if "cookiesession1" in cookies:
|
||||
result["COOKIE_SESSION"] = cookies["cookiesession1"].get(
|
||||
"value"
|
||||
)
|
||||
result["COOKIE_SESSION"] = cookies.get("cookiesession1", {}).get("value")
|
||||
if "SAP_SESSIONID_PP0_581" in cookies:
|
||||
result["SAP_SESSIONID"] = cookies["SAP_SESSIONID_PP0_581"].get(
|
||||
"value"
|
||||
)
|
||||
result["SAP_SESSIONID"] = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
|
||||
|
||||
# render next page and keep it open for 10 seconds
|
||||
page.wait_for_timeout(10000)
|
||||
|
Reference in New Issue
Block a user