feat: automate session cookie retrieval and remove manual session ID setup

This commit is contained in:
Elmar Kresse
2025-08-15 15:06:52 +02:00
parent 78152dafc0
commit 2c8cea7645
4 changed files with 57 additions and 58 deletions

View File

@@ -37,8 +37,7 @@ You can run the bot natively on your machine or use a Docker image. The requirem
### 1. Environment Setup
Ensure that the `.env` file is configured correctly. An example is available in the `sample.env` file. Copy it to `.env` and fill in the required values.
The `SAP_SESSIONID` and `COOKIE_SESSSION` are obtained after performing a search on the LWB website. Use your browser's developer tools to locate them in local storage.
*Future versions will include automatic form processing to obtain a valid session ID.*
You no longer need to set `SAP_SESSIONID` or `COOKIE_SESSION` manually. The scraper opens the LWB portal with a headless browser, clicks “Ich suche eine Wohnung”, opens “Immobiliensuche” → “MEHR ANZEIGEN”, sets the maximum results to 1000, clicks “Suchen”, and extracts session cookies automatically.
### 2. Python Environment

View File

@@ -1,3 +1 @@
SAP_SESSIONID=UrN6nRbjuCBe4dkLw7vkJLcpV5zniRHvhkwAAKG5Agg%3d
COOKIE_SESSION=678ADA67ADF0813997206FE9F4132819
WEBHOOK_URL=https://discord.com/api/webhooks/1327600813367432462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzLKAaa

View File

@@ -2,20 +2,16 @@ import requests
import xml.etree.ElementTree as ET
import src.lwb.format as format
import hashlib
import os
import time
from dotenv import load_dotenv
load_dotenv()
SESSION_CREATE_URL = (
"https://portal1s.easysquare.com/meinelwb/index.html"
"?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
)
# Session tokens (from .env as fallback; will be refreshed dynamically)
SAP_SESSIONID = os.getenv("SAP_SESSIONID")
COOKIE_SESSION = os.getenv("COOKIE_SESSION")
# Session tokens (resolved dynamically via session bootstrap; no .env needed)
SAP_SESSIONID = None
COOKIE_SESSION = None
EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms"

View File

@@ -11,6 +11,30 @@ SESSION_CREATE_URL = (
"?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
)
# UI string constants
BTN_SEARCH_ENTRY = "Ich suche eine Wohnung"
BTN_MORE = "MEHR ANZEIGEN"
BTN_SEARCH = ("Suchen", "SUCHEN")
LABEL_MAX_RESULTS = (
"Maximale Trefferanzahl",
"Maximale Trefferzahl",
"Maximale\u00A0Trefferanzahl",
"Maximale\u00A0Trefferzahl",
)
def _cookie_map(context) -> dict:
"""Return a name->cookie dict safely from Playwright context cookies."""
mapping = {}
try:
for c in context.cookies(): # type: ignore[attr-defined]
name = c.get("name") if isinstance(c, dict) else None
if name:
mapping[name] = c
except Exception:
pass
return mapping
def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None:
"""Create or update .env with given key/value pairs."""
@@ -31,7 +55,7 @@ def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None:
def fetch_session(
headless: bool = True,
save_to_env: bool = True,
save_to_env: bool = False,
) -> Dict[str, Optional[str]]:
"""Open the LWB portal in a headless browser and extract cookies.
@@ -47,7 +71,11 @@ def fetch_session(
)
raise
load_dotenv()
# .env not required for cookie fetch; loading is harmless but optional
try:
load_dotenv()
except Exception:
pass
result: Dict[str, Optional[str]] = {
"COOKIE_SESSION": None,
"SAP_SESSIONID": None,
@@ -61,19 +89,13 @@ def fetch_session(
# Click the entry button: "Ich suche eine Wohnung"
try:
page.get_by_role(
"button", name="Ich suche eine Wohnung"
).click(timeout=3000)
page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000)
except Exception:
try:
page.get_by_role(
"link", name="Ich suche eine Wohnung"
).click(timeout=3000)
page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000)
except Exception:
try:
page.get_by_text(
"Ich suche eine Wohnung", exact=True
).click(timeout=3000)
page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000)
except Exception:
pass
@@ -87,11 +109,11 @@ def fetch_session(
cookie_session = None
sap_session = None
for _ in range(10):
cookies = {c["name"]: c for c in context.cookies()}
cookies = _cookie_map(context)
if not cookie_session and "cookiesession1" in cookies:
cookie_session = cookies["cookiesession1"]["value"]
cookie_session = cookies.get("cookiesession1", {}).get("value")
if not sap_session and "SAP_SESSIONID_PP0_581" in cookies:
sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"]
sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
if cookie_session and sap_session:
break
time.sleep(0.5)
@@ -127,9 +149,9 @@ def fetch_session(
# Re-check cookies for SAP session
for _ in range(10):
cookies = {c["name"]: c for c in context.cookies()}
cookies = _cookie_map(context)
if "SAP_SESSIONID_PP0_581" in cookies:
sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"]
sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
break
time.sleep(0.5)
@@ -162,7 +184,7 @@ def fetch_session(
def apply_search_via_ui(
headless: bool = True,
save_to_env: bool = True,
save_to_env: bool = False,
) -> Dict[str, Optional[str]]:
"""Drive the UI to initialize the search context.
@@ -182,7 +204,10 @@ def apply_search_via_ui(
)
raise
load_dotenv()
try:
load_dotenv()
except Exception:
pass
result: Dict[str, Optional[str]] = {
"COOKIE_SESSION": None,
"SAP_SESSIONID": None,
@@ -197,15 +222,9 @@ def apply_search_via_ui(
# Click entry button (robust tries)
clicked = False
for sel in [
lambda: page.get_by_role(
"button", name="Ich suche eine Wohnung"
).click(timeout=3000),
lambda: page.get_by_role(
"link", name="Ich suche eine Wohnung"
).click(timeout=3000),
lambda: page.get_by_text(
"Ich suche eine Wohnung", exact=True
).click(timeout=3000),
lambda: page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000),
lambda: page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000),
lambda: page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000),
]:
try:
sel()
@@ -224,26 +243,17 @@ def apply_search_via_ui(
container = page.locator(
"div.easy-services-service-container"
).filter(has_text="Immobiliensuche")
container.get_by_role(
"button", name="MEHR ANZEIGEN"
).first.click(timeout=5000)
container.get_by_role("button", name=BTN_MORE).first.click(timeout=5000)
except Exception:
try:
page.get_by_role("button", name="MEHR ANZEIGEN").click(
timeout=5000
)
page.get_by_role("button", name=BTN_MORE).click(timeout=5000)
except Exception:
pass
# Fill "Maximale Trefferanzahl" to 1000
def _fill_max_results():
# Try common label variations
for label in (
"Maximale Trefferanzahl",
"Maximale Trefferzahl",
"Maximale\u00A0Trefferanzahl",
"Maximale\u00A0Trefferzahl",
):
for label in LABEL_MAX_RESULTS:
try:
page.get_by_label(label).fill("1000", timeout=3000)
return True
@@ -266,7 +276,7 @@ def apply_search_via_ui(
pass
# Click Suchen
for name in ["Suchen", "SUCHEN"]:
for name in BTN_SEARCH:
try:
page.get_by_role("button", name=name).click(timeout=4000)
break
@@ -283,15 +293,11 @@ def apply_search_via_ui(
pass
# Collect cookies
cookies = {c["name"]: c for c in context.cookies()}
cookies = _cookie_map(context)
if "cookiesession1" in cookies:
result["COOKIE_SESSION"] = cookies["cookiesession1"].get(
"value"
)
result["COOKIE_SESSION"] = cookies.get("cookiesession1", {}).get("value")
if "SAP_SESSIONID_PP0_581" in cookies:
result["SAP_SESSIONID"] = cookies["SAP_SESSIONID_PP0_581"].get(
"value"
)
result["SAP_SESSIONID"] = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
# render next page and keep it open for 10 seconds
page.wait_for_timeout(10000)