feat: automate session cookie retrieval and remove manual session ID setup

This commit is contained in:
Elmar Kresse
2025-08-15 15:06:52 +02:00
parent 78152dafc0
commit 2c8cea7645
4 changed files with 57 additions and 58 deletions

View File

@@ -37,8 +37,7 @@ You can run the bot natively on your machine or use a Docker image. The requirem
### 1. Environment Setup ### 1. Environment Setup
Ensure that the `.env` file is configured correctly. An example is available in the `sample.env` file. Copy it to `.env` and fill in the required values. Ensure that the `.env` file is configured correctly. An example is available in the `sample.env` file. Copy it to `.env` and fill in the required values.
The `SAP_SESSIONID` and `COOKIE_SESSSION` are obtained after performing a search on the LWB website. Use your browser's developer tools to locate them in local storage. You no longer need to set `SAP_SESSIONID` or `COOKIE_SESSION` manually. The scraper opens the LWB portal with a headless browser, clicks “Ich suche eine Wohnung”, opens “Immobiliensuche” → “MEHR ANZEIGEN”, sets the maximum results to 1000, clicks “Suchen”, and extracts session cookies automatically.
*Future versions will include automatic form processing to obtain a valid session ID.*
### 2. Python Environment ### 2. Python Environment

View File

@@ -1,3 +1 @@
SAP_SESSIONID=UrN6nRbjuCBe4dkLw7vkJLcpV5zniRHvhkwAAKG5Agg%3d
COOKIE_SESSION=678ADA67ADF0813997206FE9F4132819
WEBHOOK_URL=https://discord.com/api/webhooks/1327600813367432462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzLKAaa WEBHOOK_URL=https://discord.com/api/webhooks/1327600813367432462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzLKAaa

View File

@@ -2,20 +2,16 @@ import requests
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import src.lwb.format as format import src.lwb.format as format
import hashlib import hashlib
import os
import time import time
from dotenv import load_dotenv
load_dotenv()
SESSION_CREATE_URL = ( SESSION_CREATE_URL = (
"https://portal1s.easysquare.com/meinelwb/index.html" "https://portal1s.easysquare.com/meinelwb/index.html"
"?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL" "?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
) )
# Session tokens (from .env as fallback; will be refreshed dynamically) # Session tokens (resolved dynamically via session bootstrap; no .env needed)
SAP_SESSIONID = os.getenv("SAP_SESSIONID") SAP_SESSIONID = None
COOKIE_SESSION = os.getenv("COOKIE_SESSION") COOKIE_SESSION = None
EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms" EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms"

View File

@@ -11,6 +11,30 @@ SESSION_CREATE_URL = (
"?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL" "?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
) )
# UI string constants
BTN_SEARCH_ENTRY = "Ich suche eine Wohnung"
BTN_MORE = "MEHR ANZEIGEN"
BTN_SEARCH = ("Suchen", "SUCHEN")
LABEL_MAX_RESULTS = (
"Maximale Trefferanzahl",
"Maximale Trefferzahl",
"Maximale\u00A0Trefferanzahl",
"Maximale\u00A0Trefferzahl",
)
def _cookie_map(context) -> dict:
"""Return a name->cookie dict safely from Playwright context cookies."""
mapping = {}
try:
for c in context.cookies(): # type: ignore[attr-defined]
name = c.get("name") if isinstance(c, dict) else None
if name:
mapping[name] = c
except Exception:
pass
return mapping
def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None: def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None:
"""Create or update .env with given key/value pairs.""" """Create or update .env with given key/value pairs."""
@@ -31,7 +55,7 @@ def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None:
def fetch_session( def fetch_session(
headless: bool = True, headless: bool = True,
save_to_env: bool = True, save_to_env: bool = False,
) -> Dict[str, Optional[str]]: ) -> Dict[str, Optional[str]]:
"""Open the LWB portal in a headless browser and extract cookies. """Open the LWB portal in a headless browser and extract cookies.
@@ -47,7 +71,11 @@ def fetch_session(
) )
raise raise
load_dotenv() # .env not required for cookie fetch; loading is harmless but optional
try:
load_dotenv()
except Exception:
pass
result: Dict[str, Optional[str]] = { result: Dict[str, Optional[str]] = {
"COOKIE_SESSION": None, "COOKIE_SESSION": None,
"SAP_SESSIONID": None, "SAP_SESSIONID": None,
@@ -61,19 +89,13 @@ def fetch_session(
# Click the entry button: "Ich suche eine Wohnung" # Click the entry button: "Ich suche eine Wohnung"
try: try:
page.get_by_role( page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000)
"button", name="Ich suche eine Wohnung"
).click(timeout=3000)
except Exception: except Exception:
try: try:
page.get_by_role( page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000)
"link", name="Ich suche eine Wohnung"
).click(timeout=3000)
except Exception: except Exception:
try: try:
page.get_by_text( page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000)
"Ich suche eine Wohnung", exact=True
).click(timeout=3000)
except Exception: except Exception:
pass pass
@@ -87,11 +109,11 @@ def fetch_session(
cookie_session = None cookie_session = None
sap_session = None sap_session = None
for _ in range(10): for _ in range(10):
cookies = {c["name"]: c for c in context.cookies()} cookies = _cookie_map(context)
if not cookie_session and "cookiesession1" in cookies: if not cookie_session and "cookiesession1" in cookies:
cookie_session = cookies["cookiesession1"]["value"] cookie_session = cookies.get("cookiesession1", {}).get("value")
if not sap_session and "SAP_SESSIONID_PP0_581" in cookies: if not sap_session and "SAP_SESSIONID_PP0_581" in cookies:
sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"] sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
if cookie_session and sap_session: if cookie_session and sap_session:
break break
time.sleep(0.5) time.sleep(0.5)
@@ -127,9 +149,9 @@ def fetch_session(
# Re-check cookies for SAP session # Re-check cookies for SAP session
for _ in range(10): for _ in range(10):
cookies = {c["name"]: c for c in context.cookies()} cookies = _cookie_map(context)
if "SAP_SESSIONID_PP0_581" in cookies: if "SAP_SESSIONID_PP0_581" in cookies:
sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"] sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
break break
time.sleep(0.5) time.sleep(0.5)
@@ -162,7 +184,7 @@ def fetch_session(
def apply_search_via_ui( def apply_search_via_ui(
headless: bool = True, headless: bool = True,
save_to_env: bool = True, save_to_env: bool = False,
) -> Dict[str, Optional[str]]: ) -> Dict[str, Optional[str]]:
"""Drive the UI to initialize the search context. """Drive the UI to initialize the search context.
@@ -182,7 +204,10 @@ def apply_search_via_ui(
) )
raise raise
load_dotenv() try:
load_dotenv()
except Exception:
pass
result: Dict[str, Optional[str]] = { result: Dict[str, Optional[str]] = {
"COOKIE_SESSION": None, "COOKIE_SESSION": None,
"SAP_SESSIONID": None, "SAP_SESSIONID": None,
@@ -197,15 +222,9 @@ def apply_search_via_ui(
# Click entry button (robust tries) # Click entry button (robust tries)
clicked = False clicked = False
for sel in [ for sel in [
lambda: page.get_by_role( lambda: page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000),
"button", name="Ich suche eine Wohnung" lambda: page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000),
).click(timeout=3000), lambda: page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000),
lambda: page.get_by_role(
"link", name="Ich suche eine Wohnung"
).click(timeout=3000),
lambda: page.get_by_text(
"Ich suche eine Wohnung", exact=True
).click(timeout=3000),
]: ]:
try: try:
sel() sel()
@@ -224,26 +243,17 @@ def apply_search_via_ui(
container = page.locator( container = page.locator(
"div.easy-services-service-container" "div.easy-services-service-container"
).filter(has_text="Immobiliensuche") ).filter(has_text="Immobiliensuche")
container.get_by_role( container.get_by_role("button", name=BTN_MORE).first.click(timeout=5000)
"button", name="MEHR ANZEIGEN"
).first.click(timeout=5000)
except Exception: except Exception:
try: try:
page.get_by_role("button", name="MEHR ANZEIGEN").click( page.get_by_role("button", name=BTN_MORE).click(timeout=5000)
timeout=5000
)
except Exception: except Exception:
pass pass
# Fill "Maximale Trefferanzahl" to 1000 # Fill "Maximale Trefferanzahl" to 1000
def _fill_max_results(): def _fill_max_results():
# Try common label variations # Try common label variations
for label in ( for label in LABEL_MAX_RESULTS:
"Maximale Trefferanzahl",
"Maximale Trefferzahl",
"Maximale\u00A0Trefferanzahl",
"Maximale\u00A0Trefferzahl",
):
try: try:
page.get_by_label(label).fill("1000", timeout=3000) page.get_by_label(label).fill("1000", timeout=3000)
return True return True
@@ -266,7 +276,7 @@ def apply_search_via_ui(
pass pass
# Click Suchen # Click Suchen
for name in ["Suchen", "SUCHEN"]: for name in BTN_SEARCH:
try: try:
page.get_by_role("button", name=name).click(timeout=4000) page.get_by_role("button", name=name).click(timeout=4000)
break break
@@ -283,15 +293,11 @@ def apply_search_via_ui(
pass pass
# Collect cookies # Collect cookies
cookies = {c["name"]: c for c in context.cookies()} cookies = _cookie_map(context)
if "cookiesession1" in cookies: if "cookiesession1" in cookies:
result["COOKIE_SESSION"] = cookies["cookiesession1"].get( result["COOKIE_SESSION"] = cookies.get("cookiesession1", {}).get("value")
"value"
)
if "SAP_SESSIONID_PP0_581" in cookies: if "SAP_SESSIONID_PP0_581" in cookies:
result["SAP_SESSIONID"] = cookies["SAP_SESSIONID_PP0_581"].get( result["SAP_SESSIONID"] = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value")
"value"
)
# render next page and keep it open for 10 seconds # render next page and keep it open for 10 seconds
page.wait_for_timeout(10000) page.wait_for_timeout(10000)