diff --git a/README.md b/README.md index fcbb1e5..526d231 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,7 @@ You can run the bot natively on your machine or use a Docker image. The requirem ### 1. Environment Setup Ensure that the `.env` file is configured correctly. An example is available in the `sample.env` file. Copy it to `.env` and fill in the required values. -The `SAP_SESSIONID` and `COOKIE_SESSSION` are obtained after performing a search on the LWB website. Use your browser's developer tools to locate them in local storage. -*Future versions will include automatic form processing to obtain a valid session ID.* +You no longer need to set `SAP_SESSIONID` or `COOKIE_SESSION` manually. The scraper opens the LWB portal with a headless browser, clicks “Ich suche eine Wohnung”, opens “Immobiliensuche” → “MEHR ANZEIGEN”, sets the maximum results to 1000, clicks “Suchen”, and extracts session cookies automatically. ### 2. Python Environment diff --git a/sample.env b/sample.env index 6d2ba58..85f5f9d 100644 --- a/sample.env +++ b/sample.env @@ -1,3 +1 @@ -SAP_SESSIONID=UrN6nRbjuCBe4dkLw7vkJLcpV5zniRHvhkwAAKG5Agg%3d -COOKIE_SESSION=678ADA67ADF0813997206FE9F4132819 WEBHOOK_URL=https://discord.com/api/webhooks/1327600813367432462/goqeWDyYwi13-6F0yopUzFkHVaZs01bCe-2SI8bPJLj3WNMhxLOlIYBRIGyTpSzLKAaa \ No newline at end of file diff --git a/src/lwb/scraper.py b/src/lwb/scraper.py index 5c8f3fc..4edca92 100644 --- a/src/lwb/scraper.py +++ b/src/lwb/scraper.py @@ -2,20 +2,16 @@ import requests import xml.etree.ElementTree as ET import src.lwb.format as format import hashlib -import os import time -from dotenv import load_dotenv - -load_dotenv() SESSION_CREATE_URL = ( "https://portal1s.easysquare.com/meinelwb/index.html" "?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL" ) -# Session tokens (from .env as fallback; will be refreshed dynamically) -SAP_SESSIONID = os.getenv("SAP_SESSIONID") -COOKIE_SESSION = os.getenv("COOKIE_SESSION") +# Session tokens (resolved dynamically via session bootstrap; no .env needed) +SAP_SESSIONID = None +COOKIE_SESSION = None EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms" diff --git a/src/lwb/session_bootstrap.py b/src/lwb/session_bootstrap.py index e56f403..101735d 100644 --- a/src/lwb/session_bootstrap.py +++ b/src/lwb/session_bootstrap.py @@ -11,6 +11,30 @@ SESSION_CREATE_URL = ( "?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL" ) +# UI string constants +BTN_SEARCH_ENTRY = "Ich suche eine Wohnung" +BTN_MORE = "MEHR ANZEIGEN" +BTN_SEARCH = ("Suchen", "SUCHEN") +LABEL_MAX_RESULTS = ( + "Maximale Trefferanzahl", + "Maximale Trefferzahl", + "Maximale\u00A0Trefferanzahl", + "Maximale\u00A0Trefferzahl", +) + + +def _cookie_map(context) -> dict: + """Return a name->cookie dict safely from Playwright context cookies.""" + mapping = {} + try: + for c in context.cookies(): # type: ignore[attr-defined] + name = c.get("name") if isinstance(c, dict) else None + if name: + mapping[name] = c + except Exception: + pass + return mapping + def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None: """Create or update .env with given key/value pairs.""" @@ -31,7 +55,7 @@ def _update_env_file(values: Dict[str, str], env_path: str = ".env") -> None: def fetch_session( headless: bool = True, - save_to_env: bool = True, + save_to_env: bool = False, ) -> Dict[str, Optional[str]]: """Open the LWB portal in a headless browser and extract cookies. @@ -47,7 +71,11 @@ def fetch_session( ) raise - load_dotenv() + # .env not required for cookie fetch; loading is harmless but optional + try: + load_dotenv() + except Exception: + pass result: Dict[str, Optional[str]] = { "COOKIE_SESSION": None, "SAP_SESSIONID": None, @@ -61,19 +89,13 @@ def fetch_session( # Click the entry button: "Ich suche eine Wohnung" try: - page.get_by_role( - "button", name="Ich suche eine Wohnung" - ).click(timeout=3000) + page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000) except Exception: try: - page.get_by_role( - "link", name="Ich suche eine Wohnung" - ).click(timeout=3000) + page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000) except Exception: try: - page.get_by_text( - "Ich suche eine Wohnung", exact=True - ).click(timeout=3000) + page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000) except Exception: pass @@ -87,11 +109,11 @@ def fetch_session( cookie_session = None sap_session = None for _ in range(10): - cookies = {c["name"]: c for c in context.cookies()} + cookies = _cookie_map(context) if not cookie_session and "cookiesession1" in cookies: - cookie_session = cookies["cookiesession1"]["value"] + cookie_session = cookies.get("cookiesession1", {}).get("value") if not sap_session and "SAP_SESSIONID_PP0_581" in cookies: - sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"] + sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value") if cookie_session and sap_session: break time.sleep(0.5) @@ -127,9 +149,9 @@ def fetch_session( # Re-check cookies for SAP session for _ in range(10): - cookies = {c["name"]: c for c in context.cookies()} + cookies = _cookie_map(context) if "SAP_SESSIONID_PP0_581" in cookies: - sap_session = cookies["SAP_SESSIONID_PP0_581"]["value"] + sap_session = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value") break time.sleep(0.5) @@ -162,7 +184,7 @@ def fetch_session( def apply_search_via_ui( headless: bool = True, - save_to_env: bool = True, + save_to_env: bool = False, ) -> Dict[str, Optional[str]]: """Drive the UI to initialize the search context. @@ -182,7 +204,10 @@ def apply_search_via_ui( ) raise - load_dotenv() + try: + load_dotenv() + except Exception: + pass result: Dict[str, Optional[str]] = { "COOKIE_SESSION": None, "SAP_SESSIONID": None, @@ -197,15 +222,9 @@ def apply_search_via_ui( # Click entry button (robust tries) clicked = False for sel in [ - lambda: page.get_by_role( - "button", name="Ich suche eine Wohnung" - ).click(timeout=3000), - lambda: page.get_by_role( - "link", name="Ich suche eine Wohnung" - ).click(timeout=3000), - lambda: page.get_by_text( - "Ich suche eine Wohnung", exact=True - ).click(timeout=3000), + lambda: page.get_by_role("button", name=BTN_SEARCH_ENTRY).click(timeout=3000), + lambda: page.get_by_role("link", name=BTN_SEARCH_ENTRY).click(timeout=3000), + lambda: page.get_by_text(BTN_SEARCH_ENTRY, exact=True).click(timeout=3000), ]: try: sel() @@ -224,26 +243,17 @@ def apply_search_via_ui( container = page.locator( "div.easy-services-service-container" ).filter(has_text="Immobiliensuche") - container.get_by_role( - "button", name="MEHR ANZEIGEN" - ).first.click(timeout=5000) + container.get_by_role("button", name=BTN_MORE).first.click(timeout=5000) except Exception: try: - page.get_by_role("button", name="MEHR ANZEIGEN").click( - timeout=5000 - ) + page.get_by_role("button", name=BTN_MORE).click(timeout=5000) except Exception: pass # Fill "Maximale Trefferanzahl" to 1000 def _fill_max_results(): # Try common label variations - for label in ( - "Maximale Trefferanzahl", - "Maximale Trefferzahl", - "Maximale\u00A0Trefferanzahl", - "Maximale\u00A0Trefferzahl", - ): + for label in LABEL_MAX_RESULTS: try: page.get_by_label(label).fill("1000", timeout=3000) return True @@ -266,7 +276,7 @@ def apply_search_via_ui( pass # Click Suchen - for name in ["Suchen", "SUCHEN"]: + for name in BTN_SEARCH: try: page.get_by_role("button", name=name).click(timeout=4000) break @@ -283,15 +293,11 @@ def apply_search_via_ui( pass # Collect cookies - cookies = {c["name"]: c for c in context.cookies()} + cookies = _cookie_map(context) if "cookiesession1" in cookies: - result["COOKIE_SESSION"] = cookies["cookiesession1"].get( - "value" - ) + result["COOKIE_SESSION"] = cookies.get("cookiesession1", {}).get("value") if "SAP_SESSIONID_PP0_581" in cookies: - result["SAP_SESSIONID"] = cookies["SAP_SESSIONID_PP0_581"].get( - "value" - ) + result["SAP_SESSIONID"] = cookies.get("SAP_SESSIONID_PP0_581", {}).get("value") # render next page and keep it open for 10 seconds page.wait_for_timeout(10000)