feat: enhance scraper with session management and query parameter setup

This commit is contained in:
Elmar Kresse
2025-02-05 01:17:33 +01:00
parent 9be18e4d06
commit a6c147a18d
5 changed files with 312 additions and 2 deletions

View File

@ -1 +1 @@
[51968, "803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", "B57516F9-E364-7E54-A211-527ED54388E6", "665243A8-FD34-86F8-322F-FE9B0B392083", "id-88-5-45", "C02892BE-F34F-5A8A-E174-4A79549DC9A9", "3B73B720-13F2-62A4-8829-557676725A95", "B4095706-A65C-F421-B02E-1D227B684B62", "id-193-1-13", -4432880139075606878, 51624, 51628, "F7992488-7C24-DFA9-F8B2-94DDC18E66A3", 3286076625991780524, 51632, "FA45C0B6-813C-DE65-496A-EDD8DA3F2526", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "9A0B42A2-6D9B-331A-DAA7-624A5FA49606", "A979EBAE-EF87-FB51-152A-5453CD7DC794", "57446DF5-CB9F-951C-A40A-4BA775DA7426", "id-202-1-11", "E7B71D28-C557-CFE4-805D-42C2793E9248", 51656, "id-32-1-41", 51660, 51665, "21C20126-380D-9B0F-73F8-C4279897F189", "id-154-2-71", "281A7F0B-FA5A-75AE-68DE-89AFD90EEF48", "892BD779-F186-9BD1-A97A-5783EFB6F56D", "172DFAD2-7CDB-51B4-212E-E6F9C7F0601A", "id-105-12-78", -9017525000097635974, 51963, "BAAEC20A-F667-FE22-6693-E4B4CA366889", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1"]
[51968, "9A0B42A2-6D9B-331A-DAA7-624A5FA49606", "id-202-1-11", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "id-147-1-1", "E7B71D28-C557-CFE4-805D-42C2793E9248", "id-145-1-9", "21C20126-380D-9B0F-73F8-C4279897F189", "id-154-2-71", -4432880139075606878, "BAAEC20A-F667-FE22-6693-E4B4CA366889", 51624, "803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", 51628, 3286076625991780524, "665243A8-FD34-86F8-322F-FE9B0B392083", 52087, 51632, "id-105-12-78", "3B73B720-13F2-62A4-8829-557676725A95", "F7992488-7C24-DFA9-F8B2-94DDC18E66A3", "id-88-5-45", 51656, 51660, "892BD779-F186-9BD1-A97A-5783EFB6F56D", "id-32-1-41", 51665, "FA45C0B6-813C-DE65-496A-EDD8DA3F2526", "B57516F9-E364-7E54-A211-527ED54388E6", -9017525000097635974, "A979EBAE-EF87-FB51-152A-5453CD7DC794", "172DFAD2-7CDB-51B4-212E-E6F9C7F0601A", "57446DF5-CB9F-951C-A40A-4BA775DA7426", "id-193-1-13", "B4095706-A65C-F421-B02E-1D227B684B62", "C02892BE-F34F-5A8A-E174-4A79549DC9A9", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "281A7F0B-FA5A-75AE-68DE-89AFD90EEF48", 51963]

217
src/lwb/lwb_form.xml Normal file
View File

@ -0,0 +1,217 @@
<?xml version="1.0" encoding="UTF-8"?>
<form xmlns="http://www.openpromos.com/OPPC/XMLForms"
xmlns:meta="http://www.openpromos.com/OPPC/XMLFormsMetaData" id="4B76A3C8-3E4D-4217-B54A-7C28C920748C" originalId="842F0073-DC21-A841-4E80-B1BD5E404E35">
<head>
<originalId>842F0073-DC21-A841-4E80-B1BD5E404E35</originalId>
<id>4B76A3C8-3E4D-4217-B54A-7C28C920748C</id>
<date/>
<title/>
<keywords/>
</head>
<client >
<editor name="webapp-professional" version="6.169.22"/>
<device identifier="webapp-professional_6.169.22 win_10 desktop_ff_135" name="desktop ff 135" osName="win" osVersion="10"/>
</client>
<history >
<save oldId="842F0073-DC21-A841-4E80-B1BD5E404E35" newId="4B76A3C8-3E4D-4217-B54A-7C28C920748C" userName="DEMO" timestamp="2025-02-04T22:55:03Z"/>
</history>
<actions>
<action id="01search_re_obj" includeInMenu="false" includeInQuick="false" style="cancel" title="Suchen">
<type>
<server command="search_re_obj" locksForm="true" waitForResponse="true"/>
</type>
<preconditions valid="true"/>
</action>
<action id="02load_search" includeInMenu="false" includeInQuick="false" title="Suchvariante laden">
<type>
<server command="load_search" locksForm="true" waitForResponse="true"/>
</type>
</action>
<action id="03delete_search" includeInMenu="false" includeInQuick="false" title="Suche löschen">
<type>
<server command="delete_search" locksForm="true" waitForResponse="true"/>
</type>
<confirmation>
<title>Suche löschen</title>
<message>Möchten Sie die Gespeicherte Suche endgültig löschen?</message>
<acceptTitle>Ja</acceptTitle>
<cancelTitle>Nein</cancelTitle>
</confirmation>
</action>
</actions>
<sheet refname="topmostSheet" title="Filter/Suche Objekt">
<section id="ESQ_FORM_VALIDATION" visibility="hidden">
<textfield defaultValue="true" editable="false" id="ESQ_CHANGED" text.expression="'true'" title="Technisch: Aktualisiert?" visibility="hidden">true</textfield>
<textfield defaultValue="" id="ESQ_IS_IN_CONTEXT" visibility="hidden"/>
</section>
<section title="Was suchen Sie?" visibility.expression="$cf_search_variants.empty? true : false">
<textfield defaultValue="X" editable="false" id="SO_#ISFILTERED#_I_EQ" title="Gefiltert" visibility="hidden">X</textfield>
<choicefield id="SO_#USE_TYP#_I_EQ" refname="use_typ" span="6" span.s="12" title="Art" valid.expression="self.selection.id = '0100' or self.selection.id = '0300' or self.selection.id = '0500' or self.selection.id = '0600'">
<choice id="0100" selected="true" title="Wohnung"/>
<choice id="0300" title="Gewerbe"/>
<choice id="0500" title="Garage"/>
<choice id="0600" title="Stellplatz"/>
</choicefield>
<numberfield defaultValue="50" id="SO_#PAGINGTOP#_I_EQ" maxvalue="999999999" span="6" span.s="12" title="Maximale Trefferzahl">50</numberfield>
</section>
<section id="WBS_SECTION" title="Wohnberechtigungsschein" topSeparator="false" visibility.expression="$cf_search_variants.empty? true : false">
<choicefield id="SO_#HAS_WBS#_I_EQ" multipleChoice="false" style="inline" title="Wohnberechtigungsschein" visibility="default">
<choice id="X" meta:field_id_overwrite="SO_#HAS_WBS_NO_MATTER#_I_NE" selected="false" title="Alle Angebote"/>
<choice id="X" title="WBS erforderlich" selected="false"/>
<choice id="X" meta:field_id_overwrite="SO_#HAS_WBS#_I_NE" title="WBS nicht erforderlich" selected="true" selectionIndex="0"/>
</choicefield>
</section>
<section title="Lage" visibility.expression="$cf_search_variants.empty? true : false">
<textfield id="SO_#STREET#_I_CP#MCX" maxlength="120" refname="street" span="6" span.s="12" title="Straße"/>
<textfield id="SO_#HOUSE_NUM#_I_CP" maxlength="20" span="6" span.s="12" title="Hausnummer"/>
<textfield id="SO_#POSTCODE#_I_CP" maxlength="20" span="6" span.s="12" title="Postleitzahl"/>
<textfield editable="false" id="SO_#CITY#_I_CP#MCX" maxlength="80" refname="city" span="6" span.m="12" span.s="12" title="Ort" visibility="hidden"/>
<choicefield id="SO_#DISTR_ID#_I_EQ" multipleChoice="true" refname="district" span="6" span.s="12" title="Stadtteil">
<choice id="DS13751" title="Althen-Kleinpösna"/>
<choice id="DS2757" title="Altlindenau" selected="true" selectionIndex="10"/>
<choice id="DS2758" title="Anger-Crottendorf"/>
<choice id="DS13752" title="Baalsdorf"/>
<choice id="DS13754" title="Böhlitz-Ehrenberg"/>
<choice id="DS13753" title="Burghausen-Rückmarsdorf"/>
<choice id="DS2759" title="Connewitz" selected="true" selectionIndex="10"/>
<choice id="DS2760" title="Dölitz-Dösen"/>
<choice id="DS2761" title="Engelsdorf"/>
<choice id="DS2762" title="Eutritzsch"/>
<choice id="DS2763" title="Gohlis-Mitte"/>
<choice id="DS2764" title="Gohlis-Nord"/>
<choice id="DS2765" title="Gohlis-Süd"/>
<choice id="DS2766" title="Großzschocher"/>
<choice id="DS2767" title="Grünau-Mitte"/>
<choice id="DS2768" title="Grünau-Nord"/>
<choice id="DS2769" title="Grünau-Ost"/>
<choice id="DS2770" title="Grünau-Siedlung"/>
<choice id="DS13755" title="Hartmannsd.-Knautnaund."/>
<choice id="DS2771" title="Heiterblick"/>
<choice id="DS13756" title="Holzhausen"/>
<choice id="DS2772" title="Kleinzschocher"/>
<choice id="DS2773" title="Knautkleeberg-Knauthain"/>
<choice id="DS2774" title="Lausen-Grünau"/>
<choice id="DS2775" title="Leutzsch"/>
<choice id="DS13757" title="Liebertwolkwitz"/>
<choice id="DS2776" title="Lindenau" selected="true" selectionIndex="10"/>
<choice id="DS2778" title="Lößnig" selected="true" selectionIndex="11"/>
<choice id="DS2777" title="Lpz.-Lindenthal"/>
<choice id="DS2779" title="Marienbrunn" selected="true" selectionIndex="12"/>
<choice id="DS2780" title="Meusdorf"/>
<choice id="DS13758" title="Miltitz"/>
<choice id="DS2781" title="Mockau-Nord"/>
<choice id="DS2782" title="Mockau-Süd"/>
<choice id="DS2783" title="Möckern"/>
<choice id="DS13759" title="Mölkau"/>
<choice id="DS2784" title="Neulindenau"/>
<choice id="DS2785" title="Neustadt-Neuschönefeld"/>
<choice id="DS2786" title="Paunsdorf"/>
<choice id="DS2787" title="Plagwitz" selected="true" selectionIndex="13"/>
<choice id="DS2788" title="Plaußig-Portitz"/>
<choice id="DS2789" title="Probstheida"/>
<choice id="DS2790" title="Reudnitz-Thonberg" selected="true" selectionIndex="9"/>
<choice id="DS2791" title="Schleußig" selected="true" selectionIndex="8"/>
<choice id="DS2792" title="Schönau"/>
<choice id="DS2793" title="Schönefeld-Abtnaundorf"/>
<choice id="DS2794" title="Schönefeld-Ost"/>
<choice id="DS2795" title="Sellerhausen-Stünz"/>
<choice id="DS2796" title="Stötteritz" selected="true" selectionIndex="7"/>
<choice id="DS2797" title="Südvorstadt" selected="true" selectionIndex="6"/>
<choice id="DS2798" title="Thekla"/>
<choice id="DS2799" title="Volkmarsdorf"/>
<choice id="DS2800" title="Wahren"/>
<choice id="DS2801" title="Wiederitzsch"/>
<choice id="DS3501" title="Zentrum" selected="true" selectionIndex="5"/>
<choice id="DS2803" title="Zentrum-Nord"/>
<choice id="DS2802" title="Zentrum-Nordwest" selected="true" selectionIndex="4"/>
<choice id="DS2804" title="Zentrum-Ost" selected="true" selectionIndex="2"/>
<choice id="DS2805" title="Zentrum-Süd" selected="true" selectionIndex="1"/>
<choice id="DS2806" title="Zentrum-Südost" selected="true" selectionIndex="0"/>
<choice id="DS2807" title="Zentrum-West" selected="true" selectionIndex="3"/>
</choicefield>
<choicefield editable.expression="$city.filledOut and ( $street.filledOut or $district.filledOut ) and $district.selection.count &lt;= 1" id="SO_#DISTANCE#_I_EQ" span="6" span.m="12" span.s="12" title="Umkreis" tooltip="" visibility="hidden" visibility.expression="self.editable" editable="false">
<choice id="1" title="1 km"/>
<choice id="2" title="2 km"/>
<choice id="3" title="3 km"/>
<choice id="4" title="4 km"/>
<choice id="5" title="5 km"/>
<choice id="10" title="10 km"/>
<choice id="15" title="15 km"/>
<choice id="20" title="20 km"/>
<choice id="50" title="50 km"/>
</choicefield>
</section>
<section title="Objekt" visibility.expression="$cf_search_variants.empty? true : false">
<numberfield editable.expression="$use_typ.selection.id!='0600' and $use_typ.selection.id!='0700' and $use_typ.selection.id!='0950'" id="SO_#ROOM_FROM#_I_GE" maxvalue="999" minvalue="0" placeholder="min." span="6" span.s="12" title="Zimmer von" visibility.expression="$use_typ.selection.id!='0600' and $use_typ.selection.id!='0700' and $use_typ.selection.id!='0950'"/>
<numberfield editable.expression="$use_typ.selection.id!='0600' and $use_typ.selection.id!='0700' and $use_typ.selection.id!='0950'" id="SO_#ROOM_TO#_I_LE" maxvalue="999" minvalue="0" placeholder="max." span="6" span.s="12" title="Zimmer bis" visibility.expression="$use_typ.selection.id!='0600' and $use_typ.selection.id!='0700' and $use_typ.selection.id!='0950'"/>
<numberfield editable.expression="$use_typ.selection.id!='0600'" id="SO_#SQMETER_FROM#_I_GE" maxvalue="99999" minvalue="0" placeholder="in m²" span="6" span.s="12" suffix="m²" title="Fläche von" visibility.expression="$use_typ.selection.id!='0600'"/>
<numberfield editable.expression="$use_typ.selection.id!='0600'" id="SO_#SQMETER_TO#_I_LE" maxvalue="99999" minvalue="0" placeholder="in m²" span="6" span.s="12" suffix="m²" title="Fläche bis" visibility.expression="$use_typ.selection.id!='0600'"/>
<choicefield editable.expression="$use_typ.selection.id!='0600' and $use_typ.selection.id!='0700' and $use_typ.selection.id!='0950' and $use_typ.selection.id!='0500'" id="SO_#FLOOR_FROM#_I_GE" placeholder="min." span="6" span.s="12" title="Geschoss von" visibility.expression="false" visibility="hidden">
<choice id="001" title="1. OG"/>
<choice id="002" title="2. OG"/>
<choice id="003" title="3. OG"/>
<choice id="004" title="4. OG"/>
<choice id="005" title="5. OG"/>
<choice id="006" title="6. OG"/>
<choice id="007" title="7. OG"/>
<choice id="008" title="8. OG"/>
<choice id="009" title="9. OG"/>
<choice id="010" title="10. OG"/>
<choice id="011" title="11. OG"/>
<choice id="012" title="12. OG"/>
<choice id="013" title="13. OG"/>
<choice id="014" title="14. OG"/>
<choice id="015" title="15. OG"/>
<choice id="016" title="16. OG"/>
<choice id="017" title="17. OG"/>
<choice id="018" title="18. OG"/>
<choice id="019" title="19. OG"/>
<choice id="020" title="20. OG"/>
<choice id="080" title="EG"/>
<choice id="096" title="Souterrain"/>
<choice id="097" title="Keller"/>
<choice id="099" title="Dachgeschoss"/>
</choicefield>
<choicefield editable.expression="$use_typ.selection.id!='0600' and $use_typ.selection.id!='0700' and $use_typ.selection.id!='0950' and $use_typ.selection.id!='0500'" id="SO_#FLOOR_TO#_I_LE" placeholder="max." span="6" span.s="12" title="Geschoss bis" visibility.expression="false" visibility="hidden">
<choice id="001" title="1. OG"/>
<choice id="002" title="2. OG"/>
<choice id="003" title="3. OG"/>
<choice id="004" title="4. OG"/>
<choice id="005" title="5. OG"/>
<choice id="006" title="6. OG"/>
<choice id="007" title="7. OG"/>
<choice id="008" title="8. OG"/>
<choice id="009" title="9. OG"/>
<choice id="010" title="10. OG"/>
<choice id="011" title="11. OG"/>
<choice id="012" title="12. OG"/>
<choice id="013" title="13. OG"/>
<choice id="014" title="14. OG"/>
<choice id="015" title="15. OG"/>
<choice id="016" title="16. OG"/>
<choice id="017" title="17. OG"/>
<choice id="018" title="18. OG"/>
<choice id="019" title="19. OG"/>
<choice id="020" title="20. OG"/>
<choice id="080" title="EG"/>
<choice id="096" title="Souterrain"/>
<choice id="097" title="Keller"/>
<choice id="099" title="Dachgeschoss"/>
</choicefield>
<numberfield decimaldigits="2" id="SO_#NETCD#_I_LE" maxvalue="999999" minvalue="0" span="6" span.s="12" suffix=" €" title="Kaltmiete bis"/>
<numberfield decimaldigits="2" id="SO_#GROSSCD#_I_LE" maxvalue="999999" minvalue="0" span="6" span.s="12" suffix=" €" title="Gesamtmiete bis">900.00</numberfield>
</section>
<section title="Ausstattung" visibility.expression="($use_typ.selection.id!='0600' and $use_typ.selection.id!='0700' and $use_typ.selection.id!='0950' and $cf_search_variants.empty)? true : false">
<choicefield id="SO_#ATTR_ID#_I_EQ" multipleChoice="true" span="6" span.s="12" title="Bitte wählen Sie jeweils nur ein Neubauprojekt aus.">
<choice id="1020" title="Aufzug"/>
</choicefield>
</section>
<section title="Suche speichern" visibility.expression="$cf_search_variants.empty? true : false">
<checkboxfield id="SAVE_FILTER" meta:noFilterText="X" refname="SAVE_FILTER" title="Diese Suche speichern" tooltip="Speichern Sie Ihre Suche ab, um zu einem späteren Zeitpunkt darauf zurückzugreifen." checked="false"/>
<checkboxfield id="SO_#NOTIFICATIONS#_I_EQ" meta:noFilterText="X" title="Benachrichtigung erhalten" tooltip="" visibility.expression="$SAVE_FILTER.checked" visibility="hidden"/>
<separator leftSeparator="false" topSeparator="false"/>
<button editable="true" id="BTN_SEARCH" span.s="12" title="Suchen" topSeparator="false" url="oppc://action?id=01search_re_obj"/>
</section>
</sheet>
</form>

View File

@ -1,13 +1,19 @@
import requests
import xml.etree.ElementTree as ET
import src.lwb.format as format
import gzip
SESSION_CREATE_URL = "https://portal1s.easysquare.com/meinelwb/index.html?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL"
SAP_SESSIONID = ""
COOKIE_SESSION = ""
EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms"
EASYSQUARE_HEADERS = {
"DNT": "1",
"Host": "portal1s.easysquare.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cookie": "SAP_SESSIONID_PP0_581=zVLUPPaDGFVgBQTlA65yRXm5VdjdXBHvhiYKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118",
"Cookie": f"SAP_SESSIONID_PP0_581={SAP_SESSIONID}; sap-usercontext=sap-language=D&sap-client=581; cookiesession1={COOKIE_SESSION}",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
@ -23,6 +29,8 @@ EASYSQUARE_PARAMS = {
"_": "1736761256321"
}
SETUP_QUERY_PARAMS_URL = "https://portal1s.easysquare.com/prorex/xmlforms?application=ESQ_IA_REOBJ&sap-client=581&command=action&name=save&id=4B76A3C8-3E4D-4217-B54A-7C28C920748C&api=6.169&head-oppc-version=6.169.22&originalId=842F0073-DC21-A841-4E80-B1BD5E404E35&resourceOrigin=form"
# curl --location 'https://portal1s.easysquare.com/prorex/xmlforms?application=ESQ_IA_REOBJ&sap-client=581&command=action&name=boxlist&api=6.169&head-oppc-version=6.169.22&_=1736761255682' \
# --header 'DNT: 1' \
# --header 'UTC: 1736761256321' \
@ -44,6 +52,89 @@ EASYSQUARE_PARAMS = {
# --header 'X-Requested-With: XMLHttpRequest'
# setup query params for lwb session
def setup_query_params():
# request this url with POST an xml form
# load xml form from file
xml_form = ""
with open("src/lwb/lwb_form.xml", "r") as file:
xml_form = file.read()
# post xml form to SETUP_QUERY_PARAMS_URL
response = requests.post(SETUP_QUERY_PARAMS_URL, data=xml_form, headers=EASYSQUARE_HEADERS)
if response.status_code != 200:
print(f"Fehler beim Abrufen von Easysquare: {response.status_code}")
return []
print(response.content)
return response.content
# Call Session Create and get the session from teh response cookies
def create_session():
# request url with chromium browser and get the cookies
session = requests.Session()
response = session.get(SESSION_CREATE_URL, allow_redirects=True)
if response.status_code != 200:
print(f"Fehler Session von Easysquare: {response.status_code}")
return []
# get the cookies from the response
cookies = response.cookies
COOKIE_SESSION = cookies.get("cookiesession1")
print(COOKIE_SESSION)
url = "https://portal1s.easysquare.com/meinelwb/api5/authenticate?api=6.169&sap-language=de"
payload = {
'sap-field_b64': "dXNlcj1ERU1PJnBhc3N3b3JkPXByb21vczE2"
}
headers = {
'DNT': '1',
'UTC': '1738713279005',
'Host': 'portal1s.easysquare.com',
'host': 'portal1s.easysquare.com',
'Accept': 'text/html, */*; q=0.01',
'Cookie': f'esq-alias=%2fmeinelwb; sap-usercontext=sap-language=de&sap-client=581; cookiesession1={COOKIE_SESSION}',
'Origin': 'https://portal1s.easysquare.com',
'Referer': 'https://portal1s.easysquare.com/meinelwb/index.html?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL',
'Sec-GPC': '1',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-CSRF-Token': 'fetch',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'de,en-US;q=0.7,en;q=0.3',
'X-Requested-With': 'XMLHttpRequest'
}
print(headers)
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
if response.status_code != 200:
print(f"Fehler beim Session Erstellen via Easysquare: {response.status_code}")
return []
# get the cookies from the response
cookies = response.cookies
global SAP_SESSIONID
SAP_SESSIONID = cookies.get("SAP_SESSIONID_PP0_581")
print(SAP_SESSIONID)
# Funktion: Scrape von Easysquare
def scrape_easysquare():
session = requests.Session()
@ -51,6 +142,8 @@ def scrape_easysquare():
if response.status_code != 200:
print(f"Fehler beim Abrufen von Easysquare: {response.status_code}")
# print("Versuche Session zu erstellen")
# create_session()
return []
# XML-Daten parsen