From a6c147a18d457891cc138a8494021d0766ab8b1a Mon Sep 17 00:00:00 2001 From: Elmar Kresse Date: Wed, 5 Feb 2025 01:17:33 +0100 Subject: [PATCH] feat: enhance scraper with session management and query parameter setup --- known_properties.json | 2 +- src/lipsia/__pycache__/lipsia.cpython-310.pyc | Bin 1255 -> 1255 bytes src/lwb/__pycache__/scraper.cpython-310.pyc | Bin 2680 -> 4982 bytes src/lwb/lwb_form.xml | 217 ++++++++++++++++++ src/lwb/scraper.py | 95 +++++++- 5 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 src/lwb/lwb_form.xml diff --git a/known_properties.json b/known_properties.json index 49a560f..35866be 100644 --- a/known_properties.json +++ b/known_properties.json @@ -1 +1 @@ -[51968, "803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", "B57516F9-E364-7E54-A211-527ED54388E6", "665243A8-FD34-86F8-322F-FE9B0B392083", "id-88-5-45", "C02892BE-F34F-5A8A-E174-4A79549DC9A9", "3B73B720-13F2-62A4-8829-557676725A95", "B4095706-A65C-F421-B02E-1D227B684B62", "id-193-1-13", -4432880139075606878, 51624, 51628, "F7992488-7C24-DFA9-F8B2-94DDC18E66A3", 3286076625991780524, 51632, "FA45C0B6-813C-DE65-496A-EDD8DA3F2526", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "9A0B42A2-6D9B-331A-DAA7-624A5FA49606", "A979EBAE-EF87-FB51-152A-5453CD7DC794", "57446DF5-CB9F-951C-A40A-4BA775DA7426", "id-202-1-11", "E7B71D28-C557-CFE4-805D-42C2793E9248", 51656, "id-32-1-41", 51660, 51665, "21C20126-380D-9B0F-73F8-C4279897F189", "id-154-2-71", "281A7F0B-FA5A-75AE-68DE-89AFD90EEF48", "892BD779-F186-9BD1-A97A-5783EFB6F56D", "172DFAD2-7CDB-51B4-212E-E6F9C7F0601A", "id-105-12-78", -9017525000097635974, 51963, "BAAEC20A-F667-FE22-6693-E4B4CA366889", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1"] \ No newline at end of file +[51968, "9A0B42A2-6D9B-331A-DAA7-624A5FA49606", "id-202-1-11", "7421A7DD-D9B4-72D3-0A91-5C3DA60C26E4", "id-147-1-1", "E7B71D28-C557-CFE4-805D-42C2793E9248", "id-145-1-9", "21C20126-380D-9B0F-73F8-C4279897F189", "id-154-2-71", -4432880139075606878, "BAAEC20A-F667-FE22-6693-E4B4CA366889", 51624, "803DF7B0-1125-2AA4-90AC-CF0CAEAC625A", 51628, 3286076625991780524, "665243A8-FD34-86F8-322F-FE9B0B392083", 52087, 51632, "id-105-12-78", "3B73B720-13F2-62A4-8829-557676725A95", "F7992488-7C24-DFA9-F8B2-94DDC18E66A3", "id-88-5-45", 51656, 51660, "892BD779-F186-9BD1-A97A-5783EFB6F56D", "id-32-1-41", 51665, "FA45C0B6-813C-DE65-496A-EDD8DA3F2526", "B57516F9-E364-7E54-A211-527ED54388E6", -9017525000097635974, "A979EBAE-EF87-FB51-152A-5453CD7DC794", "172DFAD2-7CDB-51B4-212E-E6F9C7F0601A", "57446DF5-CB9F-951C-A40A-4BA775DA7426", "id-193-1-13", "B4095706-A65C-F421-B02E-1D227B684B62", "C02892BE-F34F-5A8A-E174-4A79549DC9A9", "BD41AC52-BADB-428F-AF4B-11BBC374F2F1", "281A7F0B-FA5A-75AE-68DE-89AFD90EEF48", 51963] \ No newline at end of file diff --git a/src/lipsia/__pycache__/lipsia.cpython-310.pyc b/src/lipsia/__pycache__/lipsia.cpython-310.pyc index f822c0681ad4bdf55c1132cdbe9fd63f32fe9b19..bed686feefced569ad682c889f3d0104bde16e07 100644 GIT binary patch delta 28 icmaFP`J9tGpO=@50SInQUbK-rnu#edeRBp=Jre+RvIrai delta 28 icmaFP`J9tGpO=@50SLZ)XxYde&BTg8Uo(TYY`3QXg diff --git a/src/lwb/__pycache__/scraper.cpython-310.pyc b/src/lwb/__pycache__/scraper.cpython-310.pyc index 5341b23212a51264103cece031e984c5a506a6aa..19defa9283dc325f9c3bbfe68a25908b434270a1 100644 GIT binary patch literal 4982 zcmb7INpl;=6`l<{2rl9x*_H*{5^dPT#$A#b86-i8vP4k?X~|Pcp@!%tIOG5W^$fU7 z4XRS5oRYY5aOD!qRZ^CNE9cyD^CdqaHP@VcNM&_mGl`wA2cQH=u~o*f-}HOkJ>9RD z?|V(Nrza}l_tkHHQT;F}2!G^C_b&qF9VF}L5JVuNA*k)QSd%0{6l$`nh;2)t7W6G4 zHPo?$)i9CN2$8iYk<}Pc)Hn%{APJGM9sNik5gPkQ_*fxP5+l5%=YCl2Aqmn;`ba++ zAScKm8M+@*6J%KJB_rhI4+XW4oFXsa-A`U5FX25v#FCIY{Taq5tQ?iMZM$Jz$YvWR zvo#}cWvFHyT6dcoqnV0X%hsq~r^fzfR<9F!klD6t#w9{&!_e!ybLYmFilv+7;bpDg)cm5tr-{Z&gC-a$KK4o>E~vh;Zk9>Tq>4I%Qse*7t5=wx$@L(e$IL4 zV$#wY>83?l#jNvQ=D2QX^=ebA(z(S^u2u}4*6lg8xR|V%=B`dHYFQWozOZ`Tbacbe zvQwE{@{L=VFLU2YuB<2XxlHb25{Bu?i^&5n*xrTw#AGIyN?xXwT{AnL%jfWyPcG?< zZkY#Jzw~IN>gbKJB%xb|X4A3cCL2pSmQ9XE2pyyK^hU{dY6f{fGm#2=^5V+6r(7{D z+Y1&d723di!5`^SbOX+%3srb<)Y~pg7wZ+1==JI&!?ljX&Z&(?m1%^gm+Ka-G#O2= z(YsA**_ImbUUXH)Ysml3`gnG|5cXY7s}oPDYc=Z0T0?jGx2Z1C?F?eB?bs+E3U|{PYS)9$K z=kxQ6Q^m>LWN~6@lu^rUvI@Q7x0~Y!rovD1@lQr3jI@Ac*$_l*3AR8aBHxpK8gGfN z_&|Ipw1iv0AO+7oSrqEwykHIQV>bEoK9Vi%$hP7Nc7P}!$Suj0c7i0ZE3qAr7B*RPi`J8SW<6Pagh>~YsfeeT4O;gUhPGaqb#|qvH29_;E)~}| zR?9axifiweR|{)}>!tF>+BI*WSSY<;y17wUE0(Vm3ya0Ik{7dV&2Cy{90BSD8ceU- zeDmS-*X@+7hA~)w)Sj}X8;z3h3Y%68%u*`~nGXf$a4q7zTN8j4Si6A`h;C93~@>C;e7n8sxz*F*_YXTXm0MX)iC(W}80LPNQ39N;*cKbXq&jCh+pg4x@@ICO zns^fGo%QEo&Lh$U225qOroD|F#kqEVVgof@RPIX$VKCAU|M%1*wx zwvk`GTA$ypOsq_7PF&qxy*+=pIiBCEIK^?s&pJ~eJb7b%!3Wvdnf%1~%=uhyit%WK z1$e2ljl;os?BWbRrqCqk#U!W5T%N^v;d|7&o7N0nvq0`!$9_DA8wq_1Jf9&)HP{9$ z=gf1aP7STmEy^fk5+6>eL@Vjbs|#Lq!K~M@WB`6Ax{LUO4-!le=e52o^Matig?Ulx z#L*o`f%N)egF3k`&d465_xJbH9L&;9W>C(bl)&L_jp3ZnZLBY)XPx-%^g?NEDZOs) z(z+VhqIPB5NpNSDcs9KV9`ypW*02w|&DZfQRjHWF>b912n>sNI1e9*m1?z#=i~|DW z$e<=}U%z$*aa@O>QxZ$?Y3aLtOxcvAZ|U~7r>x#6t)qCDHyz@2GTo4Sd;p5fvZ(>z zl6$)L1o)~$|2KR}McIq^nuq1`qajbOQrjEogjwZ|0n_J-_f9Sq=cj| zm9X@M5|RD|$o{7ilRj7C(m#|Q>F-KH`kT@#{Z;9c{-X3tf0hO$G5n|S@PHJ>)9FKs zON^uc6Zn1#hF&Be-~aFozS|1$P6Q8ggbxx42>&X0SmxR>!b4Yl{0!d%fBUQ)c@Ra7 z7_c6QPlCXAxhk|2FhHDyT=}82D1867t$-VN&_lvp-$}H9y3isWE#zy_w)UVGaYNX* z#JHsuaU3qrExOAOEIFc z)(Q6n+MPhVLHEQ?g8hU`-g{j{?ICxN4DL$oXYNp|_ihW?5VZTy`h0B|+ApE?``QS! zPoNFlZIP2MzeAmJ{X0}^n7rWjK!5RLxi#Wq9^DX9FH#>;Khgl^HrP68_qiwUiLHdI zJm_~5SOF2P6Ge$e2qGNIlM&89 z0BLzky@uQcYIF-51xt;a9(#p13vJtMl+22I$9p11Ap#}l#*qZDjx4aGE z1={D)G}7M=>&uV-N%CXVwhV}{*yl4zSPX(q;xf1dn4iEiAjShxMEP+kDkdcG z3(6I-585CYC)kz1IzQ->YYAmw5dV>o<9SJpey$|gZFHDA;cpOi2-lPhwHc+E zqCsoeQ|rj90pFy?x@_!S?pgv@lnkyYnf99Dr0;LCn<()V{{IG+MV3u)F~!9)7ksO+ zt6ZGsf?rVl&E*fIYmy$%RQTRtKfI!%p#T5? delta 1214 zcmY+C-*4Mg6vus?#CDutj*~R~wW%Ads9L+Fls2iL19wT+j;eHZQ+1IbGP$>I+Qx2d zw_@R1LmIpSm4!5EMM@A4?0F#Z$RpyJC&M$+{sA=JIQIImUF&}D`J8j^y+_yQ-^myG zw3$kY42;T8kM)0YXX$aYcXqvWn{5a8@-zzFHCrKB(uGuml2BI7ihWmqpEB+)taiVeYCMwZEkLs zn)8*}rQXhZeRET*-dWkvm+x&KDGT!twst%7JNN+aF0Y>Mx9@*gTfZ}}==!xe?EQs4 zm{=%Rl&Z2&R;nwd%Iw_YV)^aT!b)v%<(=8Nx!Ku@H^y$Eo@cPb+r1(t)5MsmtM7J~ zN;k@2hsNBL(1=tww*Ba;X*)irv|8jC*4w6e*d{%(ZWC)p(TU;oGB0B@HKS$XwxRbt zpZ(dpn;b=dcqIAe!!d`)5sD7DC(QTkBZL^8Q5i?*$H6BKkjn;7*+yq{OX{Zwj4Om5G4Qx?8t=0>F+dMpF*K8KRU%AC zg$YTTgoHGBQ=vC=acmk(F8n`Nf|-RWLo|W2K@>`wc5^fb`()Ucr@4ca^;Mw3^#+3M z0?p&xA!~h23vQ-+3iN&+@H;>$!~)PcP&T9?pdWzb?kO%(@d<;6X#(CU>JH%%ngSm6 z{!CrbGj#Y7a?>>0muMP~QLe~P-pTf5ni(Kib5sh~Jk1Ue9ijpr2XBEwnY4&6ox?jI zraMEU(1e#ardXffCmJS}?Q_SPrFHD*ze8j8yia1+o_(JF28lj1mGId%wz6JPsxJ-s zf384Gcf(kk1_NZ)rE@^Y+E< zP>_9W8aDB6WlMJpP`)(^(%>`<5!=cH9tp&cwL6+l8d6*9=%fij^TcYAHEiXBl%YV1 zfeZ&S?){p5t1uFnmp~fazGm;QRi83e*%P_zSF?7@(vC@!X!ZlUd!kvS1Znm@h=3#{ zvXVC`e<~~jPbIC(-na5iH0k{&j~w0rmvtqOtAV@`NRVcgUzZA?FTB?=@yQXng@Ej7 Y5P^}AjG` + +
+ + 842F0073-DC21-A841-4E80-B1BD5E404E35 + 4B76A3C8-3E4D-4217-B54A-7C28C920748C + + + <keywords/> + </head> + <client > + <editor name="webapp-professional" version="6.169.22"/> + <device identifier="webapp-professional_6.169.22 win_10 desktop_ff_135" name="desktop ff 135" osName="win" osVersion="10"/> + </client> + <history > + <save oldId="842F0073-DC21-A841-4E80-B1BD5E404E35" newId="4B76A3C8-3E4D-4217-B54A-7C28C920748C" userName="DEMO" timestamp="2025-02-04T22:55:03Z"/> + </history> + <actions> + <action id="01search_re_obj" includeInMenu="false" includeInQuick="false" style="cancel" title="Suchen"> + <type> + <server command="search_re_obj" locksForm="true" waitForResponse="true"/> + </type> + <preconditions valid="true"/> + </action> + <action id="02load_search" includeInMenu="false" includeInQuick="false" title="Suchvariante laden"> + <type> + <server command="load_search" locksForm="true" waitForResponse="true"/> + </type> + </action> + <action id="03delete_search" includeInMenu="false" includeInQuick="false" title="Suche löschen"> + <type> + <server command="delete_search" locksForm="true" waitForResponse="true"/> + </type> + <confirmation> + <title>Suche löschen + Möchten Sie die Gespeicherte Suche endgültig löschen? + Ja + Nein + + + + +
+ true + +
+
+ X + + + + + + + 50 +
+
+ + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 900.00 +
+
+ + + +
+
+ + + +
+
+ \ No newline at end of file diff --git a/src/lwb/scraper.py b/src/lwb/scraper.py index ef41602..d6c47fc 100644 --- a/src/lwb/scraper.py +++ b/src/lwb/scraper.py @@ -1,13 +1,19 @@ import requests import xml.etree.ElementTree as ET import src.lwb.format as format +import gzip + +SESSION_CREATE_URL = "https://portal1s.easysquare.com/meinelwb/index.html?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL" + +SAP_SESSIONID = "" +COOKIE_SESSION = "" EASYSQUARE_URL = "https://portal1s.easysquare.com/prorex/xmlforms" EASYSQUARE_HEADERS = { "DNT": "1", "Host": "portal1s.easysquare.com", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Cookie": "SAP_SESSIONID_PP0_581=zVLUPPaDGFVgBQTlA65yRXm5VdjdXBHvhiYKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118", + "Cookie": f"SAP_SESSIONID_PP0_581={SAP_SESSIONID}; sap-usercontext=sap-language=D&sap-client=581; cookiesession1={COOKIE_SESSION}", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0", "Accept-Encoding": "gzip, deflate, br, zstd", "Accept-Language": "de,en-US;q=0.7,en;q=0.3", @@ -23,6 +29,8 @@ EASYSQUARE_PARAMS = { "_": "1736761256321" } +SETUP_QUERY_PARAMS_URL = "https://portal1s.easysquare.com/prorex/xmlforms?application=ESQ_IA_REOBJ&sap-client=581&command=action&name=save&id=4B76A3C8-3E4D-4217-B54A-7C28C920748C&api=6.169&head-oppc-version=6.169.22&originalId=842F0073-DC21-A841-4E80-B1BD5E404E35&resourceOrigin=form" + # curl --location 'https://portal1s.easysquare.com/prorex/xmlforms?application=ESQ_IA_REOBJ&sap-client=581&command=action&name=boxlist&api=6.169&head-oppc-version=6.169.22&_=1736761255682' \ # --header 'DNT: 1' \ # --header 'UTC: 1736761256321' \ @@ -44,6 +52,89 @@ EASYSQUARE_PARAMS = { # --header 'X-Requested-With: XMLHttpRequest' +# setup query params for lwb session +def setup_query_params(): + + # request this url with POST an xml form + + # load xml form from file + xml_form = "" + with open("src/lwb/lwb_form.xml", "r") as file: + xml_form = file.read() + + # post xml form to SETUP_QUERY_PARAMS_URL + response = requests.post(SETUP_QUERY_PARAMS_URL, data=xml_form, headers=EASYSQUARE_HEADERS) + + if response.status_code != 200: + print(f"Fehler beim Abrufen von Easysquare: {response.status_code}") + return [] + + print(response.content) + + return response.content + +# Call Session Create and get the session from teh response cookies +def create_session(): + # request url with chromium browser and get the cookies + session = requests.Session() + response = session.get(SESSION_CREATE_URL, allow_redirects=True) + + if response.status_code != 200: + print(f"Fehler Session von Easysquare: {response.status_code}") + return [] + + # get the cookies from the response + cookies = response.cookies + COOKIE_SESSION = cookies.get("cookiesession1") + print(COOKIE_SESSION) + + + url = "https://portal1s.easysquare.com/meinelwb/api5/authenticate?api=6.169&sap-language=de" + + payload = { + 'sap-field_b64': "dXNlcj1ERU1PJnBhc3N3b3JkPXByb21vczE2" + } + headers = { + 'DNT': '1', + 'UTC': '1738713279005', + 'Host': 'portal1s.easysquare.com', + 'host': 'portal1s.easysquare.com', + 'Accept': 'text/html, */*; q=0.01', + 'Cookie': f'esq-alias=%2fmeinelwb; sap-usercontext=sap-language=de&sap-client=581; cookiesession1={COOKIE_SESSION}', + 'Origin': 'https://portal1s.easysquare.com', + 'Referer': 'https://portal1s.easysquare.com/meinelwb/index.html?deeplink=%2FESQ_IA_REOBJ%2FESQ_VM_REOBJ_ALL', + 'Sec-GPC': '1', + 'Connection': 'keep-alive', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-CSRF-Token': 'fetch', + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-origin', + 'Accept-Encoding': 'gzip, deflate, br, zstd', + 'Accept-Language': 'de,en-US;q=0.7,en;q=0.3', + 'X-Requested-With': 'XMLHttpRequest' + } + + print(headers) + + response = requests.request("POST", url, headers=headers, data=payload) + + print(response.text) + + if response.status_code != 200: + print(f"Fehler beim Session Erstellen via Easysquare: {response.status_code}") + return [] + + # get the cookies from the response + cookies = response.cookies + global SAP_SESSIONID + SAP_SESSIONID = cookies.get("SAP_SESSIONID_PP0_581") + + print(SAP_SESSIONID) + + + # Funktion: Scrape von Easysquare def scrape_easysquare(): session = requests.Session() @@ -51,6 +142,8 @@ def scrape_easysquare(): if response.status_code != 200: print(f"Fehler beim Abrufen von Easysquare: {response.status_code}") + # print("Versuche Session zu erstellen") + # create_session() return [] # XML-Daten parsen