refactor known_properties.json and update scraper logic for improved session handling and image scraping

This commit is contained in:
Elmar Kresse
2025-01-28 10:50:32 +01:00
parent b468fc6578
commit 7effa15ddc
6 changed files with 7 additions and 23 deletions

View File

@@ -1,25 +1,5 @@
import requests
EASYSQUARE_HEADERS = {
"DNT": "1",
"Host": "portal1s.easysquare.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cookie": "SAP_SESSIONID_PP0_581=zqFIhvNbEsOs_n3cgRTIO1V7ZaLQCxHvhYgKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
"Upgrade-Insecure-Requests": "1"
}
EASYSQUARE_PARAMS = {
"application": "ESQ_IA_REOBJ",
"sap-client": "581",
"command": "action",
"name": "boxlist",
"api": "6.169",
"head-oppc-version": "6.169.22",
"_": "1736595414769"
}
from src.lwb.scraper import EASYSQUARE_HEADERS, EASYSQUARE_PARAMS
def scrape_image(url):
session = requests.Session()

View File

@@ -7,7 +7,7 @@ EASYSQUARE_HEADERS = {
"DNT": "1",
"Host": "portal1s.easysquare.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cookie": "SAP_SESSIONID_PP0_581=ePxgeqJDhl8MWlBQQ9-sjz1zLYTSYRHvhfgKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118",
"Cookie": "SAP_SESSIONID_PP0_581=zVLUPPaDGFVgBQTlA65yRXm5VdjdXBHvhiYKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
@@ -99,6 +99,10 @@ def scrape_easysquare():
# https://portal1s.easysquare.com/prorex/xmlforms/image.jpg?application=ESQ_IA_REOBJ&command=action&id=1EC8D4E6-191A-A827-47FF-72D8C5379070&name=get
image_url = f"https://portal1s.easysquare.com/prorex/xmlforms/image.jpg?application=ESQ_IA_REOBJ&command=action&id={iamge_resourceId}&name=get"
# the id should be a hash create by the title, subtitle, rooms, size, rent, availability
hashID = f"{prop_title}{subtitle}{rooms}{size}{rent}{availability}"
id = hash(hashID)
properties.append({
"id": id,
"title": "LWB - " + prop_title,