mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2026-01-17 11:42:25 +01:00
refactor known_properties.json and update scraper logic for improved session handling and image scraping
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,25 +1,5 @@
|
||||
import requests
|
||||
|
||||
EASYSQUARE_HEADERS = {
|
||||
"DNT": "1",
|
||||
"Host": "portal1s.easysquare.com",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Cookie": "SAP_SESSIONID_PP0_581=zqFIhvNbEsOs_n3cgRTIO1V7ZaLQCxHvhYgKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
|
||||
"Upgrade-Insecure-Requests": "1"
|
||||
}
|
||||
EASYSQUARE_PARAMS = {
|
||||
"application": "ESQ_IA_REOBJ",
|
||||
"sap-client": "581",
|
||||
"command": "action",
|
||||
"name": "boxlist",
|
||||
"api": "6.169",
|
||||
"head-oppc-version": "6.169.22",
|
||||
"_": "1736595414769"
|
||||
}
|
||||
|
||||
from src.lwb.scraper import EASYSQUARE_HEADERS, EASYSQUARE_PARAMS
|
||||
|
||||
def scrape_image(url):
|
||||
session = requests.Session()
|
||||
|
||||
@@ -7,7 +7,7 @@ EASYSQUARE_HEADERS = {
|
||||
"DNT": "1",
|
||||
"Host": "portal1s.easysquare.com",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Cookie": "SAP_SESSIONID_PP0_581=ePxgeqJDhl8MWlBQQ9-sjz1zLYTSYRHvhfgKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118",
|
||||
"Cookie": "SAP_SESSIONID_PP0_581=zVLUPPaDGFVgBQTlA65yRXm5VdjdXBHvhiYKELG5Agg%3d; sap-usercontext=sap-language=D&sap-client=581; cookiesession1=678ADA67ADF0813997206FE9F4133118",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
|
||||
@@ -99,6 +99,10 @@ def scrape_easysquare():
|
||||
# https://portal1s.easysquare.com/prorex/xmlforms/image.jpg?application=ESQ_IA_REOBJ&command=action&id=1EC8D4E6-191A-A827-47FF-72D8C5379070&name=get
|
||||
image_url = f"https://portal1s.easysquare.com/prorex/xmlforms/image.jpg?application=ESQ_IA_REOBJ&command=action&id={iamge_resourceId}&name=get"
|
||||
|
||||
# the id should be a hash create by the title, subtitle, rooms, size, rent, availability
|
||||
hashID = f"{prop_title}{subtitle}{rooms}{size}{rent}{availability}"
|
||||
id = hash(hashID)
|
||||
|
||||
properties.append({
|
||||
"id": id,
|
||||
"title": "LWB - " + prop_title,
|
||||
|
||||
Reference in New Issue
Block a user