added playwright lwb scraping with chromium headless

This commit is contained in:
Elmar Kresse
2025-08-15 15:00:29 +02:00
parent 4f62e2709a
commit 78152dafc0
7 changed files with 597 additions and 343 deletions

View File

@@ -1,5 +1,5 @@
import requests
from src.lwb.scraper import EASYSQUARE_HEADERS, EASYSQUARE_PARAMS
from src.lwb.scraper import EASYSQUARE_HEADERS
def scrape_image(url, owner):
session = requests.Session()
@@ -19,10 +19,14 @@ def scrape_image(url, owner):
# return empty image
return b''
if owner == "LWB":
response = session.get(url, headers=EASYSQUARE_HEADERS, params=EASYSQUARE_PARAMS)
if owner == "LWB":
# Image URL already carries the required params; only send headers
response = session.get(url, headers=EASYSQUARE_HEADERS)
if response.status_code != 200:
print(f"Fehler beim Abrufen von Easysquare: {response.status_code}")
print(
f"Fehler beim Abrufen von Easysquare: "
f"{response.status_code}"
)
# return empty image
return b''
@@ -30,8 +34,10 @@ def scrape_image(url, owner):
if response is None:
response = session.get(url)
if response.status_code != 200:
print(f"Fehler beim Abrufen der Standardquelle: {response.status_code}")
print(
f"Fehler beim Abrufen der Standardquelle: "
f"{response.status_code}"
)
return b''
return response.content