mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2026-01-16 14:32:26 +01:00
added playwright lwb scraping with chromium headless
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import requests
|
||||
from src.lwb.scraper import EASYSQUARE_HEADERS, EASYSQUARE_PARAMS
|
||||
from src.lwb.scraper import EASYSQUARE_HEADERS
|
||||
|
||||
def scrape_image(url, owner):
|
||||
session = requests.Session()
|
||||
@@ -19,10 +19,14 @@ def scrape_image(url, owner):
|
||||
# return empty image
|
||||
return b''
|
||||
|
||||
if owner == "LWB":
|
||||
response = session.get(url, headers=EASYSQUARE_HEADERS, params=EASYSQUARE_PARAMS)
|
||||
if owner == "LWB":
|
||||
# Image URL already carries the required params; only send headers
|
||||
response = session.get(url, headers=EASYSQUARE_HEADERS)
|
||||
if response.status_code != 200:
|
||||
print(f"Fehler beim Abrufen von Easysquare: {response.status_code}")
|
||||
print(
|
||||
f"Fehler beim Abrufen von Easysquare: "
|
||||
f"{response.status_code}"
|
||||
)
|
||||
# return empty image
|
||||
return b''
|
||||
|
||||
@@ -30,8 +34,10 @@ def scrape_image(url, owner):
|
||||
if response is None:
|
||||
response = session.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Fehler beim Abrufen der Standardquelle: {response.status_code}")
|
||||
print(
|
||||
f"Fehler beim Abrufen der Standardquelle: "
|
||||
f"{response.status_code}"
|
||||
)
|
||||
return b''
|
||||
|
||||
|
||||
|
||||
return response.content
|
||||
|
||||
Reference in New Issue
Block a user