From 683403e8bffdcb75bba3e47a09cce8fff8e23e1d Mon Sep 17 00:00:00 2001 From: Elmar Kresse Date: Sun, 16 Feb 2025 22:58:19 +0100 Subject: [PATCH] feat: update property handling in VLW scraper and main process, fix image URL formatting --- known_properties.json | 2 +- main.py | 2 +- src/vlw/scraper.py | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/known_properties.json b/known_properties.json index 35e856b..cf80cde 100644 --- a/known_properties.json +++ b/known_properties.json @@ -1 +1 @@ -["b36ab45e1c5899b4c9add5b1134e731e6c97858fb7de80373df925c2246c824d", "449c368b2b7bca515840659acd6702071fa62e59917ba7a29a180d49ca19ad82", "id-58-4-30", "8901.039", "2702.004", "id-148-1-30", "7e59929357819219851c5dd0f4addb2c6d472fe5e1001b1c6a0021597d6ec4cc", "fe1245cb4d60d639f47edbf5b4ccf44c2639595723ef412d059760b3c3b075e2", "3207.006", "8ee618d09d89849e48da7a66efafe046f52c833baa08babbd98d8574578a259f", "e804d28f1dea007b5a3ff762dfdd7cc29fb1f42104ac0f15bfac74b9254174bd", "10104.013", 52087, "dc552b2bfb958b166d98425b8ee43420130a8eaea7e494da003fb1f7734cb1fb", "1809676f5b586fb5ad89ae3ec6082c45dad170be9b67c18fa887cd97df0b375c", "id-158-2-18", "9101.046"] \ No newline at end of file +["8ee618d09d89849e48da7a66efafe046f52c833baa08babbd98d8574578a259f", "449c368b2b7bca515840659acd6702071fa62e59917ba7a29a180d49ca19ad82", "fe1245cb4d60d639f47edbf5b4ccf44c2639595723ef412d059760b3c3b075e2", "7e59929357819219851c5dd0f4addb2c6d472fe5e1001b1c6a0021597d6ec4cc", "id-148-1-30", "3207.006", "b36ab45e1c5899b4c9add5b1134e731e6c97858fb7de80373df925c2246c824d", "6101.107", 52147, "id-80-1-2", "7401.011", 52151, "8901.039", "id-58-4-30", "id-158-2-18", "e804d28f1dea007b5a3ff762dfdd7cc29fb1f42104ac0f15bfac74b9254174bd", "id-128-1-20", "1301.013", "333110", "id-32-2-193", "5901.035", "1809676f5b586fb5ad89ae3ec6082c45dad170be9b67c18fa887cd97df0b375c", "2502.013", "4801.037", "id-80-3-104", "2702.004", "1003.032", "dc552b2bfb958b166d98425b8ee43420130a8eaea7e494da003fb1f7734cb1fb", "10104.013", "9101.046", "id-147-1-1", 52087, "333111", "7401.038"] \ No newline at end of file diff --git a/main.py b/main.py index 06785d4..3713ca4 100644 --- a/main.py +++ b/main.py @@ -47,7 +47,7 @@ def main(): properties_vlw = vlw_scraper.scrape_vlw() print("Scraped " + str(len(properties_vlw)) + " properties from VLW") - properties += properties_vlw + properties = properties_vlw for prop in properties: diff --git a/src/vlw/scraper.py b/src/vlw/scraper.py index ec9a8ba..e317d1f 100644 --- a/src/vlw/scraper.py +++ b/src/vlw/scraper.py @@ -34,6 +34,9 @@ def scrape_vlw(): for estate in estate_items: #
image_url = estate.find("div", class_="image-wrapper")["style"].split("'")[1] + # remove all whitespaces + image_url = image_url.replace(" ", "") + image_url = "https://vlw-eg.de/" + image_url # title

3-Raumwohnung sucht Nachmieter – Großartiger Weitblick inklusive!!

title = estate.find("h4", class_="heading_h4").text @@ -62,7 +65,7 @@ def scrape_vlw(): properties.append({ "id": property_id, - "title": "Wogetra - "+ title, + "title": "VLW - "+ title, "subtitle": subtitle, "rooms": rooms, "size": size,