mirror of
https://gitlab.dit.htwk-leipzig.de/fsr-im/tools/flatscraper.git
synced 2025-07-15 11:08:48 +02:00
feat: update property handling in VLW scraper and main process, fix image URL formatting
This commit is contained in:
@ -1 +1 @@
|
||||
["b36ab45e1c5899b4c9add5b1134e731e6c97858fb7de80373df925c2246c824d", "449c368b2b7bca515840659acd6702071fa62e59917ba7a29a180d49ca19ad82", "id-58-4-30", "8901.039", "2702.004", "id-148-1-30", "7e59929357819219851c5dd0f4addb2c6d472fe5e1001b1c6a0021597d6ec4cc", "fe1245cb4d60d639f47edbf5b4ccf44c2639595723ef412d059760b3c3b075e2", "3207.006", "8ee618d09d89849e48da7a66efafe046f52c833baa08babbd98d8574578a259f", "e804d28f1dea007b5a3ff762dfdd7cc29fb1f42104ac0f15bfac74b9254174bd", "10104.013", 52087, "dc552b2bfb958b166d98425b8ee43420130a8eaea7e494da003fb1f7734cb1fb", "1809676f5b586fb5ad89ae3ec6082c45dad170be9b67c18fa887cd97df0b375c", "id-158-2-18", "9101.046"]
|
||||
["8ee618d09d89849e48da7a66efafe046f52c833baa08babbd98d8574578a259f", "449c368b2b7bca515840659acd6702071fa62e59917ba7a29a180d49ca19ad82", "fe1245cb4d60d639f47edbf5b4ccf44c2639595723ef412d059760b3c3b075e2", "7e59929357819219851c5dd0f4addb2c6d472fe5e1001b1c6a0021597d6ec4cc", "id-148-1-30", "3207.006", "b36ab45e1c5899b4c9add5b1134e731e6c97858fb7de80373df925c2246c824d", "6101.107", 52147, "id-80-1-2", "7401.011", 52151, "8901.039", "id-58-4-30", "id-158-2-18", "e804d28f1dea007b5a3ff762dfdd7cc29fb1f42104ac0f15bfac74b9254174bd", "id-128-1-20", "1301.013", "333110", "id-32-2-193", "5901.035", "1809676f5b586fb5ad89ae3ec6082c45dad170be9b67c18fa887cd97df0b375c", "2502.013", "4801.037", "id-80-3-104", "2702.004", "1003.032", "dc552b2bfb958b166d98425b8ee43420130a8eaea7e494da003fb1f7734cb1fb", "10104.013", "9101.046", "id-147-1-1", 52087, "333111", "7401.038"]
|
2
main.py
2
main.py
@ -47,7 +47,7 @@ def main():
|
||||
|
||||
properties_vlw = vlw_scraper.scrape_vlw()
|
||||
print("Scraped " + str(len(properties_vlw)) + " properties from VLW")
|
||||
properties += properties_vlw
|
||||
properties = properties_vlw
|
||||
|
||||
|
||||
for prop in properties:
|
||||
|
@ -34,6 +34,9 @@ def scrape_vlw():
|
||||
for estate in estate_items:
|
||||
# <div class="image-wrapper" style="background-image: url(' income/actual/new/42da0fdb1bcaed578d2256f1a0599bf6.jpg ');">
|
||||
image_url = estate.find("div", class_="image-wrapper")["style"].split("'")[1]
|
||||
# remove all whitespaces
|
||||
image_url = image_url.replace(" ", "")
|
||||
image_url = "https://vlw-eg.de/" + image_url
|
||||
|
||||
# title <h4 class="heading_h4">3-Raumwohnung sucht Nachmieter – Großartiger Weitblick inklusive!!</h4>
|
||||
title = estate.find("h4", class_="heading_h4").text
|
||||
@ -62,7 +65,7 @@ def scrape_vlw():
|
||||
|
||||
properties.append({
|
||||
"id": property_id,
|
||||
"title": "Wogetra - "+ title,
|
||||
"title": "VLW - "+ title,
|
||||
"subtitle": subtitle,
|
||||
"rooms": rooms,
|
||||
"size": size,
|
||||
|
Reference in New Issue
Block a user