2
2
import requests
3
3
import re
4
4
import json
5
+ import sys
5
6
6
7
HEADERS = {
7
8
"Access-Control-Allow-Origin" : "*" ,
@@ -108,7 +109,41 @@ def scrap_listings(self, url, check_all_pages=False):
108
109
with open ('otodom_listing.json' , 'w' , encoding = 'utf-8' ) as json_file :
109
110
json .dump (listing_json , json_file , ensure_ascii = False ,indent = 2 )
110
111
112
+ def generate_url (self ):
113
+ with open ('otodom_settings.json' ) as f :
114
+ data = json .load (f )
115
+ url = data ["base_url" ] + "pl/wyniki"
116
+
117
+ if data ["only_for_sale" ]:
118
+ url += "/sprzedaz"
119
+
120
+ if data ["only_for_rent" ]:
121
+ url += "/wynajem"
122
+ url += "/" + data ["property_type" ] + "/"
123
+ if len (data ["province" ]) > 0 :
124
+ url += data ["province" ] + "/" + data ["city" ] + "?"
125
+ else :
126
+ url += "cala-polska?"
127
+
128
+ url += "limit=36"
129
+
130
+ if len (data ["price_min" ]) > 0 :
131
+ url += "&priceMin=" + data ["price_min" ]
132
+
133
+ if len (data ["price_max" ]) > 0 :
134
+ url += "&priceMax=" + data ["price_max" ]
135
+
136
+ url += "&by=LATEST&direction=DESC&viewType=listing"
137
+ # print("Generated link:\n", url)
138
+ return url
139
+
111
140
if __name__ == '__main__' :
112
- crawler = Crawler ()
113
- url = input ("Enter url: " )
114
- crawler .scrap_listings (url , check_all_pages = False )
141
+ crawler = Crawler ()
142
+
143
+ if len (sys .argv ) > 2 and sys .argv [1 ] == "-u" :
144
+ print (sys .argv [2 ])
145
+ crawler .scrap_listings (sys .argv [2 ], check_all_pages = False )
146
+ else :
147
+ crawler .scrap_listings (crawler .generate_url (), check_all_pages = False )
148
+
149
+
0 commit comments