1
+ import unicodedata
2
+
3
+
1
4
class HouseItem :
2
5
base_url = "https://www.otodom.pl"
3
6
@@ -19,6 +22,11 @@ def __init__(self, url: str):
19
22
},
20
23
}
21
24
25
+ def convert_to_ascii (self , text ):
26
+ normalized = unicodedata .normalize ("NFKD" , text .replace ('ł' , 'l' ))
27
+ ascii_text = normalized .encode ("ascii" , "ignore" ).decode ("ascii" )
28
+ return ascii_text
29
+
22
30
def setPrice (self , priceStr : str ):
23
31
clean_price = priceStr .strip ().replace (" zł" , "" ).replace (" " , "" )
24
32
try :
@@ -30,11 +38,11 @@ def setPrice(self, priceStr: str):
30
38
return self
31
39
32
40
def setTitle (self , title : str ):
33
- self .dictionary ["title" ] = title
41
+ self .dictionary ["title" ] = self . convert_to_ascii ( title )
34
42
return self
35
43
36
44
def setArea (self , area : str ):
37
- cleanNumber = area .strip ().split (" " )[0 ].replace ("," , "." )
45
+ cleanNumber = area .strip ().split (" " )[0 ].replace ("," , "." ). replace ( " \xa0 " , "" )
38
46
self .dictionary ["area" ] = int (float (cleanNumber )) if cleanNumber != "" else None
39
47
return self
40
48
@@ -45,17 +53,21 @@ def setRooms(self, rooms: str):
45
53
def setLocalization (self , address : str ):
46
54
addressList = address .split (", " )
47
55
if len (addressList ) >= 5 :
48
- self .dictionary ["localization" ]["street" ] = addressList [- 5 ]
56
+ street = self .convert_to_ascii (addressList [- 5 ])
57
+ self .dictionary ["localization" ]["street" ] = street
49
58
if len (addressList ) >= 4 :
50
- self .dictionary ["localization" ]["district" ] = addressList [- 3 ]
59
+ district = self .convert_to_ascii (addressList [- 3 ])
60
+ self .dictionary ["localization" ]["district" ] = district
51
61
if len (addressList ) >= 3 :
52
- self .dictionary ["localization" ]["city" ] = addressList [- 2 ]
62
+ city = self .convert_to_ascii (addressList [- 2 ])
63
+ self .dictionary ["localization" ]["city" ] = city
53
64
if len (addressList ) >= 1 :
54
- self .dictionary ["localization" ]["province" ] = addressList [- 1 ]
65
+ province = self .convert_to_ascii (addressList [- 1 ])
66
+ self .dictionary ["localization" ]["province" ] = province
55
67
return self
56
68
57
69
def setEstateAgency (self , agency : str ):
58
- self .dictionary ["estate_agency" ] = agency
70
+ self .dictionary ["estate_agency" ] = self . convert_to_ascii ( agency )
59
71
return self
60
72
61
73
def toDictionary (self ):
0 commit comments