1+ import unicodedata
2+
3+
14class HouseItem :
25 base_url = "https://www.otodom.pl"
36
@@ -19,6 +22,11 @@ def __init__(self, url: str):
1922 },
2023 }
2124
25+ def convert_to_ascii (self , text ):
26+ normalized = unicodedata .normalize ("NFKD" , text .replace ('ł' , 'l' ))
27+ ascii_text = normalized .encode ("ascii" , "ignore" ).decode ("ascii" )
28+ return ascii_text
29+
2230 def setPrice (self , priceStr : str ):
2331 clean_price = priceStr .strip ().replace (" zł" , "" ).replace (" " , "" )
2432 try :
@@ -30,11 +38,11 @@ def setPrice(self, priceStr: str):
3038 return self
3139
3240 def setTitle (self , title : str ):
33- self .dictionary ["title" ] = title
41+ self .dictionary ["title" ] = self . convert_to_ascii ( title )
3442 return self
3543
3644 def setArea (self , area : str ):
37- cleanNumber = area .strip ().split (" " )[0 ].replace ("," , "." )
45+ cleanNumber = area .strip ().split (" " )[0 ].replace ("," , "." ). replace ( " \xa0 " , "" )
3846 self .dictionary ["area" ] = int (float (cleanNumber )) if cleanNumber != "" else None
3947 return self
4048
@@ -45,17 +53,21 @@ def setRooms(self, rooms: str):
4553 def setLocalization (self , address : str ):
4654 addressList = address .split (", " )
4755 if len (addressList ) >= 5 :
48- self .dictionary ["localization" ]["street" ] = addressList [- 5 ]
56+ street = self .convert_to_ascii (addressList [- 5 ])
57+ self .dictionary ["localization" ]["street" ] = street
4958 if len (addressList ) >= 4 :
50- self .dictionary ["localization" ]["district" ] = addressList [- 3 ]
59+ district = self .convert_to_ascii (addressList [- 3 ])
60+ self .dictionary ["localization" ]["district" ] = district
5161 if len (addressList ) >= 3 :
52- self .dictionary ["localization" ]["city" ] = addressList [- 2 ]
62+ city = self .convert_to_ascii (addressList [- 2 ])
63+ self .dictionary ["localization" ]["city" ] = city
5364 if len (addressList ) >= 1 :
54- self .dictionary ["localization" ]["province" ] = addressList [- 1 ]
65+ province = self .convert_to_ascii (addressList [- 1 ])
66+ self .dictionary ["localization" ]["province" ] = province
5567 return self
5668
5769 def setEstateAgency (self , agency : str ):
58- self .dictionary ["estate_agency" ] = agency
70+ self .dictionary ["estate_agency" ] = self . convert_to_ascii ( agency )
5971 return self
6072
6173 def toDictionary (self ):
0 commit comments