Skip to content

Commit

Permalink
Task 1 - fix nonascii chars parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Valentine-456 committed Dec 4, 2023
1 parent 3eb0a18 commit 8f94f6e
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
9 changes: 9 additions & 0 deletions otodom/task_1/Valentyn/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"base_url": "str",
"price_min": "str",
"price_max": "str",
"city": "str",
"property_type": "str",
"only_for_sale": "bool",
"only_for_rent": "bool"
}
26 changes: 19 additions & 7 deletions otodom/task_1/Valentyn/src/HouseItem.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import unicodedata


class HouseItem:
base_url = "https://www.otodom.pl"

Expand All @@ -19,6 +22,11 @@ def __init__(self, url: str):
},
}

def convert_to_ascii(self, text):
normalized = unicodedata.normalize("NFKD", text.replace('ł', 'l'))
ascii_text = normalized.encode("ascii", "ignore").decode("ascii")
return ascii_text

def setPrice(self, priceStr: str):
clean_price = priceStr.strip().replace(" zł", "").replace(" ", "")
try:
Expand All @@ -30,11 +38,11 @@ def setPrice(self, priceStr: str):
return self

def setTitle(self, title: str):
self.dictionary["title"] = title
self.dictionary["title"] = self.convert_to_ascii(title)
return self

def setArea(self, area: str):
cleanNumber = area.strip().split(" ")[0].replace(",", ".")
cleanNumber = area.strip().split(" ")[0].replace(",", ".").replace("\xa0", "")
self.dictionary["area"] = int(float(cleanNumber)) if cleanNumber != "" else None
return self

Expand All @@ -45,17 +53,21 @@ def setRooms(self, rooms: str):
def setLocalization(self, address: str):
addressList = address.split(", ")
if len(addressList) >= 5:
self.dictionary["localization"]["street"] = addressList[-5]
street = self.convert_to_ascii(addressList[-5])
self.dictionary["localization"]["street"] = street
if len(addressList) >= 4:
self.dictionary["localization"]["district"] = addressList[-3]
district = self.convert_to_ascii(addressList[-3])
self.dictionary["localization"]["district"] = district
if len(addressList) >= 3:
self.dictionary["localization"]["city"] = addressList[-2]
city = self.convert_to_ascii(addressList[-2])
self.dictionary["localization"]["city"] = city
if len(addressList) >= 1:
self.dictionary["localization"]["province"] = addressList[-1]
province = self.convert_to_ascii(addressList[-1])
self.dictionary["localization"]["province"] = province
return self

def setEstateAgency(self, agency: str):
self.dictionary["estate_agency"] = agency
self.dictionary["estate_agency"] = self.convert_to_ascii(agency)
return self

def toDictionary(self):
Expand Down

0 comments on commit 8f94f6e

Please sign in to comment.