Skip to content

Commit

Permalink
[WIP] issue codeforpdx#1063 parse DA
Browse files Browse the repository at this point in the history
this is strictly for parsing and testing the district attorney number
unsure whether to go with least invasive additions or fully robust
case_parser new member as optional so as not to have to add arguments when calling
  the constructor throughout the project
you'll see in the new parsing method __parse_distr_atty that there's potential to return
  all the values from upper right of a given case html file but i just return DA number
would it be preferred to return more values? or return None instead of empty str?
also added testing methods to test_case_parser
  • Loading branch information
victorjz committed Jun 28, 2020
1 parent 0c8bc5b commit 996a2a7
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 1 deletion.
15 changes: 14 additions & 1 deletion src/backend/expungeservice/crawler/parsers/case_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ class CaseParserData:
hashed_dispo_data: Dict[int, Dict[str, str]]
balance_due: str
probation_revoked: Optional[date]
distr_atty: str


class CaseParser:
@staticmethod
def feed(data) -> CaseParserData:
soup = BeautifulSoup(data, "html.parser")
distr_atty = CaseParser.__parse_distr_atty(soup)
hashed_charge_data = CaseParser.__build_charge_table_data(soup)
(
hashed_dispo_data,
Expand All @@ -37,8 +39,19 @@ def feed(data) -> CaseParserData:
probation_revoked = date.fromdatetime(datetime.strptime(probation_revoked_date_string, "%m/%d/%Y"))
else:
probation_revoked = None # type: ignore
return CaseParserData(hashed_charge_data, hashed_dispo_data, balance_due, probation_revoked)
return CaseParserData(hashed_charge_data, hashed_dispo_data, balance_due, probation_revoked, distr_atty)

@staticmethod
def __parse_distr_atty(soup) -> str:
distr_atty_key = "District Attorney Number:"
labels = soup.find_all("th", "ssTableHeaderLabel", limit=10)
table = {}
for tag in labels:
table[tag.string] = tag.parent.find("td").string
if distr_atty_key in table:
return table[distr_atty_key]
return ""

@staticmethod
def __build_charge_table_data(soup) -> Dict[int, Dict[str, str]]:
hashed_charge_data = {}
Expand Down
3 changes: 3 additions & 0 deletions src/backend/expungeservice/models/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class CaseSummary:
case_detail_link: str
balance_due_in_cents: int
edit_status: EditStatus
distr_atty: str=""

def get_balance_due(self):
return self.balance_due_in_cents / 100
Expand Down Expand Up @@ -46,6 +47,7 @@ def empty(case_number: str):
CaseSummary(
name="",
birth_year=1900,
# distr_atty="",
case_number=case_number,
citation_number="",
location="",
Expand Down Expand Up @@ -90,6 +92,7 @@ def create(
case_detail_link,
balance_due_in_cents,
EditStatus.UNCHANGED,
"", # distr_atty
)

@staticmethod
Expand Down
25 changes: 25 additions & 0 deletions src/backend/tests/parser/test_case_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ def test_charge_data_is_formatted(self):
def test_probation_revoked_is_parsed(self):
assert not self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == ""


class TestCaseWithoutFinancialTable(unittest.TestCase):
def setUp(self):
Expand All @@ -75,6 +78,10 @@ def test_charge_data_is_formatted(self):

def test_probation_revoked_is_parsed(self):
assert not self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == "01234567"



class TestCaseWithPartialDisposition(unittest.TestCase):
Expand Down Expand Up @@ -117,6 +124,9 @@ def test_charge_data_is_formatted(self):

def test_probation_revoked_is_parsed(self):
assert not self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == "555555-1"


class TestCaseWithoutDisposition(unittest.TestCase):
Expand Down Expand Up @@ -152,6 +162,9 @@ def test_charge_data_is_formatted(self):

def test_probation_revoked_is_parsed(self):
assert not self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == "2377315-1"


class TestParkingViolationCase(unittest.TestCase):
Expand All @@ -176,6 +189,9 @@ def test_charge_data_is_formatted(self):

def test_probation_revoked_is_parsed(self):
assert not self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == ""


class TestCaseWithRelatedCases(unittest.TestCase):
Expand Down Expand Up @@ -203,6 +219,9 @@ def test_charge_data_is_formatted(self):

def test_probation_revoked_is_parsed(self):
assert not self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == "555555-B"


class TestFelicia(unittest.TestCase):
Expand Down Expand Up @@ -237,6 +256,9 @@ def test_charge_data_is_formatted(self):

def test_probation_revoked_is_parsed(self):
assert not self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == "555555"


class TestRevokedProbation(unittest.TestCase):
Expand Down Expand Up @@ -271,6 +293,9 @@ def test_charge_data_is_formatted(self):

def test_probation_revoked_is_parsed(self):
assert self.parser.probation_revoked

def test_distr_atty_is_parsed(self):
assert self.parser.distr_atty == "55555555"


class TestSpacesExistingInChargeInfoCells(unittest.TestCase):
Expand Down

0 comments on commit 996a2a7

Please sign in to comment.