Skip to content

Commit

Permalink
Merge pull request #2 from dadosjusbr/tratamento-mppi
Browse files Browse the repository at this point in the history
mppi -> casos sem cargo e lotacao
  • Loading branch information
danielfireman authored Aug 29, 2024
2 parents b2326bf + 54a2ea1 commit 074b154
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 23 deletions.
54 changes: 32 additions & 22 deletions src/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _readCSV(file):
return data


def load(file_names, year, month, court, output_folder):
def load(file_names, data):
"""Carrega os arquivos passados como parâmetros.
:param file_names: slice contendo os arquivos baixados pelo coletor.
Os nomes dos arquivos devem seguir uma convenção e começar com
Expand All @@ -59,13 +59,15 @@ def load(file_names, year, month, court, output_folder):
:return um objeto Data() pronto para operar com os arquivos
"""

if court.casefold() in ["mppa", "mpsc", "mprr"]:
contracheque = _readXLS([c for c in file_names if "contracheque" in c][0])
indenizatorias = _readXLS([i for i in file_names if "indenizacoes" in i][0])
if data.court.casefold() in ["mppa", "mpsc", "mprr"]:
data.contracheque = _readXLS([c for c in file_names if "contracheque" in c][0])
data.indenizatorias = _readXLS(
[i for i in file_names if "indenizacoes" in i][0]
)

return Data(contracheque, indenizatorias, year, month, court, output_folder)
return data

elif court.casefold() in [
elif data.court.casefold() in [
"mpsp",
"mprj",
"mpse",
Expand All @@ -74,29 +76,37 @@ def load(file_names, year, month, court, output_folder):
"mppi",
"mpac",
"mpba",
] or (court.casefold() == "mpes" and int(year) != 2021):
contracheque = _readODS([c for c in file_names if "contracheque" in c][0])
indenizatorias = _readODS([i for i in file_names if "indenizacoes" in i][0])

return Data(contracheque, indenizatorias, year, month, court, output_folder)
] or (data.court.casefold() == "mpes" and int(year) != 2021):
data.contracheque = _readODS([c for c in file_names if "contracheque" in c][0])
data.indenizatorias = _readODS(
[i for i in file_names if "indenizacoes" in i][0]
)

elif court.casefold() in ["mprs", "mpal"]:
contracheque = _readCSV([c for c in file_names if "contracheque" in c][0])
indenizatorias = _readCSV([i for i in file_names if "indenizacoes" in i][0])
return data

return Data(contracheque, indenizatorias, year, month, court, output_folder)
elif data.court.casefold() in ["mprs", "mpal"]:
data.contracheque = _readCSV([c for c in file_names if "contracheque" in c][0])
data.indenizatorias = _readCSV(
[i for i in file_names if "indenizacoes" in i][0]
)

elif court.casefold() in ["mppe"] or (
court.casefold() == "mpes" and int(year) == 2021
return data

elif data.court.casefold() in ["mppe"] or (
data.court.casefold() == "mpes" and int(year) == 2021
):
contracheque = _readXLSX([c for c in file_names if "contracheque" in c][0])
indenizatorias = _readXLSX([i for i in file_names if "indenizacoes" in i][0])
data.contracheque = _readXLSX([c for c in file_names if "contracheque" in c][0])
data.indenizatorias = _readXLSX(
[i for i in file_names if "indenizacoes" in i][0]
)

return Data(contracheque, indenizatorias, year, month, court, output_folder)
return data


class Data:
def __init__(self, contracheque, indenizatorias, year, month, court, output_folder):
def __init__(
self, year, month, court, output_folder, contracheque=None, indenizatorias=None
):
self.year = year
self.month = month
self.court = court
Expand All @@ -117,7 +127,7 @@ def validate(self):
glob.glob(
f"{self.output_folder}/{self.court}-contracheques-{self.month}-{self.year}.*"
)
or glob.glob(
and glob.glob(
f"{self.output_folder}/{self.court}-indenizacoes-{self.month}-{self.year}.*"
)
):
Expand Down
4 changes: 3 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@ def parse_execution(data, file_names):
def main():
file_names = [f.rstrip() for f in sys.stdin.readlines()]

dados = data.load(file_names, year, month, court, output_path)
dados = data.Data(year, month, court, output_path)

dados.validate()

dados = data.load(file_names, dados)

parse_execution(dados, file_names)


Expand Down
8 changes: 8 additions & 0 deletions src/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ def parse_employees(file, colect_key, court):
and "rendimento" not in registration.casefold()
and "mês" not in registration.casefold()
):
# MPPI não informa cargo e lotação para todos os membros,
# podendo colocar 2 campos nulos ou substituir algum por " ".
# Isso dificulta ao iterar sobre as rubricas, uma vez que não há um padrão e não é estritamente tabular.
if court == "mppi" and len(new_row) != 18:
new_row = ["" if item == " " else item for item in new_row]
while len(new_row) != 18:
new_row.insert(2, "")

# MPPA possui uma linha com o somatório de cada rubrica
if (
(court == "mppa" and len(new_row) == 15)
Expand Down

0 comments on commit 074b154

Please sign in to comment.