1- import pandas as pd
1+ from decimal import Decimal
22from pathlib import Path
33
4+ import pandas as pd
5+
46from .Institution import Institution
57from .MappingDatabase import MappingDatabase
68from .read_config import read_config
@@ -37,7 +39,16 @@ def read_transaction(self, file_name: str) -> pd.DataFrame:
3739 pd.DataFrame
3840 A dataframe after pre-processing.
3941 """
40- df = pd .read_csv (file_name )
42+ converters = {
43+ "利用日" : pd .to_datetime ,
44+ "利用店名・商品名" : str ,
45+ "利用者" : str ,
46+ # "支払方法": "Payment method",
47+ # "利用金額": "Amount",
48+ # "支払手数料": "Commission paid",
49+ "支払総額" : str ,
50+ }
51+ df = pd .read_csv (file_name , converters = converters )
4152 print (f"Found { len (df .index )} transactions in { file_name } " )
4253
4354 # Rename column names to English.
@@ -55,20 +66,25 @@ def read_transaction(self, file_name: str) -> pd.DataFrame:
5566 }
5667 df .rename (columns = column_names , inplace = True )
5768
58- df ["date" ] = pd .to_datetime (df ["date" ], format = "%Y/%m/%d" )
59-
60- # Remove rows with empty 支払総額 cell.
61- # These are extra info such as name of ETC gate or currency exchange rate.
62- # TODO record as metadata
63- extra = df .loc [pd .isnull (df ["amount" ])]
64- df .drop (extra .index , inplace = True )
65- # Convert to int type because currency is JPY
66- df = df .astype ({"amount" : int })
69+ # ETC transaction has a second row.
70+ # Update firsts to be concatenation of the two memos.
71+ # Then delete the seconds.
72+ etc_index = df .loc [df ["user" ] == "ETC" ].index
73+ df .loc [etc_index , "memo" ] = (
74+ df .loc [etc_index , "memo" ].values
75+ + " "
76+ + df .loc [etc_index + 1 , "memo" ].values
77+ )
78+ df .drop (df .loc [df ["user" ] == "" ].index , inplace = True )
6779
6880 # Remove rows with zero 支払総額. These are refunds.
81+ # Also currency exchange rate(?)
6982 # TODO record as metadata.
70- refund = df .loc [df ["amount" ] == 0 ]
71- df .drop (refund .index , inplace = True )
83+ # refund = df.loc[df["amount"] == 0]
84+ # df.drop(refund.index, inplace=True)
85+
86+ # TODO this will fail if the refund rows are not removed
87+ df ["amount" ] = df ["amount" ].apply (Decimal )
7288
7389 # Reverse row order because the oldest transaction is on the bottom
7490 # Note: the index column is also reversed
0 commit comments