1
- import pandas as pd
1
+ from decimal import Decimal
2
2
from pathlib import Path
3
3
4
+ import pandas as pd
5
+
4
6
from .Institution import Institution
5
7
from .MappingDatabase import MappingDatabase
6
8
from .read_config import read_config
@@ -37,7 +39,16 @@ def read_transaction(self, file_name: str) -> pd.DataFrame:
37
39
pd.DataFrame
38
40
A dataframe after pre-processing.
39
41
"""
40
- df = pd .read_csv (file_name )
42
+ converters = {
43
+ "利用日" : pd .to_datetime ,
44
+ "利用店名・商品名" : str ,
45
+ "利用者" : str ,
46
+ # "支払方法": "Payment method",
47
+ # "利用金額": "Amount",
48
+ # "支払手数料": "Commission paid",
49
+ "支払総額" : str ,
50
+ }
51
+ df = pd .read_csv (file_name , converters = converters )
41
52
print (f"Found { len (df .index )} transactions in { file_name } " )
42
53
43
54
# Rename column names to English.
@@ -55,20 +66,25 @@ def read_transaction(self, file_name: str) -> pd.DataFrame:
55
66
}
56
67
df .rename (columns = column_names , inplace = True )
57
68
58
- df ["date" ] = pd .to_datetime (df ["date" ], format = "%Y/%m/%d" )
59
-
60
- # Remove rows with empty 支払総額 cell.
61
- # These are extra info such as name of ETC gate or currency exchange rate.
62
- # TODO record as metadata
63
- extra = df .loc [pd .isnull (df ["amount" ])]
64
- df .drop (extra .index , inplace = True )
65
- # Convert to int type because currency is JPY
66
- df = df .astype ({"amount" : int })
69
+ # ETC transaction has a second row.
70
+ # Update firsts to be concatenation of the two memos.
71
+ # Then delete the seconds.
72
+ etc_index = df .loc [df ["user" ] == "ETC" ].index
73
+ df .loc [etc_index , "memo" ] = (
74
+ df .loc [etc_index , "memo" ].values
75
+ + " "
76
+ + df .loc [etc_index + 1 , "memo" ].values
77
+ )
78
+ df .drop (df .loc [df ["user" ] == "" ].index , inplace = True )
67
79
68
80
# Remove rows with zero 支払総額. These are refunds.
81
+ # Also currency exchange rate(?)
69
82
# TODO record as metadata.
70
- refund = df .loc [df ["amount" ] == 0 ]
71
- df .drop (refund .index , inplace = True )
83
+ # refund = df.loc[df["amount"] == 0]
84
+ # df.drop(refund.index, inplace=True)
85
+
86
+ # TODO this will fail if the refund rows are not removed
87
+ df ["amount" ] = df ["amount" ].apply (Decimal )
72
88
73
89
# Reverse row order because the oldest transaction is on the bottom
74
90
# Note: the index column is also reversed
0 commit comments