Skip to content

Commit 499943c

Browse files
committed
Black code style applied
1 parent 66e91db commit 499943c

File tree

9 files changed

+194
-98
lines changed

9 files changed

+194
-98
lines changed

ml_skeleton_py/etl/generate_dataset.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,22 @@ def remove_outliers(df: pd.DataFrame) -> pd.DataFrame:
6161

6262
# Fit a basic local outlier factor to detect outliers
6363
lof = LocalOutlierFactor()
64-
df_outlier_removed['is_outlier'] = lof.fit_predict(
65-
df_outlier_removed[["V10", "V12", "V14"]])
64+
df_outlier_removed["is_outlier"] = lof.fit_predict(
65+
df_outlier_removed[["V10", "V12", "V14"]]
66+
)
6667

6768
df_outlier_removed = df_outlier_removed[
68-
df_outlier_removed.is_outlier != -1] # -1 represents outliers
69+
df_outlier_removed.is_outlier != -1
70+
] # -1 represents outliers
6971

7072
# Report number of removed rows
7173
n_filtered_rows = df_outlier_removed.shape[0]
72-
logger.info("{} outliers are filtered out of {} rows."
73-
.format(n_rows - n_filtered_rows, n_filtered_rows)
74-
)
74+
logger.info(
75+
"{} outliers are filtered out of {} rows.".format(
76+
n_rows - n_filtered_rows, n_filtered_rows
77+
)
78+
)
7579

7680
# Remove temporary is_outlier column
77-
df_outlier_removed = df_outlier_removed.drop('is_outlier', axis=1)
81+
df_outlier_removed = df_outlier_removed.drop("is_outlier", axis=1)
7882
return df_outlier_removed

ml_skeleton_py/model/train.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020
logging.getLogger().setLevel(logging.INFO)
2121

2222

23-
def train(
24-
dataset_loc: str, model_dir: str, model_name: str = "lr"
25-
) -> None:
23+
def train(dataset_loc: str, model_dir: str, model_name: str = "lr") -> None:
2624
"""
2725
Train models using X_train and y_train with a specific classifier.
2826
@@ -72,8 +70,10 @@ def train(
7270

7371

7472
def dump_model(
75-
pipeline: sklearn.pipeline, model_name: str,
76-
training_score: np.ndarray, model_dir: str
73+
pipeline: sklearn.pipeline,
74+
model_name: str,
75+
training_score: np.ndarray,
76+
model_dir: str,
7777
) -> None:
7878
"""
7979
Dump serialized trained pipeline to disk

ml_skeleton_py/settings.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,41 @@
77
# Directories
88
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
99

10-
DATA_DIR = os.path.join(ROOT_DIR, 'data')
10+
DATA_DIR = os.path.join(ROOT_DIR, "data")
1111

12-
DATA_RAW = os.path.join(DATA_DIR, 'raw')
12+
DATA_RAW = os.path.join(DATA_DIR, "raw")
1313

14-
DATA_TRANSFORMED = os.path.join(DATA_DIR, 'transformed')
14+
DATA_TRANSFORMED = os.path.join(DATA_DIR, "transformed")
1515

16-
DATA_STAGING = os.path.join(DATA_DIR, 'staging')
16+
DATA_STAGING = os.path.join(DATA_DIR, "staging")
1717

18-
DATA_PREDICTIONS = os.path.join(DATA_DIR, 'predictions')
18+
DATA_PREDICTIONS = os.path.join(DATA_DIR, "predictions")
1919

20-
ETL_DIR = os.path.join(ROOT_DIR, 'ml_skeleton_py', 'etl')
20+
ETL_DIR = os.path.join(ROOT_DIR, "ml_skeleton_py", "etl")
2121

22-
MODEL_DIR = os.path.join(ROOT_DIR, 'models')
22+
MODEL_DIR = os.path.join(ROOT_DIR, "models")
2323

24-
MODEL_METADATA_DIR = os.path.join(ROOT_DIR, 'models', 'metadata')
24+
MODEL_METADATA_DIR = os.path.join(ROOT_DIR, "models", "metadata")
2525

2626
# Model Variables
27-
TARGET_VARIABLE = 'Class'
27+
TARGET_VARIABLE = "Class"
2828

29-
DATASET_NAME = 'creditcard.csv'
29+
DATASET_NAME = "creditcard.csv"
3030

3131
# ---------- PYTEST VARIABLES ---------
3232
# TEST Variables
33-
TEST_DATASET_NAME = 'sample_creditcard.csv'
33+
TEST_DATASET_NAME = "sample_creditcard.csv"
3434

3535
# TEST DIRECTORIES
36-
ASSETS_DIR = os.path.join(ROOT_DIR, 'tests', 'assets')
37-
38-
EXPECTED_TEMP_TRANSFORMED_DATA_LOC = os.path.join(ASSETS_DIR, "transformed",
39-
"temp_sample_creditcard.csv")
40-
EXPECTED_TRANSFORMED_DATA_LOC = os.path.join(ASSETS_DIR, "transformed",
41-
TEST_DATASET_NAME)
42-
UNEXPECTED_TRANSFORMED_DATA_LOC = os.path.join(ASSETS_DIR, "transformed",
43-
"dummy.csv")
36+
ASSETS_DIR = os.path.join(ROOT_DIR, "tests", "assets")
37+
38+
EXPECTED_TEMP_TRANSFORMED_DATA_LOC = os.path.join(
39+
ASSETS_DIR, "transformed", "temp_sample_creditcard.csv"
40+
)
41+
EXPECTED_TRANSFORMED_DATA_LOC = os.path.join(
42+
ASSETS_DIR, "transformed", TEST_DATASET_NAME
43+
)
44+
UNEXPECTED_TRANSFORMED_DATA_LOC = os.path.join(ASSETS_DIR, "transformed", "dummy.csv")
4445

4546
EXPECTED_RAW_DATA_LOC = os.path.join(ASSETS_DIR, "raw", TEST_DATASET_NAME)
4647
UNEXPECTED_RAW_DATA_LOC = os.path.join(ASSETS_DIR, "raw", "dummy.csv")

scripts/generate_dataset.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@ def generate() -> None:
1414
Load the dataset, remove outliers and store in data directory.
1515
"""
1616
parser = argparse.ArgumentParser()
17-
parser.add_argument("--dataset", default="creditcard.csv",
18-
help="raw dataset to generate train and test data")
17+
parser.add_argument(
18+
"--dataset",
19+
default="creditcard.csv",
20+
help="raw dataset to generate train and test data",
21+
)
1922
args = parser.parse_args()
2023

2124
input_location = os.path.join(s.DATA_RAW, args.dataset)

scripts/train.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@ def train() -> None:
88
Train a model on a dataset and store the model.
99
"""
1010
parser = argparse.ArgumentParser()
11-
parser.add_argument("--dataset",
12-
default="creditcard.csv",
13-
help="raw dataset to generate train and test data")
14-
parser.add_argument("--model-name",
15-
default="lr",
16-
help="the serialized model name default lr "
17-
"referring to logistic regression")
11+
parser.add_argument(
12+
"--dataset",
13+
default="creditcard.csv",
14+
help="raw dataset to generate train and test data",
15+
)
16+
parser.add_argument(
17+
"--model-name",
18+
default="lr",
19+
help="the serialized model name default lr " "referring to logistic regression",
20+
)
1821
args = parser.parse_args()
1922
model.train(args.dataset, args.model_name)
2023

setup.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,14 @@
88
"tox>=3.14.0",
99
"flake8>=3.7.9",
1010
"flake8-annotations>=1.1.3",
11-
"pytest-cov>=2.8.1"
11+
"pytest-cov>=2.8.1",
1212
]
1313

1414
serve_deps = [
1515
"dploy-kickstart>=0.1.5",
1616
]
1717

18-
extras = {
19-
"test": test_deps,
20-
"serve": serve_deps
21-
}
18+
extras = {"test": test_deps, "serve": serve_deps}
2219

2320
setup(
2421
name="ml-skeleton-py",
@@ -28,9 +25,7 @@
2825
author_email="[email protected]",
2926
description="Description of my ml-skeleton package",
3027
packages=find_packages(),
31-
install_requires=[
32-
"pandas>=1.1.0",
33-
"scikit-learn>=0.23.2"
34-
],
28+
install_requires=["pandas>=1.1.0", "scikit-learn>=0.23.2"],
3529
tests_require=test_deps,
36-
extras_require=extras)
30+
extras_require=extras,
31+
)

tests/test_generate_dataset.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,39 @@
88
from ml_skeleton_py.etl.generate_dataset import remove_outliers
99

1010
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
11-
EXPECTED_HEADERS = ['Time', 'V1', 'V2', 'V3', 'V4',
12-
'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
13-
'V11', 'V12', 'V13', 'V14', 'V15', 'V16',
14-
'V17', 'V18', 'V19', 'V20', 'V21', 'V22',
15-
'V23', 'V24', 'V25', 'V26', 'V27', 'V28',
16-
'Amount', 'Class']
11+
EXPECTED_HEADERS = [
12+
"Time",
13+
"V1",
14+
"V2",
15+
"V3",
16+
"V4",
17+
"V5",
18+
"V6",
19+
"V7",
20+
"V8",
21+
"V9",
22+
"V10",
23+
"V11",
24+
"V12",
25+
"V13",
26+
"V14",
27+
"V15",
28+
"V16",
29+
"V17",
30+
"V18",
31+
"V19",
32+
"V20",
33+
"V21",
34+
"V22",
35+
"V23",
36+
"V24",
37+
"V25",
38+
"V26",
39+
"V27",
40+
"V28",
41+
"Amount",
42+
"Class",
43+
]
1744

1845
EXPECTED_N_HEADERS = len(EXPECTED_HEADERS)
1946
UNEXPECTED_N_HEADERS = len(EXPECTED_HEADERS) - 10
@@ -22,10 +49,8 @@
2249
@pytest.mark.parametrize(
2350
"raw_data_loc, transformed_data_loc, error_expected",
2451
[
25-
(s.EXPECTED_RAW_DATA_LOC,
26-
s.EXPECTED_TEMP_TRANSFORMED_DATA_LOC, False),
27-
(s.UNEXPECTED_RAW_DATA_LOC,
28-
s.EXPECTED_TEMP_TRANSFORMED_DATA_LOC, True),
52+
(s.EXPECTED_RAW_DATA_LOC, s.EXPECTED_TEMP_TRANSFORMED_DATA_LOC, False),
53+
(s.UNEXPECTED_RAW_DATA_LOC, s.EXPECTED_TEMP_TRANSFORMED_DATA_LOC, True),
2954
],
3055
)
3156
def test_generate(

tests/test_predict.py

Lines changed: 97 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,44 +7,110 @@
77
from ml_skeleton_py.model import train
88
from ml_skeleton_py.model.predict import load_model
99

10-
features_1 = [-0.51056756, -4.76915766, 4.17380769, -6.18019076,
11-
5.54479825, -6.07673393, -2.83891627, -12.14473542,
12-
11.95168444, -5.89969894, -12.93298794, 4.58542528,
13-
-13.04122239, 0.80026314, -15.05300726, 0.80569352,
14-
-11.45602963, -23.21915935, -7.54677977, 3.40316942,
15-
0.04731062, 6.27192486, 0.1867837, -5.35273187,
16-
0.65159854, -0.06661776, 0.71556094, 1.68012583,
17-
-1.25077894, -0.30741284]
18-
19-
features_2 = [-0.51056756, -4.76915766, 4.17380769, -6.18019076,
20-
5.54479825, -6.07673393, -2.83891627, -12.14473542,
21-
11.95168444, -5.89969894, -12.93298794, 4.58542528,
22-
-13.04122239, 0.80026314, -15.05300726, 0.80569352,
23-
-11.45602963, -23.21915935, -7.54677977, 3.40316942,
24-
0.04731062, 6.27192486, 0.1867837, -5.35273187,
25-
0.65159854, -0.06661776, 0.71556094, 1.68012583,
26-
]
27-
28-
features_3 = [-0.51056756, -4.76915766, 4.17380769, -6.18019076,
29-
5.54479825, None, None, None,
30-
11.95168444, -5.89969894, -12.93298794, 4.58542528,
31-
-13.04122239, 0.80026314, -15.05300726, 0.80569352,
32-
-11.45602963, -23.21915935, -7.54677977, 3.40316942,
33-
0.04731062, 6.27192486, 0.1867837, -5.35273187,
34-
0.65159854, -0.06661776, 0.71556094, 1.68012583,
35-
-1.25077894, -0.30741284]
10+
features_1 = [
11+
-0.51056756,
12+
-4.76915766,
13+
4.17380769,
14+
-6.18019076,
15+
5.54479825,
16+
-6.07673393,
17+
-2.83891627,
18+
-12.14473542,
19+
11.95168444,
20+
-5.89969894,
21+
-12.93298794,
22+
4.58542528,
23+
-13.04122239,
24+
0.80026314,
25+
-15.05300726,
26+
0.80569352,
27+
-11.45602963,
28+
-23.21915935,
29+
-7.54677977,
30+
3.40316942,
31+
0.04731062,
32+
6.27192486,
33+
0.1867837,
34+
-5.35273187,
35+
0.65159854,
36+
-0.06661776,
37+
0.71556094,
38+
1.68012583,
39+
-1.25077894,
40+
-0.30741284,
41+
]
42+
43+
features_2 = [
44+
-0.51056756,
45+
-4.76915766,
46+
4.17380769,
47+
-6.18019076,
48+
5.54479825,
49+
-6.07673393,
50+
-2.83891627,
51+
-12.14473542,
52+
11.95168444,
53+
-5.89969894,
54+
-12.93298794,
55+
4.58542528,
56+
-13.04122239,
57+
0.80026314,
58+
-15.05300726,
59+
0.80569352,
60+
-11.45602963,
61+
-23.21915935,
62+
-7.54677977,
63+
3.40316942,
64+
0.04731062,
65+
6.27192486,
66+
0.1867837,
67+
-5.35273187,
68+
0.65159854,
69+
-0.06661776,
70+
0.71556094,
71+
1.68012583,
72+
]
73+
74+
features_3 = [
75+
-0.51056756,
76+
-4.76915766,
77+
4.17380769,
78+
-6.18019076,
79+
5.54479825,
80+
None,
81+
None,
82+
None,
83+
11.95168444,
84+
-5.89969894,
85+
-12.93298794,
86+
4.58542528,
87+
-13.04122239,
88+
0.80026314,
89+
-15.05300726,
90+
0.80569352,
91+
-11.45602963,
92+
-23.21915935,
93+
-7.54677977,
94+
3.40316942,
95+
0.04731062,
96+
6.27192486,
97+
0.1867837,
98+
-5.35273187,
99+
0.65159854,
100+
-0.06661776,
101+
0.71556094,
102+
1.68012583,
103+
-1.25077894,
104+
-0.30741284,
105+
]
36106

37107
# Need to train first to test predict
38108
train(s.EXPECTED_TRANSFORMED_DATA_LOC, s.EXPECTED_MODEL_LOC, "test_model")
39109

40110

41111
@pytest.mark.parametrize(
42112
"features, error_expected",
43-
[
44-
(features_1, False),
45-
(features_2, True),
46-
(features_3, True),
47-
],
113+
[(features_1, False), (features_2, True), (features_3, True), ],
48114
)
49115
def test_pred(features: list, error_expected: bool) -> None:
50116
"""

0 commit comments

Comments
 (0)