Merge pull request #19 from datarootsio/refac

baturayo · web-flow · commit 014fb4d32663 · 2021-08-26T16:04:32.000+02:00
Refac
diff --git a/.github/workflows/test-and-train.yml b/.github/workflows/test-and-train.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.7, 3.8, 3.9]
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
@@ -22,8 +22,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
-          python -m pip install -e ".[test]"
+          make install
       - name: Lint with flake8 & black
         run: |
           make lint
@@ -50,8 +49,7 @@ jobs:
           python-version: 3.7
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
-          python -m pip install -e .
+          make install
       - name: Train the model
         run: |
           make train
diff --git a/HOWTO.md b/HOWTO.md
@@ -14,12 +14,12 @@ While the project is heavily opinionated, opinions are welcomed to be discussed:
     ```
 
 2. Install dependencies using [pip](https://pip.pypa.io/en/stable/installing/). The following command
-will install the dependencies from `setup.py`. Note that installing dependencies with `-e` 
+will install the dependencies from `setup.py`. In the backend it will run `pip install -e ".[test, serve]"`. Note that installing dependencies with `-e` 
 editable mode is needed to properly run unit tests. `[test, serve]` is optional. `test` refers to
 unit test dependencies and `serve` refers to deployment dependencies.
 
     ```bash
-    pip install -e ".[test, serve]"
+    make install
     ```
 
 ## Running the project
@@ -49,7 +49,7 @@ Note the dependency: `generate-dataset` > `train` > `serve`.
 
 ## Docker
 
-Currently you can find the following docker files:  
+Currently, you can find the following docker files:  
 1. `jupyter.Dockerfile` builds an image for running notebooks.  
 2. `test.Dockerfile` builds an image to run all tests in (`make test-docker`).
 3. `serve.Dockerfile` build an image to serve the trained model via a REST api.
diff --git a/Makefile b/Makefile
@@ -13,6 +13,8 @@ DATASET := data/transformed/creditcard.csv
 ###############################################################
 # COMMANDS                                                    #
 ###############################################################
+install: ## install dependencies
+	pip install -e ".[test, serve]"
 
 clean: ## clean artifacts
 	@echo ">>> cleaning files"
@@ -32,7 +34,7 @@ serve: ## serve trained model with a REST API using dploy-kickstart
 	@echo ">>> serving the trained model"
 	kickstart serve -e ml_skeleton_py/model/predict.py -l .
 
-run-pipeline: clean generate-dataset train serve  ## clean artifacts -> generate dataset -> train -> serve
+run-pipeline: install clean generate-dataset train serve  ## install dependencies -> clean artifacts -> generate dataset -> train -> serve
 
 lint: ## flake8 linting and black code style
 	@echo ">>> black files"
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 
 [![maintained by dataroots](https://img.shields.io/badge/maintained%20by-dataroots-%2300b189)](https://dataroots.io)
-[![PythonVersion](https://img.shields.io/badge/python-3.7%20%7C%203.8-blue)](https://img.shields.io/badge/python-3.7%20%7C%203.8-blue)
+[![PythonVersion](https://img.shields.io/pypi/pyversions/gino_admin)](https://img.shields.io/pypi/pyversions/gino_admin)
 [![tests](https://github.com/datarootsio/ml-skeleton-py/workflows/tests/badge.svg?branch=master)](https://github.com/datarootsio/ml-skeleton-py/actions)
 [![Codecov](https://codecov.io/github/datarootsio/ml-skeleton-py/badge.svg?branch=master&service=github)](https://github.com/datarootsio/ml-skeleton-py/actions)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
diff --git a/ml_skeleton_py/etl/generate_dataset.py b/ml_skeleton_py/etl/generate_dataset.py
@@ -68,7 +68,7 @@ def remove_outliers(df: pd.DataFrame, **kwargs: int) -> pd.DataFrame:
 
     df_outlier_removed = df_outlier_removed[
         df_outlier_removed.is_outlier != -1
-        ]  # -1 represents outliers
+    ]  # -1 represents outliers
 
     # Report number of removed rows
     n_filtered_rows = df_outlier_removed.shape[0]
diff --git a/ml_skeleton_py/model/train.py b/ml_skeleton_py/model/train.py
@@ -63,10 +63,7 @@ def train(dataset_loc: str, model_dir: str, model_name: str = "lr") -> None:
 
     auc_roc = round(training_score.mean(), 2)
     logger.info(f"Classifier: {pipeline.__class__.__name__}")
-    logger.info(
-        "Has a training score "
-        + f"of {auc_roc} roc_auc"
-    )
+    logger.info("Has a training score " + f"of {auc_roc} roc_auc")
     check_performance(auc_roc)
     # Serialize and dump trained pipeline to disk
     pred_result = {
@@ -83,9 +80,11 @@ def train(dataset_loc: str, model_dir: str, model_name: str = "lr") -> None:
 
 def check_performance(auc_roc: float) -> None:
     if auc_roc < s.EXPECTED_MIN_AUC:
-        raise Exception("The auc roc is less than the expected, "
-                        "please check your data manipulation or "
-                        "training parameters!")
+        raise Exception(
+            "The auc roc is less than the expected, "
+            "please check your data manipulation or "
+            "training parameters!"
+        )
     else:
         # Performance is more than the expected
         pass
diff --git a/setup.py b/setup.py
@@ -2,14 +2,13 @@
 
 
 test_deps = [
-    "pytest>=5.3.5",
-    "pytest-flask>=1.0.0",
-    "pip>=20.0.0",
-    "tox>=3.14.0",
-    "flake8>=3.7.9",
-    "flake8-annotations>=1.1.3",
-    "pytest-cov>=2.8.1",
-    "black>=19.10b0"
+    "pytest>=6.2.3",
+    "pytest-flask>=1.2.0",
+    "pip>=21.0.1",
+    "flake8>=3.9.2",
+    "flake8-annotations>=2.6.2",
+    "pytest-cov>=2.12.1",
+    "black>=21.7b0"
 ]
 
 serve_deps = [
@@ -26,7 +25,7 @@
     author_email="info@dataroots.io",
     description="Description of my ml-skeleton package",
     packages=find_packages(),
-    install_requires=["pandas>=1.1.0", "scikit-learn>=0.23.2"],
+    install_requires=["pandas>=1.3.2", "scikit-learn>=0.24.2"],
     tests_require=test_deps,
     extras_require=extras,
 )