tryolabs · Ludecan · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/.github/workflows/style-checks.yaml b/.github/workflows/style-checks.yaml
@@ -7,10 +7,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Setup Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
 
       - name: Install Poetry
         uses: snok/install-poetry@v1
@@ -21,20 +23,20 @@ jobs:
       # Allow loading a cached venv created in a previous run if the lock file is identical
       - name: Load cached venv if it exists
         id: venv-cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: .venv
           key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock', '**/pyproject.toml') }}
 
       - name: Install dependencies
         if: steps.venv-cache.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction
+        run: poetry install --no-interaction --only dev
 
-      - name: Check format with black
-        run: poetry run black --check .
+      - name: Check linting with Ruff
+        run: poetry run ruff check
 
-      - name: Check style with flake8
-        run: poetry run flake8 .
+      - name: Check format with Ruff
+        run: poetry run ruff format --check
 
-      - name: Check import sorting with isort
-        run: poetry run isort --check .
+      - name: Check docstring coverage
+        run: poetry run docstr-coverage ./**/*.py --fail-under 20  --verbose=2 --skip-file-doc
diff --git a/.github/workflows/unit-testing.yaml b/.github/workflows/unit-testing.yaml
@@ -7,10 +7,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Setup Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
 
       - name: Install Poetry
         uses: snok/install-poetry@v1
@@ -21,7 +23,7 @@ jobs:
       # Allow loading a cached venv created in a previous run if the lock file is identical
       - name: Load cached venv if it exists
         id: venv-cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: .venv
           key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock', '**/pyproject.toml') }}

diff --git a/.gitignore b/.gitignore
@@ -8,7 +8,6 @@ __pycache__/
 
 *.joblib
 *.bin
-*.json
 *.pkl
 
 # Autogluon
@@ -18,8 +17,6 @@ AutogluonModels/
 mlruns/
 runs/
 
-examples/
-
 # Distribution / packaging
 .Python
 build/

diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -1,9 +1,7 @@
 {
     "recommendations": [
         "ms-python.python",
-        "ms-python.isort",
-        "ms-python.flake8",
-        "ms-python.black-formatter",
-        "njpwerner.autodocstring",
+        "charliermarsh.ruff",
+        "njpwerner.autodocstring"
     ]
-}
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,16 +1,6 @@
 {
-    //
-    // Set correct python path to venv's one
-    //
     "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
-    //`
-    // Very optional: type checking. Remove the line if your project doesn't really use or respect
-    // type hints. You should give it a try, though. They're great.
-    //
-    "python.analysis.typeCheckingMode": "basic",
-    //
-    // Hide .venv from explorer and searchbar
-    //
+    "python.analysis.typeCheckingMode": "off",
     "files.watcherExclude": {
         "**/.venv/**": true,
         "**/__pycache__/**": true
@@ -23,40 +13,30 @@
         "**/.venv/": true,
         "**/__pycache__/**": true
     },
-    //
-    // Linting and formatting
-    //
     "editor.formatOnSave": true,
     "editor.codeActionsOnSave": {
+        "source.fixAll": "explicit",
         "source.organizeImports": "explicit"
     },
-    "black-formatter.importStrategy": "fromEnvironment",
-    "isort.importStrategy": "fromEnvironment",
-    "flake8.importStrategy": "fromEnvironment",
-    "isort.args": [
-        "--settings-path",
-        "${workspaceFolder}/pyproject.toml"
-    ],
-    "flake8.args": [
-        "--config=${workspaceFolder}/.flake8"
-    ],
     "editor.rulers": [
-        100 // if changing line length, also do it in .flake8 and pyproject.toml's [tool.black] section
+        100
     ],
     "editor.wordWrapColumn": 100,
     "files.trimFinalNewlines": true,
     "files.trimTrailingWhitespace": true,
-    //
-    // Jupyter
-    //
     "jupyter.notebookFileRoot": "${workspaceFolder}",
     "jupyter.interactiveWindow.textEditor.executeSelection": true,
-    // TODO: this setting is showing a deprecation warning. Maybe we should drop it?
-    "jupyter.generateSVGPlots": true,
     "autoDocstring.docstringFormat": "numpy",
     "python.testing.pytestArgs": [
         "tests"
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
+    "[python]": {
+        "editor.defaultFormatter": "charliermarsh.ruff"
+    },
+    "ruff.organizeImports": true,
+    "ruff.fixAll": true,
+    "ruff.importStrategy": "fromEnvironment",
+    "ruff.lint.run": "onSave"
 }
diff --git a/README.md b/README.md
@@ -16,6 +16,7 @@ The key components of the pipeline include Pipeline Steps, which are predefined
 - Evaluation metrics calculation and reporting
 - Explainable AI (XAI) dashboard for model interpretability
 - Extensible architecture for adding custom pipeline steps
+- MLOps best practices for ensuring consistent results between training and serving
 
 ## Installation
 

diff --git a/examples/ames_housing/configs/1_ames_housing_baseline.json b/examples/ames_housing/configs/1_ames_housing_baseline.json
@@ -25,9 +25,9 @@
             {
                 "step_type": "TabularSplitStep",
                 "parameters": {
-                    "train_percentage": 0.7,
+                    "train_percentage": 0.6,
                     "validation_percentage": 0.2,
-                    "test_percentage": 0.1
+                    "test_percentage": 0.2
                 }
             },
             {

diff --git a/examples/ames_housing/configs/2_ames_housing_hp_tuning.json b/examples/ames_housing/configs/2_ames_housing_hp_tuning.json
@@ -21,9 +21,9 @@
             {
                 "step_type": "TabularSplitStep",
                 "parameters": {
-                    "train_percentage": 0.7,
+                    "train_percentage": 0.6,
                     "validation_percentage": 0.2,
-                    "test_percentage": 0.1
+                    "test_percentage": 0.2
                 }
             },
             {

diff --git a/examples/ames_housing/configs/3_ames_housing_hp_tuned.json b/examples/ames_housing/configs/3_ames_housing_hp_tuned.json
@@ -25,9 +25,9 @@
             {
                 "step_type": "TabularSplitStep",
                 "parameters": {
-                    "train_percentage": 0.7,
+                    "train_percentage": 0.6,
                     "validation_percentage": 0.2,
-                    "test_percentage": 0.1
+                    "test_percentage": 0.2
                 }
             },
             {
@@ -45,12 +45,12 @@
                         "eval_metric": "rmse",
                         "tree_method": "hist",
                         "early_stopping_rounds": 20,
-                        "max_depth": 15,
-                        "eta": 0.08311222976823307,
-                        "n_estimators": 374,
-                        "min_child_weight": 6,
-                        "subsample": 0.5272883435658126,
-                        "colsample_bytree": 0.946222179438676
+                        "max_depth": 5,
+                        "eta": 0.15805002999964826,
+                        "n_estimators": 1019,
+                        "min_child_weight": 3,
+                        "subsample": 0.8807043595486204,
+                        "colsample_bytree": 0.8754815170751743
                     }
                 }
             },

diff --git a/examples/ames_housing/configs/4_ames_housing_autogluon.json b/examples/ames_housing/configs/4_ames_housing_autogluon.json
@@ -0,0 +1,70 @@
+{
+    "pipeline": {
+        "name": "XGBoostTrainingPipeline",
+        "description": "Training pipeline for XGBoost models.",
+        "parameters": {
+            "save_data_path": "ames_housing.pkl",
+            "target": "SalePrice",
+            "task": "regression",
+            "tracking": {
+                "experiment": "ames_housing",
+                "run": "AutoGluon"
+            }
+        },
+        "steps": [
+            {
+                "step_type": "GenerateStep",
+                "parameters": {
+                    "train_path": "examples/ames_housing/data/train.csv",
+                    "predict_path": "examples/ames_housing/data/test.csv",
+                    "drop_columns": [
+                        "Id"
+                    ],
+                    "optimize_dtypes": true
+                }
+            },
+            {
+                "step_type": "TabularSplitStep",
+                "parameters": {
+                    "train_percentage": 0.6,
+                    "validation_percentage": 0.2,
+                    "test_percentage": 0.2
+                }
+            },
+            {
+                "step_type": "CleanStep"
+            },
+            {
+                "step_type": "EncodeStep"
+            },
+            {
+                "step_type": "AutoGluonModelStep",
+                "parameters": {
+                    "model_class": "AutoGluon",
+                    "autogluon_create_params": {
+                        "verbosity": 2
+                    },
+                    "autogluon_fit_params": {
+                        "presets": [
+                            "high_quality",
+                            "optimize_for_deployment"
+                        ],
+                        "save_bag_folds": true,
+                        "time_limit": 1800,
+                        "num_stack_levels": 1,
+                        "dynamic_stacking": false
+                    }
+                }
+            },
+            {
+                "step_type": "CalculateMetricsStep"
+            },
+            {
+                "step_type": "ExplainerDashboardStep",
+                "parameters": {
+                    "enable_step": false
+                }
+            }
+        ]
+    }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,7 +8,6 @@ __pycache__/ @@
     *.joblib
     *.bin
-    *.json
     *.pkl
     # Autogluon
@@ Expand All / @@ -18,8 +17,6 @@ AutogluonModels/ @@
     mlruns/
     runs/
-    examples/
     # Distribution / packaging
     .Python
     build/
@@ Expand Down @@