update readme

tryolabs · Mar 26, 2024 · 6276cc5 · 6276cc5
1 parent 58bca73
commit 6276cc5
Showing 1 changed file with 24 additions and 78 deletions.
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ To install the Pipeline Library, you need to have Python 3.9 or higher and Poetr
 
 Here's an example of how to use the library to run an XGBoost pipeline:
 
-1. Create a `train.json` file with the following content:
+1. Create a `config.json` file with the following content:
 
 
 ```json
@@ -63,14 +63,16 @@ Here's an example of how to use the library to run an XGBoost pipeline:
             {
                 "step_type": "GenerateStep",
                 "parameters": {
-                    "path": "examples/ocf/data/trainset_new.parquet"
+                    "train_path": "examples/ocf/data/trainset_forecast.parquet",
+                    "predict_path": "examples/ocf/data/testset_forecast.parquet"
                 }
             },
             {
                 "step_type": "CleanStep",
                 "parameters": {
                     "drop_na_columns": [
-                        "average_power_kw"
+                        "average_power_kw",
+                        "diffuse_radiation"
                     ],
                     "drop_ids": {
                         "ss_id": [
@@ -102,118 +104,62 @@ Here's an example of how to use the library to run an XGBoost pipeline:
                 }
             },
             {
-                "step_type": "XGBoostFitModelStep",
+                "step_type": "FitModelStep",
                 "parameters": {
+                    "model_class": "XGBoostModel",
                     "target": "average_power_kw",
                     "drop_columns": [
                         "ss_id",
                         "operational_at",
                         "total_energy_kwh"
                     ],
-                    "xgb_params": {
+                    "model_params": {
                         "max_depth": 12,
                         "eta": 0.12410097733370863,
                         "objective": "reg:squarederror",
                         "eval_metric": "mae",
                         "n_jobs": -1,
-                        "n_estimators": 672,
+                        "n_estimators": 40,
                         "min_child_weight": 7,
                         "subsample": 0.8057743223537057,
                         "colsample_bytree": 0.6316852278944352,
                         "early_stopping_rounds": 10
                     },
-                    "save_path": "model.joblib"
+                    "save_path": "model_forecast.joblib"
                 }
+            },
+            {
+                "step_type": "PredictStep",
+                "parameters": {}
+            },
+            {
+                "step_type": "CalculateTrainMetricsStep",
+                "parameters": {}
             }
         ]
     }
 }
 ```
 
-2. Run the pipeline using the following code:
+2. Run the pipeline in train mode using the following code:
 
 ```python
 import logging
 
-from pipeline_lib.core import Pipeline
+from pipeline_lib import Pipeline
 
 logging.basicConfig(level=logging.INFO)
 
-Pipeline.from_json("train.json").run()
+data = Pipeline.from_json("config.json").run(is_train=True)
 ```
 
-3. Create a `predict.json` file with the pipeline configuration for prediction:
-
-```json
-{
-    "pipeline": {
-        "name": "XGBoostPredictionPipeline",
-        "description": "Prediction pipeline for XGBoost models.",
-        "steps": [
-            {
-                "step_type": "GenerateStep",
-                "parameters": {
-                    "path": "examples/ocf/data/testset_new.parquet"
-                }
-            },
-            {
-                "step_type": "CleanStep",
-                "parameters": {
-                    "drop_na_columns": [
-                        "average_power_kw"
-                    ]
-                }
-            },
-            {
-                "step_type": "CalculateFeaturesStep",
-                "parameters": {
-                    "datetime_columns": [
-                        "date"
-                    ],
-                    "features": [
-                        "year",
-                        "month",
-                        "day",
-                        "hour",
-                        "minute"
-                    ]
-                }
-            },
-            {
-                "step_type": "XGBoostPredictStep",
-                "parameters": {
-                    "target": "average_power_kw",
-                    "drop_columns": [
-                        "ss_id",
-                        "operational_at",
-                        "total_energy_kwh"
-                    ],
-                    "load_path": "model.joblib"
-                }
-            },
-            {
-                "step_type": "CalculateMetricsStep",
-                "parameters": {}
-            },
-            {
-                "step_type": "ExplainerDashboardStep",
-                "parameters": {
-                    "max_samples": 1000
-                }
-            }
-        ]
-    }
-}
-```
-
-4. Run the prediction pipeline:
+3. To run it in prediction mode you have to set `is_train` to `False`.
 
 ```python
-Pipeline.from_json("predict.json").run()
+data = Pipeline.from_json("config.json").run(is_train=False)
 ```
 
 The library allows users to define custom steps for data generation, cleaning, and preprocessing, which can be seamlessly integrated into the pipeline.
 
 ## Contributing
-
-Contributions to the Pipeline Library are welcome! If you encounter any issues, have suggestions for improvements, or want to add new features, please open an issue or submit a pull request on the GitHub repository.
+Contributions to the Pipeline Library are welcome! If you encounter any issues, have suggestions for improvements, or want to add new features, please open an issue or submit a pull request on the GitHub repository.