From 1606b8f8a19bdde9152f32538c45072ad6d4ed84 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Wed, 28 Jun 2023 18:05:49 -0400 Subject: [PATCH 01/17] docs: updated docs for `settings` moduule and satisfied clippy --- src/algorithms/mod.rs | 15 +++++ src/lib.rs | 12 ++-- src/settings/knn_classifier_parameters.rs | 6 ++ src/settings/knn_regressor_parameters.rs | 6 ++ src/settings/mod.rs | 70 ++++++++++---------- src/settings/settings_struct.rs | 81 ++++++++++++++--------- src/settings/svc_parameters.rs | 14 ++-- src/settings/svr_parameters.rs | 6 ++ 8 files changed, 134 insertions(+), 76 deletions(-) diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index c1339b8..09fc94c 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -50,7 +50,22 @@ use smartcore::model_selection::CrossValidationResult; use crate::settings::FinalModel; use std::time::{Duration, Instant}; +/// Trait for wrapping models pub trait ModelWrapper { + /// Perform cross-validation and return the results + /// + /// # Arguments + /// + /// * `x` - The input data + /// * `y` - The output data + /// * `settings` - The settings for the model + /// + /// # Returns + /// + /// * `CrossValidationResult` - The cross-validation results + /// * `Algorithm` - The algorithm used + /// * `Duration` - The time taken to perform the cross-validation + /// * `Vec` - The final model fn cv_model( x: &DenseMatrix, y: &Vec, diff --git a/src/lib.rs b/src/lib.rs index 00f3722..e6ea837 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -600,17 +600,17 @@ impl SupervisedModel { // Train the model // let model = LassoRegressorWrapper::train(&x_train, &y_train, &self.settings); - let model = (*algo.get_trainer())(&x_train, &y_train, &self.settings); + let model = algo.get_trainer()(&x_train, &y_train, &self.settings); // Score the model - let train_score = (*self.settings.get_metric())( + let train_score = self.settings.get_metric()( &y_train, - &(*algo.get_predictor())(&x_train, &model, &self.settings), + &algo.get_predictor()(&x_train, &model, &self.settings), // &LassoRegressorWrapper::predict(&x_train, &model, &self.settings), ); - let test_score = (*self.settings.get_metric())( + let test_score = self.settings.get_metric()( &y_test, - &(*algo.get_predictor())(&x_test, &model, &self.settings), + &algo.get_predictor()(&x_test, &model, &self.settings), // &LassoRegressorWrapper::predict(&x_test, &model, &self.settings), ); @@ -638,7 +638,7 @@ impl SupervisedModel { let metamodel = &self.metamodel.model; // Train the model - (*algo.get_predictor())(&xdm, metamodel, &self.settings) + algo.get_predictor()(&xdm, metamodel, &self.settings) } fn predict_by_model(&self, x: &DenseMatrix, model: &Model) -> Vec { diff --git a/src/settings/knn_classifier_parameters.rs b/src/settings/knn_classifier_parameters.rs index f94fef6..ed4be3f 100644 --- a/src/settings/knn_classifier_parameters.rs +++ b/src/settings/knn_classifier_parameters.rs @@ -1,12 +1,18 @@ +//! KNN classifier parameters + use crate::utils::Distance; pub use smartcore::{algorithm::neighbour::KNNAlgorithmName, neighbors::KNNWeightFunction}; /// Parameters for k-nearest neighbors (KNN) classification #[derive(serde::Serialize, serde::Deserialize)] pub struct KNNClassifierParameters { + /// Number of nearest neighbors to use pub(crate) k: usize, + /// Weighting function to use with KNN regression pub(crate) weight: KNNWeightFunction, + /// Search algorithm to use with KNN regression pub(crate) algorithm: KNNAlgorithmName, + /// Distance metric to use with KNN regression pub(crate) distance: Distance, } diff --git a/src/settings/knn_regressor_parameters.rs b/src/settings/knn_regressor_parameters.rs index da1e6c6..fab8090 100644 --- a/src/settings/knn_regressor_parameters.rs +++ b/src/settings/knn_regressor_parameters.rs @@ -1,12 +1,18 @@ +//! KNN regressor parameters + use crate::utils::Distance; pub use smartcore::{algorithm::neighbour::KNNAlgorithmName, neighbors::KNNWeightFunction}; /// Parameters for k-nearest neighbor (KNN) regression #[derive(serde::Serialize, serde::Deserialize)] pub struct KNNRegressorParameters { + /// Number of nearest neighbors to use pub(crate) k: usize, + /// Weighting function to use with KNN regression pub(crate) weight: KNNWeightFunction, + /// Search algorithm to use with KNN regression pub(crate) algorithm: KNNAlgorithmName, + /// Distance metric to use with KNN regression pub(crate) distance: Distance, } diff --git a/src/settings/mod.rs b/src/settings/mod.rs index b359009..7d5659c 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -263,51 +263,49 @@ pub enum Algorithm { } impl Algorithm { + /// Get the `predict` method for the underlying algorithm. pub(crate) fn get_predictor( &self, - ) -> Box, &Vec, &Settings) -> Vec> { + ) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { - Algorithm::Linear => Box::new(LinearRegressorWrapper::predict), - Algorithm::Lasso => Box::new(LassoRegressorWrapper::predict), - Algorithm::Ridge => Box::new(RidgeRegressorWrapper::predict), - Algorithm::ElasticNet => Box::new(ElasticNetRegressorWrapper::predict), - Algorithm::RandomForestRegressor => Box::new(RandomForestRegressorWrapper::predict), - Algorithm::KNNRegressor => Box::new(KNNRegressorWrapper::predict), - Algorithm::SVR => Box::new(SupportVectorRegressorWrapper::predict), - Algorithm::DecisionTreeRegressor => Box::new(DecisionTreeRegressorWrapper::predict), - Algorithm::LogisticRegression => Box::new(LogisticRegressionWrapper::predict), - Algorithm::RandomForestClassifier => Box::new(RandomForestClassifierWrapper::predict), - Algorithm::DecisionTreeClassifier => Box::new(DecisionTreeClassifierWrapper::predict), - Algorithm::KNNClassifier => Box::new(KNNClassifierWrapper::predict), - Algorithm::SVC => Box::new(SupportVectorClassifierWrapper::predict), - Algorithm::GaussianNaiveBayes => Box::new(GaussianNaiveBayesClassifierWrapper::predict), - Algorithm::CategoricalNaiveBayes => { - Box::new(CategoricalNaiveBayesClassifierWrapper::predict) - } + Algorithm::Linear => LinearRegressorWrapper::predict, + Algorithm::Lasso => LassoRegressorWrapper::predict, + Algorithm::Ridge => RidgeRegressorWrapper::predict, + Algorithm::ElasticNet => ElasticNetRegressorWrapper::predict, + Algorithm::RandomForestRegressor => RandomForestRegressorWrapper::predict, + Algorithm::KNNRegressor => KNNRegressorWrapper::predict, + Algorithm::SVR => SupportVectorRegressorWrapper::predict, + Algorithm::DecisionTreeRegressor => DecisionTreeRegressorWrapper::predict, + Algorithm::LogisticRegression => LogisticRegressionWrapper::predict, + Algorithm::RandomForestClassifier => RandomForestClassifierWrapper::predict, + Algorithm::DecisionTreeClassifier => DecisionTreeClassifierWrapper::predict, + Algorithm::KNNClassifier => KNNClassifierWrapper::predict, + Algorithm::SVC => SupportVectorClassifierWrapper::predict, + Algorithm::GaussianNaiveBayes => GaussianNaiveBayesClassifierWrapper::predict, + Algorithm::CategoricalNaiveBayes => CategoricalNaiveBayesClassifierWrapper::predict, } } + /// Get the `train` method for the underlying algorithm. pub(crate) fn get_trainer( &self, - ) -> Box, &Vec, &Settings) -> Vec> { + ) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { - Algorithm::Linear => Box::new(LinearRegressorWrapper::train), - Algorithm::Lasso => Box::new(LassoRegressorWrapper::train), - Algorithm::Ridge => Box::new(RidgeRegressorWrapper::train), - Algorithm::ElasticNet => Box::new(ElasticNetRegressorWrapper::train), - Algorithm::RandomForestRegressor => Box::new(RandomForestRegressorWrapper::train), - Algorithm::KNNRegressor => Box::new(KNNRegressorWrapper::train), - Algorithm::SVR => Box::new(SupportVectorRegressorWrapper::train), - Algorithm::DecisionTreeRegressor => Box::new(DecisionTreeRegressorWrapper::train), - Algorithm::LogisticRegression => Box::new(LogisticRegressionWrapper::train), - Algorithm::RandomForestClassifier => Box::new(RandomForestClassifierWrapper::train), - Algorithm::DecisionTreeClassifier => Box::new(DecisionTreeClassifierWrapper::train), - Algorithm::KNNClassifier => Box::new(KNNClassifierWrapper::train), - Algorithm::SVC => Box::new(SupportVectorClassifierWrapper::train), - Algorithm::GaussianNaiveBayes => Box::new(GaussianNaiveBayesClassifierWrapper::train), - Algorithm::CategoricalNaiveBayes => { - Box::new(CategoricalNaiveBayesClassifierWrapper::train) - } + Algorithm::Linear => LinearRegressorWrapper::train, + Algorithm::Lasso => LassoRegressorWrapper::train, + Algorithm::Ridge => RidgeRegressorWrapper::train, + Algorithm::ElasticNet => ElasticNetRegressorWrapper::train, + Algorithm::RandomForestRegressor => RandomForestRegressorWrapper::train, + Algorithm::KNNRegressor => KNNRegressorWrapper::train, + Algorithm::SVR => SupportVectorRegressorWrapper::train, + Algorithm::DecisionTreeRegressor => DecisionTreeRegressorWrapper::train, + Algorithm::LogisticRegression => LogisticRegressionWrapper::train, + Algorithm::RandomForestClassifier => RandomForestClassifierWrapper::train, + Algorithm::DecisionTreeClassifier => DecisionTreeClassifierWrapper::train, + Algorithm::KNNClassifier => KNNClassifierWrapper::train, + Algorithm::SVC => SupportVectorClassifierWrapper::train, + Algorithm::GaussianNaiveBayes => GaussianNaiveBayesClassifierWrapper::train, + Algorithm::CategoricalNaiveBayes => CategoricalNaiveBayesClassifierWrapper::train, } } } diff --git a/src/settings/settings_struct.rs b/src/settings/settings_struct.rs index 2c37370..013097a 100644 --- a/src/settings/settings_struct.rs +++ b/src/settings/settings_struct.rs @@ -1,3 +1,5 @@ +//! Settings for the automl crate + use comfy_table::{ modifiers::UTF8_SOLID_INNER_BORDERS, presets::UTF8_FULL, Attribute, Cell, Table, }; @@ -25,30 +27,55 @@ use std::fmt::{Display, Formatter}; use std::io::{Read, Write}; /// Settings for supervised models +/// +/// Any algorithms in the `skiplist` member will be skipped during training. #[derive(serde::Serialize, serde::Deserialize)] pub struct Settings { + /// The metric to sort by pub(crate) sort_by: Metric, + /// The type of model to train model_type: ModelType, + /// The algorithms to skip pub(crate) skiplist: Vec, + /// The number of folds for cross-validation number_of_folds: usize, + /// Whether or not to shuffle the data pub(crate) shuffle: bool, + /// Whether or not to be verbose verbose: bool, + /// The approach to use for the final model pub(crate) final_model_approach: FinalModel, + /// The kind of preprocessing to perform pub(crate) preprocessing: PreProcessing, + /// Optional settings for linear regression pub(crate) linear_settings: Option, + /// Optional settings for support vector regressor pub(crate) svr_settings: Option, + /// Optional settings for lasso regression pub(crate) lasso_settings: Option>, + /// Optional settings for ridge regression pub(crate) ridge_settings: Option>, + /// Optional settings for elastic net pub(crate) elastic_net_settings: Option>, + /// Optional settings for decision tree regressor pub(crate) decision_tree_regressor_settings: Option, + /// Optional settings for random forest regressor pub(crate) random_forest_regressor_settings: Option, + /// Optional settings for KNN regressor pub(crate) knn_regressor_settings: Option, + /// Optional settings for logistic regression pub(crate) logistic_settings: Option>, + /// Optional settings for random forest pub(crate) random_forest_classifier_settings: Option, + /// Optional settings for KNN classifier pub(crate) knn_classifier_settings: Option, + /// Optional settings for support vector classifier pub(crate) svc_settings: Option, + /// Optional settings for decision tree classifier pub(crate) decision_tree_classifier_settings: Option, + /// Optional settings for Gaussian Naive Bayes pub(crate) gaussian_nb_settings: Option>, + /// Optional settings for Categorical Naive Bayes pub(crate) categorical_nb_settings: Option>, } @@ -99,20 +126,22 @@ impl Default for Settings { } impl Settings { + /// Get the k-fold cross-validator pub(crate) fn get_kfolds(&self) -> KFold { KFold::default() .with_n_splits(self.number_of_folds) .with_shuffle(self.shuffle) } - pub(crate) fn get_metric(&self) -> Box, &Vec) -> f32> { - Box::new(match self.sort_by { + /// Get the metric to sort by + pub(crate) fn get_metric(&self) -> fn(&Vec, &Vec) -> f32 { + match self.sort_by { Metric::RSquared => r2, Metric::MeanAbsoluteError => mean_absolute_error, Metric::MeanSquaredError => mean_squared_error, Metric::Accuracy => accuracy, Metric::None => panic!("A metric must be set."), - }) + } } /// Creates default settings for regression @@ -208,10 +237,10 @@ impl Settings { /// ``` pub fn new_from_file(file_name: &str) -> Self { let mut buf: Vec = Vec::new(); - std::fs::File::open(&file_name) + std::fs::File::open(file_name) .and_then(|mut f| f.read_to_end(&mut buf)) .expect("Cannot read settings file."); - serde_yaml::from_slice(&*buf).expect("Cannot deserialize settings file.") + serde_yaml::from_slice(&buf).expect("Cannot deserialize settings file.") } /// Save the current settings to a file for later use @@ -224,7 +253,7 @@ impl Settings { pub fn save(&self, file_name: &str) { let serial = serde_yaml::to_string(&self).expect("Cannot serialize settings."); std::fs::File::create(file_name) - .and_then(|mut f| f.write_all((&serial).as_ref())) + .and_then(|mut f| f.write_all(serial.as_ref())) .expect("Cannot write settings to file.") } @@ -577,11 +606,11 @@ impl Display for Settings { // Get list of algorithms to skip let mut skiplist = String::new(); - if self.skiplist.len() == 0 { + if self.skiplist.is_empty() { skiplist.push_str("None "); } else { for algorithm_to_skip in &self.skiplist { - skiplist.push_str(&*format!("{}\n", algorithm_to_skip)); + skiplist.push_str(&format!("{}\n", algorithm_to_skip)); } } @@ -608,7 +637,7 @@ impl Display for Settings { ]) .add_row(vec![ " Skipped Algorithms", - &*format!("{}", &skiplist[0..skiplist.len() - 1]), + &skiplist[0..skiplist.len() - 1], ]); if !self.skiplist.contains(&Algorithm::Linear) { table @@ -792,20 +821,14 @@ impl Display for Settings { ]) .add_row(vec![ " Search algorithm", - &*format!( - "{}", - print_knn_search_algorithm( - &self.knn_regressor_settings.as_ref().unwrap().algorithm - ) + &print_knn_search_algorithm( + &self.knn_regressor_settings.as_ref().unwrap().algorithm ), ]) .add_row(vec![ " Weighting function", - &*format!( - "{}", - print_knn_weight_function( - &self.knn_regressor_settings.as_ref().unwrap().weight - ) + &print_knn_weight_function( + &self.knn_regressor_settings.as_ref().unwrap().weight ), ]) .add_row(vec![ @@ -922,20 +945,14 @@ impl Display for Settings { ]) .add_row(vec![ " Search algorithm", - &*format!( - "{}", - print_knn_search_algorithm( - &self.knn_classifier_settings.as_ref().unwrap().algorithm - ) + &print_knn_search_algorithm( + &self.knn_classifier_settings.as_ref().unwrap().algorithm ), ]) .add_row(vec![ " Weighting function", - &*format!( - "{}", - print_knn_weight_function( - &self.knn_classifier_settings.as_ref().unwrap().weight - ) + &print_knn_weight_function( + &self.knn_classifier_settings.as_ref().unwrap().weight ), ]) .add_row(vec![ @@ -1041,14 +1058,18 @@ impl Display for Settings { ]); } - write!(f, "{}\n", table) + writeln!(f, "{table}") } } +/// Model type to train #[derive(serde::Serialize, serde::Deserialize)] enum ModelType { + /// No model type specified None, + /// Regression model Regression, + /// Classification model Classification, } diff --git a/src/settings/svc_parameters.rs b/src/settings/svc_parameters.rs index d2d8db4..f57c546 100644 --- a/src/settings/svc_parameters.rs +++ b/src/settings/svc_parameters.rs @@ -1,34 +1,40 @@ +//! Support Vector Classification parameters + pub use crate::utils::Kernel; /// Parameters for support vector classification #[derive(serde::Serialize, serde::Deserialize)] pub struct SVCParameters { + /// Number of epochs to use in the epsilon-SVC model pub(crate) epoch: usize, + /// Regulation penalty to use with the SVC model pub(crate) c: f32, + /// Convergence tolerance to use with the SVC model pub(crate) tol: f32, + /// Kernel to use with the SVC model pub(crate) kernel: Kernel, } impl SVCParameters { - /// Define the value of epsilon to use in the epsilon-SVR model. + /// Define the number of epochs to use in the epsilon-SVC model. pub fn with_epoch(mut self, epoch: usize) -> Self { self.epoch = epoch; self } - /// Define the regulation penalty to use with the SVR Model + /// Define the regulation penalty to use with the SVC Model pub fn with_c(mut self, c: f32) -> Self { self.c = c; self } - /// Define the convergence tolerance to use with the SVR model + /// Define the convergence tolerance to use with the SVC model pub fn with_tol(mut self, tol: f32) -> Self { self.tol = tol; self } - /// Define which kernel to use with the SVR model + /// Define which kernel to use with the SVC model pub fn with_kernel(mut self, kernel: Kernel) -> Self { self.kernel = kernel; self diff --git a/src/settings/svr_parameters.rs b/src/settings/svr_parameters.rs index f2bb76b..5c341e5 100644 --- a/src/settings/svr_parameters.rs +++ b/src/settings/svr_parameters.rs @@ -1,11 +1,17 @@ +//! Support Vector Regression parameters + pub use crate::utils::Kernel; /// Parameters for support vector regression #[derive(serde::Serialize, serde::Deserialize)] pub struct SVRParameters { + /// Epsilon in the epsilon-SVR model. pub(crate) eps: f32, + /// Regularization parameter. pub(crate) c: f32, + /// Tolerance for stopping criterion. pub(crate) tol: f32, + /// Kernel to use for the SVR model pub(crate) kernel: Kernel, } From a915c2a2d18a7e67c465c14ba356dc75e4c413bd Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 11:48:27 -0400 Subject: [PATCH 02/17] fix: satisfied clippy for tests module --- tests/classification.rs | 2 +- tests/new_from_dataset.rs | 4 ++-- tests/regression.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/classification.rs b/tests/classification.rs index 7c89073..e576e1e 100644 --- a/tests/classification.rs +++ b/tests/classification.rs @@ -58,6 +58,6 @@ mod classification_tests { classifier.train(); // Try to predict something - classifier.predict(vec![vec![5.0 as f32; 30]; 10]); + classifier.predict(vec![vec![5.0_f32; 30]; 10]); } } diff --git a/tests/new_from_dataset.rs b/tests/new_from_dataset.rs index 0d7ca95..75b267e 100644 --- a/tests/new_from_dataset.rs +++ b/tests/new_from_dataset.rs @@ -16,7 +16,7 @@ mod new_from_dataset { classifier.train(); // Try to predict something from a vector - classifier.predict(vec![vec![5.0 as f32; 30]; 10]); + classifier.predict(vec![vec![5.0_f32; 30]; 10]); // Try to predict something from ndarray #[cfg(feature = "nd")] @@ -37,7 +37,7 @@ mod new_from_dataset { regressor.train(); // Try to predict something from a vector - regressor.predict(vec![vec![5.0 as f32; 10]; 10]); + regressor.predict(vec![vec![5.0_f32; 10]; 10]); // Try to predict something from ndarray #[cfg(feature = "nd")] diff --git a/tests/regression.rs b/tests/regression.rs index 127efc4..5fbd1d2 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -76,6 +76,6 @@ mod regression_tests { regressor.train(); // Try to predict something - regressor.predict(vec![vec![5.0 as f32; 10]; 10]); + regressor.predict(vec![vec![5.0_f32; 10]; 10]); } } From 0b581fd37c795fc1ebe49adf997130fc7bbebe01 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 12:16:29 -0400 Subject: [PATCH 03/17] docs: added docs for algorithms and utils --- .../categorical_naive_bayes_classifier.rs | 17 +++-- src/algorithms/decision_tree_classifier.rs | 8 +- src/algorithms/decision_tree_regressor.rs | 8 +- src/algorithms/elastic_net_regressor.rs | 8 +- .../gaussian_naive_bayes_classifier.rs | 13 +++- src/algorithms/knn_classifier.rs | 19 +++-- src/algorithms/knn_regressor.rs | 16 ++-- src/algorithms/lasso_regressor.rs | 8 +- src/algorithms/linear_regressor.rs | 8 +- src/algorithms/logistic_regression.rs | 8 +- src/algorithms/mod.rs | 33 ++++++++- src/algorithms/random_forest_classifier.rs | 12 ++- src/algorithms/random_forest_regressor.rs | 12 ++- src/algorithms/ridge_regressor.rs | 8 +- src/algorithms/support_vector_classifier.rs | 14 +++- src/algorithms/support_vector_regressor.rs | 74 +++---------------- src/utils.rs | 11 ++- 17 files changed, 171 insertions(+), 106 deletions(-) diff --git a/src/algorithms/categorical_naive_bayes_classifier.rs b/src/algorithms/categorical_naive_bayes_classifier.rs index 8494f06..04a1276 100644 --- a/src/algorithms/categorical_naive_bayes_classifier.rs +++ b/src/algorithms/categorical_naive_bayes_classifier.rs @@ -1,10 +1,17 @@ -use smartcore::linalg::naive::dense_matrix::DenseMatrix; -use smartcore::model_selection::cross_validate; -use smartcore::naive_bayes::categorical::CategoricalNB; +//! Categorical Naive Bayes Classifier. + +use smartcore::{ + linalg::naive::dense_matrix::DenseMatrix, + model_selection::{cross_validate, CrossValidationResult}, + naive_bayes::categorical::CategoricalNB, +}; use crate::{Algorithm, Settings}; -use smartcore::model_selection::CrossValidationResult; +/// The Categorical Naive Bayes Classifier. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/naive_bayes.html#categorical-naive-bayes) +/// for a more in-depth description of the algorithm. pub(crate) struct CategoricalNaiveBayesClassifierWrapper {} impl super::ModelWrapper for CategoricalNaiveBayesClassifierWrapper { @@ -41,7 +48,7 @@ impl super::ModelWrapper for CategoricalNaiveBayesClassifierWrapper { fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { let model: CategoricalNB> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/decision_tree_classifier.rs b/src/algorithms/decision_tree_classifier.rs index 6496cf7..c1e609a 100644 --- a/src/algorithms/decision_tree_classifier.rs +++ b/src/algorithms/decision_tree_classifier.rs @@ -1,3 +1,5 @@ +//! Decision Tree Classifier. + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, model_selection::{cross_validate, CrossValidationResult}, @@ -6,6 +8,10 @@ use smartcore::{ use crate::{Algorithm, Settings}; +/// The Decision Tree Classifier. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/tree.html#classification) +/// for a more in-depth description of the algorithm. pub(crate) struct DecisionTreeClassifierWrapper {} impl super::ModelWrapper for DecisionTreeClassifierWrapper { @@ -49,7 +55,7 @@ impl super::ModelWrapper for DecisionTreeClassifierWrapper { } fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { - let model: DecisionTreeClassifier = bincode::deserialize(&*final_model).unwrap(); + let model: DecisionTreeClassifier = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/decision_tree_regressor.rs b/src/algorithms/decision_tree_regressor.rs index 58fe63d..0b8b5d5 100644 --- a/src/algorithms/decision_tree_regressor.rs +++ b/src/algorithms/decision_tree_regressor.rs @@ -1,3 +1,5 @@ +//! Decision Tree Regressor. + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, model_selection::{cross_validate, CrossValidationResult}, @@ -6,6 +8,10 @@ use smartcore::{ use crate::{Algorithm, Settings}; +/// The Decision Tree Regressor. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/tree.html#regression) +/// for a more in-depth description of the algorithm. pub(crate) struct DecisionTreeRegressorWrapper {} impl super::ModelWrapper for DecisionTreeRegressorWrapper { @@ -49,7 +55,7 @@ impl super::ModelWrapper for DecisionTreeRegressorWrapper { } fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { - let model: DecisionTreeRegressor = bincode::deserialize(&*final_model).unwrap(); + let model: DecisionTreeRegressor = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/elastic_net_regressor.rs b/src/algorithms/elastic_net_regressor.rs index 528d6ef..fc6e4e3 100644 --- a/src/algorithms/elastic_net_regressor.rs +++ b/src/algorithms/elastic_net_regressor.rs @@ -1,3 +1,5 @@ +//! Elastic Net Regressor. + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, linear::elastic_net::ElasticNet, model_selection::cross_validate, model_selection::CrossValidationResult, @@ -5,6 +7,10 @@ use smartcore::{ use crate::{Algorithm, Settings}; +/// The Elastic Net Regressor. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#elastic-net) +/// for a more in-depth description of the algorithm. pub(crate) struct ElasticNetRegressorWrapper {} impl super::ModelWrapper for ElasticNetRegressorWrapper { @@ -40,7 +46,7 @@ impl super::ModelWrapper for ElasticNetRegressorWrapper { } fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { - let model: ElasticNet> = bincode::deserialize(&*final_model).unwrap(); + let model: ElasticNet> = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/gaussian_naive_bayes_classifier.rs b/src/algorithms/gaussian_naive_bayes_classifier.rs index 3b3d91a..3852df3 100644 --- a/src/algorithms/gaussian_naive_bayes_classifier.rs +++ b/src/algorithms/gaussian_naive_bayes_classifier.rs @@ -1,10 +1,17 @@ +//! Gaussian Naive Bayes Classifier + use smartcore::{ - linalg::naive::dense_matrix::DenseMatrix, model_selection::cross_validate, - model_selection::CrossValidationResult, naive_bayes::gaussian::GaussianNB, + linalg::naive::dense_matrix::DenseMatrix, + model_selection::{cross_validate, CrossValidationResult}, + naive_bayes::gaussian::GaussianNB, }; use crate::{Algorithm, Settings}; +/// The Gaussian Naive Bayes Classifier. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/naive_bayes.html#gaussian-naive-bayes) +/// for a more in-depth description of the algorithm. pub(crate) struct GaussianNaiveBayesClassifierWrapper {} impl super::ModelWrapper for GaussianNaiveBayesClassifierWrapper { @@ -40,7 +47,7 @@ impl super::ModelWrapper for GaussianNaiveBayesClassifierWrapper { } fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { - let model: GaussianNB> = bincode::deserialize(&*final_model).unwrap(); + let model: GaussianNB> = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/knn_classifier.rs b/src/algorithms/knn_classifier.rs index 13ee395..27cb497 100644 --- a/src/algorithms/knn_classifier.rs +++ b/src/algorithms/knn_classifier.rs @@ -1,18 +1,23 @@ +//! KNN Classifier + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, math::distance::{ euclidian::Euclidian, hamming::Hamming, mahalanobis::Mahalanobis, manhattan::Manhattan, minkowski::Minkowski, Distances, }, - model_selection::cross_validate, + model_selection::{cross_validate, CrossValidationResult}, neighbors::knn_classifier::{ KNNClassifier, KNNClassifierParameters as SmartcoreKNNClassifierParameters, }, }; use crate::{Algorithm, Distance, Settings}; -use smartcore::model_selection::CrossValidationResult; +/// The KNN Classifier. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/neighbors.html#classification) +/// for a more in-depth description of the algorithm. pub(crate) struct KNNClassifierWrapper {} impl super::ModelWrapper for KNNClassifierWrapper { @@ -281,27 +286,27 @@ impl super::ModelWrapper for KNNClassifierWrapper { match settings.knn_classifier_settings.as_ref().unwrap().distance { Distance::Euclidean => { let model: KNNClassifier = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Manhattan => { let model: KNNClassifier = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Minkowski(_) => { let model: KNNClassifier = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Mahalanobis => { let model: KNNClassifier>> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Hamming => { let model: KNNClassifier = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/knn_regressor.rs b/src/algorithms/knn_regressor.rs index 3d91594..23331be 100644 --- a/src/algorithms/knn_regressor.rs +++ b/src/algorithms/knn_regressor.rs @@ -1,3 +1,5 @@ +//! KNN Regressor + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, math::distance::{ @@ -13,6 +15,10 @@ use smartcore::{ use crate::{Algorithm, Distance, Settings}; +/// The KNN Regressor. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/neighbors.html#regression) +/// for a more in-depth description of the algorithm. pub(crate) struct KNNRegressorWrapper {} impl super::ModelWrapper for KNNRegressorWrapper { @@ -288,27 +294,27 @@ impl super::ModelWrapper for KNNRegressorWrapper { match settings.knn_regressor_settings.as_ref().unwrap().distance { Distance::Euclidean => { let model: KNNRegressor = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Manhattan => { let model: KNNRegressor = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Minkowski(_) => { let model: KNNRegressor = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Mahalanobis => { let model: KNNRegressor>> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Distance::Hamming => { let model: KNNRegressor = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/lasso_regressor.rs b/src/algorithms/lasso_regressor.rs index 19096cc..d6eeeef 100644 --- a/src/algorithms/lasso_regressor.rs +++ b/src/algorithms/lasso_regressor.rs @@ -1,3 +1,5 @@ +//! LASSO regression algorithm. + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, linear::lasso::Lasso, model_selection::cross_validate, model_selection::CrossValidationResult, @@ -5,6 +7,10 @@ use smartcore::{ use crate::{Algorithm, Settings}; +/// The LASSO regression algorithm. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#lasso) +/// for a more in-depth description of the algorithm. pub(crate) struct LassoRegressorWrapper {} impl super::ModelWrapper for LassoRegressorWrapper { @@ -49,7 +55,7 @@ impl super::ModelWrapper for LassoRegressorWrapper { fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { let model: Lasso> = - bincode::deserialize(&*final_model).expect("Cannot deserialize trained model."); + bincode::deserialize(final_model).expect("Cannot deserialize trained model."); model.predict(x).expect("Error during inference.") } } diff --git a/src/algorithms/linear_regressor.rs b/src/algorithms/linear_regressor.rs index 16cabf6..ac7ceb0 100644 --- a/src/algorithms/linear_regressor.rs +++ b/src/algorithms/linear_regressor.rs @@ -1,3 +1,5 @@ +//! Linear regression algorithm. + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, linear::linear_regression::LinearRegression, model_selection::cross_validate, model_selection::CrossValidationResult, @@ -5,6 +7,10 @@ use smartcore::{ use crate::{Algorithm, Settings}; +/// The Linear regression algorithm. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#ordinary-least-squares) +/// for a more in-depth description of the algorithm. pub(crate) struct LinearRegressorWrapper {} impl super::ModelWrapper for LinearRegressorWrapper { @@ -49,7 +55,7 @@ impl super::ModelWrapper for LinearRegressorWrapper { fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { let model: LinearRegression> = - bincode::deserialize(&*final_model).expect("Cannot deserialize trained model."); + bincode::deserialize(final_model).expect("Cannot deserialize trained model."); model.predict(x).expect("Error during inference.") } } diff --git a/src/algorithms/logistic_regression.rs b/src/algorithms/logistic_regression.rs index 82806fe..afc1447 100644 --- a/src/algorithms/logistic_regression.rs +++ b/src/algorithms/logistic_regression.rs @@ -1,9 +1,15 @@ +//! Logistic Regression + use crate::{Algorithm, Settings}; use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, linear::logistic_regression::LogisticRegression, model_selection::cross_validate, model_selection::CrossValidationResult, }; +/// The Logistic Regression algorithm. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression) +/// for a more in-depth description of the algorithm. pub(crate) struct LogisticRegressionWrapper {} impl super::ModelWrapper for LogisticRegressionWrapper { @@ -36,7 +42,7 @@ impl super::ModelWrapper for LogisticRegressionWrapper { fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { let model: LogisticRegression> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index 09fc94c..e45a19e 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -1,3 +1,30 @@ +//! # Algorithms +//! +//! This module contains the wrappers for the algorithms provided by this crate. +//! The algorithms are all available through the common interface of the `ModelWrapper` trait. +//! +//! The available algorithms include: +//! +//! * Classification algorithms: +//! - Logistic Regression +//! - Random Forest Classifier +//! - K-Nearest Neighbors Classifier +//! - Decision Tree Classifier +//! - Gaussian Naive Bayes Classifier +//! - Categorical Naive Bayes Classifier +//! - Support Vector Classifier +//! +//! * Regression algorithms: +//! - Linear Regression +//! - Elastic Net Regressor +//! - Lasso Regressor +//! - K-Nearest Neighbors Regressor +//! - Ridge Regressor +//! - Random Forest Regressor +//! - Decision Tree Regressor +//! - Support Vector Regressor +//! + mod linear_regressor; pub(crate) use linear_regressor::LinearRegressorWrapper; @@ -85,16 +112,16 @@ pub trait ModelWrapper { ) } - // Perform cross-validation + /// Perform cross-validation fn cv( x: &DenseMatrix, y: &Vec, settings: &Settings, ) -> (CrossValidationResult, Algorithm); - // Train a model + /// Train a model fn train(x: &DenseMatrix, y: &Vec, settings: &Settings) -> Vec; - // Perform a prediction + /// Perform a prediction fn predict(x: &DenseMatrix, final_model: &Vec, settings: &Settings) -> Vec; } diff --git a/src/algorithms/random_forest_classifier.rs b/src/algorithms/random_forest_classifier.rs index 76b8846..46ebc4b 100644 --- a/src/algorithms/random_forest_classifier.rs +++ b/src/algorithms/random_forest_classifier.rs @@ -1,11 +1,17 @@ +//! Random Forest Classifier + use smartcore::{ ensemble::random_forest_classifier::RandomForestClassifier, - linalg::naive::dense_matrix::DenseMatrix, model_selection::cross_validate, - model_selection::CrossValidationResult, + linalg::naive::dense_matrix::DenseMatrix, + model_selection::{cross_validate, CrossValidationResult}, }; use crate::{Algorithm, Settings}; +/// The Random Forest Classifier. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/ensemble.html#random-forests) +/// for a more in-depth description of the algorithm. pub(crate) struct RandomForestClassifierWrapper {} impl super::ModelWrapper for RandomForestClassifierWrapper { @@ -49,7 +55,7 @@ impl super::ModelWrapper for RandomForestClassifierWrapper { } fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { - let model: RandomForestClassifier = bincode::deserialize(&*final_model).unwrap(); + let model: RandomForestClassifier = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/random_forest_regressor.rs b/src/algorithms/random_forest_regressor.rs index 81bdee1..1df39f2 100644 --- a/src/algorithms/random_forest_regressor.rs +++ b/src/algorithms/random_forest_regressor.rs @@ -1,11 +1,17 @@ +//! Random Forest Regressor + use smartcore::{ ensemble::random_forest_regressor::RandomForestRegressor, - linalg::naive::dense_matrix::DenseMatrix, model_selection::cross_validate, - model_selection::CrossValidationResult, + linalg::naive::dense_matrix::DenseMatrix, + model_selection::{cross_validate, CrossValidationResult}, }; use crate::{Algorithm, Settings}; +/// The Random Forest Regressor. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/ensemble.html#random-forests) +/// for a more in-depth description of the algorithm. pub(crate) struct RandomForestRegressorWrapper {} impl super::ModelWrapper for RandomForestRegressorWrapper { @@ -49,7 +55,7 @@ impl super::ModelWrapper for RandomForestRegressorWrapper { } fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { - let model: RandomForestRegressor = bincode::deserialize(&*final_model).unwrap(); + let model: RandomForestRegressor = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/ridge_regressor.rs b/src/algorithms/ridge_regressor.rs index 9e48fa3..5e161ce 100644 --- a/src/algorithms/ridge_regressor.rs +++ b/src/algorithms/ridge_regressor.rs @@ -1,3 +1,5 @@ +//! Ridge regression algorithm. + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, linear::ridge_regression::RidgeRegression, model_selection::cross_validate, model_selection::CrossValidationResult, @@ -5,6 +7,10 @@ use smartcore::{ use crate::{Algorithm, Settings}; +/// The Ridge regression algorithm. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#ridge-regression) +/// for a more in-depth description of the algorithm. pub(crate) struct RidgeRegressorWrapper {} impl super::ModelWrapper for RidgeRegressorWrapper { @@ -36,7 +42,7 @@ impl super::ModelWrapper for RidgeRegressorWrapper { fn predict(x: &DenseMatrix, final_model: &Vec, _settings: &Settings) -> Vec { let model: RidgeRegression> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/support_vector_classifier.rs b/src/algorithms/support_vector_classifier.rs index cfcadc3..2917e24 100644 --- a/src/algorithms/support_vector_classifier.rs +++ b/src/algorithms/support_vector_classifier.rs @@ -1,3 +1,5 @@ +//! Support Vector Classifier + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, model_selection::cross_validate, @@ -10,6 +12,10 @@ use smartcore::{ use crate::{Algorithm, Kernel, Settings}; +/// The Support Vector Classifier. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/svm.html#svm-classification) +/// for a more in-depth description of the algorithm. pub(crate) struct SupportVectorClassifierWrapper {} impl super::ModelWrapper for SupportVectorClassifierWrapper { @@ -120,22 +126,22 @@ impl super::ModelWrapper for SupportVectorClassifierWrapper { match settings.svc_settings.as_ref().unwrap().kernel { Kernel::Linear => { let model: SVC, LinearKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Kernel::Polynomial(_, _, _) => { let model: SVC, PolynomialKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Kernel::RBF(_) => { let model: SVC, RBFKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Kernel::Sigmoid(_, _) => { let model: SVC, SigmoidKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/support_vector_regressor.rs b/src/algorithms/support_vector_regressor.rs index f336b6a..0c8f536 100644 --- a/src/algorithms/support_vector_regressor.rs +++ b/src/algorithms/support_vector_regressor.rs @@ -1,3 +1,5 @@ +//! Support Vector Regressor + use smartcore::{ linalg::naive::dense_matrix::DenseMatrix, model_selection::cross_validate, @@ -10,6 +12,10 @@ use smartcore::{ use crate::{Algorithm, Kernel, Settings}; +/// The Support Vector Regressor. +/// +/// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/svm.html#svm-regression) +/// for a more in-depth description of the algorithm. pub(crate) struct SupportVectorRegressorWrapper {} impl super::ModelWrapper for SupportVectorRegressorWrapper { @@ -120,84 +126,24 @@ impl super::ModelWrapper for SupportVectorRegressorWrapper { match settings.svr_settings.as_ref().unwrap().kernel { Kernel::Linear => { let model: SVR, LinearKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Kernel::Polynomial(_, _, _) => { let model: SVR, PolynomialKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Kernel::RBF(_) => { let model: SVR, RBFKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } Kernel::Sigmoid(_, _) => { let model: SVR, SigmoidKernel> = - bincode::deserialize(&*final_model).unwrap(); + bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } } } - -// -// let start = Instant::now(); -// let cv = match self.settings.svr_settings.as_ref().unwrap().kernel { -// Kernel::Linear => cross_validate( -// SVR::fit, -// &self.x, -// &self.y, -// SmartcoreSVRParameters::default() -// .with_tol(self.settings.svr_settings.as_ref().unwrap().tol) -// .with_c(self.settings.svr_settings.as_ref().unwrap().c) -// .with_eps(self.settings.svr_settings.as_ref().unwrap().c) -// .with_kernel(Kernels::linear()), -// self.get_kfolds(), -// metric, -// ) -// .unwrap(), -// Kernel::Polynomial(degree, gamma, coef) => cross_validate( -// SVR::fit, -// &self.x, -// &self.y, -// SmartcoreSVRParameters::default() -// .with_tol(self.settings.svr_settings.as_ref().unwrap().tol) -// .with_c(self.settings.svr_settings.as_ref().unwrap().c) -// .with_eps(self.settings.svr_settings.as_ref().unwrap().c) -// .with_kernel(Kernels::polynomial(degree, gamma, coef)), -// self.get_kfolds(), -// metric, -// ) -// .unwrap(), -// Kernel::RBF(gamma) => cross_validate( -// SVR::fit, -// &self.x, -// &self.y, -// SmartcoreSVRParameters::default() -// .with_tol(self.settings.svr_settings.as_ref().unwrap().tol) -// .with_c(self.settings.svr_settings.as_ref().unwrap().c) -// .with_eps(self.settings.svr_settings.as_ref().unwrap().c) -// .with_kernel(Kernels::rbf(gamma)), -// self.get_kfolds(), -// metric, -// ) -// .unwrap(), -// Kernel::Sigmoid(gamma, coef) => cross_validate( -// SVR::fit, -// &self.x, -// &self.y, -// SmartcoreSVRParameters::default() -// .with_tol(self.settings.svr_settings.as_ref().unwrap().tol) -// .with_c(self.settings.svr_settings.as_ref().unwrap().c) -// .with_eps(self.settings.svr_settings.as_ref().unwrap().c) -// .with_kernel(Kernels::sigmoid(gamma, coef)), -// self.get_kfolds(), -// metric, -// ) -// .unwrap(), -// }; -// let end = Instant::now(); -// let d = end.duration_since(start); -// self.add_model(Algorithm::SVR, cv, d); diff --git a/src/utils.rs b/src/utils.rs index 91ea5d9..2f19e67 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,19 +1,25 @@ +//! Utility functions for the crate. + use smartcore::{algorithm::neighbour::KNNAlgorithmName, neighbors::KNNWeightFunction}; use std::fmt::{Debug, Display, Formatter}; +/// Convert an Option to a String for printing in display mode. pub(crate) fn print_option(x: Option) -> String { match x { None => "None".to_string(), - Some(y) => format!("{}", y), + Some(y) => format!("{y}"), } } + +/// Convert an Option to a String for printing in debug mode. pub(crate) fn debug_option(x: Option) -> String { match x { None => "None".to_string(), - Some(y) => format!("{:#?}", y), + Some(y) => format!("{y:#?}"), } } +/// Get the name for a knn weight function. pub(crate) fn print_knn_weight_function(f: &KNNWeightFunction) -> String { match f { KNNWeightFunction::Uniform => "Uniform".to_string(), @@ -21,6 +27,7 @@ pub(crate) fn print_knn_weight_function(f: &KNNWeightFunction) -> String { } } +/// Get the name for a knn search algorithm. pub(crate) fn print_knn_search_algorithm(a: &KNNAlgorithmName) -> String { match a { KNNAlgorithmName::LinearSearch => "Linear Search".to_string(), From d9cc9cca789a8bc824abf4fd71cee13f090860cd Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 12:38:08 -0400 Subject: [PATCH 04/17] docs: added docs all over and satisfied clippy for most things --- examples/classification_save_best.rs | 2 +- src/algorithms/linear_regressor.rs | 5 +- src/lib.rs | 173 ++++++++++++++++++--------- 3 files changed, 119 insertions(+), 61 deletions(-) diff --git a/examples/classification_save_best.rs b/examples/classification_save_best.rs index 7b364ff..7d33258 100644 --- a/examples/classification_save_best.rs +++ b/examples/classification_save_best.rs @@ -17,7 +17,7 @@ fn main() { // Load that model for use directly in SmartCore let mut buf: Vec = Vec::new(); - std::fs::File::open(&file_name) + std::fs::File::open(file_name) .and_then(|mut f| f.read_to_end(&mut buf)) .expect("Cannot load model from file."); let model: LogisticRegression> = diff --git a/src/algorithms/linear_regressor.rs b/src/algorithms/linear_regressor.rs index ac7ceb0..f2312cc 100644 --- a/src/algorithms/linear_regressor.rs +++ b/src/algorithms/linear_regressor.rs @@ -1,8 +1,9 @@ //! Linear regression algorithm. use smartcore::{ - linalg::naive::dense_matrix::DenseMatrix, linear::linear_regression::LinearRegression, - model_selection::cross_validate, model_selection::CrossValidationResult, + linalg::naive::dense_matrix::DenseMatrix, + linear::linear_regression::LinearRegression, + model_selection::{cross_validate, CrossValidationResult}, }; use crate::{Algorithm, Settings}; diff --git a/src/lib.rs b/src/lib.rs index e6ea837..c34d5c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -158,14 +158,23 @@ impl IntoLabels for Array1 { /// Trains and compares supervised models #[derive(serde::Serialize, serde::Deserialize)] pub struct SupervisedModel { + /// Settings for the model. settings: Settings, + /// The training data. x_train: DenseMatrix, + /// The training labels. y_train: Vec, + /// The validation data. x_val: DenseMatrix, + /// The validation labels. y_val: Vec, + /// The number of classes in the data. number_of_classes: usize, + /// The results of the model comparison. comparison: Vec, + /// The final model. metamodel: Model, + /// The preprocessing pipeline. preprocessing: ( Option>>, Option>>, @@ -243,7 +252,7 @@ impl SupervisedModel { /// ``` pub fn new_from_file(file_name: &str) -> Self { let mut buf: Vec = Vec::new(); - std::fs::File::open(&file_name) + std::fs::File::open(file_name) .and_then(|mut f| f.read_to_end(&mut buf)) .expect("Cannot load model from file."); bincode::deserialize(&buf).expect("Can not deserialize the model") @@ -297,7 +306,7 @@ impl SupervisedModel { where X: IntoFeatures, { - let x = &self.preprocess(x.to_dense_matrix().clone()); + let x = &self.preprocess(x.to_dense_matrix()); match self.settings.final_model_approach { FinalModel::None => panic!(""), FinalModel::Best => self.predict_by_model(x, &self.comparison[0]), @@ -511,14 +520,11 @@ impl SupervisedModel { )); } - match self.settings.final_model_approach { - FinalModel::Blending { - algorithm, - meta_training_fraction, - meta_testing_fraction, - } => self.train_blended_model(algorithm, meta_training_fraction, meta_testing_fraction), - _ => {} - } + if let FinalModel::Blending { + algorithm, + meta_training_fraction, + meta_testing_fraction, + } = self.settings.final_model_approach { self.train_blended_model(algorithm, meta_training_fraction, meta_testing_fraction) } } /// Save the supervised model to a file for later use @@ -563,20 +569,34 @@ impl SupervisedModel { /// Private functions go here impl SupervisedModel { + /// Build a new supervised model + /// + /// # Arguments + /// + /// * `x` - The input data + /// * `y` - The output data + /// * `settings` - The settings for the model fn build(x: DenseMatrix, y: Vec, settings: Settings) -> Self { Self { settings, - x_train: x.clone(), - y_train: y.clone(), + x_train: x, + number_of_classes: Self::count_classes(&y), + y_train: y, x_val: DenseMatrix::new(0, 0, vec![]), y_val: vec![], - number_of_classes: Self::count_classes(&y), comparison: vec![], preprocessing: (None, None), metamodel: Default::default(), } } + /// Train the supervised model. + /// + /// # Arguments + /// + /// * `algo` - The algorithm to use + /// * `training_fraction` - The fraction of the data to use for training + /// * `testing_fraction` - The fraction of the data to use for testing fn train_blended_model( &mut self, algo: Algorithm, @@ -625,12 +645,22 @@ impl SupervisedModel { }; } + /// Predict using all of the trained models. + /// + /// # Arguments + /// + /// * `x` - The input data + /// * `algo` - The algorithm to use + /// + /// # Returns + /// + /// * The predicted values fn predict_blended_model(&self, x: &DenseMatrix, algo: Algorithm) -> Vec { // Make the data let mut meta_x: Vec> = Vec::new(); for i in 0..self.comparison.len() { let model = &self.comparison[i]; - meta_x.push(self.predict_by_model(&x, model)) + meta_x.push(self.predict_by_model(x, model)) } // @@ -641,49 +671,23 @@ impl SupervisedModel { algo.get_predictor()(&xdm, metamodel, &self.settings) } + /// Predict using a single model. + /// + /// # Arguments + /// + /// * `x` - The input data + /// * `model` - The model to use + /// + /// # Returns + /// + /// * The predicted values fn predict_by_model(&self, x: &DenseMatrix, model: &Model) -> Vec { - let saved_model = &model.model; - match model.name { - Algorithm::Linear => LinearRegressorWrapper::predict(x, saved_model, &self.settings), - Algorithm::Lasso => LassoRegressorWrapper::predict(x, saved_model, &self.settings), - Algorithm::Ridge => RidgeRegressorWrapper::predict(x, saved_model, &self.settings), - Algorithm::ElasticNet => { - ElasticNetRegressorWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::RandomForestRegressor => { - RandomForestRegressorWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::KNNRegressor => KNNRegressorWrapper::predict(x, saved_model, &self.settings), - Algorithm::SVR => { - SupportVectorRegressorWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::DecisionTreeRegressor => { - DecisionTreeRegressorWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::LogisticRegression => { - LogisticRegressionWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::RandomForestClassifier => { - RandomForestClassifierWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::DecisionTreeClassifier => { - DecisionTreeClassifierWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::KNNClassifier => { - KNNClassifierWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::SVC => { - SupportVectorClassifierWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::GaussianNaiveBayes => { - GaussianNaiveBayesClassifierWrapper::predict(x, saved_model, &self.settings) - } - Algorithm::CategoricalNaiveBayes => { - CategoricalNaiveBayesClassifierWrapper::predict(x, saved_model, &self.settings) - } - } + model.name.get_predictor()(x, &model.model, &self.settings) } + /// Get interaction features for the data. + /// + /// # Arguments fn interaction_features(mut x: DenseMatrix) -> DenseMatrix { let (_, width) = x.shape(); for i in 0..width { @@ -696,10 +700,20 @@ impl SupervisedModel { x } + /// Get polynomial features for the data. + /// + /// # Arguments + /// + /// * `x` - The input data + /// * `order` - The order of the polynomial + /// + /// # Returns + /// + /// * The data with polynomial features fn polynomial_features(mut x: DenseMatrix, order: usize) -> DenseMatrix { let (height, width) = x.shape(); for n in 2..=order { - let combinations = (0..width).into_iter().combinations_with_replacement(n); + let combinations = (0..width).combinations_with_replacement(n); for combo in combinations { let mut feature = vec![1.0; height]; for column in combo { @@ -712,6 +726,12 @@ impl SupervisedModel { x } + /// Train PCA on the data for preprocessing. + /// + /// # Arguments + /// + /// * `x` - The input data + /// * `n` - The number of components to use fn train_pca(&mut self, x: DenseMatrix, n: usize) { let pca = PCA::fit( &x, @@ -723,7 +743,12 @@ impl SupervisedModel { self.preprocessing.0 = Some(pca); } - fn pca_features(&self, x: DenseMatrix, n: usize) -> DenseMatrix { + /// Get PCA features for the data using the trained PCA preprocessor. + /// + /// # Arguments + /// + /// * `x` - The input data + fn pca_features(&self, x: DenseMatrix, _: usize) -> DenseMatrix { self.preprocessing .0 .as_ref() @@ -732,12 +757,19 @@ impl SupervisedModel { .unwrap() } + /// Train SVD on the data for preprocessing. + /// + /// # Arguments + /// + /// * `x` - The input data + /// * `n` - The number of components to use fn train_svd(&mut self, x: DenseMatrix, n: usize) { let svd = SVD::fit(&x, SVDParameters::default().with_n_components(n)).unwrap(); self.preprocessing.1 = Some(svd); } - fn svd_features(&self, x: DenseMatrix, n: usize) -> DenseMatrix { + /// Get SVD features for the data. + fn svd_features(&self, x: DenseMatrix, _: usize) -> DenseMatrix { self.preprocessing .1 .as_ref() @@ -746,6 +778,15 @@ impl SupervisedModel { .unwrap() } + /// Preprocess the data. + /// + /// # Arguments + /// + /// * `x` - The input data + /// + /// # Returns + /// + /// * The preprocessed data fn preprocess(&self, x: DenseMatrix) -> DenseMatrix { match self.settings.preprocessing { PreProcessing::None => x, @@ -762,13 +803,23 @@ impl SupervisedModel { } } + /// Count the number of classes in the data. + /// + /// # Arguments + /// + /// * `y` - The data to count the classes in + /// + /// # Returns + /// + /// * The number of classes fn count_classes(y: &Vec) -> usize { let mut sorted_targets = y.clone(); - sorted_targets.sort_by(|a, b| a.partial_cmp(&b).unwrap_or(Equal)); + sorted_targets.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Equal)); sorted_targets.dedup(); sorted_targets.len() } + /// Record a model in the comparison. fn record_model(&mut self, model: (CrossValidationResult, Algorithm, Duration, Vec)) { self.comparison.push(Model { score: model.0, @@ -779,6 +830,7 @@ impl SupervisedModel { self.sort(); } + /// Sort the models in the comparison by their mean test scores. fn sort(&mut self) { self.comparison.sort_by(|a, b| { a.score @@ -855,10 +907,14 @@ impl Display for SupervisedModel { /// This contains the results of a single model #[derive(serde::Serialize, serde::Deserialize)] struct Model { + /// The cross validation score of the model #[serde(with = "CrossValidationResultDef")] score: CrossValidationResult, + /// The algorithm used name: Algorithm, + /// The time it took to train the model duration: Duration, + /// What is this? TODO model: Vec, } @@ -876,6 +932,7 @@ impl Default for Model { } } +/// This is a wrapper for the CrossValidationResult #[derive(serde::Serialize, serde::Deserialize)] #[serde(remote = "CrossValidationResult::")] struct CrossValidationResultDef { From f18d6424ce8f3b88b7e181ffbc3b558a0bf50781 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 12:46:02 -0400 Subject: [PATCH 05/17] chore: ran cargo fmt --- .../categorical_naive_bayes_classifier.rs | 2 +- src/algorithms/decision_tree_classifier.rs | 2 +- src/algorithms/decision_tree_regressor.rs | 2 +- src/algorithms/elastic_net_regressor.rs | 2 +- .../gaussian_naive_bayes_classifier.rs | 2 +- src/algorithms/knn_classifier.rs | 5 +- src/algorithms/knn_regressor.rs | 5 +- src/algorithms/lasso_regressor.rs | 2 +- src/algorithms/linear_regressor.rs | 2 +- src/algorithms/logistic_regression.rs | 2 +- src/algorithms/mod.rs | 21 +++--- src/algorithms/random_forest_classifier.rs | 2 +- src/algorithms/random_forest_regressor.rs | 2 +- src/algorithms/ridge_regressor.rs | 2 +- src/algorithms/support_vector_classifier.rs | 2 +- src/algorithms/support_vector_regressor.rs | 2 +- src/lib.rs | 71 ++++++++++--------- src/settings/mod.rs | 8 +-- src/settings/settings_struct.rs | 10 +-- src/utils.rs | 2 +- 20 files changed, 74 insertions(+), 74 deletions(-) diff --git a/src/algorithms/categorical_naive_bayes_classifier.rs b/src/algorithms/categorical_naive_bayes_classifier.rs index 04a1276..2200e57 100644 --- a/src/algorithms/categorical_naive_bayes_classifier.rs +++ b/src/algorithms/categorical_naive_bayes_classifier.rs @@ -9,7 +9,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Categorical Naive Bayes Classifier. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/naive_bayes.html#categorical-naive-bayes) /// for a more in-depth description of the algorithm. pub(crate) struct CategoricalNaiveBayesClassifierWrapper {} diff --git a/src/algorithms/decision_tree_classifier.rs b/src/algorithms/decision_tree_classifier.rs index c1e609a..86a3b0d 100644 --- a/src/algorithms/decision_tree_classifier.rs +++ b/src/algorithms/decision_tree_classifier.rs @@ -9,7 +9,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Decision Tree Classifier. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/tree.html#classification) /// for a more in-depth description of the algorithm. pub(crate) struct DecisionTreeClassifierWrapper {} diff --git a/src/algorithms/decision_tree_regressor.rs b/src/algorithms/decision_tree_regressor.rs index 0b8b5d5..1482489 100644 --- a/src/algorithms/decision_tree_regressor.rs +++ b/src/algorithms/decision_tree_regressor.rs @@ -9,7 +9,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Decision Tree Regressor. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/tree.html#regression) /// for a more in-depth description of the algorithm. pub(crate) struct DecisionTreeRegressorWrapper {} diff --git a/src/algorithms/elastic_net_regressor.rs b/src/algorithms/elastic_net_regressor.rs index fc6e4e3..dba8dd3 100644 --- a/src/algorithms/elastic_net_regressor.rs +++ b/src/algorithms/elastic_net_regressor.rs @@ -8,7 +8,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Elastic Net Regressor. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#elastic-net) /// for a more in-depth description of the algorithm. pub(crate) struct ElasticNetRegressorWrapper {} diff --git a/src/algorithms/gaussian_naive_bayes_classifier.rs b/src/algorithms/gaussian_naive_bayes_classifier.rs index 3852df3..e6ea887 100644 --- a/src/algorithms/gaussian_naive_bayes_classifier.rs +++ b/src/algorithms/gaussian_naive_bayes_classifier.rs @@ -9,7 +9,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Gaussian Naive Bayes Classifier. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/naive_bayes.html#gaussian-naive-bayes) /// for a more in-depth description of the algorithm. pub(crate) struct GaussianNaiveBayesClassifierWrapper {} diff --git a/src/algorithms/knn_classifier.rs b/src/algorithms/knn_classifier.rs index 27cb497..d22dd03 100644 --- a/src/algorithms/knn_classifier.rs +++ b/src/algorithms/knn_classifier.rs @@ -15,7 +15,7 @@ use smartcore::{ use crate::{Algorithm, Distance, Settings}; /// The KNN Classifier. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/neighbors.html#classification) /// for a more in-depth description of the algorithm. pub(crate) struct KNNClassifierWrapper {} @@ -305,8 +305,7 @@ impl super::ModelWrapper for KNNClassifierWrapper { model.predict(x).unwrap() } Distance::Hamming => { - let model: KNNClassifier = - bincode::deserialize(final_model).unwrap(); + let model: KNNClassifier = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/knn_regressor.rs b/src/algorithms/knn_regressor.rs index 23331be..913f85d 100644 --- a/src/algorithms/knn_regressor.rs +++ b/src/algorithms/knn_regressor.rs @@ -16,7 +16,7 @@ use smartcore::{ use crate::{Algorithm, Distance, Settings}; /// The KNN Regressor. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/neighbors.html#regression) /// for a more in-depth description of the algorithm. pub(crate) struct KNNRegressorWrapper {} @@ -313,8 +313,7 @@ impl super::ModelWrapper for KNNRegressorWrapper { model.predict(x).unwrap() } Distance::Hamming => { - let model: KNNRegressor = - bincode::deserialize(final_model).unwrap(); + let model: KNNRegressor = bincode::deserialize(final_model).unwrap(); model.predict(x).unwrap() } } diff --git a/src/algorithms/lasso_regressor.rs b/src/algorithms/lasso_regressor.rs index d6eeeef..2d75a75 100644 --- a/src/algorithms/lasso_regressor.rs +++ b/src/algorithms/lasso_regressor.rs @@ -8,7 +8,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The LASSO regression algorithm. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#lasso) /// for a more in-depth description of the algorithm. pub(crate) struct LassoRegressorWrapper {} diff --git a/src/algorithms/linear_regressor.rs b/src/algorithms/linear_regressor.rs index f2312cc..e26c143 100644 --- a/src/algorithms/linear_regressor.rs +++ b/src/algorithms/linear_regressor.rs @@ -9,7 +9,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Linear regression algorithm. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#ordinary-least-squares) /// for a more in-depth description of the algorithm. pub(crate) struct LinearRegressorWrapper {} diff --git a/src/algorithms/logistic_regression.rs b/src/algorithms/logistic_regression.rs index afc1447..823a80b 100644 --- a/src/algorithms/logistic_regression.rs +++ b/src/algorithms/logistic_regression.rs @@ -7,7 +7,7 @@ use smartcore::{ }; /// The Logistic Regression algorithm. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression) /// for a more in-depth description of the algorithm. pub(crate) struct LogisticRegressionWrapper {} diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index e45a19e..60c9edc 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -1,10 +1,10 @@ //! # Algorithms -//! +//! //! This module contains the wrappers for the algorithms provided by this crate. //! The algorithms are all available through the common interface of the `ModelWrapper` trait. -//! +//! //! The available algorithms include: -//! +//! //! * Classification algorithms: //! - Logistic Regression //! - Random Forest Classifier @@ -13,7 +13,7 @@ //! - Gaussian Naive Bayes Classifier //! - Categorical Naive Bayes Classifier //! - Support Vector Classifier -//! +//! //! * Regression algorithms: //! - Linear Regression //! - Elastic Net Regressor @@ -23,7 +23,7 @@ //! - Random Forest Regressor //! - Decision Tree Regressor //! - Support Vector Regressor -//! +//! mod linear_regressor; pub(crate) use linear_regressor::LinearRegressorWrapper; @@ -80,15 +80,15 @@ use std::time::{Duration, Instant}; /// Trait for wrapping models pub trait ModelWrapper { /// Perform cross-validation and return the results - /// + /// /// # Arguments - /// + /// /// * `x` - The input data /// * `y` - The output data /// * `settings` - The settings for the model - /// + /// /// # Returns - /// + /// /// * `CrossValidationResult` - The cross-validation results /// * `Algorithm` - The algorithm used /// * `Duration` - The time taken to perform the cross-validation @@ -113,6 +113,7 @@ pub trait ModelWrapper { } /// Perform cross-validation + #[allow(clippy::ptr_arg)] fn cv( x: &DenseMatrix, y: &Vec, @@ -120,8 +121,10 @@ pub trait ModelWrapper { ) -> (CrossValidationResult, Algorithm); /// Train a model + #[allow(clippy::ptr_arg)] fn train(x: &DenseMatrix, y: &Vec, settings: &Settings) -> Vec; /// Perform a prediction + #[allow(clippy::ptr_arg)] fn predict(x: &DenseMatrix, final_model: &Vec, settings: &Settings) -> Vec; } diff --git a/src/algorithms/random_forest_classifier.rs b/src/algorithms/random_forest_classifier.rs index 46ebc4b..f4e0886 100644 --- a/src/algorithms/random_forest_classifier.rs +++ b/src/algorithms/random_forest_classifier.rs @@ -9,7 +9,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Random Forest Classifier. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/ensemble.html#random-forests) /// for a more in-depth description of the algorithm. pub(crate) struct RandomForestClassifierWrapper {} diff --git a/src/algorithms/random_forest_regressor.rs b/src/algorithms/random_forest_regressor.rs index 1df39f2..828e298 100644 --- a/src/algorithms/random_forest_regressor.rs +++ b/src/algorithms/random_forest_regressor.rs @@ -9,7 +9,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Random Forest Regressor. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/ensemble.html#random-forests) /// for a more in-depth description of the algorithm. pub(crate) struct RandomForestRegressorWrapper {} diff --git a/src/algorithms/ridge_regressor.rs b/src/algorithms/ridge_regressor.rs index 5e161ce..ed434d9 100644 --- a/src/algorithms/ridge_regressor.rs +++ b/src/algorithms/ridge_regressor.rs @@ -8,7 +8,7 @@ use smartcore::{ use crate::{Algorithm, Settings}; /// The Ridge regression algorithm. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#ridge-regression) /// for a more in-depth description of the algorithm. pub(crate) struct RidgeRegressorWrapper {} diff --git a/src/algorithms/support_vector_classifier.rs b/src/algorithms/support_vector_classifier.rs index 2917e24..939857c 100644 --- a/src/algorithms/support_vector_classifier.rs +++ b/src/algorithms/support_vector_classifier.rs @@ -13,7 +13,7 @@ use smartcore::{ use crate::{Algorithm, Kernel, Settings}; /// The Support Vector Classifier. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/svm.html#svm-classification) /// for a more in-depth description of the algorithm. pub(crate) struct SupportVectorClassifierWrapper {} diff --git a/src/algorithms/support_vector_regressor.rs b/src/algorithms/support_vector_regressor.rs index 0c8f536..0274713 100644 --- a/src/algorithms/support_vector_regressor.rs +++ b/src/algorithms/support_vector_regressor.rs @@ -13,7 +13,7 @@ use smartcore::{ use crate::{Algorithm, Kernel, Settings}; /// The Support Vector Regressor. -/// +/// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/svm.html#svm-regression) /// for a more in-depth description of the algorithm. pub(crate) struct SupportVectorRegressorWrapper {} diff --git a/src/lib.rs b/src/lib.rs index c34d5c5..a1d124e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -524,7 +524,10 @@ impl SupervisedModel { algorithm, meta_training_fraction, meta_testing_fraction, - } = self.settings.final_model_approach { self.train_blended_model(algorithm, meta_training_fraction, meta_testing_fraction) } + } = self.settings.final_model_approach + { + self.train_blended_model(algorithm, meta_training_fraction, meta_testing_fraction) + } } /// Save the supervised model to a file for later use @@ -570,9 +573,9 @@ impl SupervisedModel { /// Private functions go here impl SupervisedModel { /// Build a new supervised model - /// + /// /// # Arguments - /// + /// /// * `x` - The input data /// * `y` - The output data /// * `settings` - The settings for the model @@ -591,9 +594,9 @@ impl SupervisedModel { } /// Train the supervised model. - /// + /// /// # Arguments - /// + /// /// * `algo` - The algorithm to use /// * `training_fraction` - The fraction of the data to use for training /// * `testing_fraction` - The fraction of the data to use for testing @@ -646,14 +649,14 @@ impl SupervisedModel { } /// Predict using all of the trained models. - /// + /// /// # Arguments - /// + /// /// * `x` - The input data /// * `algo` - The algorithm to use - /// + /// /// # Returns - /// + /// /// * The predicted values fn predict_blended_model(&self, x: &DenseMatrix, algo: Algorithm) -> Vec { // Make the data @@ -672,21 +675,21 @@ impl SupervisedModel { } /// Predict using a single model. - /// + /// /// # Arguments - /// + /// /// * `x` - The input data /// * `model` - The model to use - /// + /// /// # Returns - /// + /// /// * The predicted values fn predict_by_model(&self, x: &DenseMatrix, model: &Model) -> Vec { model.name.get_predictor()(x, &model.model, &self.settings) } /// Get interaction features for the data. - /// + /// /// # Arguments fn interaction_features(mut x: DenseMatrix) -> DenseMatrix { let (_, width) = x.shape(); @@ -701,14 +704,14 @@ impl SupervisedModel { } /// Get polynomial features for the data. - /// + /// /// # Arguments - /// + /// /// * `x` - The input data /// * `order` - The order of the polynomial - /// + /// /// # Returns - /// + /// /// * The data with polynomial features fn polynomial_features(mut x: DenseMatrix, order: usize) -> DenseMatrix { let (height, width) = x.shape(); @@ -727,9 +730,9 @@ impl SupervisedModel { } /// Train PCA on the data for preprocessing. - /// + /// /// # Arguments - /// + /// /// * `x` - The input data /// * `n` - The number of components to use fn train_pca(&mut self, x: DenseMatrix, n: usize) { @@ -744,9 +747,9 @@ impl SupervisedModel { } /// Get PCA features for the data using the trained PCA preprocessor. - /// + /// /// # Arguments - /// + /// /// * `x` - The input data fn pca_features(&self, x: DenseMatrix, _: usize) -> DenseMatrix { self.preprocessing @@ -758,9 +761,9 @@ impl SupervisedModel { } /// Train SVD on the data for preprocessing. - /// + /// /// # Arguments - /// + /// /// * `x` - The input data /// * `n` - The number of components to use fn train_svd(&mut self, x: DenseMatrix, n: usize) { @@ -779,13 +782,13 @@ impl SupervisedModel { } /// Preprocess the data. - /// + /// /// # Arguments - /// + /// /// * `x` - The input data - /// + /// /// # Returns - /// + /// /// * The preprocessed data fn preprocess(&self, x: DenseMatrix) -> DenseMatrix { match self.settings.preprocessing { @@ -804,16 +807,16 @@ impl SupervisedModel { } /// Count the number of classes in the data. - /// + /// /// # Arguments - /// + /// /// * `y` - The data to count the classes in - /// + /// /// # Returns - /// + /// /// * The number of classes - fn count_classes(y: &Vec) -> usize { - let mut sorted_targets = y.clone(); + fn count_classes(y: &[f32]) -> usize { + let mut sorted_targets = y.to_vec(); sorted_targets.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Equal)); sorted_targets.dedup(); sorted_targets.len() diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 7d5659c..c456fcd 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -264,9 +264,7 @@ pub enum Algorithm { impl Algorithm { /// Get the `predict` method for the underlying algorithm. - pub(crate) fn get_predictor( - &self, - ) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { + pub(crate) fn get_predictor(&self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { Algorithm::Linear => LinearRegressorWrapper::predict, Algorithm::Lasso => LassoRegressorWrapper::predict, @@ -287,9 +285,7 @@ impl Algorithm { } /// Get the `train` method for the underlying algorithm. - pub(crate) fn get_trainer( - &self, - ) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { + pub(crate) fn get_trainer(&self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { Algorithm::Linear => LinearRegressorWrapper::train, Algorithm::Lasso => LassoRegressorWrapper::train, diff --git a/src/settings/settings_struct.rs b/src/settings/settings_struct.rs index 013097a..c610469 100644 --- a/src/settings/settings_struct.rs +++ b/src/settings/settings_struct.rs @@ -27,7 +27,7 @@ use std::fmt::{Display, Formatter}; use std::io::{Read, Write}; /// Settings for supervised models -/// +/// /// Any algorithms in the `skiplist` member will be skipped during training. #[derive(serde::Serialize, serde::Deserialize)] pub struct Settings { @@ -822,13 +822,13 @@ impl Display for Settings { .add_row(vec![ " Search algorithm", &print_knn_search_algorithm( - &self.knn_regressor_settings.as_ref().unwrap().algorithm + &self.knn_regressor_settings.as_ref().unwrap().algorithm, ), ]) .add_row(vec![ " Weighting function", &print_knn_weight_function( - &self.knn_regressor_settings.as_ref().unwrap().weight + &self.knn_regressor_settings.as_ref().unwrap().weight, ), ]) .add_row(vec![ @@ -946,13 +946,13 @@ impl Display for Settings { .add_row(vec![ " Search algorithm", &print_knn_search_algorithm( - &self.knn_classifier_settings.as_ref().unwrap().algorithm + &self.knn_classifier_settings.as_ref().unwrap().algorithm, ), ]) .add_row(vec![ " Weighting function", &print_knn_weight_function( - &self.knn_classifier_settings.as_ref().unwrap().weight + &self.knn_classifier_settings.as_ref().unwrap().weight, ), ]) .add_row(vec![ diff --git a/src/utils.rs b/src/utils.rs index 2f19e67..8a28f25 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -100,7 +100,7 @@ impl Display for Distance { } /// Function to do element-wise multiplication fo two vectors -pub fn elementwise_multiply(v1: &Vec, v2: &Vec) -> Vec { +pub fn elementwise_multiply(v1: &[f32], v2: &[f32]) -> Vec { v1.iter().zip(v2).map(|(&i1, &i2)| i1 * i2).collect() } From d5d4a3fd538c8e553323be2ff9eb3bb1094d80ee Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:03:56 -0400 Subject: [PATCH 06/17] chore: changed `pub(crate)` to `pub` inside private modules --- .../categorical_naive_bayes_classifier.rs | 2 +- src/algorithms/decision_tree_classifier.rs | 2 +- src/algorithms/decision_tree_regressor.rs | 2 +- src/algorithms/elastic_net_regressor.rs | 2 +- .../gaussian_naive_bayes_classifier.rs | 2 +- src/algorithms/knn_classifier.rs | 2 +- src/algorithms/knn_regressor.rs | 2 +- src/algorithms/lasso_regressor.rs | 2 +- src/algorithms/linear_regressor.rs | 2 +- src/algorithms/logistic_regression.rs | 2 +- src/algorithms/mod.rs | 31 +++++++++---------- src/algorithms/random_forest_classifier.rs | 2 +- src/algorithms/random_forest_regressor.rs | 2 +- src/algorithms/ridge_regressor.rs | 2 +- src/algorithms/support_vector_classifier.rs | 2 +- src/algorithms/support_vector_regressor.rs | 2 +- src/lib.rs | 5 ++- src/utils.rs | 8 ++--- 18 files changed, 38 insertions(+), 36 deletions(-) diff --git a/src/algorithms/categorical_naive_bayes_classifier.rs b/src/algorithms/categorical_naive_bayes_classifier.rs index 2200e57..cad8fef 100644 --- a/src/algorithms/categorical_naive_bayes_classifier.rs +++ b/src/algorithms/categorical_naive_bayes_classifier.rs @@ -12,7 +12,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/naive_bayes.html#categorical-naive-bayes) /// for a more in-depth description of the algorithm. -pub(crate) struct CategoricalNaiveBayesClassifierWrapper {} +pub struct CategoricalNaiveBayesClassifierWrapper {} impl super::ModelWrapper for CategoricalNaiveBayesClassifierWrapper { fn cv( diff --git a/src/algorithms/decision_tree_classifier.rs b/src/algorithms/decision_tree_classifier.rs index 86a3b0d..fa90225 100644 --- a/src/algorithms/decision_tree_classifier.rs +++ b/src/algorithms/decision_tree_classifier.rs @@ -12,7 +12,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/tree.html#classification) /// for a more in-depth description of the algorithm. -pub(crate) struct DecisionTreeClassifierWrapper {} +pub struct DecisionTreeClassifierWrapper {} impl super::ModelWrapper for DecisionTreeClassifierWrapper { fn cv( diff --git a/src/algorithms/decision_tree_regressor.rs b/src/algorithms/decision_tree_regressor.rs index 1482489..4bb6dc2 100644 --- a/src/algorithms/decision_tree_regressor.rs +++ b/src/algorithms/decision_tree_regressor.rs @@ -12,7 +12,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/tree.html#regression) /// for a more in-depth description of the algorithm. -pub(crate) struct DecisionTreeRegressorWrapper {} +pub struct DecisionTreeRegressorWrapper {} impl super::ModelWrapper for DecisionTreeRegressorWrapper { fn cv( diff --git a/src/algorithms/elastic_net_regressor.rs b/src/algorithms/elastic_net_regressor.rs index dba8dd3..86eff40 100644 --- a/src/algorithms/elastic_net_regressor.rs +++ b/src/algorithms/elastic_net_regressor.rs @@ -11,7 +11,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#elastic-net) /// for a more in-depth description of the algorithm. -pub(crate) struct ElasticNetRegressorWrapper {} +pub struct ElasticNetRegressorWrapper {} impl super::ModelWrapper for ElasticNetRegressorWrapper { fn cv( diff --git a/src/algorithms/gaussian_naive_bayes_classifier.rs b/src/algorithms/gaussian_naive_bayes_classifier.rs index e6ea887..a638278 100644 --- a/src/algorithms/gaussian_naive_bayes_classifier.rs +++ b/src/algorithms/gaussian_naive_bayes_classifier.rs @@ -12,7 +12,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/naive_bayes.html#gaussian-naive-bayes) /// for a more in-depth description of the algorithm. -pub(crate) struct GaussianNaiveBayesClassifierWrapper {} +pub struct GaussianNaiveBayesClassifierWrapper {} impl super::ModelWrapper for GaussianNaiveBayesClassifierWrapper { fn cv( diff --git a/src/algorithms/knn_classifier.rs b/src/algorithms/knn_classifier.rs index d22dd03..f8f48c5 100644 --- a/src/algorithms/knn_classifier.rs +++ b/src/algorithms/knn_classifier.rs @@ -18,7 +18,7 @@ use crate::{Algorithm, Distance, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/neighbors.html#classification) /// for a more in-depth description of the algorithm. -pub(crate) struct KNNClassifierWrapper {} +pub struct KNNClassifierWrapper {} impl super::ModelWrapper for KNNClassifierWrapper { fn cv( diff --git a/src/algorithms/knn_regressor.rs b/src/algorithms/knn_regressor.rs index 913f85d..7672db8 100644 --- a/src/algorithms/knn_regressor.rs +++ b/src/algorithms/knn_regressor.rs @@ -19,7 +19,7 @@ use crate::{Algorithm, Distance, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/neighbors.html#regression) /// for a more in-depth description of the algorithm. -pub(crate) struct KNNRegressorWrapper {} +pub struct KNNRegressorWrapper {} impl super::ModelWrapper for KNNRegressorWrapper { fn cv( diff --git a/src/algorithms/lasso_regressor.rs b/src/algorithms/lasso_regressor.rs index 2d75a75..9e546c8 100644 --- a/src/algorithms/lasso_regressor.rs +++ b/src/algorithms/lasso_regressor.rs @@ -11,7 +11,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#lasso) /// for a more in-depth description of the algorithm. -pub(crate) struct LassoRegressorWrapper {} +pub struct LassoRegressorWrapper {} impl super::ModelWrapper for LassoRegressorWrapper { fn cv( diff --git a/src/algorithms/linear_regressor.rs b/src/algorithms/linear_regressor.rs index e26c143..97a42ad 100644 --- a/src/algorithms/linear_regressor.rs +++ b/src/algorithms/linear_regressor.rs @@ -12,7 +12,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#ordinary-least-squares) /// for a more in-depth description of the algorithm. -pub(crate) struct LinearRegressorWrapper {} +pub struct LinearRegressorWrapper {} impl super::ModelWrapper for LinearRegressorWrapper { fn cv( diff --git a/src/algorithms/logistic_regression.rs b/src/algorithms/logistic_regression.rs index 823a80b..22b65b6 100644 --- a/src/algorithms/logistic_regression.rs +++ b/src/algorithms/logistic_regression.rs @@ -10,7 +10,7 @@ use smartcore::{ /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression) /// for a more in-depth description of the algorithm. -pub(crate) struct LogisticRegressionWrapper {} +pub struct LogisticRegressionWrapper {} impl super::ModelWrapper for LogisticRegressionWrapper { fn cv( diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index 60c9edc..1eb73de 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -23,52 +23,51 @@ //! - Random Forest Regressor //! - Decision Tree Regressor //! - Support Vector Regressor -//! mod linear_regressor; -pub(crate) use linear_regressor::LinearRegressorWrapper; +pub use linear_regressor::LinearRegressorWrapper; mod elastic_net_regressor; -pub(crate) use elastic_net_regressor::ElasticNetRegressorWrapper; +pub use elastic_net_regressor::ElasticNetRegressorWrapper; mod lasso_regressor; -pub(crate) use lasso_regressor::LassoRegressorWrapper; +pub use lasso_regressor::LassoRegressorWrapper; mod knn_regressor; -pub(crate) use knn_regressor::KNNRegressorWrapper; +pub use knn_regressor::KNNRegressorWrapper; mod ridge_regressor; -pub(crate) use ridge_regressor::RidgeRegressorWrapper; +pub use ridge_regressor::RidgeRegressorWrapper; mod logistic_regression; -pub(crate) use logistic_regression::LogisticRegressionWrapper; +pub use logistic_regression::LogisticRegressionWrapper; mod random_forest_classifier; -pub(crate) use random_forest_classifier::RandomForestClassifierWrapper; +pub use random_forest_classifier::RandomForestClassifierWrapper; mod random_forest_regressor; -pub(crate) use random_forest_regressor::RandomForestRegressorWrapper; +pub use random_forest_regressor::RandomForestRegressorWrapper; mod knn_classifier; -pub(crate) use knn_classifier::KNNClassifierWrapper; +pub use knn_classifier::KNNClassifierWrapper; mod decision_tree_classifier; -pub(crate) use decision_tree_classifier::DecisionTreeClassifierWrapper; +pub use decision_tree_classifier::DecisionTreeClassifierWrapper; mod decision_tree_regressor; -pub(crate) use decision_tree_regressor::DecisionTreeRegressorWrapper; +pub use decision_tree_regressor::DecisionTreeRegressorWrapper; mod gaussian_naive_bayes_classifier; -pub(crate) use gaussian_naive_bayes_classifier::GaussianNaiveBayesClassifierWrapper; +pub use gaussian_naive_bayes_classifier::GaussianNaiveBayesClassifierWrapper; mod categorical_naive_bayes_classifier; -pub(crate) use categorical_naive_bayes_classifier::CategoricalNaiveBayesClassifierWrapper; +pub use categorical_naive_bayes_classifier::CategoricalNaiveBayesClassifierWrapper; mod support_vector_classifier; -pub(crate) use support_vector_classifier::SupportVectorClassifierWrapper; +pub use support_vector_classifier::SupportVectorClassifierWrapper; mod support_vector_regressor; -pub(crate) use support_vector_regressor::SupportVectorRegressorWrapper; +pub use support_vector_regressor::SupportVectorRegressorWrapper; use crate::{Algorithm, Settings}; use smartcore::linalg::naive::dense_matrix::DenseMatrix; diff --git a/src/algorithms/random_forest_classifier.rs b/src/algorithms/random_forest_classifier.rs index f4e0886..09eb111 100644 --- a/src/algorithms/random_forest_classifier.rs +++ b/src/algorithms/random_forest_classifier.rs @@ -12,7 +12,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/ensemble.html#random-forests) /// for a more in-depth description of the algorithm. -pub(crate) struct RandomForestClassifierWrapper {} +pub struct RandomForestClassifierWrapper {} impl super::ModelWrapper for RandomForestClassifierWrapper { fn cv( diff --git a/src/algorithms/random_forest_regressor.rs b/src/algorithms/random_forest_regressor.rs index 828e298..54642ba 100644 --- a/src/algorithms/random_forest_regressor.rs +++ b/src/algorithms/random_forest_regressor.rs @@ -12,7 +12,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/ensemble.html#random-forests) /// for a more in-depth description of the algorithm. -pub(crate) struct RandomForestRegressorWrapper {} +pub struct RandomForestRegressorWrapper {} impl super::ModelWrapper for RandomForestRegressorWrapper { fn cv( diff --git a/src/algorithms/ridge_regressor.rs b/src/algorithms/ridge_regressor.rs index ed434d9..6ce0b25 100644 --- a/src/algorithms/ridge_regressor.rs +++ b/src/algorithms/ridge_regressor.rs @@ -11,7 +11,7 @@ use crate::{Algorithm, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/linear_model.html#ridge-regression) /// for a more in-depth description of the algorithm. -pub(crate) struct RidgeRegressorWrapper {} +pub struct RidgeRegressorWrapper {} impl super::ModelWrapper for RidgeRegressorWrapper { fn cv( diff --git a/src/algorithms/support_vector_classifier.rs b/src/algorithms/support_vector_classifier.rs index 939857c..4d76f06 100644 --- a/src/algorithms/support_vector_classifier.rs +++ b/src/algorithms/support_vector_classifier.rs @@ -16,7 +16,7 @@ use crate::{Algorithm, Kernel, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/svm.html#svm-classification) /// for a more in-depth description of the algorithm. -pub(crate) struct SupportVectorClassifierWrapper {} +pub struct SupportVectorClassifierWrapper {} impl super::ModelWrapper for SupportVectorClassifierWrapper { fn cv( diff --git a/src/algorithms/support_vector_regressor.rs b/src/algorithms/support_vector_regressor.rs index 0274713..a34a557 100644 --- a/src/algorithms/support_vector_regressor.rs +++ b/src/algorithms/support_vector_regressor.rs @@ -16,7 +16,7 @@ use crate::{Algorithm, Kernel, Settings}; /// /// See [scikit-learn's user guide](https://scikit-learn.org/stable/modules/svm.html#svm-regression) /// for a more in-depth description of the algorithm. -pub(crate) struct SupportVectorRegressorWrapper {} +pub struct SupportVectorRegressorWrapper {} impl super::ModelWrapper for SupportVectorRegressorWrapper { fn cv( diff --git a/src/lib.rs b/src/lib.rs index a1d124e..b706770 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,7 @@ -#![warn(clippy::all)] +#![warn( + clippy::all, + clippy::nursery, +)] #![warn(missing_docs)] #![warn(rustdoc::missing_doc_code_examples)] #![warn(clippy::missing_docs_in_private_items)] diff --git a/src/utils.rs b/src/utils.rs index 8a28f25..026c475 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,7 +4,7 @@ use smartcore::{algorithm::neighbour::KNNAlgorithmName, neighbors::KNNWeightFunc use std::fmt::{Debug, Display, Formatter}; /// Convert an Option to a String for printing in display mode. -pub(crate) fn print_option(x: Option) -> String { +pub fn print_option(x: Option) -> String { match x { None => "None".to_string(), Some(y) => format!("{y}"), @@ -12,7 +12,7 @@ pub(crate) fn print_option(x: Option) -> String { } /// Convert an Option to a String for printing in debug mode. -pub(crate) fn debug_option(x: Option) -> String { +pub fn debug_option(x: Option) -> String { match x { None => "None".to_string(), Some(y) => format!("{y:#?}"), @@ -20,7 +20,7 @@ pub(crate) fn debug_option(x: Option) -> String { } /// Get the name for a knn weight function. -pub(crate) fn print_knn_weight_function(f: &KNNWeightFunction) -> String { +pub fn print_knn_weight_function(f: &KNNWeightFunction) -> String { match f { KNNWeightFunction::Uniform => "Uniform".to_string(), KNNWeightFunction::Distance => "Distance".to_string(), @@ -28,7 +28,7 @@ pub(crate) fn print_knn_weight_function(f: &KNNWeightFunction) -> String { } /// Get the name for a knn search algorithm. -pub(crate) fn print_knn_search_algorithm(a: &KNNAlgorithmName) -> String { +pub fn print_knn_search_algorithm(a: &KNNAlgorithmName) -> String { match a { KNNAlgorithmName::LinearSearch => "Linear Search".to_string(), KNNAlgorithmName::CoverTree => "Cover Tree".to_string(), From bb8d638697f5f27edd11f71e33a9a375800289f9 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:06:38 -0400 Subject: [PATCH 07/17] chore: using `Self` keyword where applicable --- src/lib.rs | 10 ++- src/settings/mod.rs | 110 ++++++++++++++++---------------- src/settings/settings_struct.rs | 12 ++-- src/utils.rs | 18 +++--- 4 files changed, 74 insertions(+), 76 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b706770..7d68048 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -239,7 +239,7 @@ impl SupervisedModel { D: IntoSupervisedData, { let (x, y) = data.to_supervised_data(); - SupervisedModel::build(x, y, settings) + Self::build(x, y, settings) } /// Load the supervised model from a file saved previously @@ -796,10 +796,8 @@ impl SupervisedModel { fn preprocess(&self, x: DenseMatrix) -> DenseMatrix { match self.settings.preprocessing { PreProcessing::None => x, - PreProcessing::AddInteractions => SupervisedModel::interaction_features(x), - PreProcessing::AddPolynomial { order } => { - SupervisedModel::polynomial_features(x, order) - } + PreProcessing::AddInteractions => Self::interaction_features(x), + PreProcessing::AddPolynomial { order } => Self::polynomial_features(x, order), PreProcessing::ReplaceWithPCA { number_of_components, } => self.pca_features(x, number_of_components), @@ -926,7 +924,7 @@ struct Model { impl Default for Model { fn default() -> Self { - Model { + Self { score: CrossValidationResult { test_score: vec![], train_score: vec![], diff --git a/src/settings/mod.rs b/src/settings/mod.rs index c456fcd..802c89b 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -218,11 +218,11 @@ pub enum Metric { impl Display for Metric { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - Metric::RSquared => write!(f, "R^2"), - Metric::MeanAbsoluteError => write!(f, "MAE"), - Metric::MeanSquaredError => write!(f, "MSE"), - Metric::Accuracy => write!(f, "Accuracy"), - Metric::None => panic!("A metric must be set."), + Self::RSquared => write!(f, "R^2"), + Self::MeanAbsoluteError => write!(f, "MAE"), + Self::MeanSquaredError => write!(f, "MSE"), + Self::Accuracy => write!(f, "Accuracy"), + Self::None => panic!("A metric must be set."), } } } @@ -266,42 +266,42 @@ impl Algorithm { /// Get the `predict` method for the underlying algorithm. pub(crate) fn get_predictor(&self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { - Algorithm::Linear => LinearRegressorWrapper::predict, - Algorithm::Lasso => LassoRegressorWrapper::predict, - Algorithm::Ridge => RidgeRegressorWrapper::predict, - Algorithm::ElasticNet => ElasticNetRegressorWrapper::predict, - Algorithm::RandomForestRegressor => RandomForestRegressorWrapper::predict, - Algorithm::KNNRegressor => KNNRegressorWrapper::predict, - Algorithm::SVR => SupportVectorRegressorWrapper::predict, - Algorithm::DecisionTreeRegressor => DecisionTreeRegressorWrapper::predict, - Algorithm::LogisticRegression => LogisticRegressionWrapper::predict, - Algorithm::RandomForestClassifier => RandomForestClassifierWrapper::predict, - Algorithm::DecisionTreeClassifier => DecisionTreeClassifierWrapper::predict, - Algorithm::KNNClassifier => KNNClassifierWrapper::predict, - Algorithm::SVC => SupportVectorClassifierWrapper::predict, - Algorithm::GaussianNaiveBayes => GaussianNaiveBayesClassifierWrapper::predict, - Algorithm::CategoricalNaiveBayes => CategoricalNaiveBayesClassifierWrapper::predict, + Self::Linear => LinearRegressorWrapper::predict, + Self::Lasso => LassoRegressorWrapper::predict, + Self::Ridge => RidgeRegressorWrapper::predict, + Self::ElasticNet => ElasticNetRegressorWrapper::predict, + Self::RandomForestRegressor => RandomForestRegressorWrapper::predict, + Self::KNNRegressor => KNNRegressorWrapper::predict, + Self::SVR => SupportVectorRegressorWrapper::predict, + Self::DecisionTreeRegressor => DecisionTreeRegressorWrapper::predict, + Self::LogisticRegression => LogisticRegressionWrapper::predict, + Self::RandomForestClassifier => RandomForestClassifierWrapper::predict, + Self::DecisionTreeClassifier => DecisionTreeClassifierWrapper::predict, + Self::KNNClassifier => KNNClassifierWrapper::predict, + Self::SVC => SupportVectorClassifierWrapper::predict, + Self::GaussianNaiveBayes => GaussianNaiveBayesClassifierWrapper::predict, + Self::CategoricalNaiveBayes => CategoricalNaiveBayesClassifierWrapper::predict, } } /// Get the `train` method for the underlying algorithm. pub(crate) fn get_trainer(&self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { - Algorithm::Linear => LinearRegressorWrapper::train, - Algorithm::Lasso => LassoRegressorWrapper::train, - Algorithm::Ridge => RidgeRegressorWrapper::train, - Algorithm::ElasticNet => ElasticNetRegressorWrapper::train, - Algorithm::RandomForestRegressor => RandomForestRegressorWrapper::train, - Algorithm::KNNRegressor => KNNRegressorWrapper::train, - Algorithm::SVR => SupportVectorRegressorWrapper::train, - Algorithm::DecisionTreeRegressor => DecisionTreeRegressorWrapper::train, - Algorithm::LogisticRegression => LogisticRegressionWrapper::train, - Algorithm::RandomForestClassifier => RandomForestClassifierWrapper::train, - Algorithm::DecisionTreeClassifier => DecisionTreeClassifierWrapper::train, - Algorithm::KNNClassifier => KNNClassifierWrapper::train, - Algorithm::SVC => SupportVectorClassifierWrapper::train, - Algorithm::GaussianNaiveBayes => GaussianNaiveBayesClassifierWrapper::train, - Algorithm::CategoricalNaiveBayes => CategoricalNaiveBayesClassifierWrapper::train, + Self::Linear => LinearRegressorWrapper::train, + Self::Lasso => LassoRegressorWrapper::train, + Self::Ridge => RidgeRegressorWrapper::train, + Self::ElasticNet => ElasticNetRegressorWrapper::train, + Self::RandomForestRegressor => RandomForestRegressorWrapper::train, + Self::KNNRegressor => KNNRegressorWrapper::train, + Self::SVR => SupportVectorRegressorWrapper::train, + Self::DecisionTreeRegressor => DecisionTreeRegressorWrapper::train, + Self::LogisticRegression => LogisticRegressionWrapper::train, + Self::RandomForestClassifier => RandomForestClassifierWrapper::train, + Self::DecisionTreeClassifier => DecisionTreeClassifierWrapper::train, + Self::KNNClassifier => KNNClassifierWrapper::train, + Self::SVC => SupportVectorClassifierWrapper::train, + Self::GaussianNaiveBayes => GaussianNaiveBayesClassifierWrapper::train, + Self::CategoricalNaiveBayes => CategoricalNaiveBayesClassifierWrapper::train, } } } @@ -309,21 +309,21 @@ impl Algorithm { impl Display for Algorithm { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - Algorithm::DecisionTreeRegressor => write!(f, "Decision Tree Regressor"), - Algorithm::KNNRegressor => write!(f, "KNN Regressor"), - Algorithm::RandomForestRegressor => write!(f, "Random Forest Regressor"), - Algorithm::Linear => write!(f, "Linear Regressor"), - Algorithm::Ridge => write!(f, "Ridge Regressor"), - Algorithm::Lasso => write!(f, "LASSO Regressor"), - Algorithm::ElasticNet => write!(f, "Elastic Net Regressor"), - Algorithm::SVR => write!(f, "Support Vector Regressor"), - Algorithm::DecisionTreeClassifier => write!(f, "Decision Tree Classifier"), - Algorithm::KNNClassifier => write!(f, "KNN Classifier"), - Algorithm::RandomForestClassifier => write!(f, "Random Forest Classifier"), - Algorithm::LogisticRegression => write!(f, "Logistic Regression Classifier"), - Algorithm::SVC => write!(f, "Support Vector Classifier"), - Algorithm::GaussianNaiveBayes => write!(f, "Gaussian Naive Bayes"), - Algorithm::CategoricalNaiveBayes => write!(f, "Categorical Naive Bayes"), + Self::DecisionTreeRegressor => write!(f, "Decision Tree Regressor"), + Self::KNNRegressor => write!(f, "KNN Regressor"), + Self::RandomForestRegressor => write!(f, "Random Forest Regressor"), + Self::Linear => write!(f, "Linear Regressor"), + Self::Ridge => write!(f, "Ridge Regressor"), + Self::Lasso => write!(f, "LASSO Regressor"), + Self::ElasticNet => write!(f, "Elastic Net Regressor"), + Self::SVR => write!(f, "Support Vector Regressor"), + Self::DecisionTreeClassifier => write!(f, "Decision Tree Classifier"), + Self::KNNClassifier => write!(f, "KNN Classifier"), + Self::RandomForestClassifier => write!(f, "Random Forest Classifier"), + Self::LogisticRegression => write!(f, "Logistic Regression Classifier"), + Self::SVC => write!(f, "Support Vector Classifier"), + Self::GaussianNaiveBayes => write!(f, "Gaussian Naive Bayes"), + Self::CategoricalNaiveBayes => write!(f, "Categorical Naive Bayes"), } } } @@ -355,12 +355,12 @@ pub enum PreProcessing { impl Display for PreProcessing { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - PreProcessing::None => write!(f, "None"), - PreProcessing::AddInteractions => write!(f, "Interaction terms added"), - PreProcessing::AddPolynomial { order } => { + Self::None => write!(f, "None"), + Self::AddInteractions => write!(f, "Interaction terms added"), + Self::AddPolynomial { order } => { write!(f, "Polynomial terms added (order = {})", order) } - PreProcessing::ReplaceWithPCA { + Self::ReplaceWithPCA { number_of_components, } => write!( f, @@ -368,7 +368,7 @@ impl Display for PreProcessing { number_of_components ), - PreProcessing::ReplaceWithSVD { + Self::ReplaceWithSVD { number_of_components, } => write!( f, diff --git a/src/settings/settings_struct.rs b/src/settings/settings_struct.rs index c610469..3242111 100644 --- a/src/settings/settings_struct.rs +++ b/src/settings/settings_struct.rs @@ -81,7 +81,7 @@ pub struct Settings { impl Default for Settings { fn default() -> Self { - Settings { + Self { sort_by: Metric::RSquared, model_type: ModelType::None, final_model_approach: FinalModel::Best, @@ -150,7 +150,7 @@ impl Settings { /// let settings = Settings::default_regression(); /// ``` pub fn default_regression() -> Self { - Settings { + Self { sort_by: Metric::RSquared, model_type: ModelType::Regression, final_model_approach: FinalModel::Best, @@ -191,7 +191,7 @@ impl Settings { /// let settings = Settings::default_classification(); /// ``` pub fn default_classification() -> Self { - Settings { + Self { sort_by: Metric::Accuracy, model_type: ModelType::Classification, final_model_approach: FinalModel::Best, @@ -1076,9 +1076,9 @@ enum ModelType { impl Display for ModelType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - ModelType::None => write!(f, "None"), - ModelType::Regression => write!(f, "Regression"), - ModelType::Classification => write!(f, "Classification"), + Self::None => write!(f, "None"), + Self::Regression => write!(f, "Regression"), + Self::Classification => write!(f, "Classification"), } } } diff --git a/src/utils.rs b/src/utils.rs index 026c475..81c8c45 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -54,14 +54,14 @@ pub enum Kernel { impl Display for Kernel { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - Kernel::Linear => write!(f, "Linear"), - Kernel::Polynomial(degree, gamma, coef) => write!( + Self::Linear => write!(f, "Linear"), + Self::Polynomial(degree, gamma, coef) => write!( f, "Polynomial\n degree = {}\n gamma = {}\n coef = {}", degree, gamma, coef ), - Kernel::RBF(gamma) => write!(f, "RBF\n gamma = {}", gamma), - Kernel::Sigmoid(gamma, coef) => { + Self::RBF(gamma) => write!(f, "RBF\n gamma = {}", gamma), + Self::Sigmoid(gamma, coef) => { write!(f, "Sigmoid\n gamma = {}\n coef = {}", gamma, coef) } } @@ -90,11 +90,11 @@ pub enum Distance { impl Display for Distance { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - Distance::Euclidean => write!(f, "Euclidean"), - Distance::Manhattan => write!(f, "Manhattan"), - Distance::Minkowski(n) => write!(f, "Minkowski\n p = {}", n), - Distance::Mahalanobis => write!(f, "Mahalanobis"), - Distance::Hamming => write!(f, "Hamming"), + Self::Euclidean => write!(f, "Euclidean"), + Self::Manhattan => write!(f, "Manhattan"), + Self::Minkowski(n) => write!(f, "Minkowski\n p = {}", n), + Self::Mahalanobis => write!(f, "Mahalanobis"), + Self::Hamming => write!(f, "Hamming"), } } } From ae55737848b6637c804e69de5bd41a523b767793 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:07:09 -0400 Subject: [PATCH 08/17] chore: simplified private type for clippy --- src/lib.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7d68048..e02ef17 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -177,11 +177,10 @@ pub struct SupervisedModel { comparison: Vec, /// The final model. metamodel: Model, - /// The preprocessing pipeline. - preprocessing: ( - Option>>, - Option>>, - ), + /// PCA model for preprocessing. + preprocessing_pca: Option>>, + /// SVD model for preprocessing. + preprocessing_svd: Option>>, } impl SupervisedModel { @@ -591,8 +590,9 @@ impl SupervisedModel { x_val: DenseMatrix::new(0, 0, vec![]), y_val: vec![], comparison: vec![], - preprocessing: (None, None), metamodel: Default::default(), + preprocessing_pca: None, + preprocessing_svd: None, } } @@ -746,7 +746,7 @@ impl SupervisedModel { .with_use_correlation_matrix(true), ) .unwrap(); - self.preprocessing.0 = Some(pca); + self.preprocessing_pca = Some(pca); } /// Get PCA features for the data using the trained PCA preprocessor. @@ -755,8 +755,7 @@ impl SupervisedModel { /// /// * `x` - The input data fn pca_features(&self, x: DenseMatrix, _: usize) -> DenseMatrix { - self.preprocessing - .0 + self.preprocessing_pca .as_ref() .unwrap() .transform(&x) @@ -771,13 +770,12 @@ impl SupervisedModel { /// * `n` - The number of components to use fn train_svd(&mut self, x: DenseMatrix, n: usize) { let svd = SVD::fit(&x, SVDParameters::default().with_n_components(n)).unwrap(); - self.preprocessing.1 = Some(svd); + self.preprocessing_svd = Some(svd); } /// Get SVD features for the data. fn svd_features(&self, x: DenseMatrix, _: usize) -> DenseMatrix { - self.preprocessing - .1 + self.preprocessing_svd .as_ref() .unwrap() .transform(&x) From 0e8f123000d8091980d87661aa82cce2cf77dda5 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:07:52 -0400 Subject: [PATCH 09/17] chore: removed some unused imports --- src/utils.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 81c8c45..e7cedd1 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -105,10 +105,7 @@ pub fn elementwise_multiply(v1: &[f32], v2: &[f32]) -> Vec { } #[cfg(any(feature = "csv"))] -use polars::prelude::{ - BooleanChunked, BooleanChunkedBuilder, CsvReader, DataFrame, DataType, NamedFrom, PolarsError, - SerReader, Series, -}; +use polars::prelude::{CsvReader, DataFrame, PolarsError, SerReader}; #[cfg(any(feature = "csv"))] pub(crate) fn validate_and_read

(file_path: P) -> DataFrame From aeb60e17954a49909dd0e10cbfecdeef0f89cfc5 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:08:08 -0400 Subject: [PATCH 10/17] chore: ignoring unused mut --- src/utils.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils.rs b/src/utils.rs index e7cedd1..14336ec 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -155,6 +155,7 @@ trait Cleanup { #[cfg(any(feature = "csv"))] impl Cleanup for DataFrame { + #[allow(unused_mut)] fn convert_to_float(mut self) -> Result { // Work in progress // for field in self.schema().fields() { From c31899d876eb07e3565fa8bebe6c14076727e617 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:08:32 -0400 Subject: [PATCH 11/17] chore: worked through some more clippy lints --- src/lib.rs | 2 +- src/settings/mod.rs | 4 ++-- src/utils.rs | 10 ++-------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e02ef17..cc8d9db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -564,7 +564,7 @@ impl SupervisedModel { /// # std::fs::remove_file("tests/save_best.sc"); /// ``` pub fn save_best(&self, file_name: &str) { - if let FinalModel::Best = self.settings.final_model_approach { + if matches!(self.settings.final_model_approach, FinalModel::Best) { std::fs::File::create(file_name) .and_then(|mut f| f.write_all(&self.comparison[0].model)) .expect("Cannot write model to file."); diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 802c89b..fa16532 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -201,7 +201,7 @@ pub use settings_struct::Settings; /// Metrics for evaluating algorithms #[non_exhaustive] -#[derive(PartialEq, serde::Serialize, serde::Deserialize)] +#[derive(PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub enum Metric { /// Sort by R^2 RSquared, @@ -228,7 +228,7 @@ impl Display for Metric { } /// Algorithm options -#[derive(PartialEq, Copy, Clone, serde::Serialize, serde::Deserialize)] +#[derive(PartialEq, Eq, Copy, Clone, serde::Serialize, serde::Deserialize)] pub enum Algorithm { /// Decision tree regressor DecisionTreeRegressor, diff --git a/src/utils.rs b/src/utils.rs index 14336ec..f4c8381 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -5,18 +5,12 @@ use std::fmt::{Debug, Display, Formatter}; /// Convert an Option to a String for printing in display mode. pub fn print_option(x: Option) -> String { - match x { - None => "None".to_string(), - Some(y) => format!("{y}"), - } + x.map_or_else(|| "None".to_string(), |y| format!("{y}")) } /// Convert an Option to a String for printing in debug mode. pub fn debug_option(x: Option) -> String { - match x { - None => "None".to_string(), - Some(y) => format!("{y:#?}"), - } + x.map_or_else(|| "None".to_string(), |y| format!("{y:#?}")) } /// Get the name for a knn weight function. From cecbb293ecb8b1419a1ba51c0df00e02d1a67473 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:13:25 -0400 Subject: [PATCH 12/17] feat: made several const functions as per clippy::nursery --- src/settings/knn_classifier_parameters.rs | 8 ++--- src/settings/knn_regressor_parameters.rs | 8 ++--- src/settings/mod.rs | 2 +- src/settings/settings_struct.rs | 41 ++++++++++++----------- src/settings/svc_parameters.rs | 8 ++--- src/settings/svr_parameters.rs | 8 ++--- 6 files changed, 38 insertions(+), 37 deletions(-) diff --git a/src/settings/knn_classifier_parameters.rs b/src/settings/knn_classifier_parameters.rs index ed4be3f..dee1f6c 100644 --- a/src/settings/knn_classifier_parameters.rs +++ b/src/settings/knn_classifier_parameters.rs @@ -18,25 +18,25 @@ pub struct KNNClassifierParameters { impl KNNClassifierParameters { /// Define the number of nearest neighbors to use - pub fn with_k(mut self, k: usize) -> Self { + pub const fn with_k(mut self, k: usize) -> Self { self.k = k; self } /// Define the weighting function to use with KNN regression - pub fn with_weight(mut self, weight: KNNWeightFunction) -> Self { + pub const fn with_weight(mut self, weight: KNNWeightFunction) -> Self { self.weight = weight; self } /// Define the search algorithm to use with KNN regression - pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { + pub const fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { self.algorithm = algorithm; self } /// Define the distance metric to use with KNN regression - pub fn with_distance(mut self, distance: Distance) -> Self { + pub const fn with_distance(mut self, distance: Distance) -> Self { self.distance = distance; self } diff --git a/src/settings/knn_regressor_parameters.rs b/src/settings/knn_regressor_parameters.rs index fab8090..1de8fcc 100644 --- a/src/settings/knn_regressor_parameters.rs +++ b/src/settings/knn_regressor_parameters.rs @@ -18,25 +18,25 @@ pub struct KNNRegressorParameters { impl KNNRegressorParameters { /// Define the number of nearest neighbors to use - pub fn with_k(mut self, k: usize) -> Self { + pub const fn with_k(mut self, k: usize) -> Self { self.k = k; self } /// Define the weighting function to use with KNN regression - pub fn with_weight(mut self, weight: KNNWeightFunction) -> Self { + pub const fn with_weight(mut self, weight: KNNWeightFunction) -> Self { self.weight = weight; self } /// Define the search algorithm to use with KNN regression - pub fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { + pub const fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { self.algorithm = algorithm; self } /// Define the distance metric to use with KNN regression - pub fn with_distance(mut self, distance: Distance) -> Self { + pub const fn with_distance(mut self, distance: Distance) -> Self { self.distance = distance; self } diff --git a/src/settings/mod.rs b/src/settings/mod.rs index fa16532..07e74c6 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -404,7 +404,7 @@ pub enum FinalModel { impl FinalModel { /// Default values for a blending model (linear regression, 30% of all data reserved for training the blending model) - pub fn default_blending() -> FinalModel { + pub const fn default_blending() -> Self { Self::Blending { algorithm: Algorithm::Linear, meta_training_fraction: 0.15, diff --git a/src/settings/settings_struct.rs b/src/settings/settings_struct.rs index 3242111..f1b47a1 100644 --- a/src/settings/settings_struct.rs +++ b/src/settings/settings_struct.rs @@ -262,7 +262,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default().with_number_of_folds(3); /// ``` - pub fn with_number_of_folds(mut self, n: usize) -> Self { + pub const fn with_number_of_folds(mut self, n: usize) -> Self { self.number_of_folds = n; self } @@ -272,7 +272,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default().shuffle_data(true); /// ``` - pub fn shuffle_data(mut self, shuffle: bool) -> Self { + pub const fn shuffle_data(mut self, shuffle: bool) -> Self { self.shuffle = shuffle; self } @@ -282,7 +282,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default().verbose(true); /// ``` - pub fn verbose(mut self, verbose: bool) -> Self { + pub const fn verbose(mut self, verbose: bool) -> Self { self.verbose = verbose; self } @@ -293,7 +293,7 @@ impl Settings { /// use automl::settings::PreProcessing; /// let settings = Settings::default().with_preprocessing(PreProcessing::AddInteractions); /// ``` - pub fn with_preprocessing(mut self, pre: PreProcessing) -> Self { + pub const fn with_preprocessing(mut self, pre: PreProcessing) -> Self { self.preprocessing = pre; self } @@ -304,7 +304,7 @@ impl Settings { /// use automl::settings::FinalModel; /// let settings = Settings::default().with_final_model(FinalModel::Best); /// ``` - pub fn with_final_model(mut self, approach: FinalModel) -> Self { + pub const fn with_final_model(mut self, approach: FinalModel) -> Self { self.final_model_approach = approach; self } @@ -338,7 +338,7 @@ impl Settings { /// use automl::settings::Metric; /// let settings = Settings::default().sorted_by(Metric::RSquared); /// ``` - pub fn sorted_by(mut self, sort_by: Metric) -> Self { + pub const fn sorted_by(mut self, sort_by: Metric) -> Self { self.sort_by = sort_by; self } @@ -356,7 +356,7 @@ impl Settings { /// .with_min_samples_split(20) /// ); /// ``` - pub fn with_random_forest_classifier_settings( + pub const fn with_random_forest_classifier_settings( mut self, settings: RandomForestClassifierParameters, ) -> Self { @@ -371,7 +371,7 @@ impl Settings { /// let settings = Settings::default() /// .with_logistic_settings(LogisticRegressionParameters::default()); /// ``` - pub fn with_logistic_settings(mut self, settings: LogisticRegressionParameters) -> Self { + pub const fn with_logistic_settings(mut self, settings: LogisticRegressionParameters) -> Self { self.logistic_settings = Some(settings); self } @@ -388,7 +388,7 @@ impl Settings { /// .with_kernel(Kernel::Linear) /// ); /// ``` - pub fn with_svc_settings(mut self, settings: SVCParameters) -> Self { + pub const fn with_svc_settings(mut self, settings: SVCParameters) -> Self { self.svc_settings = Some(settings); self } @@ -404,7 +404,7 @@ impl Settings { /// .with_min_samples_leaf(20) /// ); /// ``` - pub fn with_decision_tree_classifier_settings( + pub const fn with_decision_tree_classifier_settings( mut self, settings: DecisionTreeClassifierParameters, ) -> Self { @@ -425,7 +425,7 @@ impl Settings { /// .with_weight(KNNWeightFunction::Uniform) /// ); /// ``` - pub fn with_knn_classifier_settings(mut self, settings: KNNClassifierParameters) -> Self { + pub const fn with_knn_classifier_settings(mut self, settings: KNNClassifierParameters) -> Self { self.knn_classifier_settings = Some(settings); self } @@ -439,6 +439,7 @@ impl Settings { /// .with_priors(vec![1.0, 1.0]) /// ); /// ``` + #[allow(clippy::missing_const_for_fn)] pub fn with_gaussian_nb_settings(mut self, settings: GaussianNBParameters) -> Self { self.gaussian_nb_settings = Some(settings); self @@ -453,7 +454,7 @@ impl Settings { /// .with_alpha(1.0) /// ); /// ``` - pub fn with_categorical_nb_settings(mut self, settings: CategoricalNBParameters) -> Self { + pub const fn with_categorical_nb_settings(mut self, settings: CategoricalNBParameters) -> Self { self.categorical_nb_settings = Some(settings); self } @@ -467,7 +468,7 @@ impl Settings { /// .with_solver(LinearRegressionSolverName::QR) /// ); /// ``` - pub fn with_linear_settings(mut self, settings: LinearRegressionParameters) -> Self { + pub const fn with_linear_settings(mut self, settings: LinearRegressionParameters) -> Self { self.linear_settings = Some(settings); self } @@ -484,7 +485,7 @@ impl Settings { /// .with_max_iter(10_000) /// ); /// ``` - pub fn with_lasso_settings(mut self, settings: LassoParameters) -> Self { + pub const fn with_lasso_settings(mut self, settings: LassoParameters) -> Self { self.lasso_settings = Some(settings); self } @@ -500,7 +501,7 @@ impl Settings { /// .with_solver(RidgeRegressionSolverName::Cholesky) /// ); /// ``` - pub fn with_ridge_settings(mut self, settings: RidgeRegressionParameters) -> Self { + pub const fn with_ridge_settings(mut self, settings: RidgeRegressionParameters) -> Self { self.ridge_settings = Some(settings); self } @@ -518,7 +519,7 @@ impl Settings { /// .with_l1_ratio(0.5) /// ); /// ``` - pub fn with_elastic_net_settings(mut self, settings: ElasticNetParameters) -> Self { + pub const fn with_elastic_net_settings(mut self, settings: ElasticNetParameters) -> Self { self.elastic_net_settings = Some(settings); self } @@ -536,7 +537,7 @@ impl Settings { /// .with_weight(KNNWeightFunction::Uniform) /// ); /// ``` - pub fn with_knn_regressor_settings(mut self, settings: KNNRegressorParameters) -> Self { + pub const fn with_knn_regressor_settings(mut self, settings: KNNRegressorParameters) -> Self { self.knn_regressor_settings = Some(settings); self } @@ -553,7 +554,7 @@ impl Settings { /// .with_kernel(Kernel::Linear) /// ); /// ``` - pub fn with_svr_settings(mut self, settings: SVRParameters) -> Self { + pub const fn with_svr_settings(mut self, settings: SVRParameters) -> Self { self.svr_settings = Some(settings); self } @@ -571,7 +572,7 @@ impl Settings { /// .with_min_samples_split(20) /// ); /// ``` - pub fn with_random_forest_regressor_settings( + pub const fn with_random_forest_regressor_settings( mut self, settings: RandomForestRegressorParameters, ) -> Self { @@ -590,7 +591,7 @@ impl Settings { /// .with_min_samples_leaf(20) /// ); /// ``` - pub fn with_decision_tree_regressor_settings( + pub const fn with_decision_tree_regressor_settings( mut self, settings: DecisionTreeRegressorParameters, ) -> Self { diff --git a/src/settings/svc_parameters.rs b/src/settings/svc_parameters.rs index f57c546..4ae3de4 100644 --- a/src/settings/svc_parameters.rs +++ b/src/settings/svc_parameters.rs @@ -17,25 +17,25 @@ pub struct SVCParameters { impl SVCParameters { /// Define the number of epochs to use in the epsilon-SVC model. - pub fn with_epoch(mut self, epoch: usize) -> Self { + pub const fn with_epoch(mut self, epoch: usize) -> Self { self.epoch = epoch; self } /// Define the regulation penalty to use with the SVC Model - pub fn with_c(mut self, c: f32) -> Self { + pub const fn with_c(mut self, c: f32) -> Self { self.c = c; self } /// Define the convergence tolerance to use with the SVC model - pub fn with_tol(mut self, tol: f32) -> Self { + pub const fn with_tol(mut self, tol: f32) -> Self { self.tol = tol; self } /// Define which kernel to use with the SVC model - pub fn with_kernel(mut self, kernel: Kernel) -> Self { + pub const fn with_kernel(mut self, kernel: Kernel) -> Self { self.kernel = kernel; self } diff --git a/src/settings/svr_parameters.rs b/src/settings/svr_parameters.rs index 5c341e5..dba47b5 100644 --- a/src/settings/svr_parameters.rs +++ b/src/settings/svr_parameters.rs @@ -17,25 +17,25 @@ pub struct SVRParameters { impl SVRParameters { /// Define the value of epsilon to use in the epsilon-SVR model. - pub fn with_eps(mut self, eps: f32) -> Self { + pub const fn with_eps(mut self, eps: f32) -> Self { self.eps = eps; self } /// Define the regulation penalty to use with the SVR Model - pub fn with_c(mut self, c: f32) -> Self { + pub const fn with_c(mut self, c: f32) -> Self { self.c = c; self } /// Define the convergence tolerance to use with the SVR model - pub fn with_tol(mut self, tol: f32) -> Self { + pub const fn with_tol(mut self, tol: f32) -> Self { self.tol = tol; self } /// Define which kernel to use with the SVR model - pub fn with_kernel(mut self, kernel: Kernel) -> Self { + pub const fn with_kernel(mut self, kernel: Kernel) -> Self { self.kernel = kernel; self } From 3c3e1f5d67819301bf827b9eca1aa35389f07fe9 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:43:07 -0400 Subject: [PATCH 13/17] chore: made clippy more pedantic and worked through some lints --- src/lib.rs | 54 ++++++++++++++++++--------------- src/settings/mod.rs | 12 +++----- src/settings/settings_struct.rs | 5 +-- src/utils.rs | 9 +++--- 4 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index cc8d9db..6bedfc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,12 @@ #![warn( clippy::all, + clippy::pedantic, clippy::nursery, )] +#![allow( + clippy::module_name_repetitions, + clippy::too_many_lines, +)] #![warn(missing_docs)] #![warn(rustdoc::missing_doc_code_examples)] #![warn(clippy::missing_docs_in_private_items)] @@ -304,10 +309,11 @@ impl SupervisedModel { /// #[cfg(any(feature = "csv"))] /// model.predict("data/diabetes_without_target.csv"); /// ``` - pub fn predict(&self, x: X) -> Vec - where - X: IntoFeatures, - { + /// + /// # Panics + /// + /// If the model has not been trained, this function will panic. + pub fn predict(&self, x: X) -> Vec { let x = &self.preprocess(x.to_dense_matrix()); match self.settings.final_model_approach { FinalModel::None => panic!(""), @@ -332,13 +338,13 @@ impl SupervisedModel { number_of_components, } = self.settings.preprocessing { - self.train_pca(self.x_train.clone(), number_of_components); + self.train_pca(&self.x_train.clone(), number_of_components); } if let PreProcessing::ReplaceWithSVD { number_of_components, } = self.settings.preprocessing { - self.train_svd(self.x_train.clone(), number_of_components); + self.train_svd(&self.x_train.clone(), number_of_components); } // Preprocess the data @@ -528,7 +534,7 @@ impl SupervisedModel { meta_testing_fraction, } = self.settings.final_model_approach { - self.train_blended_model(algorithm, meta_training_fraction, meta_testing_fraction) + self.train_blended_model(algorithm, meta_training_fraction, meta_testing_fraction); } } @@ -590,7 +596,7 @@ impl SupervisedModel { x_val: DenseMatrix::new(0, 0, vec![]), y_val: vec![], comparison: vec![], - metamodel: Default::default(), + metamodel: Model::default(), preprocessing_pca: None, preprocessing_svd: None, } @@ -612,7 +618,7 @@ impl SupervisedModel { // Make the data let mut meta_x: Vec> = Vec::new(); for model in &self.comparison { - meta_x.push(self.predict_by_model(&self.x_val, model)) + meta_x.push(self.predict_by_model(&self.x_val, model)); } let xdm = DenseMatrix::from_2d_vec(&meta_x).transpose(); @@ -646,7 +652,7 @@ impl SupervisedModel { train_score: vec![train_score; 1], }, name: algo, - duration: Default::default(), + duration: Duration::default(), model, }; } @@ -666,7 +672,7 @@ impl SupervisedModel { let mut meta_x: Vec> = Vec::new(); for i in 0..self.comparison.len() { let model = &self.comparison[i]; - meta_x.push(self.predict_by_model(x, model)) + meta_x.push(self.predict_by_model(x, model)); } // @@ -738,9 +744,9 @@ impl SupervisedModel { /// /// * `x` - The input data /// * `n` - The number of components to use - fn train_pca(&mut self, x: DenseMatrix, n: usize) { + fn train_pca(&mut self, x: &DenseMatrix, n: usize) { let pca = PCA::fit( - &x, + x, PCAParameters::default() .with_n_components(n) .with_use_correlation_matrix(true), @@ -754,11 +760,11 @@ impl SupervisedModel { /// # Arguments /// /// * `x` - The input data - fn pca_features(&self, x: DenseMatrix, _: usize) -> DenseMatrix { + fn pca_features(&self, x: &DenseMatrix, _: usize) -> DenseMatrix { self.preprocessing_pca .as_ref() .unwrap() - .transform(&x) + .transform(x) .unwrap() } @@ -768,21 +774,21 @@ impl SupervisedModel { /// /// * `x` - The input data /// * `n` - The number of components to use - fn train_svd(&mut self, x: DenseMatrix, n: usize) { - let svd = SVD::fit(&x, SVDParameters::default().with_n_components(n)).unwrap(); + fn train_svd(&mut self, x: &DenseMatrix, n: usize) { + let svd = SVD::fit(x, SVDParameters::default().with_n_components(n)).unwrap(); self.preprocessing_svd = Some(svd); } /// Get SVD features for the data. - fn svd_features(&self, x: DenseMatrix, _: usize) -> DenseMatrix { + fn svd_features(&self, x: &DenseMatrix, _: usize) -> DenseMatrix { self.preprocessing_svd .as_ref() .unwrap() - .transform(&x) + .transform(x) .unwrap() } - /// Preprocess the data. + /// Pre process the data. /// /// # Arguments /// @@ -798,10 +804,10 @@ impl SupervisedModel { PreProcessing::AddPolynomial { order } => Self::polynomial_features(x, order), PreProcessing::ReplaceWithPCA { number_of_components, - } => self.pca_features(x, number_of_components), + } => self.pca_features(&x, number_of_components), PreProcessing::ReplaceWithSVD { number_of_components, - } => self.svd_features(x, number_of_components), + } => self.svd_features(&x, number_of_components), } } @@ -902,7 +908,7 @@ impl Display for SupervisedModel { meta_table.add_row(row_vec); // Write - write!(f, "{}\n{}", table, meta_table) + write!(f, "{table}\n{meta_table}") } } @@ -934,7 +940,7 @@ impl Default for Model { } } -/// This is a wrapper for the CrossValidationResult +/// This is a wrapper for the `CrossValidationResult` #[derive(serde::Serialize, serde::Deserialize)] #[serde(remote = "CrossValidationResult::")] struct CrossValidationResultDef { diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 07e74c6..2758890 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -264,7 +264,7 @@ pub enum Algorithm { impl Algorithm { /// Get the `predict` method for the underlying algorithm. - pub(crate) fn get_predictor(&self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { + pub(crate) fn get_predictor(self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { Self::Linear => LinearRegressorWrapper::predict, Self::Lasso => LassoRegressorWrapper::predict, @@ -285,7 +285,7 @@ impl Algorithm { } /// Get the `train` method for the underlying algorithm. - pub(crate) fn get_trainer(&self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { + pub(crate) fn get_trainer(self) -> fn(&DenseMatrix, &Vec, &Settings) -> Vec { match self { Self::Linear => LinearRegressorWrapper::train, Self::Lasso => LassoRegressorWrapper::train, @@ -358,22 +358,20 @@ impl Display for PreProcessing { Self::None => write!(f, "None"), Self::AddInteractions => write!(f, "Interaction terms added"), Self::AddPolynomial { order } => { - write!(f, "Polynomial terms added (order = {})", order) + write!(f, "Polynomial terms added (order = {order})") } Self::ReplaceWithPCA { number_of_components, } => write!( f, - "Replaced with PCA features (n = {})", - number_of_components + "Replaced with PCA features (n = {number_of_components})" ), Self::ReplaceWithSVD { number_of_components, } => write!( f, - "Replaced with SVD features (n = {})", - number_of_components + "Replaced with SVD features (n = {number_of_components})" ), } } diff --git a/src/settings/settings_struct.rs b/src/settings/settings_struct.rs index f1b47a1..b9730bf 100644 --- a/src/settings/settings_struct.rs +++ b/src/settings/settings_struct.rs @@ -254,7 +254,7 @@ impl Settings { let serial = serde_yaml::to_string(&self).expect("Cannot serialize settings."); std::fs::File::create(file_name) .and_then(|mut f| f.write_all(serial.as_ref())) - .expect("Cannot write settings to file.") + .expect("Cannot write settings to file."); } /// Specify number of folds for cross-validation @@ -262,6 +262,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default().with_number_of_folds(3); /// ``` + #[must_use] pub const fn with_number_of_folds(mut self, n: usize) -> Self { self.number_of_folds = n; self @@ -611,7 +612,7 @@ impl Display for Settings { skiplist.push_str("None "); } else { for algorithm_to_skip in &self.skiplist { - skiplist.push_str(&format!("{}\n", algorithm_to_skip)); + skiplist.push_str(&format!("{algorithm_to_skip}\n")); } } diff --git a/src/utils.rs b/src/utils.rs index f4c8381..f3e5530 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -51,12 +51,11 @@ impl Display for Kernel { Self::Linear => write!(f, "Linear"), Self::Polynomial(degree, gamma, coef) => write!( f, - "Polynomial\n degree = {}\n gamma = {}\n coef = {}", - degree, gamma, coef + "Polynomial\n degree = {degree}\n gamma = {gamma}\n coef = {coef}" ), - Self::RBF(gamma) => write!(f, "RBF\n gamma = {}", gamma), + Self::RBF(gamma) => write!(f, "RBF\n gamma = {gamma}"), Self::Sigmoid(gamma, coef) => { - write!(f, "Sigmoid\n gamma = {}\n coef = {}", gamma, coef) + write!(f, "Sigmoid\n gamma = {gamma}\n coef = {coef}") } } } @@ -86,7 +85,7 @@ impl Display for Distance { match self { Self::Euclidean => write!(f, "Euclidean"), Self::Manhattan => write!(f, "Manhattan"), - Self::Minkowski(n) => write!(f, "Minkowski\n p = {}", n), + Self::Minkowski(n) => write!(f, "Minkowski\n p = {n}"), Self::Mahalanobis => write!(f, "Mahalanobis"), Self::Hamming => write!(f, "Hamming"), } From 9b1b5474d871cf98b6a59e5f2b74c43f03bb8000 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 13:32:41 -0400 Subject: [PATCH 14/17] chore: added must_use macro as per clippy::pedandic --- src/lib.rs | 1 + src/settings/knn_classifier_parameters.rs | 4 ++++ src/settings/knn_regressor_parameters.rs | 4 ++++ src/settings/mod.rs | 1 + src/settings/settings_struct.rs | 27 ++++++++++++++++++++++- src/settings/svc_parameters.rs | 4 ++++ src/settings/svr_parameters.rs | 4 ++++ 7 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 6bedfc8..fd8d722 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -257,6 +257,7 @@ impl SupervisedModel { /// let model = SupervisedModel::new_from_file("tests/load_that_model.aml"); /// # std::fs::remove_file("tests/load_that_model.aml"); /// ``` + #[must_use] pub fn new_from_file(file_name: &str) -> Self { let mut buf: Vec = Vec::new(); std::fs::File::open(file_name) diff --git a/src/settings/knn_classifier_parameters.rs b/src/settings/knn_classifier_parameters.rs index dee1f6c..f79e704 100644 --- a/src/settings/knn_classifier_parameters.rs +++ b/src/settings/knn_classifier_parameters.rs @@ -18,24 +18,28 @@ pub struct KNNClassifierParameters { impl KNNClassifierParameters { /// Define the number of nearest neighbors to use + #[must_use] pub const fn with_k(mut self, k: usize) -> Self { self.k = k; self } /// Define the weighting function to use with KNN regression + #[must_use] pub const fn with_weight(mut self, weight: KNNWeightFunction) -> Self { self.weight = weight; self } /// Define the search algorithm to use with KNN regression + #[must_use] pub const fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { self.algorithm = algorithm; self } /// Define the distance metric to use with KNN regression + #[must_use] pub const fn with_distance(mut self, distance: Distance) -> Self { self.distance = distance; self diff --git a/src/settings/knn_regressor_parameters.rs b/src/settings/knn_regressor_parameters.rs index 1de8fcc..89bd78b 100644 --- a/src/settings/knn_regressor_parameters.rs +++ b/src/settings/knn_regressor_parameters.rs @@ -18,24 +18,28 @@ pub struct KNNRegressorParameters { impl KNNRegressorParameters { /// Define the number of nearest neighbors to use + #[must_use] pub const fn with_k(mut self, k: usize) -> Self { self.k = k; self } /// Define the weighting function to use with KNN regression + #[must_use] pub const fn with_weight(mut self, weight: KNNWeightFunction) -> Self { self.weight = weight; self } /// Define the search algorithm to use with KNN regression + #[must_use] pub const fn with_algorithm(mut self, algorithm: KNNAlgorithmName) -> Self { self.algorithm = algorithm; self } /// Define the distance metric to use with KNN regression + #[must_use] pub const fn with_distance(mut self, distance: Distance) -> Self { self.distance = distance; self diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 2758890..001fc19 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -402,6 +402,7 @@ pub enum FinalModel { impl FinalModel { /// Default values for a blending model (linear regression, 30% of all data reserved for training the blending model) + #[must_use] pub const fn default_blending() -> Self { Self::Blending { algorithm: Algorithm::Linear, diff --git a/src/settings/settings_struct.rs b/src/settings/settings_struct.rs index b9730bf..8c9ba27 100644 --- a/src/settings/settings_struct.rs +++ b/src/settings/settings_struct.rs @@ -149,6 +149,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default_regression(); /// ``` + #[must_use] pub fn default_regression() -> Self { Self { sort_by: Metric::RSquared, @@ -190,6 +191,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default_classification(); /// ``` + #[must_use] pub fn default_classification() -> Self { Self { sort_by: Metric::Accuracy, @@ -235,6 +237,7 @@ impl Settings { /// let settings = Settings::new_from_file("tests/load_those_settings.yaml"); /// # std::fs::remove_file("tests/load_those_settings.yaml"); /// ``` + #[must_use] pub fn new_from_file(file_name: &str) -> Self { let mut buf: Vec = Vec::new(); std::fs::File::open(file_name) @@ -273,6 +276,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default().shuffle_data(true); /// ``` + #[must_use] pub const fn shuffle_data(mut self, shuffle: bool) -> Self { self.shuffle = shuffle; self @@ -283,6 +287,7 @@ impl Settings { /// # use automl::Settings; /// let settings = Settings::default().verbose(true); /// ``` + #[must_use] pub const fn verbose(mut self, verbose: bool) -> Self { self.verbose = verbose; self @@ -294,6 +299,7 @@ impl Settings { /// use automl::settings::PreProcessing; /// let settings = Settings::default().with_preprocessing(PreProcessing::AddInteractions); /// ``` + #[must_use] pub const fn with_preprocessing(mut self, pre: PreProcessing) -> Self { self.preprocessing = pre; self @@ -305,6 +311,7 @@ impl Settings { /// use automl::settings::FinalModel; /// let settings = Settings::default().with_final_model(FinalModel::Best); /// ``` + #[must_use] pub const fn with_final_model(mut self, approach: FinalModel) -> Self { self.final_model_approach = approach; self @@ -316,6 +323,7 @@ impl Settings { /// use automl::settings::Algorithm; /// let settings = Settings::default().skip(Algorithm::RandomForestRegressor); /// ``` + #[must_use] pub fn skip(mut self, skip: Algorithm) -> Self { self.skiplist.push(skip); self @@ -327,6 +335,7 @@ impl Settings { /// use automl::settings::Algorithm; /// let settings = Settings::default().only(Algorithm::RandomForestRegressor); /// ``` + #[must_use] pub fn only(mut self, only: Algorithm) -> Self { self.skiplist = Self::default().skiplist; self.skiplist.retain(|&algo| algo != only); @@ -339,12 +348,13 @@ impl Settings { /// use automl::settings::Metric; /// let settings = Settings::default().sorted_by(Metric::RSquared); /// ``` + #[must_use] pub const fn sorted_by(mut self, sort_by: Metric) -> Self { self.sort_by = sort_by; self } - /// Specify settings for random_forest + /// Specify settings for Random Forest Classifier /// ``` /// # use automl::Settings; /// use automl::settings::RandomForestClassifierParameters; @@ -357,6 +367,7 @@ impl Settings { /// .with_min_samples_split(20) /// ); /// ``` + #[must_use] pub const fn with_random_forest_classifier_settings( mut self, settings: RandomForestClassifierParameters, @@ -372,6 +383,7 @@ impl Settings { /// let settings = Settings::default() /// .with_logistic_settings(LogisticRegressionParameters::default()); /// ``` + #[must_use] pub const fn with_logistic_settings(mut self, settings: LogisticRegressionParameters) -> Self { self.logistic_settings = Some(settings); self @@ -389,6 +401,7 @@ impl Settings { /// .with_kernel(Kernel::Linear) /// ); /// ``` + #[must_use] pub const fn with_svc_settings(mut self, settings: SVCParameters) -> Self { self.svc_settings = Some(settings); self @@ -405,6 +418,7 @@ impl Settings { /// .with_min_samples_leaf(20) /// ); /// ``` + #[must_use] pub const fn with_decision_tree_classifier_settings( mut self, settings: DecisionTreeClassifierParameters, @@ -426,6 +440,7 @@ impl Settings { /// .with_weight(KNNWeightFunction::Uniform) /// ); /// ``` + #[must_use] pub const fn with_knn_classifier_settings(mut self, settings: KNNClassifierParameters) -> Self { self.knn_classifier_settings = Some(settings); self @@ -441,6 +456,7 @@ impl Settings { /// ); /// ``` #[allow(clippy::missing_const_for_fn)] + #[must_use] pub fn with_gaussian_nb_settings(mut self, settings: GaussianNBParameters) -> Self { self.gaussian_nb_settings = Some(settings); self @@ -455,6 +471,7 @@ impl Settings { /// .with_alpha(1.0) /// ); /// ``` + #[must_use] pub const fn with_categorical_nb_settings(mut self, settings: CategoricalNBParameters) -> Self { self.categorical_nb_settings = Some(settings); self @@ -469,6 +486,7 @@ impl Settings { /// .with_solver(LinearRegressionSolverName::QR) /// ); /// ``` + #[must_use] pub const fn with_linear_settings(mut self, settings: LinearRegressionParameters) -> Self { self.linear_settings = Some(settings); self @@ -486,6 +504,7 @@ impl Settings { /// .with_max_iter(10_000) /// ); /// ``` + #[must_use] pub const fn with_lasso_settings(mut self, settings: LassoParameters) -> Self { self.lasso_settings = Some(settings); self @@ -502,6 +521,7 @@ impl Settings { /// .with_solver(RidgeRegressionSolverName::Cholesky) /// ); /// ``` + #[must_use] pub const fn with_ridge_settings(mut self, settings: RidgeRegressionParameters) -> Self { self.ridge_settings = Some(settings); self @@ -520,6 +540,7 @@ impl Settings { /// .with_l1_ratio(0.5) /// ); /// ``` + #[must_use] pub const fn with_elastic_net_settings(mut self, settings: ElasticNetParameters) -> Self { self.elastic_net_settings = Some(settings); self @@ -538,6 +559,7 @@ impl Settings { /// .with_weight(KNNWeightFunction::Uniform) /// ); /// ``` + #[must_use] pub const fn with_knn_regressor_settings(mut self, settings: KNNRegressorParameters) -> Self { self.knn_regressor_settings = Some(settings); self @@ -555,6 +577,7 @@ impl Settings { /// .with_kernel(Kernel::Linear) /// ); /// ``` + #[must_use] pub const fn with_svr_settings(mut self, settings: SVRParameters) -> Self { self.svr_settings = Some(settings); self @@ -573,6 +596,7 @@ impl Settings { /// .with_min_samples_split(20) /// ); /// ``` + #[must_use] pub const fn with_random_forest_regressor_settings( mut self, settings: RandomForestRegressorParameters, @@ -592,6 +616,7 @@ impl Settings { /// .with_min_samples_leaf(20) /// ); /// ``` + #[must_use] pub const fn with_decision_tree_regressor_settings( mut self, settings: DecisionTreeRegressorParameters, diff --git a/src/settings/svc_parameters.rs b/src/settings/svc_parameters.rs index 4ae3de4..9199393 100644 --- a/src/settings/svc_parameters.rs +++ b/src/settings/svc_parameters.rs @@ -17,24 +17,28 @@ pub struct SVCParameters { impl SVCParameters { /// Define the number of epochs to use in the epsilon-SVC model. + #[must_use] pub const fn with_epoch(mut self, epoch: usize) -> Self { self.epoch = epoch; self } /// Define the regulation penalty to use with the SVC Model + #[must_use] pub const fn with_c(mut self, c: f32) -> Self { self.c = c; self } /// Define the convergence tolerance to use with the SVC model + #[must_use] pub const fn with_tol(mut self, tol: f32) -> Self { self.tol = tol; self } /// Define which kernel to use with the SVC model + #[must_use] pub const fn with_kernel(mut self, kernel: Kernel) -> Self { self.kernel = kernel; self diff --git a/src/settings/svr_parameters.rs b/src/settings/svr_parameters.rs index dba47b5..2bc424d 100644 --- a/src/settings/svr_parameters.rs +++ b/src/settings/svr_parameters.rs @@ -17,24 +17,28 @@ pub struct SVRParameters { impl SVRParameters { /// Define the value of epsilon to use in the epsilon-SVR model. + #[must_use] pub const fn with_eps(mut self, eps: f32) -> Self { self.eps = eps; self } /// Define the regulation penalty to use with the SVR Model + #[must_use] pub const fn with_c(mut self, c: f32) -> Self { self.c = c; self } /// Define the convergence tolerance to use with the SVR model + #[must_use] pub const fn with_tol(mut self, tol: f32) -> Self { self.tol = tol; self } /// Define which kernel to use with the SVR model + #[must_use] pub const fn with_kernel(mut self, kernel: Kernel) -> Self { self.kernel = kernel; self From 61c957d9ef8fd740ea3cbc6694860999a4bd3db6 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 14:33:23 -0400 Subject: [PATCH 15/17] chore: final pass with clippy on all features --- src/lib.rs | 19 ++++++------ src/settings/mod.rs | 10 ++---- src/settings/settings_struct.rs | 10 ++++-- src/utils.rs | 55 +++++++++++++++++---------------- tests/classification.rs | 2 +- tests/regression.rs | 4 +-- 6 files changed, 51 insertions(+), 49 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fd8d722..6974dfc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,16 @@ +#![deny(clippy::correctness)] #![warn( clippy::all, + clippy::suspicious, + clippy::complexity, + clippy::perf, + clippy::style, clippy::pedantic, clippy::nursery, + clippy::missing_docs_in_private_items )] -#![allow( - clippy::module_name_repetitions, - clippy::too_many_lines, -)] -#![warn(missing_docs)] -#![warn(rustdoc::missing_doc_code_examples)] -#![warn(clippy::missing_docs_in_private_items)] +#![allow(clippy::module_name_repetitions, clippy::too_many_lines)] +#![warn(missing_docs, rustdoc::missing_doc_code_examples)] #![doc = include_str!("../README.md")] pub mod settings; @@ -310,9 +311,9 @@ impl SupervisedModel { /// #[cfg(any(feature = "csv"))] /// model.predict("data/diabetes_without_target.csv"); /// ``` - /// + /// /// # Panics - /// + /// /// If the model has not been trained, this function will panic. pub fn predict(&self, x: X) -> Vec { let x = &self.preprocess(x.to_dense_matrix()); diff --git a/src/settings/mod.rs b/src/settings/mod.rs index 001fc19..96cbdf4 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -362,17 +362,11 @@ impl Display for PreProcessing { } Self::ReplaceWithPCA { number_of_components, - } => write!( - f, - "Replaced with PCA features (n = {number_of_components})" - ), + } => write!(f, "Replaced with PCA features (n = {number_of_components})"), Self::ReplaceWithSVD { number_of_components, - } => write!( - f, - "Replaced with SVD features (n = {number_of_components})" - ), + } => write!(f, "Replaced with SVD features (n = {number_of_components})"), } } } diff --git a/src/settings/settings_struct.rs b/src/settings/settings_struct.rs index 8c9ba27..c5e2206 100644 --- a/src/settings/settings_struct.rs +++ b/src/settings/settings_struct.rs @@ -384,7 +384,10 @@ impl Settings { /// .with_logistic_settings(LogisticRegressionParameters::default()); /// ``` #[must_use] - pub const fn with_logistic_settings(mut self, settings: LogisticRegressionParameters) -> Self { + pub const fn with_logistic_settings( + mut self, + settings: LogisticRegressionParameters, + ) -> Self { self.logistic_settings = Some(settings); self } @@ -472,7 +475,10 @@ impl Settings { /// ); /// ``` #[must_use] - pub const fn with_categorical_nb_settings(mut self, settings: CategoricalNBParameters) -> Self { + pub const fn with_categorical_nb_settings( + mut self, + settings: CategoricalNBParameters, + ) -> Self { self.categorical_nb_settings = Some(settings); self } diff --git a/src/utils.rs b/src/utils.rs index f3e5530..b02cbc7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -101,48 +101,49 @@ pub fn elementwise_multiply(v1: &[f32], v2: &[f32]) -> Vec { use polars::prelude::{CsvReader, DataFrame, PolarsError, SerReader}; #[cfg(any(feature = "csv"))] -pub(crate) fn validate_and_read

(file_path: P) -> DataFrame +/// Read and validate a csv file or URL into a polars `DataFrame`. +pub fn validate_and_read

(file_path: P) -> DataFrame where P: AsRef, { let file_path_as_str = file_path.as_ref().to_str().unwrap(); - match CsvReader::from_path(file_path_as_str) { - Ok(csv) => csv - .infer_schema(Some(10)) - .has_header( - csv_sniffer::Sniffer::new() - .sniff_path(file_path_as_str.clone()) - .expect("Cannot sniff file") - .dialect - .header - .has_header_row, - ) - .finish() - .expect("Cannot read file as CSV") - .drop_nulls(None) - .expect("Cannot remove null values") - .convert_to_float() - .expect("Cannot convert types"), - Err(_) => { - if let Ok(_) = url::Url::parse(file_path_as_str) { + CsvReader::from_path(file_path_as_str).map_or_else( + |_| { + if url::Url::parse(file_path_as_str).is_ok() { let file_contents = minreq::get(file_path_as_str) .send() .expect("Could not open URL"); let temp = temp_file::with_contents(file_contents.as_bytes()); - validate_and_read(temp.path().to_str().unwrap()) } else { - panic!( - "The string {} is not a valid URL or file path.", - file_path_as_str - ) + panic!("The string {file_path_as_str} is not a valid URL or file path.") } - } - } + }, + |csv| { + csv.infer_schema(Some(10)) + .has_header( + csv_sniffer::Sniffer::new() + .sniff_path(file_path_as_str) + .expect("Cannot sniff file") + .dialect + .header + .has_header_row, + ) + .finish() + .expect("Cannot read file as CSV") + .drop_nulls(None) + .expect("Cannot remove null values") + .convert_to_float() + .expect("Cannot convert types") + }, + ) } + +/// Trait to convert to a polars `DataFrame`. #[cfg(any(feature = "csv"))] trait Cleanup { + /// Convert to a polars `DataFrame` with all columns of type float. fn convert_to_float(self) -> Result; } diff --git a/tests/classification.rs b/tests/classification.rs index e576e1e..4af0bcb 100644 --- a/tests/classification.rs +++ b/tests/classification.rs @@ -17,7 +17,7 @@ mod classification_tests { classifier.train(); // Try to predict something - classifier.predict(vec![vec![5.0 as f32; 30]; 10]); + classifier.predict(vec![vec![5.0_f32; 30]; 10]); classifier.predict("data/breast_cancer_without_target.csv"); #[cfg(feature = "nd")] classifier.predict(ndarray::Array2::from_shape_vec((10, 30), vec![5.0; 300]).unwrap()); diff --git a/tests/regression.rs b/tests/regression.rs index 5fbd1d2..a395e1e 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -17,7 +17,7 @@ mod regression_tests { regressor.train(); // Try to predict something - regressor.predict(vec![vec![5.0 as f32; 10]; 10]); + regressor.predict(vec![vec![5.0_f32; 10]; 10]); regressor.predict("data/diabetes_without_target.csv"); #[cfg(feature = "nd")] regressor.predict(ndarray::Array2::from_shape_vec((10, 10), vec![5.0; 100]).unwrap()); @@ -38,7 +38,7 @@ mod regression_tests { regressor.train(); // Try to predict something - regressor.predict(vec![vec![5.0 as f32; 8]; 8]); + regressor.predict(vec![vec![5.0_f32; 8]; 8]); } #[test] From f29722956d58e775e7869c9f6069e8fd43864e8f Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Thu, 29 Jun 2023 14:50:59 -0400 Subject: [PATCH 16/17] build: bump version --- .gitignore | 3 ++- Cargo.toml | 2 +- README.md | 21 +++++++++++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index b7b4f51..eb53182 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ Cargo.lock .idea/ /examples/*.aml /examples/*.yaml -/examples/*.sc \ No newline at end of file +/examples/*.sc +.vscode diff --git a/Cargo.toml b/Cargo.toml index 8165e32..8b43581 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "automl" -version = "0.2.7" +version = "0.3.0" authors = ["Chris McComb "] description = "Automated machine learning for classification and regression" edition = "2021" diff --git a/README.md b/README.md index 9379cdd..b2a7eb4 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,16 @@ [![docs.rs](https://img.shields.io/docsrs/automl/latest?logo=rust)](https://docs.rs/automl) # AutoML with SmartCore + AutoML is _Automated Machine Learning_, referring to processes and methods to make machine learning more accessible for a general audience. This crate builds on top of the [smartcore](https://docs.rs/smartcore/) machine learning framework, and provides some utilities to quickly train and compare models. # Install + To use the latest released version of `AutoML`, add this to your `Cargo.toml`: ```toml -automl = "0.2.7" +automl = "0.3.0" ``` To use the bleeding edge instead, add this: ```toml @@ -18,14 +20,18 @@ automl = { git = "https://github.com/cmccomb/rust-automl" } ``` # Usage + Running the following: + ```rust let dataset = smartcore::dataset::breast_cancer::load_dataset(); let settings = automl::Settings::default_classification(); let mut classifier = automl::SupervisedModel::new(dataset, settings); classifier.train(); ``` + will perform a comparison of classifier models using cross-validation. Printing the classifier object will yield: + ```text ┌────────────────────────────────┬─────────────────────┬───────────────────┬──────────────────┐ │ Model │ Time │ Training Accuracy │ Testing Accuracy │ @@ -45,17 +51,20 @@ will perform a comparison of classifier models using cross-validation. Printing │ Support Vector Classifier │ 4s 187ms 61us 708ns │ 0.57 │ 0.57 │ └────────────────────────────────┴─────────────────────┴───────────────────┴──────────────────┘ ``` + You can then perform inference using the best model with the `predict` method. ## Features -This crate has several features that add some additional methods -| Feature | Description | -|:----------|:----------------------------------------------------------------------------------------------------------| -| `nd` | Adds methods for predicting/reading data using [`ndarray`](https://crates.io/crates/ndarray). | -| `csv` | Adds methods for predicting/reading data from a .csv using [`polars`](https://crates.io/crates/polars). | +This crate has several features that add some additional methods. + +| Feature | Description | +| :------ | :------------------------------------------------------------------------------------------------------ | +| `nd` | Adds methods for predicting/reading data using [`ndarray`](https://crates.io/crates/ndarray). | +| `csv` | Adds methods for predicting/reading data from a .csv using [`polars`](https://crates.io/crates/polars). | ## Capabilities + - Feature Engineering - PCA - SVD From 2d5ec3212ce9bd120a08dfdd8104887e551b3662 Mon Sep 17 00:00:00 2001 From: Chris McComb Date: Tue, 11 Jul 2023 16:32:00 -0400 Subject: [PATCH 17/17] Update tests.yml Updating egui dependencies to fix build error --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8ad88a8..cfda8cf 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Install egui dependencies - run: sudo apt-get install libxcb-render0-dev libxcb-shape0-dev libxcb-xfixes0-dev libspeechd-dev libxkbcommon-dev libssl-dev + run: sudo apt-get install -y libclang-dev libgtk-3-dev libxcb-render0-dev libxcb-shape0-dev libxcb-xfixes0-dev libxkbcommon-dev libssl-dev - name: Build run: cargo build --release --verbose --all-features - name: Run tests