From 21850fecd037c94151d3cc19952977fc9e16bf05 Mon Sep 17 00:00:00 2001 From: Pratik Fandade Date: Thu, 24 Oct 2024 04:03:43 -0400 Subject: [PATCH 1/2] Adding logistic regression & optimizing the gradient descent algorithm --- DIRECTORY.md | 1 + src/machine_learning/logistic_regression.rs | 62 +++++++++++++++++++ src/machine_learning/mod.rs | 2 + .../optimization/gradient_descent.rs | 2 +- 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/machine_learning/logistic_regression.rs diff --git a/DIRECTORY.md b/DIRECTORY.md index f4e1fa0e58c..8559cb34c93 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -156,6 +156,7 @@ * [Cholesky](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/cholesky.rs) * [K Means](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/k_means.rs) * [Linear Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/linear_regression.rs) + * [Logistic Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/logistic_regression.rs) * Loss Function * [Average Margin Ranking Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/average_margin_ranking_loss.rs) * [Hinge Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/hinge_loss.rs) diff --git a/src/machine_learning/logistic_regression.rs b/src/machine_learning/logistic_regression.rs new file mode 100644 index 00000000000..cae85814671 --- /dev/null +++ b/src/machine_learning/logistic_regression.rs @@ -0,0 +1,62 @@ +use super::optimization::gradient_descent; +use std::f64::consts::E; + +/// Returns the wieghts after performing Logistic regression on the input data points. +pub fn logistic_regression( + data_points: Vec<(Vec, f64)>, + iterations: usize, + learning_rate: f64, +) -> Option> { + if data_points.is_empty() { + return None; + } + + let num_features = data_points[0].0.len(); + let mut params = vec![0.0; num_features]; + + let derivative_fn = |params: &[f64]| derivative(params, &data_points); + + gradient_descent(derivative_fn, &mut params, learning_rate, iterations as i32); + + Some(params) +} + +fn derivative(params: &[f64], data_points: &[(Vec, f64)]) -> Vec { + let num_features = params.len(); + let mut gradients = vec![0.0; num_features]; + + for (features, y_i) in data_points { + let z = params.iter().zip(features).map(|(p, x)| p * x).sum::(); + let prediction = 1.0 / (1.0 + E.powf(-z)); + + for (i, x_i) in features.iter().enumerate() { + gradients[i] += (prediction - y_i) * x_i; + } + } + + gradients +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_logistic_regression() { + let data = vec![ + (vec![0.0, 0.0], 0.0), + (vec![1.0, 1.0], 1.0), + (vec![2.0, 2.0], 1.0), + ]; + let result = logistic_regression(data, 10000, 0.1); + assert!(result.is_some()); + let params = result.unwrap(); + assert!((params[0] - 6.902976808251308).abs() < 1e-6); + assert!((params[1] - 2000.4659358334482).abs() < 1e-6); + } + + #[test] + fn test_empty_list_logistic_regression() { + assert_eq!(logistic_regression(vec![], 10000, 0.1), None); + } +} diff --git a/src/machine_learning/mod.rs b/src/machine_learning/mod.rs index c77fd65116b..534326d2121 100644 --- a/src/machine_learning/mod.rs +++ b/src/machine_learning/mod.rs @@ -1,12 +1,14 @@ mod cholesky; mod k_means; mod linear_regression; +mod logistic_regression; mod loss_function; mod optimization; pub use self::cholesky::cholesky; pub use self::k_means::k_means; pub use self::linear_regression::linear_regression; +pub use self::logistic_regression::logistic_regression; pub use self::loss_function::average_margin_ranking_loss; pub use self::loss_function::hng_loss; pub use self::loss_function::huber_loss; diff --git a/src/machine_learning/optimization/gradient_descent.rs b/src/machine_learning/optimization/gradient_descent.rs index 6701a688d15..fd322a23ff3 100644 --- a/src/machine_learning/optimization/gradient_descent.rs +++ b/src/machine_learning/optimization/gradient_descent.rs @@ -23,7 +23,7 @@ /// A reference to the optimized parameter vector `x`. pub fn gradient_descent( - derivative_fn: fn(&[f64]) -> Vec, + derivative_fn: impl Fn(&[f64]) -> Vec, x: &mut Vec, learning_rate: f64, num_iterations: i32, From da1b10d159630bc83bd25eb2f981adeadb0fafc4 Mon Sep 17 00:00:00 2001 From: Pratik Fandade Date: Fri, 25 Oct 2024 20:13:34 -0400 Subject: [PATCH 2/2] Fixing the test cases and minor adjustment to the algorithm --- src/machine_learning/logistic_regression.rs | 54 ++++++++++++++++----- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/src/machine_learning/logistic_regression.rs b/src/machine_learning/logistic_regression.rs index cae85814671..fc020a795ac 100644 --- a/src/machine_learning/logistic_regression.rs +++ b/src/machine_learning/logistic_regression.rs @@ -11,7 +11,7 @@ pub fn logistic_regression( return None; } - let num_features = data_points[0].0.len(); + let num_features = data_points[0].0.len() + 1; let mut params = vec![0.0; num_features]; let derivative_fn = |params: &[f64]| derivative(params, &data_points); @@ -26,11 +26,17 @@ fn derivative(params: &[f64], data_points: &[(Vec, f64)]) -> Vec { let mut gradients = vec![0.0; num_features]; for (features, y_i) in data_points { - let z = params.iter().zip(features).map(|(p, x)| p * x).sum::(); + let z = params[0] + + params[1..] + .iter() + .zip(features) + .map(|(p, x)| p * x) + .sum::(); let prediction = 1.0 / (1.0 + E.powf(-z)); + gradients[0] += prediction - y_i; for (i, x_i) in features.iter().enumerate() { - gradients[i] += (prediction - y_i) * x_i; + gradients[i + 1] += (prediction - y_i) * x_i; } } @@ -42,21 +48,45 @@ mod test { use super::*; #[test] - fn test_logistic_regression() { + fn test_logistic_regression_simple() { let data = vec![ - (vec![0.0, 0.0], 0.0), - (vec![1.0, 1.0], 1.0), - (vec![2.0, 2.0], 1.0), + (vec![0.0], 0.0), + (vec![1.0], 0.0), + (vec![2.0], 0.0), + (vec![3.0], 1.0), + (vec![4.0], 1.0), + (vec![5.0], 1.0), ]; - let result = logistic_regression(data, 10000, 0.1); + + let result = logistic_regression(data, 10000, 0.05); assert!(result.is_some()); + + let params = result.unwrap(); + assert!((params[0] + 17.65).abs() < 1.0); + assert!((params[1] - 7.13).abs() < 1.0); + } + + #[test] + fn test_logistic_regression_extreme_data() { + let data = vec![ + (vec![-100.0], 0.0), + (vec![-10.0], 0.0), + (vec![0.0], 0.0), + (vec![10.0], 1.0), + (vec![100.0], 1.0), + ]; + + let result = logistic_regression(data, 10000, 0.05); + assert!(result.is_some()); + let params = result.unwrap(); - assert!((params[0] - 6.902976808251308).abs() < 1e-6); - assert!((params[1] - 2000.4659358334482).abs() < 1e-6); + assert!((params[0] + 6.20).abs() < 1.0); + assert!((params[1] - 5.5).abs() < 1.0); } #[test] - fn test_empty_list_logistic_regression() { - assert_eq!(logistic_regression(vec![], 10000, 0.1), None); + fn test_logistic_regression_no_data() { + let result = logistic_regression(vec![], 5000, 0.1); + assert_eq!(result, None); } }