forked from zotroneneis/machine_learning_basics
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlogistic_regression.rs
101 lines (95 loc) · 3.39 KB
/
logistic_regression.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use ndarray::*;
use std::ops::SubAssign;
use utils::{make_blobs, shuffle2, sigmoid, train_test_split};
struct LogisticRegressor {
weights: Array1<f64>,
bias: f64,
}
impl LogisticRegressor {
/// Construct and train a logistic regressor.
/// x: a matrix [n_samples, n_features] of examples
/// y: a vector [n_samples] of targets
/// n_iters: how many training iterations
/// learning_rate: for gradient descent
pub fn new(
x: Array2<f64>,
y: Array1<f64>,
n_iters: usize,
learning_rate: f64,
) -> LogisticRegressor {
let scale = 1.0 / x.rows() as f64;
let n_features = x.cols();
let mut lgr = LogisticRegressor {
weights: Array::zeros(n_features),
bias: 0.0,
};
for i in 0..n_iters {
let y_pred = lgr.act(&x);
// Negative categorical cross entropy for each data point
let crents = y.clone() * y_pred.mapv(f64::ln)
+ (1.0 - y.clone()) * (1.0 - y_pred.clone()).mapv(f64::ln);
// Average cross entropy for data set
let cost = -scale * crents.scalar_sum();
// Compute gradients for weights and bias
let err = y_pred - y.clone();
let dw: Array1<f64> = scale * err.dot(&x);
let db: f64 = scale * err.scalar_sum();
// Update parameters with (non-stochastic) gradient descent
lgr.weights.sub_assign(&(learning_rate * dw));
lgr.bias.sub_assign(learning_rate * db);
if i % 100 == 0 {
println!("Cost iteration {}: {}", i, cost);
}
}
lgr
}
/// Given a matrix [n_samples, n_features] of examples 'x',
/// for each examples (row) compute the sigmoid of a linear combination
/// of the example. Returns a matrix of size [n_samples]
fn act<S: Data<Elem = f64>>(&self, x: &ArrayBase<S, Ix2>) -> Array1<f64> {
let mut out = x.dot(&self.weights) + self.bias;
out.mapv_inplace(sigmoid);
out
}
/// Predicts binary label for an example.
pub fn predict<S: Data<Elem = f64>>(&self, example: ArrayBase<S, Ix1>) -> f64 {
let mut y_pred = self.act(&example.insert_axis(Axis(0)));
// Threshold to 1's and 0's
y_pred.mapv_inplace(|e| if e > 0.5 { 1.0 } else { 0.0 });
y_pred[[0]]
}
/// Evaluate regressor performance on a data set
pub fn test(&self, x: &Array2<f64>, y: &Array1<f64>) -> f64 {
let y_preds: Array1<f64> = x
.outer_iter()
.map(|example| self.predict(example))
.collect();
let acc = 100.0 - (y - &y_preds).mapv(f64::abs).mean_axis(Axis(0)) * 100.0;
acc[[]]
}
}
pub fn run(
n_iters: usize,
learning_rate: f64,
train_test_split_ratio: f64,
rng_seed: Option<[u8; 32]>,
) {
let (x, y): (Array2<f64>, Array1<usize>) = make_blobs(1000, 2, 2);
let (x, y) = shuffle2(x, y, rng_seed);
let y = y.mapv(|e| e as f64);
let dataset = train_test_split(x, y, train_test_split_ratio);
let lgr = LogisticRegressor::new(
dataset.x_train.clone(),
dataset.y_train.clone(),
n_iters,
learning_rate,
);
println!(
"Training set accuracy: {} %",
lgr.test(&dataset.x_train, &dataset.y_train)
);
println!(
"Test set accuracy: {} %",
lgr.test(&dataset.x_test, &dataset.y_test)
);;
}