-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathplot_robust_cv_example.py
55 lines (47 loc) · 1.49 KB
/
plot_robust_cv_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# -*- coding: utf-8 -*-
"""
================================================================
An example of a robust cross-validation evaluation in regression
================================================================
In this example we compare `LinearRegression` (OLS) with `HuberRegressor` from
scikit-learn using cross-validation.
We show that a robust cross-validation scheme gives a better
evaluation of the generalisation error in a corrupted dataset.
"""
print(__doc__)
import numpy as np
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import cross_val_score
from sklearn_extra.robust import make_huber_metric
from sklearn.linear_model import LinearRegression, HuberRegressor
robust_mse = make_huber_metric(mean_squared_error, c=9)
rng = np.random.RandomState(42)
X = rng.uniform(size=100)[:, np.newaxis]
y = 3 * X.ravel()
# Remark y <= 3
y[[42 // 2, 42, 42 * 2]] = 200 # outliers
print("Non robust error:")
for reg in [LinearRegression(), HuberRegressor()]:
print(
reg,
" mse : %.2F"
% (
np.mean(
cross_val_score(
reg, X, y, scoring=make_scorer(mean_squared_error)
)
)
),
)
print("\n")
print("Robust error:")
for reg in [LinearRegression(), HuberRegressor()]:
print(
reg,
" mse : %.2F"
% (
np.mean(
cross_val_score(reg, X, y, scoring=make_scorer(robust_mse))
)
),
)