@@ -18,7 +18,7 @@ public static class PermutationFeatureImportanceExtensions
18
18
/// <remarks>
19
19
/// <para>
20
20
/// Permutation feature importance (PFI) is a technique to determine the global importance of features in a trained
21
- /// machine learning model. PFI is a simple yet powerul technique motivated by Breiman in his Random Forest paper, section 10
21
+ /// machine learning model. PFI is a simple yet powerful technique motivated by Breiman in his Random Forest paper, section 10
22
22
/// (Breiman. <a href='https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf'>"Random Forests."</a> Machine Learning, 2001.)
23
23
/// The advantage of the PFI method is that it is model agnostic -- it works with any model that can be
24
24
/// evaluated -- and it can use any dataset, not just the training set, to compute feature importance metrics.
@@ -33,7 +33,7 @@ public static class PermutationFeatureImportanceExtensions
33
33
/// </para>
34
34
/// <para>
35
35
/// In this implementation, PFI computes the change in all possible regression evaluation metrics for each feature, and an
36
- /// <code>ImmutableArray</code> of <code>RegressionEvaluator.Result </code> objects is returned. See the sample below for an
36
+ /// <code>ImmutableArray</code> of <code>RegressionMetrics </code> objects is returned. See the sample below for an
37
37
/// example of working with these results to analyze the feature importance of a model.
38
38
/// </para>
39
39
/// </remarks>
@@ -85,10 +85,37 @@ private static RegressionMetrics RegressionDelta(
85
85
}
86
86
87
87
/// <summary>
88
- /// Permutation Feature Importance is a technique that calculates how much each feature 'matters' to the predictions.
89
- /// Namely, how much the model's predictions will change if we randomly permute the values of one feature across the evaluation set.
90
- /// If the quality doesn't change much, this feature is not very important. If the quality drops drastically, this was a really important feature.
88
+ /// Permutation Feature Importance (PFI) for Binary Classification
91
89
/// </summary>
90
+ /// <remarks>
91
+ /// <para>
92
+ /// Permutation feature importance (PFI) is a technique to determine the global importance of features in a trained
93
+ /// machine learning model. PFI is a simple yet powerful technique motivated by Breiman in his Random Forest paper, section 10
94
+ /// (Breiman. <a href='https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf'>"Random Forests."</a> Machine Learning, 2001.)
95
+ /// The advantage of the PFI method is that it is model agnostic -- it works with any model that can be
96
+ /// evaluated -- and it can use any dataset, not just the training set, to compute feature importance metrics.
97
+ /// </para>
98
+ /// <para>
99
+ /// PFI works by taking a labeled dataset, choosing a feature, and permuting the values
100
+ /// for that feature across all the examples, so that each example now has a random value for the feature and
101
+ /// the original values for all other features. The evalution metric (e.g. AUC or R-squared) is then calculated
102
+ /// for this modified dataset, and the change in the evaluation metric from the original dataset is computed.
103
+ /// The larger the change in the evaluation metric, the more important the feature is to the model.
104
+ /// PFI works by performing this permutation analysis across all the features of a model, one after another.
105
+ /// </para>
106
+ /// <para>
107
+ /// In this implementation, PFI computes the change in all possible binary classification evaluation metrics for each feature, and an
108
+ /// <code>ImmutableArray</code> of <code>BinaryClassificationMetrics</code> objects is returned. See the sample below for an
109
+ /// example of working with these results to analyze the feature importance of a model.
110
+ /// </para>
111
+ /// </remarks>
112
+ /// <example>
113
+ /// <format type="text/markdown">
114
+ /// <]
116
+ /// ]]>
117
+ /// </format>
118
+ /// </example>
92
119
/// <param name="ctx">The binary classification context.</param>
93
120
/// <param name="model">The model to evaluate.</param>
94
121
/// <param name="data">The evaluation data set.</param>
0 commit comments