diff --git a/orangecontrib/explain/inspection.py b/orangecontrib/explain/inspection.py index 1c030ba..3af0263 100644 --- a/orangecontrib/explain/inspection.py +++ b/orangecontrib/explain/inspection.py @@ -189,13 +189,7 @@ def individual_condition_expectation( _check_data(data) # implicit check if feature in data.domain - needs_pp = _check_model(model, data) - - # values should not be preprocessed - orig_values = data[:, feature].X.flatten() - _, index = np.unique(orig_values, return_index=True) - orig_values = orig_values[index] - if needs_pp: + if _check_model(model, data): data = model.data_to_model_domain(data) assert feature.name in [a.name for a in data.domain.attributes] @@ -209,7 +203,7 @@ def dummy_fit(*_, **__): model.fit_ = dummy_fit if model.domain.class_var.is_discrete: model._estimator_type = "classifier" - model.classes_ = model.domain.class_var.values + model.classes_ = np.array(model.domain.class_var.values) else: model._estimator_type = "regressor" @@ -221,7 +215,7 @@ def dummy_fit(*_, **__): grid_resolution=grid_resolution, kind=kind) - results = {"average": dep["average"], "values": orig_values} + results = {"average": dep["average"], "values": dep["grid_values"][0]} if kind == "both": results["individual"] = dep["individual"] diff --git a/orangecontrib/explain/tests/test_explainer.py b/orangecontrib/explain/tests/test_explainer.py index 77c8700..de061a4 100644 --- a/orangecontrib/explain/tests/test_explainer.py +++ b/orangecontrib/explain/tests/test_explainer.py @@ -2,7 +2,6 @@ import unittest import numpy as np -from numpy.testing import assert_array_equal from Orange.classification import ( LogisticRegressionLearner, RandomForestLearner, @@ -11,6 +10,10 @@ TreeLearner, ThresholdLearner, ) +try: + from Orange.classification import ScoringSheetLearner +except ImportError: + ScoringSheetLearner = None from Orange.data import Table, Domain, ContinuousVariable from Orange.regression import LinearRegressionLearner, CurveFitLearner from Orange.tests import test_regression, test_classification @@ -206,7 +209,7 @@ def test_gradient_boosting_shape(self): ) self.assertEqual(len(shap_values), 2) self.assertEqual(len(base_value), 2) - assert_array_equal(-shap_values[0], shap_values[1]) + np.testing.assert_array_almost_equal(-shap_values[0], shap_values[1]) learner = GBLearner() model = learner(self.hearth_disease) @@ -215,7 +218,7 @@ def test_gradient_boosting_shape(self): ) self.assertEqual(len(shap_values), 2) self.assertEqual(len(base_value), 2) - assert_array_equal(-shap_values[0], shap_values[1]) + np.testing.assert_array_almost_equal(-shap_values[0], shap_values[1]) learner = XGBRFLearner() model = learner(self.hearth_disease) @@ -224,7 +227,7 @@ def test_gradient_boosting_shape(self): ) self.assertEqual(len(shap_values), 2) self.assertEqual(len(base_value), 2) - assert_array_equal(-shap_values[0], shap_values[1]) + np.testing.assert_array_almost_equal(-shap_values[0], shap_values[1]) @unittest.skipIf(XGBLearner is None, "Missing 'xgboost' package") def test_remove_workaround(self): @@ -253,8 +256,8 @@ def test_all_classifiers(self): """ Test explanation for all classifiers """ for learner in test_classification.all_learners(): with self.subTest(learner): - if learner == ThresholdLearner: - # ThresholdLearner require binary class + if learner in (ThresholdLearner, ScoringSheetLearner): + # ThresholdLearner and ScoringSheetLearner require binary class continue kwargs = {} if "base_learner" in inspect.signature(learner).parameters: diff --git a/orangecontrib/explain/tests/test_inspection.py b/orangecontrib/explain/tests/test_inspection.py index 1b05f9c..15697f9 100644 --- a/orangecontrib/explain/tests/test_inspection.py +++ b/orangecontrib/explain/tests/test_inspection.py @@ -319,17 +319,6 @@ def test_continuous_class(self): self.assertEqual(res["individual"].shape, (1, 506, 504)) self.assertEqual(res["values"].shape, (504,)) - def test_retain_original_values(self): - data = self.housing - nn = NNRegressionLearner(random_state=0)(data) - res_nn = individual_condition_expectation(nn, data, data.domain[0]) - rf = RandomForestRegressionLearner(n_estimators=10, random_state=0)(data) - res_rf = individual_condition_expectation(rf, data, data.domain[0]) - self.assertEqual(res_nn["values"].min(), res_rf["values"].min()) - self.assertEqual(res_nn["values"].max(), res_rf["values"].max()) - self.assertEqual(res_nn["values"].shape, res_rf["values"].shape) - self.assertEqual(len(set(res_nn["values"])), len(res_rf["values"])) - def test_multi_class(self): data = self.iris model = RandomForestLearner(n_estimators=10, random_state=0)(data) diff --git a/orangecontrib/explain/widgets/owice.py b/orangecontrib/explain/widgets/owice.py index 914b45b..5e68bb5 100644 --- a/orangecontrib/explain/widgets/owice.py +++ b/orangecontrib/explain/widgets/owice.py @@ -508,6 +508,9 @@ class Error(OWWidget.Error): not_enough_data = Msg("At least two instances are needed.") no_cont_features = Msg("At least one numeric feature is required.") + class Warning(OWWidget.Warning): + pp_feature = Msg("Selected feature has been preprocessed.") + class Information(OWWidget.Information): data_sampled = Msg("Data has been sampled.") @@ -762,6 +765,7 @@ def clear(self): self.cancel() self.Error.domain_transform_err.clear() self.Error.unknown_err.clear() + self.Warning.pp_feature.clear() self.graph.clear_all() def setup_plot(self): @@ -773,6 +777,13 @@ def setup_plot(self): y_average = self.__results.y_average[self.target_index] y_individual = self.__results.y_individual[self.target_index] + data = self.data[self.__sampled_mask] + orig_values = data[:, self.feature].X.flatten() + _, index = np.unique(orig_values, return_index=True) + orig_values = orig_values[index] + if len(orig_values) != len(x_data) or (orig_values != x_data).any(): + self.Warning.pp_feature() + class_var: Variable = self.model.original_domain.class_var if class_var.is_discrete: cls_val = class_var.values[self.target_index] diff --git a/orangecontrib/explain/widgets/tests/test_owice.py b/orangecontrib/explain/widgets/tests/test_owice.py index c1538f6..ee46aed 100644 --- a/orangecontrib/explain/widgets/tests/test_owice.py +++ b/orangecontrib/explain/widgets/tests/test_owice.py @@ -5,7 +5,8 @@ from AnyQt.QtCore import Qt, QPointF from Orange.classification import RandomForestLearner, CalibratedLearner, \ - ThresholdLearner, SimpleRandomForestLearner as SimpleRandomForestClassifier + ThresholdLearner, NaiveBayesLearner, \ + SimpleRandomForestLearner as SimpleRandomForestClassifier from Orange.data import Table from Orange.data.table import DomainTransformationError from Orange.regression import RandomForestRegressionLearner, \ @@ -177,6 +178,18 @@ def test_send_report(self): self.send_signal(self.widget.Inputs.model, self.rf_reg) self.widget.send_report() + def test_naive_bayes(self): + data = self.iris + self.send_signal(self.widget.Inputs.data, data) + + self.send_signal(self.widget.Inputs.model, NaiveBayesLearner()(data)) + self.wait_until_finished() + self.assertTrue(self.widget.Warning.pp_feature.is_shown()) + + self.send_signal(self.widget.Inputs.model, self.rf_cls) + self.wait_until_finished() + self.assertFalse(self.widget.Warning.pp_feature.is_shown()) + if __name__ == "__main__": unittest.main() diff --git a/tox.ini b/tox.ini index 60358a5..c8e7f24 100644 --- a/tox.ini +++ b/tox.ini @@ -28,6 +28,7 @@ deps = oldest: pandas==1.4.0 oldest: scikit-learn==1.3.0 oldest: scipy==1.9.0 + oldest: xgboost==2.0.0 latest: https://github.com/biolab/orange3/archive/refs/heads/master.zip#egg=orange3 latest: https://github.com/biolab/orange-canvas-core/archive/refs/heads/master.zip#egg=orange-canvas-core latest: https://github.com/biolab/orange-widget-base/archive/refs/heads/master.zip#egg=orange-widget-base