Skip to content

Commit a783728

Browse files
Merge branch 'main' into tongy/removeBindPart2
2 parents 159d264 + 6b6e1fb commit a783728

File tree

8 files changed

+76
-14
lines changed

8 files changed

+76
-14
lines changed

responsibleai/responsibleai/rai_insights/rai_insights.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,19 @@
2929
_TARGET_COLUMN = 'target_column'
3030
_TASK_TYPE = 'task_type'
3131
_CLASSES = 'classes'
32+
_FEATURE_COLUMNS = 'feature_columns'
33+
_FEATURE_RANGES = 'feature_ranges'
3234
_CATEGORICAL_FEATURES = 'categorical_features'
3335
_META_JSON = Metadata.META_JSON
3436
_TRAIN_LABELS = 'train_labels'
3537
_JSON_EXTENSION = '.json'
3638
_PREDICT = 'predict'
3739
_PREDICT_PROBA = 'predict_proba'
40+
_COLUMN_NAME = 'column_name'
41+
_RANGE_TYPE = 'range_type'
42+
_UNIQUE_VALUES = 'unique_values'
43+
_MIN_VALUE = 'min_value'
44+
_MAX_VALUE = 'max_value'
3845

3946

4047
class RAIInsights(RAIBaseInsights):
@@ -92,6 +99,11 @@ def __init__(self, model: Optional[Any], train: pd.DataFrame,
9299
target_column=target_column,
93100
classes=classes
94101
)
102+
self._feature_columns = \
103+
test.drop(columns=[target_column]).columns.tolist()
104+
self._feature_ranges = RAIInsights._get_feature_ranges(
105+
test=test, categorical_features=categorical_features,
106+
feature_columns=self._feature_columns)
95107
self.categorical_features = categorical_features
96108

97109
super(RAIInsights, self).__init__(
@@ -528,11 +540,36 @@ def _save_metadata(self, path):
528540
_TARGET_COLUMN: self.target_column,
529541
_TASK_TYPE: self.task_type,
530542
_CATEGORICAL_FEATURES: self.categorical_features,
531-
_CLASSES: classes
543+
_CLASSES: classes,
544+
_FEATURE_COLUMNS: self._feature_columns,
545+
_FEATURE_RANGES: self._feature_ranges
546+
532547
}
533548
with open(top_dir / _META_JSON, 'w') as file:
534549
json.dump(meta, file)
535550

551+
@staticmethod
552+
def _get_feature_ranges(test, categorical_features, feature_columns):
553+
"""Get feature ranges like min, max and unique values
554+
for all columns"""
555+
result = []
556+
for col in feature_columns:
557+
res_object = {}
558+
if (col in categorical_features):
559+
unique_value = test[col].unique()
560+
res_object[_COLUMN_NAME] = col
561+
res_object[_RANGE_TYPE] = "categorical"
562+
res_object[_UNIQUE_VALUES] = unique_value.tolist()
563+
else:
564+
min_value = float(test[col].min())
565+
max_value = float(test[col].max())
566+
res_object[_COLUMN_NAME] = col
567+
res_object[_RANGE_TYPE] = "integer"
568+
res_object[_MIN_VALUE] = min_value
569+
res_object[_MAX_VALUE] = max_value
570+
result.append(res_object)
571+
return result
572+
536573
@staticmethod
537574
def _load_metadata(inst, path):
538575
"""Load the metadata.
@@ -562,6 +599,9 @@ def _load_metadata(inst, path):
562599
classes=classes
563600
)
564601

602+
inst.__dict__['_' + _FEATURE_COLUMNS] = meta[_FEATURE_COLUMNS]
603+
inst.__dict__['_' + _FEATURE_RANGES] = meta[_FEATURE_RANGES]
604+
565605
@staticmethod
566606
def load(path):
567607
"""Load the RAIInsights from the given path.

responsibleai/tests/causal/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
@pytest.fixture(scope='session')
1616
def adult_data():
1717
X_train_df, X_test_df, y_train, y_test,\
18-
_, _, target_name, _ = create_adult_income_dataset()
18+
_, _, target_name, _, _, _ = create_adult_income_dataset()
1919
train_df = copy.deepcopy(X_train_df)
2020
test_df = copy.deepcopy(X_test_df)
2121
train_df[target_name] = y_train

responsibleai/tests/causal/test_causal_general.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ def test_causal_classification_scikitlearn_issue():
1212
# This test gets stuck on SciKit-Learn v1.1.0
1313
# See PR #1429
1414
data_train, data_test, _, _, categorical_features, \
15-
_, target_name, classes = create_adult_income_dataset()
15+
_, target_name, classes, _, _ = \
16+
create_adult_income_dataset()
1617

1718
rai_i = RAIInsights(
1819
model=None,

responsibleai/tests/common_utils.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ def create_adult_income_dataset():
134134
target_name = 'income'
135135
target = dataset[target_name]
136136
classes = list(np.unique(target))
137+
feature_columns = dataset.drop(columns=[target_name]).columns.tolist()
138+
feature_range_keys = ['column_name', 'range_type', 'unique_values']
137139
categorical_features = list(set(dataset.columns) -
138140
set(continuous_features) -
139141
set([target_name]))
@@ -142,7 +144,8 @@ def create_adult_income_dataset():
142144
dataset, target,
143145
test_size=5000, random_state=7, stratify=target)
144146
return data_train, data_test, y_train, y_test, categorical_features, \
145-
continuous_features, target_name, classes
147+
continuous_features, target_name, classes, \
148+
feature_columns, feature_range_keys
146149

147150

148151
def create_complex_classification_pipeline(

responsibleai/tests/databalanceanalysis/conftest.py

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def adult_data():
2020
_,
2121
target_col,
2222
_,
23+
_,
24+
_,
2325
) = create_adult_income_dataset()
2426
train_df = copy.deepcopy(data_train)
2527
test_df = copy.deepcopy(data_test)

responsibleai/tests/test_model_analysis.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def test_model_analysis_binary(self, manager_type):
110110
def test_model_analysis_binary_mixed_types(self, manager_type):
111111

112112
data_train, data_test, y_train, y_test, categorical_features, \
113-
continuous_features, target_name, classes = \
113+
continuous_features, target_name, classes, \
114+
feature_columns, feature_range_keys = \
114115
create_adult_income_dataset()
115116
X_train = data_train.drop([target_name], axis=1)
116117

responsibleai/tests/test_rai_insights.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ def test_rai_insights_binary(self, manager_type):
109109
def test_rai_insights_binary_mixed_types(self, manager_type):
110110

111111
data_train, data_test, y_train, y_test, categorical_features, \
112-
continuous_features, target_name, classes = \
112+
continuous_features, target_name, classes, \
113+
feature_columns, feature_range_keys = \
113114
create_adult_income_dataset()
114115
X_train = data_train.drop([target_name], axis=1)
115116

responsibleai/tests/test_rai_insights_save_and_load_scenarios.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def test_rai_insights_empty_save_load_save(self):
5353
# Validate, but this isn't the main check
5454
validate_rai_insights(
5555
rai_2, X_train, X_test,
56-
LABELS, ModelTask.CLASSIFICATION, None)
56+
LABELS, ModelTask.CLASSIFICATION, None, None, None)
5757

5858
# Save again (this is where Issue #1046 manifested)
5959
rai_2.save(save_2)
@@ -68,7 +68,8 @@ def test_rai_insights_empty_save_load_save(self):
6868
ManagerNames.COUNTERFACTUAL])
6969
def test_rai_insights_save_load_add_save(self, manager_type):
7070
data_train, data_test, y_train, y_test, categorical_features, \
71-
continuous_features, target_name, classes = \
71+
continuous_features, target_name, classes, \
72+
feature_columns, feature_range_keys = \
7273
create_adult_income_dataset()
7374
X_train = data_train.drop([target_name], axis=1)
7475

@@ -120,7 +121,9 @@ def test_rai_insights_save_load_add_save(self, manager_type):
120121
validate_rai_insights(
121122
rai_2, data_train, data_test,
122123
target_name, ModelTask.CLASSIFICATION,
123-
categorical_features=categorical_features)
124+
categorical_features=categorical_features,
125+
feature_range_keys=feature_range_keys,
126+
feature_columns=feature_columns)
124127

125128
# Save again (this is where Issue #1046 manifested)
126129
rai_2.save(save_2)
@@ -135,7 +138,8 @@ def test_load_missing_dirs(self, target_dir):
135138
# The exception is the Explainer, which always creates a file
136139
# in its subdirectory
137140
data_train, data_test, y_train, y_test, categorical_features, \
138-
continuous_features, target_name, classes = \
141+
continuous_features, target_name, classes, \
142+
feature_columns, feature_range_keys = \
139143
create_adult_income_dataset()
140144
X_train = data_train.drop([target_name], axis=1)
141145

@@ -201,7 +205,8 @@ def test_loading_rai_insights_without_model_file(self):
201205
ManagerNames.COUNTERFACTUAL])
202206
def test_rai_insights_add_save_load_save(self, manager_type):
203207
data_train, data_test, y_train, y_test, categorical_features, \
204-
continuous_features, target_name, classes = \
208+
continuous_features, target_name, classes, \
209+
feature_columns, feature_range_keys = \
205210
create_adult_income_dataset()
206211
X_train = data_train.drop([target_name], axis=1)
207212

@@ -253,7 +258,9 @@ def test_rai_insights_add_save_load_save(self, manager_type):
253258
validate_rai_insights(
254259
rai_2, data_train, data_test,
255260
target_name, ModelTask.CLASSIFICATION,
256-
categorical_features=categorical_features)
261+
categorical_features=categorical_features,
262+
feature_range_keys=feature_range_keys,
263+
feature_columns=feature_columns)
257264

258265
# Save again (this is where Issue #1081 manifested)
259266
rai_2.save(save_2)
@@ -265,14 +272,21 @@ def validate_rai_insights(
265272
test_data,
266273
target_column,
267274
task_type,
268-
categorical_features
275+
categorical_features,
276+
feature_range_keys,
277+
feature_columns
269278
):
270-
271279
pd.testing.assert_frame_equal(rai_insights.train, train_data)
272280
pd.testing.assert_frame_equal(rai_insights.test, test_data)
273281
assert rai_insights.target_column == target_column
274282
assert rai_insights.task_type == task_type
275283
assert rai_insights.categorical_features == (categorical_features or [])
284+
if feature_range_keys is not None:
285+
assert feature_range_keys.sort() == \
286+
list(rai_insights._feature_ranges[0].keys()).sort()
287+
if feature_columns is not None:
288+
assert rai_insights._feature_columns == (feature_columns or [])
289+
assert target_column not in rai_insights._feature_columns
276290
if task_type == ModelTask.CLASSIFICATION:
277291
classes = train_data[target_column].unique()
278292
classes.sort()

0 commit comments

Comments
 (0)