Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: updates for Python 3.9 #233

Merged
merged 8 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion q2_sample_classifier/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def metatable(ctx,
raise ValueError('Missing samples in metadata: %r' %
table_ids.difference(metadata_ids))
else:
metadata = metadata.loc[sample_ids]
metadata = metadata.loc[list(sample_ids)]
if len(sample_ids) < len(table_ids):
tab = tab.filter(
ids_to_keep=sample_ids, axis='sample', inplace=False)
Expand Down
2 changes: 1 addition & 1 deletion q2_sample_classifier/tests/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_action_split_table(self):
self.assertEqual(y_train.name, 'bugs')

# test if complete target column is covered
y_all = y_train.append(y_test).sort_index()
y_all = pd.concat([y_train, y_test]).sort_index()
y_all.index.name = 'SampleID'
pdt.assert_series_equal(y_all, self.md._series)

Expand Down
10 changes: 5 additions & 5 deletions q2_sample_classifier/tests/test_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _load_cmc(md_fp, column):
index_col=0, names=['feature', 'importance'])
self.exp_pred = pd.read_csv(
self.get_data_path('predictions.tsv'), sep='\t', header=0,
index_col=0, squeeze=True)
index_col=0).squeeze('columns')
index = pd.Index(['A', 'B', 'C', 'D'], name='id')
self.table_percnorm = qiime2.Artifact.import_data(
FeatureTable[PercentileNormalized], pd.DataFrame(
Expand All @@ -135,7 +135,7 @@ def test_extract_features(self):
dv = DictVectorizer()
dv.fit(dicts)
features = table.ids('observation')
self.assertEqual(set(dv.get_feature_names()), set(features))
self.assertEqual(set(dv.get_feature_names_out()), set(features))
self.assertEqual(len(dicts), len(table.ids()))
for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
for feature, count in zip(features, table_row):
Expand Down Expand Up @@ -398,7 +398,7 @@ def test_train_adaboost_decision_tree(self):
parameter_tuning=True, classification=True,
missing_samples='ignore', base_estimator="DecisionTree")
self.assertEqual(type(abe.named_steps.est), AdaBoostClassifier)
self.assertEqual(type(abe.named_steps.est.base_estimator),
self.assertEqual(type(abe.named_steps.est.estimator),
DecisionTreeClassifier)

def test_train_adaboost_extra_trees(self):
Expand All @@ -408,7 +408,7 @@ def test_train_adaboost_extra_trees(self):
parameter_tuning=True, classification=True,
missing_samples='ignore', base_estimator="ExtraTrees")
self.assertEqual(type(abe.named_steps.est), AdaBoostClassifier)
self.assertEqual(type(abe.named_steps.est.base_estimator),
self.assertEqual(type(abe.named_steps.est.estimator),
ExtraTreeClassifier)

# test some invalid inputs/edge cases
Expand Down Expand Up @@ -504,7 +504,7 @@ def test_predict_classifications(self):
ls_pred_classes = prob.columns.tolist()
ls_correct_range = [col for col in ls_pred_classes if
prob[col].between(
0, 1, inclusive=True).all()]
0, 1, inclusive="both").all()]
self.assertEqual(len(ls_correct_range), prob.shape[1],
msg='Predicted probabilities of class {}'
'are not in range [0,1]'.format(
Expand Down
6 changes: 3 additions & 3 deletions q2_sample_classifier/tests/test_types_formats_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_pd_series_to_boolean_format(self):
name='outlier', index=exp_index)
obs = transformer(exp)
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
squeeze=True)
).squeeze('columns')
self.assertEqual(sorted(exp), sorted(obs))

def test_boolean_format_to_pd_series(self):
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_pd_series_to_Predictions_format(self):
name='prediction', index=['a', 'b', 'c', 'd'])
obs = transformer(exp)
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
squeeze=True)
).squeeze('columns')
pdt.assert_series_equal(obs, exp)

def test_pd_series_to_Predictions_format_allow_nans(self):
Expand All @@ -161,7 +161,7 @@ def test_pd_series_to_Predictions_format_allow_nans(self):
name='prediction', index=['a', 'b', 'c', 'd'])
obs = transformer(exp)
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
squeeze=True)
).squeeze('columns')
pdt.assert_series_equal(obs, exp)

def test_Predictions_format_to_pd_series(self):
Expand Down
20 changes: 11 additions & 9 deletions q2_sample_classifier/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
# Describe top features
n_opt = rfecv.named_steps.est.n_features_
importance = _extract_important_features(
rfecv.named_steps.dv.get_feature_names(),
rfecv.named_steps.dv.get_feature_names_out(),
rfecv.named_steps.est.ranking_)
importance = sort_importances(importance, ascending=True)[:n_opt]

Expand All @@ -249,16 +249,17 @@ def _rfecv_feature_selection(feature_data, targets, estimator,


def _extract_rfe_scores(rfecv):
grid_scores_ = rfecv.cv_results_['mean_test_score']
n_features = len(rfecv.ranking_)
# If using fractional step, step = integer of fraction * n_features
if rfecv.step < 1:
rfecv.step = int(rfecv.step * n_features)
# Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
# Need to manually calculate x-axis, grid_scores_ is a 1-d array
x = [n_features - (n * rfecv.step)
for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
for n in range(len(grid_scores_)-1, -1, -1)]
if x[0] < 1:
x[0] = 1
return pd.Series(rfecv.grid_scores_, index=x, name='Accuracy')
return pd.Series(grid_scores_, index=x, name='Accuracy')


def nested_cross_validation(table, metadata, cv, random_state, n_jobs,
Expand Down Expand Up @@ -411,12 +412,12 @@ def _calculate_feature_importances(estimator):
# feature_importances_ or coef_ to report feature importance/weights
try:
importances = _extract_important_features(
estimator.named_steps.dv.get_feature_names(),
estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.feature_importances_)
# is there a better way to determine whether estimator has coef_ ?
except AttributeError:
importances = _extract_important_features(
estimator.named_steps.dv.get_feature_names(),
estimator.named_steps.dv.get_feature_names_out(),
estimator.named_steps.est.coef_)
return importances

Expand Down Expand Up @@ -718,7 +719,7 @@ def _mean_feature_importance(importances):
def _null_feature_importance(table):
feature_extractor = DictVectorizer()
feature_extractor.fit(table)
imp = pd.DataFrame(index=feature_extractor.get_feature_names())
imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
imp.index.name = "feature"
imp["importance"] = 1
return imp
Expand Down Expand Up @@ -827,8 +828,9 @@ def _train_adaboost_base_estimator(table, metadata, column, base_estimator,

return Pipeline(
[('dv', estimator.named_steps.dv),
('est', adaboost_estimator(estimator.named_steps.est,
n_estimators, random_state=random_state))])
('est', adaboost_estimator(estimator=estimator.named_steps.est,
n_estimators=n_estimators,
random_state=random_state))])


def _disable_feature_selection(estimator, optimize_feature_selection):
Expand Down
6 changes: 3 additions & 3 deletions q2_sample_classifier/visuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ def _plot_confusion_matrix(y_test, y_pred, classes, normalize, palette,
predictions.loc["Overall Accuracy"] = ""
predictions.loc["Baseline Accuracy"] = ""
predictions.loc["Accuracy Ratio"] = ""
predictions.loc["Overall Accuracy"]["Overall Accuracy"] = accuracy
predictions.loc["Baseline Accuracy"]["Overall Accuracy"] = basline_accuracy
predictions.loc["Accuracy Ratio"]["Overall Accuracy"] = accuracy_ratio
predictions.loc["Overall Accuracy", "Overall Accuracy"] = accuracy
predictions.loc["Baseline Accuracy", "Overall Accuracy"] = basline_accuracy
predictions.loc["Accuracy Ratio", "Overall Accuracy"] = accuracy_ratio

return predictions, confusion

Expand Down
Loading