From 46a6f1c7715ff93755b9cc021e91aa51ab8ab756 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 19 Sep 2023 15:59:34 -0700 Subject: [PATCH 1/3] Fixes #149 --- q2_feature_table/_filter.py | 5 +---- q2_feature_table/_subsample_ids.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/q2_feature_table/_filter.py b/q2_feature_table/_filter.py index e286e01..0b782e2 100644 --- a/q2_feature_table/_filter.py +++ b/q2_feature_table/_filter.py @@ -66,10 +66,7 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero, # filter on the opposite axis to remove any entities that now have a # frequency of zero if filter_opposite_axis: - filter_fn2 = _get_biom_filter_function( - ids_to_keep=table.ids(axis=_other_axis_map[axis]), min_frequency=0, - max_frequency=None, min_nonzero=1, max_nonzero=None) - table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True) + table.remove_empty(axis=_other_axis_map[axis], inplace=True) if not allow_empty_table: _validate_nonempty_table(table) diff --git a/q2_feature_table/_subsample_ids.py b/q2_feature_table/_subsample_ids.py index 6cb8ca5..e241423 100644 --- a/q2_feature_table/_subsample_ids.py +++ b/q2_feature_table/_subsample_ids.py @@ -25,7 +25,7 @@ def subsample_ids(table: biom.Table, subsampling_depth: int, # the inverted axis is always observation due to the above transpose invaxis = 'observation' - table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis) + table = table.remove_empty(axis=invaxis, inplace=False) if axis == 'feature': # reverse the transpose necessary due to biocore/biom-format#759 From b3e8028da4a9c291022798f5a6aa0e19b6d8ec8e Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 19 Sep 2023 16:15:58 -0700 Subject: [PATCH 2/3] MAINT: use Table.partition --- q2_feature_table/_split.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/q2_feature_table/_split.py b/q2_feature_table/_split.py index ab795d5..55f61ff 100644 --- a/q2_feature_table/_split.py +++ b/q2_feature_table/_split.py @@ -15,12 +15,14 @@ def split(table: biom.Table, filter_empty_features: bool = True) -> biom.Table: metadata = metadata.filter_ids(table.ids(axis='sample')) metadata_df = metadata.drop_missing_values().to_dataframe() + lookup = metadata_df[metadata.name].to_dict() - indices = metadata_df.reset_index( - ).groupby(metadata.name)[metadata_df.index.name].apply(list).to_dict() + def partition_f(i, m): + return lookup.get(i) + unique_grps = sorted(set(lookup.values())) try: - qiime2.sdk.util.validate_result_collection_keys(*indices.keys()) + qiime2.sdk.util.validate_result_collection_keys(*unique_grps) except KeyError as e: raise KeyError( "One or more invalid metadata column values identified during " @@ -29,9 +31,11 @@ def split(table: biom.Table, f"table. The original error message is as follows: {str(e)}") result = {} - for group, sample_ids in indices.items(): - t = table.filter(sample_ids, axis='sample', inplace=False) + for group, tab in table.partition(partition_f): + if group is None: + continue + if filter_empty_features: - t.remove_empty(axis='observation', inplace=True) - result[group] = t + tab.remove_empty(axis='observation', inplace=True) + result[group] = tab return result From a8f0a54685f8f0a8d9d1cbdfba8dcd1d09495161 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Tue, 17 Dec 2024 12:39:42 -0700 Subject: [PATCH 3/3] TST: adds subsample test that should have existed (prior to this PR) --- q2_feature_table/tests/test_subsample.py | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py index f0e0c98..8f9b739 100644 --- a/q2_feature_table/tests/test_subsample.py +++ b/q2_feature_table/tests/test_subsample.py @@ -34,6 +34,19 @@ def test_subsample_samples(self): npt.assert_equal(t.data(i, axis='sample'), a.data(i, axis='sample')) + def test_subsample_samples_drop_empty_feature(self): + t = Table(np.array([[0, 0, 0], [1, 1, 2]]), + ['O1', 'O2'], + ['S1', 'S2', 'S3']) + a = subsample_ids(t, 2, 'sample') + self.assertEqual(a.shape, (1, 2)) + + sample_ids = frozenset(a.ids(axis='sample')) + self.assertIn(sample_ids, set([frozenset(['S1', 'S2']), + frozenset(['S1', 'S3']), + frozenset(['S2', 'S3'])])) + self.assertEqual(set(a.ids(axis='observation')), set(['O2'])) + def test_subsample_features(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'], @@ -51,6 +64,19 @@ def test_subsample_features(self): npt.assert_equal(t.data(i, axis='observation'), a.data(i, axis='observation')) + def test_subsample_features_drop_empty_samples(self): + t = Table(np.array([[0, 0, 0], [1, 1, 2]]).T, + ['O1', 'O2', 'O3'], + ['S1', 'S2']) + a = subsample_ids(t, 2, 'feature') + self.assertEqual(a.shape, (2, 1)) + + sample_ids = frozenset(a.ids(axis='observation')) + self.assertIn(sample_ids, set([frozenset(['O1', 'O2']), + frozenset(['O1', 'O3']), + frozenset(['O2', 'O3'])])) + self.assertEqual(set(a.ids(axis='sample')), set(['S2'])) + def test_subsample_samples_oversample(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'],