From 6758bc2bd7c5149f915034e130e8a004b5bf5fd7 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 19 Sep 2023 15:59:34 -0700 Subject: [PATCH 1/2] Fixes #149 --- q2_feature_table/_filter.py | 5 +---- q2_feature_table/_subsample.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/q2_feature_table/_filter.py b/q2_feature_table/_filter.py index 8253587..b707902 100644 --- a/q2_feature_table/_filter.py +++ b/q2_feature_table/_filter.py @@ -57,10 +57,7 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero, # filter on the opposite axis to remove any entities that now have a # frequency of zero if filter_opposite_axis: - filter_fn2 = _get_biom_filter_function( - ids_to_keep=table.ids(axis=_other_axis_map[axis]), min_frequency=0, - max_frequency=None, min_nonzero=1, max_nonzero=None) - table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True) + table.remove_empty(axis=_other_axis_map[axis], inplace=True) def filter_samples(table: biom.Table, min_frequency: int = 0, diff --git a/q2_feature_table/_subsample.py b/q2_feature_table/_subsample.py index 7a29293..a2d353d 100644 --- a/q2_feature_table/_subsample.py +++ b/q2_feature_table/_subsample.py @@ -25,7 +25,7 @@ def subsample(table: biom.Table, subsampling_depth: int, # the inverted axis is always observation due to the above transpose invaxis = 'observation' - table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis) + table = table.remove_empty(axis=invaxis, inplace=False) if axis == 'feature': # reverse the transpose necessary due to biocore/biom-format#759 From 7a9a89f84225e8f44b1669d0015cb15e6aefded2 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 19 Sep 2023 16:15:58 -0700 Subject: [PATCH 2/2] MAINT: use Table.partition --- q2_feature_table/_split.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/q2_feature_table/_split.py b/q2_feature_table/_split.py index a7d846d..ebf0775 100644 --- a/q2_feature_table/_split.py +++ b/q2_feature_table/_split.py @@ -14,12 +14,14 @@ def split(table: biom.Table, metadata: qiime2.CategoricalMetadataColumn, filter_empty_features: bool = True) -> biom.Table: metadata_df = metadata.drop_missing_values().to_dataframe() + lookup = metadata_df[metadata.name].to_dict() - indices = metadata_df.reset_index( - ).groupby(metadata.name)[metadata_df.index.name].apply(list).to_dict() + def partition_f(i, m): + return lookup.get(i) + unique_grps = sorted(set(lookup.values())) try: - qiime2.sdk.util.validate_result_collection_keys(*indices.keys()) + qiime2.sdk.util.validate_result_collection_keys(*unique_grps) except KeyError as e: raise KeyError( "One or more invalid metadata column values identified during " @@ -28,9 +30,11 @@ def split(table: biom.Table, f"table. The original error message is as follows: {str(e)}") result = {} - for group, sample_ids in indices.items(): - t = table.filter(sample_ids, axis='sample', inplace=False) + for group, tab in table.partition(partition_f): + if group is None: + continue + if filter_empty_features: - t.remove_empty(axis='observation', inplace=True) - result[group] = t + tab.remove_empty(axis='observation', inplace=True) + result[group] = tab return result