Skip to content

Commit

Permalink
MAINT: various updates to support better usage of biom.Table API (qii…
Browse files Browse the repository at this point in the history
…me2#323)

Fixes qiime2#149

Also adds a subsample test that should have existed (prior to this PR).

---------

Co-authored-by: Daniel McDonald <[email protected]>
  • Loading branch information
gregcaporaso and wasade committed Jan 23, 2025
1 parent c22729b commit 31e09f6
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 12 deletions.
5 changes: 1 addition & 4 deletions q2_feature_table/_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,7 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero,
# filter on the opposite axis to remove any entities that now have a
# frequency of zero
if filter_opposite_axis:
filter_fn2 = _get_biom_filter_function(
ids_to_keep=table.ids(axis=_other_axis_map[axis]), min_frequency=0,
max_frequency=None, min_nonzero=1, max_nonzero=None)
table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True)
table.remove_empty(axis=_other_axis_map[axis], inplace=True)

if not allow_empty_table:
_validate_nonempty_table(table)
Expand Down
18 changes: 11 additions & 7 deletions q2_feature_table/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ def split(table: biom.Table,
filter_empty_features: bool = True) -> biom.Table:
metadata = metadata.filter_ids(table.ids(axis='sample'))
metadata_df = metadata.drop_missing_values().to_dataframe()
lookup = metadata_df[metadata.name].to_dict()

indices = metadata_df.reset_index(
).groupby(metadata.name)[metadata_df.index.name].apply(list).to_dict()
def partition_f(i, m):
return lookup.get(i)

unique_grps = sorted(set(lookup.values()))
try:
qiime2.sdk.util.validate_result_collection_keys(*indices.keys())
qiime2.sdk.util.validate_result_collection_keys(*unique_grps)
except KeyError as e:
raise KeyError(
"One or more invalid metadata column values identified during "
Expand All @@ -29,9 +31,11 @@ def split(table: biom.Table,
f"table. The original error message is as follows: {str(e)}")

result = {}
for group, sample_ids in indices.items():
t = table.filter(sample_ids, axis='sample', inplace=False)
for group, tab in table.partition(partition_f):
if group is None:
continue

if filter_empty_features:
t.remove_empty(axis='observation', inplace=True)
result[group] = t
tab.remove_empty(axis='observation', inplace=True)
result[group] = tab
return result
2 changes: 1 addition & 1 deletion q2_feature_table/_subsample_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def subsample_ids(table: biom.Table, subsampling_depth: int,

# the inverted axis is always observation due to the above transpose
invaxis = 'observation'
table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis)
table = table.remove_empty(axis=invaxis, inplace=False)

if axis == 'feature':
# reverse the transpose necessary due to biocore/biom-format#759
Expand Down
26 changes: 26 additions & 0 deletions q2_feature_table/tests/test_subsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ def test_subsample_samples(self):
npt.assert_equal(t.data(i, axis='sample'),
a.data(i, axis='sample'))

def test_subsample_samples_drop_empty_feature(self):
t = Table(np.array([[0, 0, 0], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
a = subsample_ids(t, 2, 'sample')
self.assertEqual(a.shape, (1, 2))

sample_ids = frozenset(a.ids(axis='sample'))
self.assertIn(sample_ids, set([frozenset(['S1', 'S2']),
frozenset(['S1', 'S3']),
frozenset(['S2', 'S3'])]))
self.assertEqual(set(a.ids(axis='observation')), set(['O2']))

def test_subsample_features(self):
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
Expand All @@ -51,6 +64,19 @@ def test_subsample_features(self):
npt.assert_equal(t.data(i, axis='observation'),
a.data(i, axis='observation'))

def test_subsample_features_drop_empty_samples(self):
t = Table(np.array([[0, 0, 0], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
['S1', 'S2'])
a = subsample_ids(t, 2, 'feature')
self.assertEqual(a.shape, (2, 1))

sample_ids = frozenset(a.ids(axis='observation'))
self.assertIn(sample_ids, set([frozenset(['O1', 'O2']),
frozenset(['O1', 'O3']),
frozenset(['O2', 'O3'])]))
self.assertEqual(set(a.ids(axis='sample')), set(['S2']))

def test_subsample_samples_oversample(self):
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
Expand Down

0 comments on commit 31e09f6

Please sign in to comment.