Skip to content

Commit 31e09f6

Browse files
gregcaporasowasade
andcommitted
MAINT: various updates to support better usage of biom.Table API (qiime2#323)
Fixes qiime2#149 Also adds a subsample test that should have existed (prior to this PR). --------- Co-authored-by: Daniel McDonald <[email protected]>
1 parent c22729b commit 31e09f6

File tree

4 files changed

+39
-12
lines changed

4 files changed

+39
-12
lines changed

q2_feature_table/_filter.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,7 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero,
6666
# filter on the opposite axis to remove any entities that now have a
6767
# frequency of zero
6868
if filter_opposite_axis:
69-
filter_fn2 = _get_biom_filter_function(
70-
ids_to_keep=table.ids(axis=_other_axis_map[axis]), min_frequency=0,
71-
max_frequency=None, min_nonzero=1, max_nonzero=None)
72-
table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True)
69+
table.remove_empty(axis=_other_axis_map[axis], inplace=True)
7370

7471
if not allow_empty_table:
7572
_validate_nonempty_table(table)

q2_feature_table/_split.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ def split(table: biom.Table,
1515
filter_empty_features: bool = True) -> biom.Table:
1616
metadata = metadata.filter_ids(table.ids(axis='sample'))
1717
metadata_df = metadata.drop_missing_values().to_dataframe()
18+
lookup = metadata_df[metadata.name].to_dict()
1819

19-
indices = metadata_df.reset_index(
20-
).groupby(metadata.name)[metadata_df.index.name].apply(list).to_dict()
20+
def partition_f(i, m):
21+
return lookup.get(i)
2122

23+
unique_grps = sorted(set(lookup.values()))
2224
try:
23-
qiime2.sdk.util.validate_result_collection_keys(*indices.keys())
25+
qiime2.sdk.util.validate_result_collection_keys(*unique_grps)
2426
except KeyError as e:
2527
raise KeyError(
2628
"One or more invalid metadata column values identified during "
@@ -29,9 +31,11 @@ def split(table: biom.Table,
2931
f"table. The original error message is as follows: {str(e)}")
3032

3133
result = {}
32-
for group, sample_ids in indices.items():
33-
t = table.filter(sample_ids, axis='sample', inplace=False)
34+
for group, tab in table.partition(partition_f):
35+
if group is None:
36+
continue
37+
3438
if filter_empty_features:
35-
t.remove_empty(axis='observation', inplace=True)
36-
result[group] = t
39+
tab.remove_empty(axis='observation', inplace=True)
40+
result[group] = tab
3741
return result

q2_feature_table/_subsample_ids.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def subsample_ids(table: biom.Table, subsampling_depth: int,
2525

2626
# the inverted axis is always observation due to the above transpose
2727
invaxis = 'observation'
28-
table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis)
28+
table = table.remove_empty(axis=invaxis, inplace=False)
2929

3030
if axis == 'feature':
3131
# reverse the transpose necessary due to biocore/biom-format#759

q2_feature_table/tests/test_subsample.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,19 @@ def test_subsample_samples(self):
3434
npt.assert_equal(t.data(i, axis='sample'),
3535
a.data(i, axis='sample'))
3636

37+
def test_subsample_samples_drop_empty_feature(self):
38+
t = Table(np.array([[0, 0, 0], [1, 1, 2]]),
39+
['O1', 'O2'],
40+
['S1', 'S2', 'S3'])
41+
a = subsample_ids(t, 2, 'sample')
42+
self.assertEqual(a.shape, (1, 2))
43+
44+
sample_ids = frozenset(a.ids(axis='sample'))
45+
self.assertIn(sample_ids, set([frozenset(['S1', 'S2']),
46+
frozenset(['S1', 'S3']),
47+
frozenset(['S2', 'S3'])]))
48+
self.assertEqual(set(a.ids(axis='observation')), set(['O2']))
49+
3750
def test_subsample_features(self):
3851
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
3952
['O1', 'O2', 'O3'],
@@ -51,6 +64,19 @@ def test_subsample_features(self):
5164
npt.assert_equal(t.data(i, axis='observation'),
5265
a.data(i, axis='observation'))
5366

67+
def test_subsample_features_drop_empty_samples(self):
68+
t = Table(np.array([[0, 0, 0], [1, 1, 2]]).T,
69+
['O1', 'O2', 'O3'],
70+
['S1', 'S2'])
71+
a = subsample_ids(t, 2, 'feature')
72+
self.assertEqual(a.shape, (2, 1))
73+
74+
sample_ids = frozenset(a.ids(axis='observation'))
75+
self.assertIn(sample_ids, set([frozenset(['O1', 'O2']),
76+
frozenset(['O1', 'O3']),
77+
frozenset(['O2', 'O3'])]))
78+
self.assertEqual(set(a.ids(axis='sample')), set(['S2']))
79+
5480
def test_subsample_samples_oversample(self):
5581
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
5682
['O1', 'O2', 'O3'],

0 commit comments

Comments
 (0)