Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rebased version of #286 #323

Merged
merged 3 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions q2_feature_table/_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,7 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero,
# filter on the opposite axis to remove any entities that now have a
# frequency of zero
if filter_opposite_axis:
filter_fn2 = _get_biom_filter_function(
ids_to_keep=table.ids(axis=_other_axis_map[axis]), min_frequency=0,
max_frequency=None, min_nonzero=1, max_nonzero=None)
table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True)
table.remove_empty(axis=_other_axis_map[axis], inplace=True)

if not allow_empty_table:
_validate_nonempty_table(table)
Expand Down
18 changes: 11 additions & 7 deletions q2_feature_table/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ def split(table: biom.Table,
filter_empty_features: bool = True) -> biom.Table:
metadata = metadata.filter_ids(table.ids(axis='sample'))
metadata_df = metadata.drop_missing_values().to_dataframe()
lookup = metadata_df[metadata.name].to_dict()

indices = metadata_df.reset_index(
).groupby(metadata.name)[metadata_df.index.name].apply(list).to_dict()
def partition_f(i, m):
return lookup.get(i)

unique_grps = sorted(set(lookup.values()))
try:
qiime2.sdk.util.validate_result_collection_keys(*indices.keys())
qiime2.sdk.util.validate_result_collection_keys(*unique_grps)
except KeyError as e:
raise KeyError(
"One or more invalid metadata column values identified during "
Expand All @@ -29,9 +31,11 @@ def split(table: biom.Table,
f"table. The original error message is as follows: {str(e)}")

result = {}
for group, sample_ids in indices.items():
t = table.filter(sample_ids, axis='sample', inplace=False)
for group, tab in table.partition(partition_f):
if group is None:
continue

if filter_empty_features:
t.remove_empty(axis='observation', inplace=True)
result[group] = t
tab.remove_empty(axis='observation', inplace=True)
result[group] = tab
return result
2 changes: 1 addition & 1 deletion q2_feature_table/_subsample_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def subsample_ids(table: biom.Table, subsampling_depth: int,

# the inverted axis is always observation due to the above transpose
invaxis = 'observation'
table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis)
table = table.remove_empty(axis=invaxis, inplace=False)

if axis == 'feature':
# reverse the transpose necessary due to biocore/biom-format#759
Expand Down
26 changes: 26 additions & 0 deletions q2_feature_table/tests/test_subsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ def test_subsample_samples(self):
npt.assert_equal(t.data(i, axis='sample'),
a.data(i, axis='sample'))

def test_subsample_samples_drop_empty_feature(self):
t = Table(np.array([[0, 0, 0], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
a = subsample_ids(t, 2, 'sample')
self.assertEqual(a.shape, (1, 2))

sample_ids = frozenset(a.ids(axis='sample'))
self.assertIn(sample_ids, set([frozenset(['S1', 'S2']),
frozenset(['S1', 'S3']),
frozenset(['S2', 'S3'])]))
self.assertEqual(set(a.ids(axis='observation')), set(['O2']))

def test_subsample_features(self):
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
Expand All @@ -51,6 +64,19 @@ def test_subsample_features(self):
npt.assert_equal(t.data(i, axis='observation'),
a.data(i, axis='observation'))

def test_subsample_features_drop_empty_samples(self):
t = Table(np.array([[0, 0, 0], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
['S1', 'S2'])
a = subsample_ids(t, 2, 'feature')
self.assertEqual(a.shape, (2, 1))

sample_ids = frozenset(a.ids(axis='observation'))
self.assertIn(sample_ids, set([frozenset(['O1', 'O2']),
frozenset(['O1', 'O3']),
frozenset(['O2', 'O3'])]))
self.assertEqual(set(a.ids(axis='sample')), set(['S2']))

def test_subsample_samples_oversample(self):
t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
['O1', 'O2', 'O3'],
Expand Down
Loading