Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #149 #286

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions q2_feature_table/_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,7 @@ def _filter_table(table, min_frequency, max_frequency, min_nonzero,
# filter on the opposite axis to remove any entities that now have a
# frequency of zero
if filter_opposite_axis:
filter_fn2 = _get_biom_filter_function(
ids_to_keep=table.ids(axis=_other_axis_map[axis]), min_frequency=0,
max_frequency=None, min_nonzero=1, max_nonzero=None)
table.filter(filter_fn2, axis=_other_axis_map[axis], inplace=True)
table.remove_empty(axis=_other_axis_map[axis], inplace=True)


def filter_samples(table: biom.Table, min_frequency: int = 0,
Expand Down
18 changes: 11 additions & 7 deletions q2_feature_table/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ def split(table: biom.Table,
metadata: qiime2.CategoricalMetadataColumn,
filter_empty_features: bool = True) -> biom.Table:
metadata_df = metadata.drop_missing_values().to_dataframe()
lookup = metadata_df[metadata.name].to_dict()

indices = metadata_df.reset_index(
).groupby(metadata.name)[metadata_df.index.name].apply(list).to_dict()
def partition_f(i, m):
return lookup.get(i)

unique_grps = sorted(set(lookup.values()))
try:
qiime2.sdk.util.validate_result_collection_keys(*indices.keys())
qiime2.sdk.util.validate_result_collection_keys(*unique_grps)
except KeyError as e:
raise KeyError(
"One or more invalid metadata column values identified during "
Expand All @@ -28,9 +30,11 @@ def split(table: biom.Table,
f"table. The original error message is as follows: {str(e)}")

result = {}
for group, sample_ids in indices.items():
t = table.filter(sample_ids, axis='sample', inplace=False)
for group, tab in table.partition(partition_f):
if group is None:
continue
Comment on lines +34 to +35
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gregcaporaso, we simply ignore tables where a null group was observed. The null is exposed via the .get in the partition_f. Does that seem reasonable for the intended uses?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to make sure I understand: you generate the partitioned table containing all samples that are missing metadata (i.e., the value is null), but just don't do anything with it so it's not included in the output? If so, that seems fine to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. Specifically, if the sample ID is not in the metadata lookup, a None is returned. The None group is then ignored.


if filter_empty_features:
t.remove_empty(axis='observation', inplace=True)
result[group] = t
tab.remove_empty(axis='observation', inplace=True)
result[group] = tab
return result
2 changes: 1 addition & 1 deletion q2_feature_table/_subsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def subsample(table: biom.Table, subsampling_depth: int,

# the inverted axis is always observation due to the above transpose
invaxis = 'observation'
table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis)
table = table.remove_empty(axis=invaxis, inplace=False)

if axis == 'feature':
# reverse the transpose necessary due to biocore/biom-format#759
Expand Down