From bb8fe1f472eb4ad0e139e34bb63a69397519bd3a Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Tue, 12 Dec 2023 12:10:42 -0700 Subject: [PATCH 1/7] added minor changes to filter sequences tests --- .../tests/filter/test_filter_sequences.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/q2_feature_table/tests/filter/test_filter_sequences.py b/q2_feature_table/tests/filter/test_filter_sequences.py index b7ed927..1772db6 100644 --- a/q2_feature_table/tests/filter/test_filter_sequences.py +++ b/q2_feature_table/tests/filter/test_filter_sequences.py @@ -31,8 +31,8 @@ def setUp(self): md_full.index.name = 'FeatureID' self.md_full = qiime2.Metadata(md_full) - def filter_and_assertEqual(self, exp, md=None, exclude_ids=False, - where=None): + def _filter_and_assertEqual(self, exp, md=None, exclude_ids=False, + where=None): if md is None: md = self.md_full obs = filter_seqs(self.seqs, metadata=md, @@ -41,13 +41,13 @@ def filter_and_assertEqual(self, exp, md=None, exclude_ids=False, def test_id_based_filtering(self): # filter none - self.filter_and_assertEqual(self.seqs, - md=qiime2.Metadata(self.df_lite)) + self._filter_and_assertEqual(self.seqs, + md=qiime2.Metadata(self.df_lite)) # filter one md = qiime2.Metadata(self.df_lite.drop(['O1'])) exp = pd.Series(['GCTA', 'CCCC', 'TGTT'], index=['O2', 'O3', 'O4']) - self.filter_and_assertEqual(exp, md=md) + self._filter_and_assertEqual(exp, md=md) # filter all md = qiime2.Metadata(pd.DataFrame({}, @@ -58,12 +58,12 @@ def test_id_based_filtering(self): # exclude none md = qiime2.Metadata(pd.DataFrame({}, index=pd.Index(['foo'], name='id'))) - self.filter_and_assertEqual(self.seqs, md=md, exclude_ids=True) + self._filter_and_assertEqual(self.seqs, md=md, exclude_ids=True) # exclude one md = qiime2.Metadata(self.df_lite.drop(['O1', 'O2', 'O3'])) exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3']) - self.filter_and_assertEqual(exp, md=md, exclude_ids=True) + self._filter_and_assertEqual(exp, md=md, exclude_ids=True) # exclude all md = qiime2.Metadata(self.df_lite) @@ -74,17 +74,17 @@ def test_id_based_filtering_with_extra_ids(self): md = qiime2.Metadata(pd.DataFrame([], index=pd.Index(['O1', 'O3', 'foo'], name='id'))) exp = pd.Series(['ACGT', 'CCCC'], index=['O1', 'O3']) - self.filter_and_assertEqual(exp, md=md) + self._filter_and_assertEqual(exp, md=md) def test_where_param(self): # filter none where = "stuff='foo' OR stuff='bar' OR stuff='baz'" - self.filter_and_assertEqual(self.seqs, where=where) + self._filter_and_assertEqual(self.seqs, where=where) # filter one where = "stuff='foo' OR stuff='bar'" exp = pd.Series(['ACGT', 'GCTA', 'TGTT'], index=['O1', 'O2', 'O4']) - self.filter_and_assertEqual(exp, where=where) + self._filter_and_assertEqual(exp, where=where) # filter all where = "stuff='boo'" @@ -93,12 +93,12 @@ def test_where_param(self): # exclude none where = 'CAST(some_numbers AS INTEGER) < 0' - self.filter_and_assertEqual(self.seqs, exclude_ids=True, where=where) + self._filter_and_assertEqual(self.seqs, exclude_ids=True, where=where) # exclude one where = 'CAST(some_numbers AS INTEGER) > 3' exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3']) - self.filter_and_assertEqual(exp, exclude_ids=True, where=where) + self._filter_and_assertEqual(exp, exclude_ids=True, where=where) # exclude all where = 'CAST(some_numbers AS INTEGER) BETWEEN 0 AND 5' From 56424fcd271d49cb45981cb6733100e9232ae131 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Fri, 22 Dec 2023 15:10:05 -0700 Subject: [PATCH 2/7] IMP: Changed 'subsample' action to 'subsample_ids' --- q2_feature_table/__init__.py | 4 ++-- q2_feature_table/_normalize.py | 4 ++-- .../{_subsample.py => _subsample_ids.py} | 6 +++--- q2_feature_table/plugin_setup.py | 2 +- q2_feature_table/tests/test_subsample.py | 16 ++++++++-------- 5 files changed, 16 insertions(+), 16 deletions(-) rename q2_feature_table/{_subsample.py => _subsample_ids.py} (88%) diff --git a/q2_feature_table/__init__.py b/q2_feature_table/__init__.py index 4df9c71..d038d73 100644 --- a/q2_feature_table/__init__.py +++ b/q2_feature_table/__init__.py @@ -7,7 +7,7 @@ # ---------------------------------------------------------------------------- from ._normalize import rarefy -from ._subsample import subsample +from ._subsample_ids import subsample_ids from ._transform import (presence_absence, relative_frequency, transpose) from ._summarize import (summarize, tabulate_seqs, tabulate_sample_frequencies, tabulate_feature_frequencies, summarize_plus) @@ -28,7 +28,7 @@ 'summarize', 'merge', 'merge_seqs', 'filter_samples', 'filter_features', 'merge_taxa', 'tabulate_seqs', 'overlap_methods', 'core_features', 'group', 'heatmap', 'heatmap_choices', - 'filter_seqs', 'subsample', 'rename_ids', + 'filter_seqs', 'subsample_ids', 'rename_ids', 'filter_features_conditionally', 'split', 'tabulate_feature_frequencies', 'tabulate_sample_frequencies', 'summarize_plus'] diff --git a/q2_feature_table/_normalize.py b/q2_feature_table/_normalize.py index d9c2935..80758c7 100644 --- a/q2_feature_table/_normalize.py +++ b/q2_feature_table/_normalize.py @@ -14,8 +14,8 @@ def rarefy(table: biom.Table, sampling_depth: int, if with_replacement: table = table.filter(lambda v, i, m: v.sum() >= sampling_depth, inplace=False, axis='sample') - table = table.subsample(sampling_depth, axis='sample', by_id=False, - with_replacement=with_replacement) + table = table.subsample_ids(sampling_depth, axis='sample', by_id=False, + with_replacement=with_replacement) if table.is_empty(): raise ValueError('The rarefied table contains no samples or features. ' diff --git a/q2_feature_table/_subsample.py b/q2_feature_table/_subsample_ids.py similarity index 88% rename from q2_feature_table/_subsample.py rename to q2_feature_table/_subsample_ids.py index 7a29293..b7e4837 100644 --- a/q2_feature_table/_subsample.py +++ b/q2_feature_table/_subsample_ids.py @@ -9,8 +9,8 @@ import biom -def subsample(table: biom.Table, subsampling_depth: int, - axis: str) -> biom.Table: +def subsample_ids(table: biom.Table, subsampling_depth: int, + axis: str) -> biom.Table: if axis == 'feature': # we are transposing the table due to biocore/biom-format#759 table = table.transpose() @@ -21,7 +21,7 @@ def subsample(table: biom.Table, subsampling_depth: int, 'is: %d.' % len(table.ids())) # the axis is always 'sample' due to the above transpose - table = table.subsample(subsampling_depth, axis='sample', by_id=True) + table = table.subsample_ids(subsampling_depth, axis='sample', by_id=True) # the inverted axis is always observation due to the above transpose invaxis = 'observation' diff --git a/q2_feature_table/plugin_setup.py b/q2_feature_table/plugin_setup.py index e7025c1..569b4d3 100644 --- a/q2_feature_table/plugin_setup.py +++ b/q2_feature_table/plugin_setup.py @@ -58,7 +58,7 @@ ) plugin.methods.register_function( - function=q2_feature_table.subsample, + function=q2_feature_table.subsample_ids, inputs={'table': FeatureTable[Frequency]}, parameters={'subsampling_depth': Int % Range(1, None), 'axis': Str % Choices(['sample', 'feature'])}, diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py index e8ce177..f0e0c98 100644 --- a/q2_feature_table/tests/test_subsample.py +++ b/q2_feature_table/tests/test_subsample.py @@ -12,16 +12,16 @@ import numpy.testing as npt from biom.table import Table -from q2_feature_table import subsample +from q2_feature_table import subsample_ids -class SubsampleTests(TestCase): +class SubsampleIDsTests(TestCase): def test_subsample_samples(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) - a = subsample(t, 2, 'sample') + a = subsample_ids(t, 2, 'sample') self.assertEqual(a.shape, (2, 2)) sample_ids = frozenset(a.ids(axis='sample')) @@ -38,7 +38,7 @@ def test_subsample_features(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) - a = subsample(t, 2, 'feature') + a = subsample_ids(t, 2, 'feature') self.assertEqual(a.shape, (2, 2)) sample_ids = frozenset(a.ids(axis='observation')) @@ -56,28 +56,28 @@ def test_subsample_samples_oversample(self): ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "depth exceeds"): - subsample(t, 10, 'sample') + subsample_ids(t, 10, 'sample') def test_subsample_features_oversample(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "depth exceeds"): - subsample(t, 10, 'feature') + subsample_ids(t, 10, 'feature') def test_subsample_samples_empty(self): t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "contains no"): - subsample(t, 2, 'sample') + subsample_ids(t, 2, 'sample') def test_subsample_features_empty(self): t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "contains no"): - subsample(t, 2, 'feature') + subsample_ids(t, 2, 'feature') if __name__ == "__main__": From 841df17490193191bb3e113ef23ce4cb6db526eb Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Tue, 2 Jan 2024 11:45:07 -0700 Subject: [PATCH 3/7] removed name changes from biom table --- q2_feature_table/tests/test_subsample.py | 33 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py index f0e0c98..174a686 100644 --- a/q2_feature_table/tests/test_subsample.py +++ b/q2_feature_table/tests/test_subsample.py @@ -11,17 +11,18 @@ import numpy as np import numpy.testing as npt from biom.table import Table +from biom.table import subsample from q2_feature_table import subsample_ids -class SubsampleIDsTests(TestCase): +class SubsampleTests(TestCase): def test_subsample_samples(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) - a = subsample_ids(t, 2, 'sample') + a = subsample(t, 2, 'sample') self.assertEqual(a.shape, (2, 2)) sample_ids = frozenset(a.ids(axis='sample')) @@ -38,7 +39,7 @@ def test_subsample_features(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) - a = subsample_ids(t, 2, 'feature') + a = subsample(t, 2, 'feature') self.assertEqual(a.shape, (2, 2)) sample_ids = frozenset(a.ids(axis='observation')) @@ -56,28 +57,46 @@ def test_subsample_samples_oversample(self): ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "depth exceeds"): - subsample_ids(t, 10, 'sample') + subsample(t, 10, 'sample') def test_subsample_features_oversample(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "depth exceeds"): - subsample_ids(t, 10, 'feature') + subsample(t, 10, 'feature') def test_subsample_samples_empty(self): t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "contains no"): - subsample_ids(t, 2, 'sample') + subsample(t, 2, 'sample') def test_subsample_features_empty(self): t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "contains no"): - subsample_ids(t, 2, 'feature') + subsample(t, 2, 'feature') + + +class SubsampleIdsTests(TestCase): + + def test_subsample_ids(self): + t = np.array([[0, 1, 3], [1, 1, 2]]),['O1', 'O2'],['S1', 'S2', 'S3'] + a = subsample_ids(t, 2, 'sample') + self.assertEqual(a.shape, (2, 2)) + + sample_ids = frozenset(a.ids(axis='sample')) + self.assertIn(sample_ids, set([frozenset(['S1', 'S2']), + frozenset(['S1', 'S3']), + frozenset(['S2', 'S3'])])) + self.assertEqual(set(a.ids(axis='observation')), set(['O1', 'O2'])) + + for i in a.ids(axis='sample'): + npt.assert_equal(t.data(i, axis='sample'), + a.data(i, axis='sample')) if __name__ == "__main__": From 11d2fd2f4ecf6cb089e41d31a2afb2536c541587 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Tue, 2 Jan 2024 11:48:47 -0700 Subject: [PATCH 4/7] flake removal --- q2_feature_table/tests/test_subsample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py index 174a686..93aeed3 100644 --- a/q2_feature_table/tests/test_subsample.py +++ b/q2_feature_table/tests/test_subsample.py @@ -84,7 +84,7 @@ def test_subsample_features_empty(self): class SubsampleIdsTests(TestCase): def test_subsample_ids(self): - t = np.array([[0, 1, 3], [1, 1, 2]]),['O1', 'O2'],['S1', 'S2', 'S3'] + t = np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3'] a = subsample_ids(t, 2, 'sample') self.assertEqual(a.shape, (2, 2)) From 7f006703d7eae52fad0dc243b81fec2d46c63501 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Tue, 2 Jan 2024 12:02:15 -0700 Subject: [PATCH 5/7] tweeks to subsample_id placements --- q2_feature_table/_normalize.py | 4 ++-- q2_feature_table/_subsample_ids.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/q2_feature_table/_normalize.py b/q2_feature_table/_normalize.py index 80758c7..d9c2935 100644 --- a/q2_feature_table/_normalize.py +++ b/q2_feature_table/_normalize.py @@ -14,8 +14,8 @@ def rarefy(table: biom.Table, sampling_depth: int, if with_replacement: table = table.filter(lambda v, i, m: v.sum() >= sampling_depth, inplace=False, axis='sample') - table = table.subsample_ids(sampling_depth, axis='sample', by_id=False, - with_replacement=with_replacement) + table = table.subsample(sampling_depth, axis='sample', by_id=False, + with_replacement=with_replacement) if table.is_empty(): raise ValueError('The rarefied table contains no samples or features. ' diff --git a/q2_feature_table/_subsample_ids.py b/q2_feature_table/_subsample_ids.py index b7e4837..6cb8ca5 100644 --- a/q2_feature_table/_subsample_ids.py +++ b/q2_feature_table/_subsample_ids.py @@ -21,7 +21,7 @@ def subsample_ids(table: biom.Table, subsampling_depth: int, 'is: %d.' % len(table.ids())) # the axis is always 'sample' due to the above transpose - table = table.subsample_ids(subsampling_depth, axis='sample', by_id=True) + table = table.subsample(subsampling_depth, axis='sample', by_id=True) # the inverted axis is always observation due to the above transpose invaxis = 'observation' From ad91ea02f56c77b9b65229f3a3f46931667a6dbd Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Wed, 3 Jan 2024 09:57:29 -0700 Subject: [PATCH 6/7] removed broken subsample import --- q2_feature_table/tests/test_subsample.py | 31 +++++------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py index 93aeed3..a5c6549 100644 --- a/q2_feature_table/tests/test_subsample.py +++ b/q2_feature_table/tests/test_subsample.py @@ -11,7 +11,6 @@ import numpy as np import numpy.testing as npt from biom.table import Table -from biom.table import subsample from q2_feature_table import subsample_ids @@ -22,7 +21,7 @@ def test_subsample_samples(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) - a = subsample(t, 2, 'sample') + a = subsample_ids(t, 2, 'sample') self.assertEqual(a.shape, (2, 2)) sample_ids = frozenset(a.ids(axis='sample')) @@ -39,7 +38,7 @@ def test_subsample_features(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) - a = subsample(t, 2, 'feature') + a = subsample_ids(t, 2, 'feature') self.assertEqual(a.shape, (2, 2)) sample_ids = frozenset(a.ids(axis='observation')) @@ -57,46 +56,28 @@ def test_subsample_samples_oversample(self): ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "depth exceeds"): - subsample(t, 10, 'sample') + subsample_ids(t, 10, 'sample') def test_subsample_features_oversample(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "depth exceeds"): - subsample(t, 10, 'feature') + subsample_ids(t, 10, 'feature') def test_subsample_samples_empty(self): t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "contains no"): - subsample(t, 2, 'sample') + subsample_ids(t, 2, 'sample') def test_subsample_features_empty(self): t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T, ['O1', 'O2', 'O3'], ['S1', 'S2']) with self.assertRaisesRegex(ValueError, "contains no"): - subsample(t, 2, 'feature') - - -class SubsampleIdsTests(TestCase): - - def test_subsample_ids(self): - t = np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3'] - a = subsample_ids(t, 2, 'sample') - self.assertEqual(a.shape, (2, 2)) - - sample_ids = frozenset(a.ids(axis='sample')) - self.assertIn(sample_ids, set([frozenset(['S1', 'S2']), - frozenset(['S1', 'S3']), - frozenset(['S2', 'S3'])])) - self.assertEqual(set(a.ids(axis='observation')), set(['O1', 'O2'])) - - for i in a.ids(axis='sample'): - npt.assert_equal(t.data(i, axis='sample'), - a.data(i, axis='sample')) + subsample_ids(t, 2, 'feature') if __name__ == "__main__": From 91702a16d57f678c2cf8bbf92756ca6047f09519 Mon Sep 17 00:00:00 2001 From: Hannah Hagen <125509369+hagenjp@users.noreply.github.com> Date: Thu, 4 Jan 2024 15:19:15 -0700 Subject: [PATCH 7/7] Update q2_feature_table/tests/test_subsample.py Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com> --- q2_feature_table/tests/test_subsample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py index a5c6549..f0e0c98 100644 --- a/q2_feature_table/tests/test_subsample.py +++ b/q2_feature_table/tests/test_subsample.py @@ -15,7 +15,7 @@ from q2_feature_table import subsample_ids -class SubsampleTests(TestCase): +class SubsampleIDsTests(TestCase): def test_subsample_samples(self): t = Table(np.array([[0, 1, 3], [1, 1, 2]]),