From bb8fe1f472eb4ad0e139e34bb63a69397519bd3a Mon Sep 17 00:00:00 2001
From: Johannah Hagen <hagenjohannah@gmail.com>
Date: Tue, 12 Dec 2023 12:10:42 -0700
Subject: [PATCH 1/7] added minor changes to filter sequences tests

---
 .../tests/filter/test_filter_sequences.py     | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/q2_feature_table/tests/filter/test_filter_sequences.py b/q2_feature_table/tests/filter/test_filter_sequences.py
index b7ed927..1772db6 100644
--- a/q2_feature_table/tests/filter/test_filter_sequences.py
+++ b/q2_feature_table/tests/filter/test_filter_sequences.py
@@ -31,8 +31,8 @@ def setUp(self):
         md_full.index.name = 'FeatureID'
         self.md_full = qiime2.Metadata(md_full)
 
-    def filter_and_assertEqual(self, exp, md=None, exclude_ids=False,
-                               where=None):
+    def _filter_and_assertEqual(self, exp, md=None, exclude_ids=False,
+                                where=None):
         if md is None:
             md = self.md_full
         obs = filter_seqs(self.seqs, metadata=md,
@@ -41,13 +41,13 @@ def filter_and_assertEqual(self, exp, md=None, exclude_ids=False,
 
     def test_id_based_filtering(self):
         # filter none
-        self.filter_and_assertEqual(self.seqs,
-                                    md=qiime2.Metadata(self.df_lite))
+        self._filter_and_assertEqual(self.seqs,
+                                     md=qiime2.Metadata(self.df_lite))
 
         # filter one
         md = qiime2.Metadata(self.df_lite.drop(['O1']))
         exp = pd.Series(['GCTA', 'CCCC', 'TGTT'], index=['O2', 'O3', 'O4'])
-        self.filter_and_assertEqual(exp, md=md)
+        self._filter_and_assertEqual(exp, md=md)
 
         # filter all
         md = qiime2.Metadata(pd.DataFrame({},
@@ -58,12 +58,12 @@ def test_id_based_filtering(self):
         # exclude none
         md = qiime2.Metadata(pd.DataFrame({},
                                           index=pd.Index(['foo'], name='id')))
-        self.filter_and_assertEqual(self.seqs, md=md, exclude_ids=True)
+        self._filter_and_assertEqual(self.seqs, md=md, exclude_ids=True)
 
         # exclude one
         md = qiime2.Metadata(self.df_lite.drop(['O1', 'O2', 'O3']))
         exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3'])
-        self.filter_and_assertEqual(exp, md=md, exclude_ids=True)
+        self._filter_and_assertEqual(exp, md=md, exclude_ids=True)
 
         # exclude all
         md = qiime2.Metadata(self.df_lite)
@@ -74,17 +74,17 @@ def test_id_based_filtering_with_extra_ids(self):
         md = qiime2.Metadata(pd.DataFrame([],
                              index=pd.Index(['O1', 'O3', 'foo'], name='id')))
         exp = pd.Series(['ACGT', 'CCCC'], index=['O1', 'O3'])
-        self.filter_and_assertEqual(exp, md=md)
+        self._filter_and_assertEqual(exp, md=md)
 
     def test_where_param(self):
         # filter none
         where = "stuff='foo' OR stuff='bar' OR stuff='baz'"
-        self.filter_and_assertEqual(self.seqs, where=where)
+        self._filter_and_assertEqual(self.seqs, where=where)
 
         # filter one
         where = "stuff='foo' OR stuff='bar'"
         exp = pd.Series(['ACGT', 'GCTA', 'TGTT'], index=['O1', 'O2', 'O4'])
-        self.filter_and_assertEqual(exp, where=where)
+        self._filter_and_assertEqual(exp, where=where)
 
         # filter all
         where = "stuff='boo'"
@@ -93,12 +93,12 @@ def test_where_param(self):
 
         # exclude none
         where = 'CAST(some_numbers AS INTEGER) < 0'
-        self.filter_and_assertEqual(self.seqs, exclude_ids=True, where=where)
+        self._filter_and_assertEqual(self.seqs, exclude_ids=True, where=where)
 
         # exclude one
         where = 'CAST(some_numbers AS INTEGER) > 3'
         exp = pd.Series(['ACGT', 'GCTA', 'CCCC'], index=['O1', 'O2', 'O3'])
-        self.filter_and_assertEqual(exp, exclude_ids=True, where=where)
+        self._filter_and_assertEqual(exp, exclude_ids=True, where=where)
 
         # exclude all
         where = 'CAST(some_numbers AS INTEGER) BETWEEN 0 AND 5'

From 56424fcd271d49cb45981cb6733100e9232ae131 Mon Sep 17 00:00:00 2001
From: Johannah Hagen <hagenjohannah@gmail.com>
Date: Fri, 22 Dec 2023 15:10:05 -0700
Subject: [PATCH 2/7] IMP: Changed 'subsample' action to 'subsample_ids'

---
 q2_feature_table/__init__.py                     |  4 ++--
 q2_feature_table/_normalize.py                   |  4 ++--
 .../{_subsample.py => _subsample_ids.py}         |  6 +++---
 q2_feature_table/plugin_setup.py                 |  2 +-
 q2_feature_table/tests/test_subsample.py         | 16 ++++++++--------
 5 files changed, 16 insertions(+), 16 deletions(-)
 rename q2_feature_table/{_subsample.py => _subsample_ids.py} (88%)

diff --git a/q2_feature_table/__init__.py b/q2_feature_table/__init__.py
index 4df9c71..d038d73 100644
--- a/q2_feature_table/__init__.py
+++ b/q2_feature_table/__init__.py
@@ -7,7 +7,7 @@
 # ----------------------------------------------------------------------------
 
 from ._normalize import rarefy
-from ._subsample import subsample
+from ._subsample_ids import subsample_ids
 from ._transform import (presence_absence, relative_frequency, transpose)
 from ._summarize import (summarize, tabulate_seqs, tabulate_sample_frequencies,
                          tabulate_feature_frequencies, summarize_plus)
@@ -28,7 +28,7 @@
            'summarize', 'merge', 'merge_seqs', 'filter_samples',
            'filter_features', 'merge_taxa', 'tabulate_seqs', 'overlap_methods',
            'core_features', 'group', 'heatmap', 'heatmap_choices',
-           'filter_seqs', 'subsample', 'rename_ids',
+           'filter_seqs', 'subsample_ids', 'rename_ids',
            'filter_features_conditionally', 'split',
            'tabulate_feature_frequencies', 'tabulate_sample_frequencies',
            'summarize_plus']
diff --git a/q2_feature_table/_normalize.py b/q2_feature_table/_normalize.py
index d9c2935..80758c7 100644
--- a/q2_feature_table/_normalize.py
+++ b/q2_feature_table/_normalize.py
@@ -14,8 +14,8 @@ def rarefy(table: biom.Table, sampling_depth: int,
     if with_replacement:
         table = table.filter(lambda v, i, m: v.sum() >= sampling_depth,
                              inplace=False, axis='sample')
-    table = table.subsample(sampling_depth, axis='sample', by_id=False,
-                            with_replacement=with_replacement)
+    table = table.subsample_ids(sampling_depth, axis='sample', by_id=False,
+                                with_replacement=with_replacement)
 
     if table.is_empty():
         raise ValueError('The rarefied table contains no samples or features. '
diff --git a/q2_feature_table/_subsample.py b/q2_feature_table/_subsample_ids.py
similarity index 88%
rename from q2_feature_table/_subsample.py
rename to q2_feature_table/_subsample_ids.py
index 7a29293..b7e4837 100644
--- a/q2_feature_table/_subsample.py
+++ b/q2_feature_table/_subsample_ids.py
@@ -9,8 +9,8 @@
 import biom
 
 
-def subsample(table: biom.Table, subsampling_depth: int,
-              axis: str) -> biom.Table:
+def subsample_ids(table: biom.Table, subsampling_depth: int,
+                  axis: str) -> biom.Table:
     if axis == 'feature':
         # we are transposing the table due to biocore/biom-format#759
         table = table.transpose()
@@ -21,7 +21,7 @@ def subsample(table: biom.Table, subsampling_depth: int,
                          'is: %d.' % len(table.ids()))
 
     # the axis is always 'sample' due to the above transpose
-    table = table.subsample(subsampling_depth, axis='sample', by_id=True)
+    table = table.subsample_ids(subsampling_depth, axis='sample', by_id=True)
 
     # the inverted axis is always observation due to the above transpose
     invaxis = 'observation'
diff --git a/q2_feature_table/plugin_setup.py b/q2_feature_table/plugin_setup.py
index e7025c1..569b4d3 100644
--- a/q2_feature_table/plugin_setup.py
+++ b/q2_feature_table/plugin_setup.py
@@ -58,7 +58,7 @@
 )
 
 plugin.methods.register_function(
-    function=q2_feature_table.subsample,
+    function=q2_feature_table.subsample_ids,
     inputs={'table': FeatureTable[Frequency]},
     parameters={'subsampling_depth': Int % Range(1, None),
                 'axis': Str % Choices(['sample', 'feature'])},
diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py
index e8ce177..f0e0c98 100644
--- a/q2_feature_table/tests/test_subsample.py
+++ b/q2_feature_table/tests/test_subsample.py
@@ -12,16 +12,16 @@
 import numpy.testing as npt
 from biom.table import Table
 
-from q2_feature_table import subsample
+from q2_feature_table import subsample_ids
 
 
-class SubsampleTests(TestCase):
+class SubsampleIDsTests(TestCase):
 
     def test_subsample_samples(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
-        a = subsample(t, 2, 'sample')
+        a = subsample_ids(t, 2, 'sample')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='sample'))
@@ -38,7 +38,7 @@ def test_subsample_features(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
-        a = subsample(t, 2, 'feature')
+        a = subsample_ids(t, 2, 'feature')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='observation'))
@@ -56,28 +56,28 @@ def test_subsample_samples_oversample(self):
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample(t, 10, 'sample')
+            subsample_ids(t, 10, 'sample')
 
     def test_subsample_features_oversample(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample(t, 10, 'feature')
+            subsample_ids(t, 10, 'feature')
 
     def test_subsample_samples_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample(t, 2, 'sample')
+            subsample_ids(t, 2, 'sample')
 
     def test_subsample_features_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample(t, 2, 'feature')
+            subsample_ids(t, 2, 'feature')
 
 
 if __name__ == "__main__":

From 841df17490193191bb3e113ef23ce4cb6db526eb Mon Sep 17 00:00:00 2001
From: Johannah Hagen <hagenjohannah@gmail.com>
Date: Tue, 2 Jan 2024 11:45:07 -0700
Subject: [PATCH 3/7] removed name changes from biom table

---
 q2_feature_table/tests/test_subsample.py | 33 +++++++++++++++++++-----
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py
index f0e0c98..174a686 100644
--- a/q2_feature_table/tests/test_subsample.py
+++ b/q2_feature_table/tests/test_subsample.py
@@ -11,17 +11,18 @@
 import numpy as np
 import numpy.testing as npt
 from biom.table import Table
+from biom.table import subsample
 
 from q2_feature_table import subsample_ids
 
 
-class SubsampleIDsTests(TestCase):
+class SubsampleTests(TestCase):
 
     def test_subsample_samples(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
-        a = subsample_ids(t, 2, 'sample')
+        a = subsample(t, 2, 'sample')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='sample'))
@@ -38,7 +39,7 @@ def test_subsample_features(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
-        a = subsample_ids(t, 2, 'feature')
+        a = subsample(t, 2, 'feature')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='observation'))
@@ -56,28 +57,46 @@ def test_subsample_samples_oversample(self):
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample_ids(t, 10, 'sample')
+            subsample(t, 10, 'sample')
 
     def test_subsample_features_oversample(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample_ids(t, 10, 'feature')
+            subsample(t, 10, 'feature')
 
     def test_subsample_samples_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample_ids(t, 2, 'sample')
+            subsample(t, 2, 'sample')
 
     def test_subsample_features_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample_ids(t, 2, 'feature')
+            subsample(t, 2, 'feature')
+
+
+class SubsampleIdsTests(TestCase):
+
+    def test_subsample_ids(self):
+        t = np.array([[0, 1, 3], [1, 1, 2]]),['O1', 'O2'],['S1', 'S2', 'S3']
+        a = subsample_ids(t, 2, 'sample')
+        self.assertEqual(a.shape, (2, 2))
+
+        sample_ids = frozenset(a.ids(axis='sample'))
+        self.assertIn(sample_ids, set([frozenset(['S1', 'S2']),
+                                       frozenset(['S1', 'S3']),
+                                       frozenset(['S2', 'S3'])]))
+        self.assertEqual(set(a.ids(axis='observation')), set(['O1', 'O2']))
+
+        for i in a.ids(axis='sample'):
+            npt.assert_equal(t.data(i, axis='sample'),
+                             a.data(i, axis='sample'))
 
 
 if __name__ == "__main__":

From 11d2fd2f4ecf6cb089e41d31a2afb2536c541587 Mon Sep 17 00:00:00 2001
From: Johannah Hagen <hagenjohannah@gmail.com>
Date: Tue, 2 Jan 2024 11:48:47 -0700
Subject: [PATCH 4/7] flake removal

---
 q2_feature_table/tests/test_subsample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py
index 174a686..93aeed3 100644
--- a/q2_feature_table/tests/test_subsample.py
+++ b/q2_feature_table/tests/test_subsample.py
@@ -84,7 +84,7 @@ def test_subsample_features_empty(self):
 class SubsampleIdsTests(TestCase):
 
     def test_subsample_ids(self):
-        t = np.array([[0, 1, 3], [1, 1, 2]]),['O1', 'O2'],['S1', 'S2', 'S3']
+        t = np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']
         a = subsample_ids(t, 2, 'sample')
         self.assertEqual(a.shape, (2, 2))
 

From 7f006703d7eae52fad0dc243b81fec2d46c63501 Mon Sep 17 00:00:00 2001
From: Johannah Hagen <hagenjohannah@gmail.com>
Date: Tue, 2 Jan 2024 12:02:15 -0700
Subject: [PATCH 5/7] tweeks to subsample_id placements

---
 q2_feature_table/_normalize.py     | 4 ++--
 q2_feature_table/_subsample_ids.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/q2_feature_table/_normalize.py b/q2_feature_table/_normalize.py
index 80758c7..d9c2935 100644
--- a/q2_feature_table/_normalize.py
+++ b/q2_feature_table/_normalize.py
@@ -14,8 +14,8 @@ def rarefy(table: biom.Table, sampling_depth: int,
     if with_replacement:
         table = table.filter(lambda v, i, m: v.sum() >= sampling_depth,
                              inplace=False, axis='sample')
-    table = table.subsample_ids(sampling_depth, axis='sample', by_id=False,
-                                with_replacement=with_replacement)
+    table = table.subsample(sampling_depth, axis='sample', by_id=False,
+                            with_replacement=with_replacement)
 
     if table.is_empty():
         raise ValueError('The rarefied table contains no samples or features. '
diff --git a/q2_feature_table/_subsample_ids.py b/q2_feature_table/_subsample_ids.py
index b7e4837..6cb8ca5 100644
--- a/q2_feature_table/_subsample_ids.py
+++ b/q2_feature_table/_subsample_ids.py
@@ -21,7 +21,7 @@ def subsample_ids(table: biom.Table, subsampling_depth: int,
                          'is: %d.' % len(table.ids()))
 
     # the axis is always 'sample' due to the above transpose
-    table = table.subsample_ids(subsampling_depth, axis='sample', by_id=True)
+    table = table.subsample(subsampling_depth, axis='sample', by_id=True)
 
     # the inverted axis is always observation due to the above transpose
     invaxis = 'observation'

From ad91ea02f56c77b9b65229f3a3f46931667a6dbd Mon Sep 17 00:00:00 2001
From: Johannah Hagen <hagenjohannah@gmail.com>
Date: Wed, 3 Jan 2024 09:57:29 -0700
Subject: [PATCH 6/7] removed broken subsample import

---
 q2_feature_table/tests/test_subsample.py | 31 +++++-------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py
index 93aeed3..a5c6549 100644
--- a/q2_feature_table/tests/test_subsample.py
+++ b/q2_feature_table/tests/test_subsample.py
@@ -11,7 +11,6 @@
 import numpy as np
 import numpy.testing as npt
 from biom.table import Table
-from biom.table import subsample
 
 from q2_feature_table import subsample_ids
 
@@ -22,7 +21,7 @@ def test_subsample_samples(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
-        a = subsample(t, 2, 'sample')
+        a = subsample_ids(t, 2, 'sample')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='sample'))
@@ -39,7 +38,7 @@ def test_subsample_features(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
-        a = subsample(t, 2, 'feature')
+        a = subsample_ids(t, 2, 'feature')
         self.assertEqual(a.shape, (2, 2))
 
         sample_ids = frozenset(a.ids(axis='observation'))
@@ -57,46 +56,28 @@ def test_subsample_samples_oversample(self):
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample(t, 10, 'sample')
+            subsample_ids(t, 10, 'sample')
 
     def test_subsample_features_oversample(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "depth exceeds"):
-            subsample(t, 10, 'feature')
+            subsample_ids(t, 10, 'feature')
 
     def test_subsample_samples_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample(t, 2, 'sample')
+            subsample_ids(t, 2, 'sample')
 
     def test_subsample_features_empty(self):
         t = Table(np.array([[0, 0, 0], [0, 0, 0]]).T,
                   ['O1', 'O2', 'O3'],
                   ['S1', 'S2'])
         with self.assertRaisesRegex(ValueError, "contains no"):
-            subsample(t, 2, 'feature')
-
-
-class SubsampleIdsTests(TestCase):
-
-    def test_subsample_ids(self):
-        t = np.array([[0, 1, 3], [1, 1, 2]]), ['O1', 'O2'], ['S1', 'S2', 'S3']
-        a = subsample_ids(t, 2, 'sample')
-        self.assertEqual(a.shape, (2, 2))
-
-        sample_ids = frozenset(a.ids(axis='sample'))
-        self.assertIn(sample_ids, set([frozenset(['S1', 'S2']),
-                                       frozenset(['S1', 'S3']),
-                                       frozenset(['S2', 'S3'])]))
-        self.assertEqual(set(a.ids(axis='observation')), set(['O1', 'O2']))
-
-        for i in a.ids(axis='sample'):
-            npt.assert_equal(t.data(i, axis='sample'),
-                             a.data(i, axis='sample'))
+            subsample_ids(t, 2, 'feature')
 
 
 if __name__ == "__main__":

From 91702a16d57f678c2cf8bbf92756ca6047f09519 Mon Sep 17 00:00:00 2001
From: Hannah Hagen <125509369+hagenjp@users.noreply.github.com>
Date: Thu, 4 Jan 2024 15:19:15 -0700
Subject: [PATCH 7/7] Update q2_feature_table/tests/test_subsample.py

Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com>
---
 q2_feature_table/tests/test_subsample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/q2_feature_table/tests/test_subsample.py b/q2_feature_table/tests/test_subsample.py
index a5c6549..f0e0c98 100644
--- a/q2_feature_table/tests/test_subsample.py
+++ b/q2_feature_table/tests/test_subsample.py
@@ -15,7 +15,7 @@
 from q2_feature_table import subsample_ids
 
 
-class SubsampleTests(TestCase):
+class SubsampleIDsTests(TestCase):
 
     def test_subsample_samples(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]),