Skip to content

Commit de36ea3

Browse files
ENH: Adds seed parameter to rarefy (qiime2#321)
Co-authored-by: Greg Caporaso <[email protected]>
1 parent c038913 commit de36ea3

File tree

3 files changed

+45
-11
lines changed

3 files changed

+45
-11
lines changed

q2_feature_table/_normalize.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#
66
# The full license is in the file LICENSE, distributed with this software.
77
# ----------------------------------------------------------------------------
8-
98
import biom
109

1110
import os
@@ -15,13 +14,19 @@
1514
from rnanorm import CPM, CTF, CUF, FPKM, TMM, TPM, UQ
1615

1716

18-
def rarefy(table: biom.Table, sampling_depth: int,
19-
with_replacement: bool = False) -> biom.Table:
17+
def rarefy(table: biom.Table,
18+
sampling_depth: int,
19+
with_replacement: bool = False,
20+
random_seed: int = None
21+
) -> biom.Table:
22+
2023
if with_replacement:
2124
table = table.filter(lambda v, i, m: v.sum() >= sampling_depth,
2225
inplace=False, axis='sample')
26+
2327
table = table.subsample(sampling_depth, axis='sample', by_id=False,
24-
with_replacement=with_replacement)
28+
with_replacement=with_replacement,
29+
seed=random_seed)
2530

2631
if table.is_empty():
2732
raise ValueError('The rarefied table contains no samples or features. '

q2_feature_table/plugin_setup.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
function=q2_feature_table.rarefy,
3838
inputs={'table': FeatureTable[Frequency]},
3939
parameters={'sampling_depth': Int % Range(1, None),
40-
'with_replacement': Bool},
40+
'with_replacement': Bool,
41+
'random_seed': Int % Range(0, None)},
4142
outputs=[('rarefied_table', FeatureTable[Frequency])],
4243
input_descriptions={'table': 'The feature table to be rarefied.'},
4344
parameter_descriptions={
@@ -47,7 +48,10 @@
4748
'included in the resulting table.'),
4849
'with_replacement': ('Rarefy with replacement by sampling from the '
4950
'multinomial distribution instead of rarefying '
50-
'without replacement.')
51+
'without replacement.'),
52+
'random_seed': ('Set the seed for the subsampling. Using the same '
53+
'seed with the same table will always lead to the '
54+
'same result. Defaults to a random seed.')
5155
},
5256
output_descriptions={
5357
'rarefied_table': 'The resulting rarefied feature table.'

q2_feature_table/tests/test_normalize.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,40 @@
2323

2424
class RarefyTests(TestCase):
2525

26-
def test_rarefy(self):
26+
def test_rarefy_random_seed_is_randomized(self):
2727
t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
2828
['O1', 'O2'],
2929
['S1', 'S2', 'S3'])
3030
a = rarefy(t, 2)
31-
self.assertEqual(a.shape, (2, 2))
32-
self.assertEqual(set(a.ids(axis='sample')), set(['S2', 'S3']))
33-
self.assertEqual(set(a.ids(axis='observation')), set(['O1', 'O2']))
34-
npt.assert_array_equal(a.sum(axis='sample'), np.array([2., 2.]))
31+
a_eq_b = []
32+
n_iterations = 100
33+
for i in range(n_iterations):
34+
b = rarefy(t, 2)
35+
self.assertEqual(b.shape, (2, 2))
36+
self.assertEqual(set(b.ids(axis='sample')), set(['S2', 'S3']))
37+
self.assertEqual(set(b.ids(axis='observation')), set(['O1', 'O2']))
38+
npt.assert_array_equal(b.sum(axis='sample'), np.array([2., 2.]))
39+
a_eq_b.append(a == b)
40+
self.assertTrue(False in a_eq_b,
41+
f"After {n_iterations} iterations, all resulting "
42+
"feature tables are identical. It therefore seems "
43+
"that a randomized seed is not being used.")
44+
45+
def test_rarefy_seed_is_not_randomized(self):
46+
t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
47+
['O1', 'O2'],
48+
['S1', 'S2', 'S3'])
49+
a = rarefy(t, 2, random_seed=1)
50+
a_eq_b = []
51+
n_iterations = 100
52+
for i in range(n_iterations):
53+
b = rarefy(t, 2, random_seed=1)
54+
a_eq_b.append(a == b)
55+
self.assertFalse(False in a_eq_b,
56+
f"After {n_iterations} iterations, at least one "
57+
"feature table differed from the others. It "
58+
"therefore seems that a randomized seed is being "
59+
"used.")
3560

3661
def test_rarefy_replacement(self):
3762
t = Table(np.array([[0, 10, 30], [10, 10, 20]]),

0 commit comments

Comments
 (0)