|
5 | 5 | # |
6 | 6 | # The full license is in the file LICENSE, distributed with this software. |
7 | 7 | # ---------------------------------------------------------------------------- |
8 | | - |
| 8 | +import os |
9 | 9 | from unittest import TestCase, main |
| 10 | +from unittest.mock import MagicMock, patch |
10 | 11 |
|
11 | 12 | import numpy as np |
12 | 13 | import numpy.testing as npt |
| 14 | +import pandas as pd |
13 | 15 | from biom.table import Table |
| 16 | +from pandas._testing import assert_series_equal |
| 17 | +from q2_types.feature_data import SequenceCharacteristicsDirectoryFormat |
14 | 18 |
|
15 | 19 | from q2_feature_table import rarefy |
| 20 | +from q2_feature_table._normalize import (_validate_parameters, |
| 21 | + _convert_lengths, normalize) |
16 | 22 |
|
17 | 23 |
|
18 | 24 | class RarefyTests(TestCase): |
@@ -78,5 +84,107 @@ def test_rarefy_depth_error(self): |
78 | 84 | rarefy(t, 50) |
79 | 85 |
|
80 | 86 |
|
| 87 | +class NormalizeTests(TestCase): |
| 88 | + |
| 89 | + @classmethod |
| 90 | + def setUpClass(cls): |
| 91 | + cls.lengths = pd.Series( |
| 92 | + { |
| 93 | + "ARO1": 1356.0, |
| 94 | + "ARO2": 1173.0, |
| 95 | + }, |
| 96 | + name="values", |
| 97 | + ) |
| 98 | + cls.lengths.index.name = "index" |
| 99 | + cls.table = pd.DataFrame({ |
| 100 | + 'ID': ['sample1', 'sample2'], |
| 101 | + 'ARO1': [2.0, 2.0], |
| 102 | + 'ARO2': [0.0, 0.0] |
| 103 | + }).set_index('ID') |
| 104 | + |
| 105 | + def test_validate_parameters_uq_with_m_a_trim(self): |
| 106 | + # Test Error raised if gene-length is given with UQ method |
| 107 | + with self.assertRaisesRegex( |
| 108 | + ValueError, |
| 109 | + "Parameters m-trim and a-trim can only " |
| 110 | + "be used with methods TMM and CTF.", |
| 111 | + ): |
| 112 | + _validate_parameters("uq", 0.2, 0.05, None) |
| 113 | + |
| 114 | + def test_validate_parameters_tpm_missing_gene_length(self): |
| 115 | + # Test Error raised if gene-length is missing with TPM method |
| 116 | + with self.assertRaisesRegex( |
| 117 | + ValueError, "gene-length input is missing."): |
| 118 | + _validate_parameters("tpm", None, None, None) |
| 119 | + |
| 120 | + def test_validate_parameters_tmm_gene_length(self): |
| 121 | + # Test Error raised if gene-length is given with TMM method |
| 122 | + with self.assertRaisesRegex( |
| 123 | + ValueError, |
| 124 | + "gene-length input can only be used with FPKM and " |
| 125 | + "TPM methods." |
| 126 | + ): |
| 127 | + _validate_parameters( |
| 128 | + "tmm", None, None, gene_length=MagicMock()) |
| 129 | + |
| 130 | + def test_validate_parameters_default_m_a_trim(self): |
| 131 | + # Test if m_trim and a_trim get set to default values if None |
| 132 | + m_trim, a_trim = _validate_parameters("tmm", None, None, None) |
| 133 | + self.assertEqual(m_trim, 0.3) |
| 134 | + self.assertEqual(a_trim, 0.05) |
| 135 | + |
| 136 | + def test_validate_parameters_m_a_trim(self): |
| 137 | + # Test if m_trim and a_trim are not modified if not None |
| 138 | + m_trim, a_trim = _validate_parameters("tmm", 0.1, 0.06, None) |
| 139 | + self.assertEqual(m_trim, 0.1) |
| 140 | + self.assertEqual(a_trim, 0.06) |
| 141 | + |
| 142 | + def test_convert_lengths_gene_length(self): |
| 143 | + # Test _convert_lengths |
| 144 | + gene_length = SequenceCharacteristicsDirectoryFormat() |
| 145 | + with open(os.path.join( |
| 146 | + str(gene_length), "sequence_characteristics.tsv"), |
| 147 | + 'w') as file: |
| 148 | + file.write("id\tlength\nARO1\t1356.0\nARO2\t1173.0") |
| 149 | + |
| 150 | + obs = _convert_lengths(self.table, gene_length=gene_length) |
| 151 | + assert_series_equal(obs, self.lengths) |
| 152 | + |
| 153 | + def test_convert_lengths_short_gene_length(self): |
| 154 | + # Test Error raised if gene-length is missing genes |
| 155 | + gene_length = SequenceCharacteristicsDirectoryFormat() |
| 156 | + with open(os.path.join( |
| 157 | + str(gene_length), |
| 158 | + "sequence_characteristics.tsv"), 'w') as file: |
| 159 | + file.write("id\tlength\nARO1\t1356.0") |
| 160 | + with self.assertRaisesRegex( |
| 161 | + ValueError, |
| 162 | + "There are genes present in the FeatureTable that are " |
| 163 | + "not present in the gene-length input. Missing lengths " |
| 164 | + "for genes: {'ARO2'}", |
| 165 | + ): |
| 166 | + _convert_lengths(self.table, gene_length=gene_length) |
| 167 | + |
| 168 | + @patch("q2_feature_table._normalize.TPM") |
| 169 | + def test_tpm_fpkm_with_valid_inputs(self, mock_tpm): |
| 170 | + # Test valid inputs for TPM method |
| 171 | + gene_length = SequenceCharacteristicsDirectoryFormat() |
| 172 | + with open(os.path.join( |
| 173 | + str(gene_length), "sequence_characteristics.tsv"), |
| 174 | + 'w') as file: |
| 175 | + file.write("id\tlength\nARO1\t1356.0\nARO2\t1173.0") |
| 176 | + normalize(table=self.table, gene_length=gene_length, method="tpm") |
| 177 | + |
| 178 | + @patch("q2_feature_table._normalize.TMM") |
| 179 | + def test_tmm_uq_cuf_ctf_with_valid_inputs(self, mock_tmm): |
| 180 | + # Test valid inputs for TMM method |
| 181 | + gene_length = SequenceCharacteristicsDirectoryFormat() |
| 182 | + with open(os.path.join( |
| 183 | + str(gene_length), "sequence_characteristics.tsv"), |
| 184 | + 'w') as file: |
| 185 | + file.write("id\tlength\nARO1\t1356.0\nARO2\t1173.0") |
| 186 | + normalize(table=self.table, method="tmm", a_trim=0.06, m_trim=0.4) |
| 187 | + |
| 188 | + |
81 | 189 | if __name__ == "__main__": |
82 | 190 | main() |
0 commit comments