Skip to content

Commit 9533a76

Browse files
jeromekellehermergify[bot]
authored andcommitted
Implement span_normalise for divmat
Closes #2818
1 parent 7190c2d commit 9533a76

File tree

6 files changed

+162
-51
lines changed

6 files changed

+162
-51
lines changed

c/tests/test_stats.c

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,10 @@ verify_mean_descendants(tsk_treeseq_t *ts)
263263
}
264264

265265
/* Check the divergence matrix by running against the stats API equivalent
266-
* code. NOTE: this will not always be equal in site mode, because of a slightly
267-
* different definition wrt to multiple mutations at a site.
266+
* code.
268267
*/
269268
static void
270-
verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t mode)
269+
verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
271270
{
272271
int ret;
273272
const tsk_size_t n = tsk_treeseq_get_num_samples(ts);
@@ -285,10 +284,10 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t mode)
285284
}
286285
}
287286
ret = tsk_treeseq_divergence(
288-
ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, mode, D1);
287+
ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, options, D1);
289288
CU_ASSERT_EQUAL_FATAL(ret, 0);
290289

291-
ret = tsk_treeseq_divergence_matrix(ts, 0, NULL, 0, NULL, mode, D2);
290+
ret = tsk_treeseq_divergence_matrix(ts, 0, NULL, 0, NULL, options, D2);
292291
CU_ASSERT_EQUAL_FATAL(ret, 0);
293292

294293
for (j = 0; j < n; j++) {
@@ -1072,7 +1071,9 @@ test_single_tree_divergence_matrix(void)
10721071
assert_arrays_almost_equal(16, result, D_site);
10731072

10741073
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
1074+
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
10751075
verify_divergence_matrix(&ts, TSK_STAT_SITE);
1076+
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
10761077

10771078
tsk_treeseq_free(&ts);
10781079
}
@@ -1120,7 +1121,9 @@ test_single_tree_divergence_matrix_internal_samples(void)
11201121
assert_arrays_almost_equal(16, result, D);
11211122

11221123
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
1124+
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
11231125
verify_divergence_matrix(&ts, TSK_STAT_SITE);
1126+
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
11241127

11251128
tsk_treeseq_free(&ts);
11261129
}
@@ -1165,8 +1168,10 @@ test_single_tree_divergence_matrix_multi_root(void)
11651168
CU_ASSERT_EQUAL_FATAL(ret, 0);
11661169
assert_arrays_almost_equal(16, result, D_branch);
11671170

1168-
verify_divergence_matrix(&ts, TSK_STAT_SITE);
11691171
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
1172+
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
1173+
verify_divergence_matrix(&ts, TSK_STAT_SITE);
1174+
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
11701175

11711176
tsk_treeseq_free(&ts);
11721177
}
@@ -1839,7 +1844,9 @@ test_paper_ex_divergence_matrix(void)
18391844
paper_ex_mutations, paper_ex_individuals, NULL, 0);
18401845

18411846
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
1847+
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
18421848
verify_divergence_matrix(&ts, TSK_STAT_SITE);
1849+
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
18431850

18441851
tsk_treeseq_free(&ts);
18451852
}
@@ -1999,10 +2006,20 @@ test_simplest_divergence_matrix(void)
19992006
CU_ASSERT_EQUAL_FATAL(ret, 0);
20002007
assert_arrays_almost_equal(4, D_branch, result);
20012008

2009+
ret = tsk_treeseq_divergence_matrix(
2010+
&ts, 2, sample_ids, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
2011+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2012+
assert_arrays_almost_equal(4, D_branch, result);
2013+
20022014
ret = tsk_treeseq_divergence_matrix(&ts, 2, sample_ids, 0, NULL, 0, result);
20032015
CU_ASSERT_EQUAL_FATAL(ret, 0);
20042016
assert_arrays_almost_equal(4, D_site, result);
20052017

2018+
ret = tsk_treeseq_divergence_matrix(
2019+
&ts, 2, sample_ids, 0, NULL, TSK_STAT_SPAN_NORMALISE, result);
2020+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2021+
assert_arrays_almost_equal(4, D_site, result);
2022+
20062023
ret = tsk_treeseq_divergence_matrix(
20072024
&ts, 2, sample_ids, 0, NULL, TSK_STAT_SITE, result);
20082025
CU_ASSERT_EQUAL_FATAL(ret, 0);
@@ -2019,10 +2036,6 @@ test_simplest_divergence_matrix(void)
20192036
ret = tsk_treeseq_divergence_matrix(&ts, 0, NULL, 0, NULL, TSK_STAT_NODE, result);
20202037
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
20212038

2022-
ret = tsk_treeseq_divergence_matrix(
2023-
&ts, 0, NULL, 0, NULL, TSK_STAT_SPAN_NORMALISE, result);
2024-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED);
2025-
20262039
ret = tsk_treeseq_divergence_matrix(
20272040
&ts, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);
20282041
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_POLARISED_UNSUPPORTED);
@@ -2146,7 +2159,9 @@ test_multiroot_divergence_matrix(void)
21462159
multiroot_ex_sites, multiroot_ex_mutations, NULL, NULL, 0);
21472160

21482161
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
2162+
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
21492163
verify_divergence_matrix(&ts, TSK_STAT_SITE);
2164+
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
21502165

21512166
tsk_treeseq_free(&ts);
21522167
}

c/tskit/trees.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6809,10 +6809,6 @@ tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_samples,
68096809
ret = TSK_ERR_STAT_POLARISED_UNSUPPORTED;
68106810
goto out;
68116811
}
6812-
if (options & TSK_STAT_SPAN_NORMALISE) {
6813-
ret = TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED;
6814-
goto out;
6815-
}
68166812

68176813
if (windows == NULL) {
68186814
num_windows = 1;
@@ -6855,6 +6851,9 @@ tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_samples,
68556851
}
68566852
fill_lower_triangle(result, n, num_windows);
68576853

6854+
if (options & TSK_STAT_SPAN_NORMALISE) {
6855+
span_normalise(num_windows, windows, n * n, result);
6856+
}
68586857
out:
68596858
tsk_safe_free(sample_index_map);
68606859
return ret;

python/_tskitmodule.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9737,7 +9737,7 @@ static PyObject *
97379737
TreeSequence_divergence_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)
97389738
{
97399739
PyObject *ret = NULL;
9740-
static char *kwlist[] = { "windows", "samples", "mode", NULL };
9740+
static char *kwlist[] = { "windows", "samples", "mode", "span_normalise", NULL };
97419741
PyArrayObject *result_array = NULL;
97429742
PyObject *windows = NULL;
97439743
PyObject *py_samples = Py_None;
@@ -9748,13 +9748,14 @@ TreeSequence_divergence_matrix(TreeSequence *self, PyObject *args, PyObject *kwd
97489748
npy_intp *shape, dims[3];
97499749
tsk_size_t num_samples, num_windows;
97509750
tsk_id_t *samples = NULL;
9751+
int span_normalise = 0;
97519752
int err;
97529753

97539754
if (TreeSequence_check_state(self) != 0) {
97549755
goto out;
97559756
}
9756-
if (!PyArg_ParseTupleAndKeywords(
9757-
args, kwds, "O|Os", kwlist, &windows, &py_samples, &mode)) {
9757+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Osi", kwlist, &windows, &py_samples,
9758+
&mode, &span_normalise)) {
97589759
goto out;
97599760
}
97609761
num_samples = tsk_treeseq_get_num_samples(self->tree_sequence);
@@ -9778,9 +9779,14 @@ TreeSequence_divergence_matrix(TreeSequence *self, PyObject *args, PyObject *kwd
97789779
if (result_array == NULL) {
97799780
goto out;
97809781
}
9782+
97819783
if (parse_stats_mode(mode, &options) != 0) {
97829784
goto out;
97839785
}
9786+
if (span_normalise) {
9787+
options |= TSK_STAT_SPAN_NORMALISE;
9788+
}
9789+
97849790
// clang-format off
97859791
Py_BEGIN_ALLOW_THREADS
97869792
err = tsk_treeseq_divergence_matrix(

0 commit comments

Comments
 (0)