Skip to content

Commit 9955a69

Browse files
benjefferymergify[bot]
authored andcommitted
Change genotypes to 32bit
1 parent 2e21a1b commit 9955a69

14 files changed

+267
-498
lines changed

c/CHANGELOG.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22
[0.99.16] - 2022-0X-XX
33
----------------------
44

5-
- Make dumping of tables and tree seqences to disk a zero-copy operation.
5+
**Breaking changes**
6+
7+
- Change the type of genotypes to ``int32_t``, removing the TSK_16_BIT_GENOTYPES flag option.
8+
(:user:`benjeffery`, :issue:`463`, :pr:`2108`)
9+
10+
**Features**
11+
12+
- Make dumping of tables and tree sequences to disk a zero-copy operation.
613
(:user:`benjeffery`, :issue:`2111`, :pr:`2124`)
714

815
----------------------

c/tests/test_genotypes.c

+123-247
Large diffs are not rendered by default.

c/tests/test_haplotype_matching.c

+10-10
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ tsk_ls_hmm_next_probability_test(tsk_ls_hmm_t *TSK_UNUSED(self),
4949
}
5050

5151
static int
52-
run_test_hmm(tsk_ls_hmm_t *hmm, int8_t *haplotype, tsk_compressed_matrix_t *output)
52+
run_test_hmm(tsk_ls_hmm_t *hmm, int32_t *haplotype, tsk_compressed_matrix_t *output)
5353
{
5454
int ret = 0;
5555

@@ -79,7 +79,7 @@ test_single_tree_missing_alleles(void)
7979

8080
double rho[] = { 0, 0.25, 0.25 };
8181
double mu[] = { 0.125, 0.125, 0.125 };
82-
int8_t h[] = { 0, 0, 0, 0 };
82+
int32_t h[] = { 0, 0, 0, 0 };
8383

8484
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
8585
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
@@ -108,7 +108,7 @@ test_single_tree_exact_match(void)
108108

109109
double rho[] = { 0.0, 0.25, 0.25 };
110110
double mu[] = { 0, 0, 0 };
111-
int8_t h[] = { 1, 1, 1 };
111+
int32_t h[] = { 1, 1, 1 };
112112
tsk_id_t path[3];
113113
double decoded_compressed_matrix[12];
114114
unsigned int precision;
@@ -167,7 +167,7 @@ test_single_tree_missing_haplotype_data(void)
167167

168168
double rho[] = { 0.0, 0.25, 0.25 };
169169
double mu[] = { 0, 0, 0 };
170-
int8_t h[] = { 1, TSK_MISSING_DATA, 1 };
170+
int32_t h[] = { 1, TSK_MISSING_DATA, 1 };
171171
tsk_id_t path[3];
172172
double decoded_compressed_matrix[12];
173173

@@ -211,7 +211,7 @@ test_single_tree_match_impossible(void)
211211
double rho[] = { 0.0, 0.25, 0.25 };
212212
double mu[] = { 0, 0, 0 };
213213
/* This haplotype can't happen with a mutation rate of 0 */
214-
int8_t h[] = { 0, 0, 0 };
214+
int32_t h[] = { 0, 0, 0 };
215215

216216
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
217217
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
@@ -247,7 +247,7 @@ test_single_tree_errors(void)
247247

248248
double rho[] = { 0.0, 0.25, 0.25 };
249249
double mu[] = { 0, 0, 0 };
250-
int8_t h[] = { 0, 0, 0 };
250+
int32_t h[] = { 0, 0, 0 };
251251

252252
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
253253
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
@@ -309,7 +309,7 @@ test_single_tree_compressed_matrix(void)
309309

310310
double rho[] = { 0.0, 0.25, 0.25 };
311311
double mu[] = { 0.1, 0.1, 0.1 };
312-
int8_t h[] = { 0, 0, 0 };
312+
int32_t h[] = { 0, 0, 0 };
313313

314314
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
315315
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
@@ -374,7 +374,7 @@ test_single_tree_viterbi_matrix(void)
374374
tsk_ls_hmm_t ls_hmm;
375375
double rho[] = { 0.0, 0.25, 0.25 };
376376
double mu[] = { 0, 0, 0 };
377-
int8_t h[] = { 1, 1, 1 };
377+
int32_t h[] = { 1, 1, 1 };
378378
tsk_id_t path[3];
379379
tsk_value_transition_t T[2];
380380
int j;
@@ -448,7 +448,7 @@ test_multi_tree_exact_match(void)
448448

449449
double rho[] = { 0.0, 0.25, 0.25 };
450450
double mu[] = { 0, 0, 0 };
451-
int8_t h[] = { 1, 1, 1 };
451+
int32_t h[] = { 1, 1, 1 };
452452
tsk_id_t path[3];
453453
double decoded_compressed_matrix[12];
454454
unsigned int precision;
@@ -530,7 +530,7 @@ test_caterpillar_tree_many_values(void)
530530
tsk_ls_hmm_t ls_hmm;
531531
tsk_compressed_matrix_t matrix;
532532
double unused[] = { 0, 0, 0, 0, 0 };
533-
int8_t h[] = { 0, 0, 0, 0, 0 };
533+
int32_t h[] = { 0, 0, 0, 0, 0 };
534534
tsk_size_t n[] = {
535535
8,
536536
16,

c/tests/test_trees.c

+32-32
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ verify_simplify_genotypes(tsk_treeseq_t *ts, tsk_treeseq_t *subset,
486486
tsk_vargen_t vargen, subset_vargen;
487487
tsk_variant_t *variant, *subset_variant;
488488
tsk_size_t j, k;
489-
int8_t a1, a2;
489+
int32_t a1, a2;
490490
const tsk_id_t *sample_index_map;
491491

492492
sample_index_map = tsk_treeseq_get_sample_index_map(ts);
@@ -511,8 +511,8 @@ verify_simplify_genotypes(tsk_treeseq_t *ts, tsk_treeseq_t *subset,
511511
CU_ASSERT_EQUAL(variant->site->position, subset_variant->site->position);
512512
for (k = 0; k < num_samples; k++) {
513513
CU_ASSERT_FATAL(sample_index_map[samples[k]] < (tsk_id_t) ts->num_samples);
514-
a1 = variant->genotypes.i8[sample_index_map[samples[k]]];
515-
a2 = subset_variant->genotypes.i8[k];
514+
a1 = variant->genotypes[sample_index_map[samples[k]]];
515+
a2 = subset_variant->genotypes[k];
516516
/* printf("a1 = %d, a2 = %d\n", a1, a2); */
517517
/* printf("k = %d original node = %d " */
518518
/* "original_index = %d a1=%.*s a2=%.*s\n", */
@@ -1330,29 +1330,29 @@ test_simplest_non_sample_leaf_records(void)
13301330
CU_ASSERT_EQUAL_FATAL(ret, 1);
13311331
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
13321332
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
1333-
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
1334-
CU_ASSERT_EQUAL(var->genotypes.i8[1], 0);
1333+
CU_ASSERT_EQUAL(var->genotypes[0], 1);
1334+
CU_ASSERT_EQUAL(var->genotypes[1], 0);
13351335

13361336
ret = tsk_vargen_next(&vargen, &var);
13371337
CU_ASSERT_EQUAL_FATAL(ret, 1);
13381338
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
13391339
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
1340-
CU_ASSERT_EQUAL(var->genotypes.i8[0], 0);
1341-
CU_ASSERT_EQUAL(var->genotypes.i8[1], 1);
1340+
CU_ASSERT_EQUAL(var->genotypes[0], 0);
1341+
CU_ASSERT_EQUAL(var->genotypes[1], 1);
13421342

13431343
ret = tsk_vargen_next(&vargen, &var);
13441344
CU_ASSERT_EQUAL_FATAL(ret, 1);
13451345
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
13461346
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
1347-
CU_ASSERT_EQUAL(var->genotypes.i8[0], 0);
1348-
CU_ASSERT_EQUAL(var->genotypes.i8[1], 0);
1347+
CU_ASSERT_EQUAL(var->genotypes[0], 0);
1348+
CU_ASSERT_EQUAL(var->genotypes[1], 0);
13491349

13501350
ret = tsk_vargen_next(&vargen, &var);
13511351
CU_ASSERT_EQUAL_FATAL(ret, 1);
13521352
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
13531353
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
1354-
CU_ASSERT_EQUAL(var->genotypes.i8[0], 0);
1355-
CU_ASSERT_EQUAL(var->genotypes.i8[1], 0);
1354+
CU_ASSERT_EQUAL(var->genotypes[0], 0);
1355+
CU_ASSERT_EQUAL(var->genotypes[1], 0);
13561356

13571357
ret = tsk_vargen_next(&vargen, &var);
13581358
CU_ASSERT_EQUAL_FATAL(ret, 0);
@@ -1659,9 +1659,9 @@ test_simplest_back_mutations(void)
16591659
CU_ASSERT_EQUAL(var->num_alleles, 2);
16601660
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
16611661
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
1662-
CU_ASSERT_EQUAL(var->genotypes.i8[0], 0);
1663-
CU_ASSERT_EQUAL(var->genotypes.i8[1], 1);
1664-
CU_ASSERT_EQUAL(var->genotypes.i8[2], 0);
1662+
CU_ASSERT_EQUAL(var->genotypes[0], 0);
1663+
CU_ASSERT_EQUAL(var->genotypes[1], 1);
1664+
CU_ASSERT_EQUAL(var->genotypes[2], 0);
16651665
CU_ASSERT_EQUAL(var->site->id, 0);
16661666
CU_ASSERT_EQUAL(var->site->mutations_length, 2);
16671667
tsk_vargen_free(&vargen);
@@ -3069,10 +3069,10 @@ test_simplest_map_mutations(void)
30693069
const char *edges = "0 1 2 0,1\n";
30703070
tsk_treeseq_t ts;
30713071
tsk_tree_t t;
3072-
int8_t genotypes[] = { 0, 0 };
3072+
int32_t genotypes[] = { 0, 0 };
30733073
tsk_size_t num_transitions;
30743074
tsk_state_transition_t *transitions;
3075-
int8_t ancestral_state;
3075+
int32_t ancestral_state;
30763076
int ret;
30773077

30783078
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
@@ -3151,10 +3151,10 @@ test_simplest_nonbinary_map_mutations(void)
31513151
const char *edges = "0 1 4 0,1,2,3\n";
31523152
tsk_treeseq_t ts;
31533153
tsk_tree_t t;
3154-
int8_t genotypes[] = { 0, 0, 0, 0 };
3154+
int32_t genotypes[] = { 0, 0, 0, 0 };
31553155
tsk_size_t num_transitions;
31563156
tsk_state_transition_t *transitions;
3157-
int8_t ancestral_state;
3157+
int32_t ancestral_state;
31583158
int ret;
31593159

31603160
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
@@ -3197,10 +3197,10 @@ test_simplest_unary_map_mutations(void)
31973197
"0 1 4 2,3\n";
31983198
tsk_treeseq_t ts;
31993199
tsk_tree_t t;
3200-
int8_t genotypes[] = { 0, 0 };
3200+
int32_t genotypes[] = { 0, 0 };
32013201
tsk_size_t num_transitions;
32023202
tsk_state_transition_t *transitions;
3203-
int8_t ancestral_state;
3203+
int32_t ancestral_state;
32043204
int ret;
32053205

32063206
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
@@ -3241,10 +3241,10 @@ test_simplest_non_sample_leaf_map_mutations(void)
32413241
const char *edges = "0 1 2 0,1,3,4\n";
32423242
tsk_treeseq_t ts;
32433243
tsk_tree_t t;
3244-
int8_t genotypes[] = { 0, 0 };
3244+
int32_t genotypes[] = { 0, 0 };
32453245
tsk_size_t num_transitions;
32463246
tsk_state_transition_t *transitions;
3247-
int8_t ancestral_state;
3247+
int32_t ancestral_state;
32483248
int ret;
32493249

32503250
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
@@ -3283,10 +3283,10 @@ test_simplest_internal_sample_map_mutations(void)
32833283
const char *edges = "0 1 2 0,1\n";
32843284
tsk_treeseq_t ts;
32853285
tsk_tree_t t;
3286-
int8_t genotypes[] = { 0, 0, 0 };
3286+
int32_t genotypes[] = { 0, 0, 0 };
32873287
tsk_size_t num_transitions;
32883288
tsk_state_transition_t *transitions;
3289-
int8_t ancestral_state;
3289+
int32_t ancestral_state;
32903290
int ret;
32913291

32923292
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
@@ -3340,10 +3340,10 @@ test_simplest_multiple_root_map_mutations(void)
33403340
"0 1 5 2,3\n";
33413341
tsk_treeseq_t ts;
33423342
tsk_tree_t t;
3343-
int8_t genotypes[] = { 0, 0, 0, 0 };
3343+
int32_t genotypes[] = { 0, 0, 0, 0 };
33443344
tsk_size_t num_transitions;
33453345
tsk_state_transition_t *transitions;
3346-
int8_t ancestral_state;
3346+
int32_t ancestral_state;
33473347
int ret;
33483348

33493349
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
@@ -3397,10 +3397,10 @@ test_simplest_chained_map_mutations(void)
33973397
"0 1 4 2,3\n";
33983398
tsk_treeseq_t ts;
33993399
tsk_tree_t t;
3400-
int8_t genotypes[] = { 0, 0, 0, 0 };
3400+
int32_t genotypes[] = { 0, 0, 0, 0 };
34013401
tsk_size_t num_transitions;
34023402
tsk_state_transition_t *transitions;
3403-
int8_t ancestral_state;
3403+
int32_t ancestral_state;
34043404
int ret;
34053405

34063406
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
@@ -4543,11 +4543,11 @@ test_single_tree_map_mutations(void)
45434543
{
45444544
tsk_treeseq_t ts;
45454545
tsk_tree_t t;
4546-
int8_t genotypes[] = { 0, 1, 1, 1 };
4546+
int32_t genotypes[] = { 0, 1, 1, 1 };
45474547
int ret = 0;
45484548
tsk_size_t num_transitions;
45494549
tsk_state_transition_t *transitions;
4550-
int8_t ancestral_state, j;
4550+
int32_t ancestral_state, j;
45514551

45524552
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
45534553
NULL, NULL, NULL, 0);
@@ -4689,11 +4689,11 @@ test_single_tree_map_mutations_internal_samples(void)
46894689
"0.00000000 1.00000000 8 7\n";
46904690
tsk_treeseq_t ts;
46914691
tsk_tree_t t;
4692-
int8_t genotypes[] = { 0, 2, 2, 1, 0 };
4692+
int32_t genotypes[] = { 0, 2, 2, 1, 0 };
46934693
int ret = 0;
46944694
tsk_size_t num_transitions;
46954695
tsk_state_transition_t *transitions;
4696-
int8_t ancestral_state;
4696+
int32_t ancestral_state;
46974697

46984698
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
46994699
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 5);

0 commit comments

Comments
 (0)