Skip to content

Commit 57f9209

Browse files
Port version of tskit's blk_alloc using c11 atomics
Minimal changes to use c11 Update setup.py for Windows C11
1 parent d575342 commit 57f9209

9 files changed

+169
-31
lines changed

Makefile

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
CC?=gcc
2-
CFLAGS=-std=c99 -g -O3 -march=native -funroll-loops -ffast-math \
2+
CFLAGS=-std=c11 -g -O3 -march=native -funroll-loops -ffast-math \
33
# -ftree-vectorize \
44
# -ftree-vectorizer-verbose=6 \
55
# -fopt-info-vec-missed
66

7-
all: _tsinfer.cpython-34m.so
7+
all: _tsinfer.cpython-34m.so
88

9-
_tsinfer.cpython-34m.so: _tsinfermodule.c
9+
_tsinfer.cpython-34m.so: _tsinfermodule.c
1010
CC="${CC}" CFLAGS="${CFLAGS}" python3 setup.py build_ext --inplace
1111

1212
ctags:

lib/ancestor_builder.c

+11-11
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ ancestor_builder_print_state(ancestor_builder_t *self, FILE *out)
115115
}
116116
fprintf(out, "\n");
117117
}
118-
tsk_blkalloc_print_state(&self->allocator, out);
118+
tsi_blkalloc_print_state(&self->allocator, out);
119119
ancestor_builder_check_state(self);
120120
return 0;
121121
}
@@ -144,12 +144,12 @@ ancestor_builder_alloc(
144144
goto out;
145145
}
146146
/* Pre-calculate the maximum sizes asked for in other methods when calling
147-
* tsk_blkalloc_get(&self->allocator, ...) */
147+
* tsi_blkalloc_get(&self->allocator, ...) */
148148
max_size = TSK_MAX(self->num_samples * sizeof(allele_t), max_size);
149149
/* NB: using self->max_sites below is probably overkill: the real number should be
150150
* the maximum number of focal sites in a single ancestor, usually << max_sites */
151151
max_size = TSK_MAX(self->max_sites * sizeof(tsk_id_t), max_size);
152-
ret = tsk_blkalloc_init(&self->allocator, max_size);
152+
ret = tsi_blkalloc_init(&self->allocator, max_size);
153153
if (ret != 0) {
154154
goto out;
155155
}
@@ -163,7 +163,7 @@ ancestor_builder_free(ancestor_builder_t *self)
163163
{
164164
tsi_safe_free(self->sites);
165165
tsi_safe_free(self->descriptors);
166-
tsk_blkalloc_free(&self->allocator);
166+
tsi_blkalloc_free(&self->allocator);
167167
return 0;
168168
}
169169

@@ -177,8 +177,8 @@ ancestor_builder_get_time_map(ancestor_builder_t *self, double time)
177177
search.time = time;
178178
avl_node = avl_search(&self->time_map, &search);
179179
if (avl_node == NULL) {
180-
avl_node = tsk_blkalloc_get(&self->allocator, sizeof(*avl_node));
181-
time_map = tsk_blkalloc_get(&self->allocator, sizeof(*time_map));
180+
avl_node = tsi_blkalloc_get(&self->allocator, sizeof(*avl_node));
181+
time_map = tsi_blkalloc_get(&self->allocator, sizeof(*time_map));
182182
if (avl_node == NULL || time_map == NULL) {
183183
goto out;
184184
}
@@ -439,10 +439,10 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
439439
search.num_samples = self->num_samples;
440440
avl_node = avl_search(pattern_map, &search);
441441
if (avl_node == NULL) {
442-
avl_node = tsk_blkalloc_get(&self->allocator, sizeof(avl_node_t));
443-
map_elem = tsk_blkalloc_get(&self->allocator, sizeof(pattern_map_t));
442+
avl_node = tsi_blkalloc_get(&self->allocator, sizeof(avl_node_t));
443+
map_elem = tsi_blkalloc_get(&self->allocator, sizeof(pattern_map_t));
444444
site->genotypes
445-
= tsk_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t));
445+
= tsi_blkalloc_get(&self->allocator, self->num_samples * sizeof(allele_t));
446446
if (avl_node == NULL || map_elem == NULL || site->genotypes == NULL) {
447447
ret = TSI_ERR_NO_MEMORY;
448448
goto out;
@@ -465,7 +465,7 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
465465
}
466466
map_elem->num_sites++;
467467

468-
list_node = tsk_blkalloc_get(&self->allocator, sizeof(site_list_t));
468+
list_node = tsi_blkalloc_get(&self->allocator, sizeof(site_list_t));
469469
if (list_node == NULL) {
470470
ret = TSI_ERR_NO_MEMORY;
471471
goto out;
@@ -538,7 +538,7 @@ ancestor_builder_finalise(ancestor_builder_t *self)
538538
descriptor = self->descriptors + self->num_ancestors;
539539
self->num_ancestors++;
540540
descriptor->time = time_map->time;
541-
focal_sites = tsk_blkalloc_get(
541+
focal_sites = tsi_blkalloc_get(
542542
&self->allocator, pattern_map->num_sites * sizeof(tsk_id_t));
543543
if (focal_sites == NULL) {
544544
ret = TSI_ERR_NO_MEMORY;

lib/ancestor_matcher.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ ancestor_matcher_print_state(ancestor_matcher_t *self, FILE *out)
9494
}
9595
fprintf(out, "\n");
9696
}
97-
tsk_blkalloc_print_state(&self->traceback_allocator, out);
97+
tsi_blkalloc_print_state(&self->traceback_allocator, out);
9898

9999
/* ancestor_matcher_check_state(self); */
100100
return 0;
@@ -132,7 +132,7 @@ ancestor_matcher_alloc(ancestor_matcher_t *self,
132132
ret = TSI_ERR_NO_MEMORY;
133133
goto out;
134134
}
135-
ret = tsk_blkalloc_init(&self->traceback_allocator, traceback_block_size);
135+
ret = tsi_blkalloc_init(&self->traceback_allocator, traceback_block_size);
136136
if (ret != 0) {
137137
goto out;
138138
}
@@ -165,7 +165,7 @@ ancestor_matcher_free(ancestor_matcher_t *self)
165165
tsi_safe_free(self->output.left);
166166
tsi_safe_free(self->output.right);
167167
tsi_safe_free(self->output.parent);
168-
tsk_blkalloc_free(&self->traceback_allocator);
168+
tsi_blkalloc_free(&self->traceback_allocator);
169169
return 0;
170170
}
171171

@@ -229,9 +229,9 @@ ancestor_matcher_store_traceback(ancestor_matcher_t *self, const tsk_id_t site_i
229229
T[site_id].node = T[site_id - 1].node;
230230
T[site_id].recombination_required = T[site_id - 1].recombination_required;
231231
} else {
232-
list_node = tsk_blkalloc_get(&self->traceback_allocator,
232+
list_node = tsi_blkalloc_get(&self->traceback_allocator,
233233
(size_t) num_likelihood_nodes * sizeof(tsk_id_t));
234-
list_R = tsk_blkalloc_get(
234+
list_R = tsi_blkalloc_get(
235235
&self->traceback_allocator, (size_t) num_likelihood_nodes * sizeof(int8_t));
236236
if (list_node == NULL || list_R == NULL) {
237237
ret = TSI_ERR_NO_MEMORY;
@@ -554,7 +554,7 @@ ancestor_matcher_reset(ancestor_matcher_t *self)
554554
assert(self->num_nodes <= self->max_nodes);
555555

556556
memset(self->allelic_state, 0xff, self->num_nodes * sizeof(*self->allelic_state));
557-
ret = tsk_blkalloc_reset(&self->traceback_allocator);
557+
ret = tsi_blkalloc_reset(&self->traceback_allocator);
558558
if (ret != 0) {
559559
goto out;
560560
}

lib/err.c

+108
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,111 @@ tsi_strerror(int err)
105105
}
106106
return ret;
107107
}
108+
109+
/* Temporary hack. See notes in err.h for why this code is here. */
110+
111+
#include <stdlib.h>
112+
#include <stdio.h>
113+
114+
void
115+
tsi_blkalloc_print_state(tsi_blkalloc_t *self, FILE *out)
116+
{
117+
fprintf(out, "Block allocator%p::\n", (void *) self);
118+
fprintf(out, "\ttop = %lld\n", (long long) self->top);
119+
fprintf(out, "\tchunk_size = %lld\n", (long long) self->chunk_size);
120+
fprintf(out, "\tnum_chunks = %lld\n", (long long) self->num_chunks);
121+
fprintf(out, "\ttotal_allocated = %lld\n", (long long) self->total_allocated);
122+
fprintf(out, "\ttotal_size = %lld\n", (long long) self->total_size);
123+
}
124+
125+
int TSK_WARN_UNUSED
126+
tsi_blkalloc_reset(tsi_blkalloc_t *self)
127+
{
128+
int ret = 0;
129+
130+
self->top = 0;
131+
self->current_chunk = 0;
132+
self->total_allocated = 0;
133+
return ret;
134+
}
135+
136+
int TSK_WARN_UNUSED
137+
tsi_blkalloc_init(tsi_blkalloc_t *self, size_t chunk_size)
138+
{
139+
int ret = 0;
140+
141+
tsk_memset(self, 0, sizeof(tsi_blkalloc_t));
142+
if (chunk_size < 1) {
143+
ret = TSK_ERR_BAD_PARAM_VALUE;
144+
goto out;
145+
}
146+
self->chunk_size = chunk_size;
147+
self->top = 0;
148+
self->current_chunk = 0;
149+
self->total_allocated = 0;
150+
self->total_size = 0;
151+
self->num_chunks = 0;
152+
self->mem_chunks = malloc(sizeof(char *));
153+
if (self->mem_chunks == NULL) {
154+
ret = TSK_ERR_NO_MEMORY;
155+
goto out;
156+
}
157+
self->mem_chunks[0] = malloc(chunk_size);
158+
if (self->mem_chunks[0] == NULL) {
159+
ret = TSK_ERR_NO_MEMORY;
160+
goto out;
161+
}
162+
self->num_chunks = 1;
163+
self->total_size = chunk_size + sizeof(void *);
164+
out:
165+
return ret;
166+
}
167+
168+
void *TSK_WARN_UNUSED
169+
tsi_blkalloc_get(tsi_blkalloc_t *self, size_t size)
170+
{
171+
void *ret = NULL;
172+
void *p;
173+
174+
if (size > self->chunk_size) {
175+
goto out;
176+
}
177+
if ((self->top + size) > self->chunk_size) {
178+
if (self->current_chunk == (self->num_chunks - 1)) {
179+
p = realloc(self->mem_chunks, (self->num_chunks + 1) * sizeof(void *));
180+
if (p == NULL) {
181+
goto out;
182+
}
183+
self->mem_chunks = p;
184+
p = malloc(self->chunk_size);
185+
if (p == NULL) {
186+
goto out;
187+
}
188+
self->mem_chunks[self->num_chunks] = p;
189+
self->num_chunks++;
190+
self->total_size += self->chunk_size + sizeof(void *);
191+
}
192+
self->current_chunk++;
193+
self->top = 0;
194+
}
195+
ret = self->mem_chunks[self->current_chunk] + self->top;
196+
self->top += size;
197+
self->total_allocated += size;
198+
out:
199+
return ret;
200+
}
201+
202+
void
203+
tsi_blkalloc_free(tsi_blkalloc_t *self)
204+
{
205+
size_t j;
206+
207+
for (j = 0; j < self->num_chunks; j++) {
208+
if (self->mem_chunks[j] != NULL) {
209+
free(self->mem_chunks[j]);
210+
}
211+
}
212+
if (self->mem_chunks != NULL) {
213+
free(self->mem_chunks);
214+
}
215+
}

lib/err.h

+25
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,29 @@
4141

4242
const char *tsi_strerror(int err);
4343

44+
/* FIXME! Including a custom version of the tsk_blkalloc struct here so that
45+
* we can use c11 atomics on the total_size attribute. Including it in this
46+
* file and err.c as this is the least noisy place to put it, for now
47+
* See https://github.com/jeromekelleher/sc2ts/issues/381 for reasoning.
48+
*/
49+
50+
#include "tskit.h"
51+
#include <stdatomic.h>
52+
53+
typedef struct {
54+
size_t chunk_size; /* number of bytes per chunk */
55+
size_t top; /* the offset of the next available byte in the current chunk */
56+
size_t current_chunk; /* the index of the chunk currently being used */
57+
_Atomic size_t total_size; /* the total number of bytes allocated + overhead. */
58+
size_t total_allocated; /* the total number of bytes allocated. */
59+
size_t num_chunks; /* the number of memory chunks. */
60+
char **mem_chunks; /* the memory chunks */
61+
} tsi_blkalloc_t;
62+
63+
extern void tsi_blkalloc_print_state(tsi_blkalloc_t *self, FILE *out);
64+
extern int tsi_blkalloc_reset(tsi_blkalloc_t *self);
65+
extern int tsi_blkalloc_init(tsi_blkalloc_t *self, size_t chunk_size);
66+
extern void *tsi_blkalloc_get(tsi_blkalloc_t *self, size_t size);
67+
extern void tsi_blkalloc_free(tsi_blkalloc_t *self);
68+
4469
#endif /*__ERR_H__*/

lib/meson.build

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ m_dep = cc.find_library('m', required : false)
88
cunit_dep = dependency('cunit')
99

1010
extra_c_args = [
11-
'-std=c99', '-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W',
11+
'-std=c11', '-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W',
1212
'-Wmissing-prototypes', '-Wstrict-prototypes',
1313
'-Wconversion', '-Wshadow', '-Wpointer-arith', '-Wcast-align',
1414
'-Wcast-qual', '-Wwrite-strings', '-Wnested-externs',

lib/tree_sequence_builder.c

+5-5
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,8 @@ tree_sequence_builder_print_state(tree_sequence_builder_t *self, FILE *out)
194194
out, "%d\t%d\t%d\t%d\n", edge->left, edge->right, edge->parent, edge->child);
195195
}
196196

197-
fprintf(out, "tsk_blkalloc = \n");
198-
tsk_blkalloc_print_state(&self->tsk_blkalloc, out);
197+
fprintf(out, "tsi_blkalloc = \n");
198+
tsi_blkalloc_print_state(&self->tsi_blkalloc, out);
199199
fprintf(out, "avl_node_heap = \n");
200200
object_heap_print_state(&self->avl_node_heap, out);
201201
fprintf(out, "edge_heap = \n");
@@ -244,7 +244,7 @@ tree_sequence_builder_alloc(tree_sequence_builder_t *self, size_t num_sites,
244244
if (ret != 0) {
245245
goto out;
246246
}
247-
ret = tsk_blkalloc_init(&self->tsk_blkalloc,
247+
ret = tsi_blkalloc_init(&self->tsi_blkalloc,
248248
TSK_MAX(8192, num_sites * sizeof(mutation_list_node_t) / 4));
249249
if (ret != 0) {
250250
goto out;
@@ -278,7 +278,7 @@ tree_sequence_builder_free(tree_sequence_builder_t *self)
278278
tsi_safe_free(self->sites.num_alleles);
279279
tsi_safe_free(self->left_index_edges);
280280
tsi_safe_free(self->right_index_edges);
281-
tsk_blkalloc_free(&self->tsk_blkalloc);
281+
tsi_blkalloc_free(&self->tsi_blkalloc);
282282
object_heap_free(&self->avl_node_heap);
283283
object_heap_free(&self->edge_heap);
284284
return 0;
@@ -418,7 +418,7 @@ tree_sequence_builder_add_mutation(
418418
}
419419
}
420420

421-
list_node = tsk_blkalloc_get(&self->tsk_blkalloc, sizeof(mutation_list_node_t));
421+
list_node = tsi_blkalloc_get(&self->tsi_blkalloc, sizeof(mutation_list_node_t));
422422
if (list_node == NULL) {
423423
ret = TSI_ERR_NO_MEMORY;
424424
goto out;

lib/tsinfer.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ typedef struct {
108108
int flags;
109109
site_t *sites;
110110
avl_tree_t time_map;
111-
tsk_blkalloc_t allocator;
111+
tsi_blkalloc_t allocator;
112112
ancestor_descriptor_t *descriptors;
113113
} ancestor_builder_t;
114114

@@ -141,7 +141,7 @@ typedef struct {
141141
size_t num_nodes;
142142
size_t num_match_nodes;
143143
size_t num_mutations;
144-
tsk_blkalloc_t tsk_blkalloc;
144+
tsi_blkalloc_t tsi_blkalloc;
145145
object_heap_t avl_node_heap;
146146
object_heap_t edge_heap;
147147
/* Dynamic edge indexes used for tree generation and path compression. The
@@ -184,7 +184,7 @@ typedef struct {
184184
tsk_id_t *likelihood_nodes_tmp;
185185
tsk_id_t *likelihood_nodes;
186186
node_state_list_t *traceback;
187-
tsk_blkalloc_t traceback_allocator;
187+
tsi_blkalloc_t traceback_allocator;
188188
size_t total_traceback_size;
189189
struct {
190190
tsk_id_t *left;

setup.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,17 @@
3131
+ [os.path.join(kasdir, f) for f in kas_source_files]
3232
)
3333

34-
libraries = ["Advapi32"] if IS_WINDOWS else []
34+
if IS_WINDOWS:
35+
libraries = ["Advapi32"]
36+
extra_compile_args = ["/std:c11"]
37+
else:
38+
libraries = []
39+
extra_compile_args = ["-std=c11"]
3540

3641
_tsinfer_module = Extension(
3742
"_tsinfer",
3843
sources=sources,
39-
extra_compile_args=["-std=c99"],
44+
extra_compile_args=extra_compile_args,
4045
libraries=libraries,
4146
undef_macros=["NDEBUG"],
4247
include_dirs=includes + [numpy.get_include()],

0 commit comments

Comments
 (0)