@@ -198,9 +198,13 @@ def get_example_historical_sampled_ts(
198
198
return tables .tree_sequence ()
199
199
200
200
201
- EXAMPLE_SCHEMA = tskit .MetadataSchema (
202
- {"codec" : "json" , "properties" : {"foo" : {"type" : "integer" }}}
203
- )
201
+ def example_schema (default ):
202
+ return tskit .MetadataSchema (
203
+ {
204
+ "codec" : "json" ,
205
+ "properties" : {"default_prop" : {"type" : "string" , "default" : default }},
206
+ }
207
+ )
204
208
205
209
206
210
def add_array_to_dataset (name , array , zarr_path , dims = None ):
@@ -227,22 +231,23 @@ def make_ts_and_zarr(path, add_optional=False, shuffle_alleles=True):
227
231
)
228
232
ts = msprime .sim_mutations (ts , rate = 0.025 , model = msprime .JC69 (), random_seed = 42 )
229
233
tables = ts .dump_tables ()
230
- tables .metadata_schema = EXAMPLE_SCHEMA
234
+ tables .metadata_schema = example_schema ("example" )
235
+ tables .metadata = {"foo" : "bar" }
231
236
sites_copy = tables .sites .copy ()
232
237
tables .sites .clear ()
233
- tables .sites .metadata_schema = EXAMPLE_SCHEMA
238
+ tables .sites .metadata_schema = example_schema ( "sites" )
234
239
for i , site in enumerate (sites_copy ):
235
240
tables .sites .append (site .replace (metadata = {"id_site" : i }))
236
241
237
242
pops_copy = tables .populations .copy ()
238
243
tables .populations .clear ()
239
- tables .populations .metadata_schema = EXAMPLE_SCHEMA
244
+ tables .populations .metadata_schema = example_schema ( "populations" )
240
245
for i , pop in enumerate (pops_copy ):
241
246
tables .populations .append (pop .replace (metadata = {"id_pop" : i }))
242
247
243
248
indiv_copy = tables .individuals .copy ()
244
249
tables .individuals .clear ()
245
- tables .individuals .metadata_schema = EXAMPLE_SCHEMA
250
+ tables .individuals .metadata_schema = example_schema ( "individuals" )
246
251
for i , ind in enumerate (indiv_copy ):
247
252
tables .individuals .append (ind .replace (metadata = {"id_indiv" : i }))
248
253
@@ -320,12 +325,14 @@ def make_ts_and_zarr(path, add_optional=False, shuffle_alleles=True):
320
325
ts .sequence_length + 1337 ,
321
326
path / "data.zarr" ,
322
327
)
328
+ sites_md = tables .sites .metadata
329
+ sites_md_offset = tables .sites .metadata_offset
323
330
add_array_to_dataset (
324
331
"sites_metadata" ,
325
332
np .array (
326
333
[
327
- tables . sites . metadata_schema . encode_row ( site . metadata )
328
- for site in ts .sites ( )
334
+ sites_md [ sites_md_offset [ i ] : sites_md_offset [ i + 1 ]]. tobytes ( )
335
+ for i in range ( ts .num_sites )
329
336
]
330
337
),
331
338
path / "data.zarr" ,
@@ -347,6 +354,11 @@ def make_ts_and_zarr(path, add_optional=False, shuffle_alleles=True):
347
354
repr (tables .metadata_schema ),
348
355
path / "data.zarr" ,
349
356
)
357
+ add_attribute_to_dataset (
358
+ "metadata" ,
359
+ tables .metadata_bytes .decode (),
360
+ path / "data.zarr" ,
361
+ )
350
362
add_array_to_dataset (
351
363
"provenances_timestamp" ,
352
364
["2021-01-01T00:00:00" , "2021-01-02T00:00:00" ],
@@ -364,12 +376,16 @@ def make_ts_and_zarr(path, add_optional=False, shuffle_alleles=True):
364
376
repr (tables .populations .metadata_schema ),
365
377
path / "data.zarr" ,
366
378
)
379
+ populations_md = tables .populations .metadata
380
+ populations_md_offset = tables .populations .metadata_offset
367
381
add_array_to_dataset (
368
382
"populations_metadata" ,
369
383
np .array (
370
384
[
371
- tables .populations .metadata_schema .encode_row (population .metadata )
372
- for population in ts .populations ()
385
+ populations_md [
386
+ populations_md_offset [i ] : populations_md_offset [i + 1 ]
387
+ ].tobytes ()
388
+ for i in range (ts .num_populations )
373
389
]
374
390
),
375
391
path / "data.zarr" ,
@@ -381,13 +397,15 @@ def make_ts_and_zarr(path, add_optional=False, shuffle_alleles=True):
381
397
path / "data.zarr" ,
382
398
["samples" ],
383
399
)
400
+ indiv_md = tables .individuals .metadata
401
+ indiv_md_offset = tables .individuals .metadata_offset
384
402
add_array_to_dataset (
385
403
"individuals_metadata" ,
386
404
np .array (
387
405
[
388
- tables . individuals . metadata_schema . encode_row ( individual . metadata )
389
- for individual in ts .individuals ( )
390
- ]
406
+ indiv_md [ indiv_md_offset [ i ] : indiv_md_offset [ i + 1 ]]. tobytes ( )
407
+ for i in range ( ts .num_individuals )
408
+ ],
391
409
),
392
410
path / "data.zarr" ,
393
411
["samples" ],
0 commit comments