@@ -80,14 +80,14 @@ def test_sgkit_dataset_roundtrip(tmp_path):
80
80
inf_ts = tsinfer .infer (samples )
81
81
ds = sgkit .load_dataset (zarr_path )
82
82
83
- assert ts .num_individuals == inf_ts .num_individuals == ds .dims ["samples" ]
83
+ assert ts .num_individuals == inf_ts .num_individuals == ds .sizes ["samples" ]
84
84
for ts_ind , sample_id in zip (inf_ts .individuals (), ds ["sample_id" ].values ):
85
85
assert ts_ind .metadata ["variant_data_sample_id" ] == sample_id
86
86
87
87
assert (
88
- ts .num_samples == inf_ts .num_samples == ds .dims ["samples" ] * ds .dims ["ploidy" ]
88
+ ts .num_samples == inf_ts .num_samples == ds .sizes ["samples" ] * ds .sizes ["ploidy" ]
89
89
)
90
- assert ts .num_sites == inf_ts .num_sites == ds .dims ["variants" ]
90
+ assert ts .num_sites == inf_ts .num_sites == ds .sizes ["variants" ]
91
91
assert ts .sequence_length == inf_ts .sequence_length == ds .attrs ["contig_lengths" ][0 ]
92
92
for (
93
93
v ,
@@ -122,7 +122,7 @@ def test_sgkit_individual_metadata_not_clobbered(tmp_path):
122
122
inf_ts = tsinfer .infer (samples )
123
123
ds = sgkit .load_dataset (zarr_path )
124
124
125
- assert ts .num_individuals == inf_ts .num_individuals == ds .dims ["samples" ]
125
+ assert ts .num_individuals == inf_ts .num_individuals == ds .sizes ["samples" ]
126
126
for i , (ts_ind , sample_id ) in enumerate (
127
127
zip (inf_ts .individuals (), ds ["sample_id" ].values )
128
128
):
@@ -694,23 +694,15 @@ def test_phased(self, tmp_path):
694
694
ds ["call_genotype" ].dims ,
695
695
np .ones (ds ["call_genotype" ].shape , dtype = bool ),
696
696
)
697
- ds ["variant_ancestral_allele" ] = (
698
- ds ["variant_position" ].dims ,
699
- np .array (["A" , "C" , "G" ], dtype = "S1" ),
700
- )
701
697
sgkit .save_dataset (ds , path )
702
- tsinfer .VariantData (path , "variant_ancestral_allele" )
698
+ tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
703
699
704
700
def test_ploidy1_missing_phase (self , tmp_path ):
705
701
path = tmp_path / "data.zarr"
706
702
# Ploidy==1 is always ok
707
703
ds = sgkit .simulate_genotype_call_dataset (n_variant = 3 , n_sample = 3 , n_ploidy = 1 )
708
- ds ["variant_ancestral_allele" ] = (
709
- ds ["variant_position" ].dims ,
710
- np .array (["A" , "C" , "G" ], dtype = "S1" ),
711
- )
712
704
sgkit .save_dataset (ds , path )
713
- tsinfer .VariantData (path , "variant_ancestral_allele" )
705
+ tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
714
706
715
707
def test_ploidy1_unphased (self , tmp_path ):
716
708
path = tmp_path / "data.zarr"
@@ -719,12 +711,8 @@ def test_ploidy1_unphased(self, tmp_path):
719
711
ds ["call_genotype" ].dims ,
720
712
np .zeros (ds ["call_genotype" ].shape , dtype = bool ),
721
713
)
722
- ds ["variant_ancestral_allele" ] = (
723
- ds ["variant_position" ].dims ,
724
- np .array (["A" , "C" , "G" ], dtype = "S1" ),
725
- )
726
714
sgkit .save_dataset (ds , path )
727
- tsinfer .VariantData (path , "variant_ancestral_allele" )
715
+ tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
728
716
729
717
def test_duplicate_positions (self , tmp_path ):
730
718
path = tmp_path / "data.zarr"
@@ -749,14 +737,10 @@ def test_empty_alleles_not_at_end(self, tmp_path):
749
737
ds ["variant_allele" ].dims ,
750
738
np .array ([["" , "A" , "C" ], ["A" , "C" , "" ], ["A" , "C" , "" ]], dtype = "S1" ),
751
739
)
752
- ds ["variant_ancestral_allele" ] = (
753
- ["variants" ],
754
- np .array (["C" , "A" , "A" ], dtype = "S1" ),
755
- )
756
740
sgkit .save_dataset (ds , path )
757
- samples = tsinfer .VariantData (path , "variant_ancestral_allele" )
741
+ vdata = tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
758
742
with pytest .raises (ValueError , match = "Empty alleles must be at the end" ):
759
- tsinfer .infer (samples )
743
+ tsinfer .infer (vdata )
760
744
761
745
def test_unimplemented_from_tree_sequence (self ):
762
746
# NB we should reimplement something like this functionality.
0 commit comments