@@ -80,14 +80,14 @@ def test_sgkit_dataset_roundtrip(tmp_path):
8080 inf_ts = tsinfer .infer (samples )
8181 ds = sgkit .load_dataset (zarr_path )
8282
83- assert ts .num_individuals == inf_ts .num_individuals == ds .dims ["samples" ]
83+ assert ts .num_individuals == inf_ts .num_individuals == ds .sizes ["samples" ]
8484 for ts_ind , sample_id in zip (inf_ts .individuals (), ds ["sample_id" ].values ):
8585 assert ts_ind .metadata ["variant_data_sample_id" ] == sample_id
8686
8787 assert (
88- ts .num_samples == inf_ts .num_samples == ds .dims ["samples" ] * ds .dims ["ploidy" ]
88+ ts .num_samples == inf_ts .num_samples == ds .sizes ["samples" ] * ds .sizes ["ploidy" ]
8989 )
90- assert ts .num_sites == inf_ts .num_sites == ds .dims ["variants" ]
90+ assert ts .num_sites == inf_ts .num_sites == ds .sizes ["variants" ]
9191 assert ts .sequence_length == inf_ts .sequence_length == ds .attrs ["contig_lengths" ][0 ]
9292 for (
9393 v ,
@@ -122,7 +122,7 @@ def test_sgkit_individual_metadata_not_clobbered(tmp_path):
122122 inf_ts = tsinfer .infer (samples )
123123 ds = sgkit .load_dataset (zarr_path )
124124
125- assert ts .num_individuals == inf_ts .num_individuals == ds .dims ["samples" ]
125+ assert ts .num_individuals == inf_ts .num_individuals == ds .sizes ["samples" ]
126126 for i , (ts_ind , sample_id ) in enumerate (
127127 zip (inf_ts .individuals (), ds ["sample_id" ].values )
128128 ):
@@ -694,23 +694,15 @@ def test_phased(self, tmp_path):
694694 ds ["call_genotype" ].dims ,
695695 np .ones (ds ["call_genotype" ].shape , dtype = bool ),
696696 )
697- ds ["variant_ancestral_allele" ] = (
698- ds ["variant_position" ].dims ,
699- np .array (["A" , "C" , "G" ], dtype = "S1" ),
700- )
701697 sgkit .save_dataset (ds , path )
702- tsinfer .VariantData (path , "variant_ancestral_allele" )
698+ tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
703699
704700 def test_ploidy1_missing_phase (self , tmp_path ):
705701 path = tmp_path / "data.zarr"
706702 # Ploidy==1 is always ok
707703 ds = sgkit .simulate_genotype_call_dataset (n_variant = 3 , n_sample = 3 , n_ploidy = 1 )
708- ds ["variant_ancestral_allele" ] = (
709- ds ["variant_position" ].dims ,
710- np .array (["A" , "C" , "G" ], dtype = "S1" ),
711- )
712704 sgkit .save_dataset (ds , path )
713- tsinfer .VariantData (path , "variant_ancestral_allele" )
705+ tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
714706
715707 def test_ploidy1_unphased (self , tmp_path ):
716708 path = tmp_path / "data.zarr"
@@ -719,12 +711,8 @@ def test_ploidy1_unphased(self, tmp_path):
719711 ds ["call_genotype" ].dims ,
720712 np .zeros (ds ["call_genotype" ].shape , dtype = bool ),
721713 )
722- ds ["variant_ancestral_allele" ] = (
723- ds ["variant_position" ].dims ,
724- np .array (["A" , "C" , "G" ], dtype = "S1" ),
725- )
726714 sgkit .save_dataset (ds , path )
727- tsinfer .VariantData (path , "variant_ancestral_allele" )
715+ tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
728716
729717 def test_duplicate_positions (self , tmp_path ):
730718 path = tmp_path / "data.zarr"
@@ -749,14 +737,10 @@ def test_empty_alleles_not_at_end(self, tmp_path):
749737 ds ["variant_allele" ].dims ,
750738 np .array ([["" , "A" , "C" ], ["A" , "C" , "" ], ["A" , "C" , "" ]], dtype = "S1" ),
751739 )
752- ds ["variant_ancestral_allele" ] = (
753- ["variants" ],
754- np .array (["C" , "A" , "A" ], dtype = "S1" ),
755- )
756740 sgkit .save_dataset (ds , path )
757- samples = tsinfer .VariantData (path , "variant_ancestral_allele" )
741+ vdata = tsinfer .VariantData (path , ds [ "variant_allele" ][:, 0 ]. values . astype ( str ) )
758742 with pytest .raises (ValueError , match = "Empty alleles must be at the end" ):
759- tsinfer .infer (samples )
743+ tsinfer .infer (vdata )
760744
761745 def test_unimplemented_from_tree_sequence (self ):
762746 # NB we should reimplement something like this functionality.
0 commit comments