Skip to content

Commit 8235f6f

Browse files
committed
Warn when using defaults that aren't explict and not metadata
1 parent e3b2155 commit 8235f6f

File tree

2 files changed

+58
-12
lines changed

2 files changed

+58
-12
lines changed

tests/test_variantdata.py

+31-12
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,11 @@ def test_variantdata_accessors_defaults(tmp_path, in_mem):
218218
ds = data if in_mem else sgkit.load_dataset(data)
219219

220220
default_schema = tskit.MetadataSchema.permissive_json().schema
221-
assert vdata.sequence_length == ts.sequence_length
221+
with pytest.warns(
222+
UserWarning,
223+
match="`sequence_length` was not found as an attribute in the dataset",
224+
):
225+
assert vdata.sequence_length == ts.sequence_length
222226
assert vdata.sites_metadata_schema == default_schema
223227
assert vdata.sites_metadata == [{} for _ in range(ts.num_sites)]
224228
for time in vdata.sites_time:
@@ -234,17 +238,32 @@ def test_variantdata_accessors_defaults(tmp_path, in_mem):
234238
assert vdata.individuals_metadata == [
235239
{"variant_data_sample_id": sample_id} for sample_id in ds.sample_id[:]
236240
]
237-
for time in vdata.individuals_time:
238-
assert tskit.is_unknown_time(time)
239-
assert np.array_equal(
240-
vdata.individuals_location, np.array([[]] * ts.num_individuals, dtype=float)
241-
)
242-
assert np.array_equal(
243-
vdata.individuals_population, np.full(ts.num_individuals, tskit.NULL)
244-
)
245-
assert np.array_equal(
246-
vdata.individuals_flags, np.zeros(ts.num_individuals, dtype=int)
247-
)
241+
with pytest.warns(
242+
UserWarning, match="`individuals_time` was not found as an array in the dataset"
243+
):
244+
for time in vdata.individuals_time:
245+
assert tskit.is_unknown_time(time)
246+
with pytest.warns(
247+
UserWarning,
248+
match="`individuals_location` was not found as an array in the dataset",
249+
):
250+
assert np.array_equal(
251+
vdata.individuals_location, np.array([[]] * ts.num_individuals, dtype=float)
252+
)
253+
with pytest.warns(
254+
UserWarning,
255+
match="`individuals_population` was not found as an array in the dataset",
256+
):
257+
assert np.array_equal(
258+
vdata.individuals_population, np.full(ts.num_individuals, tskit.NULL)
259+
)
260+
with pytest.warns(
261+
UserWarning,
262+
match="`individuals_flags` was not found as an array in the dataset",
263+
):
264+
assert np.array_equal(
265+
vdata.individuals_flags, np.zeros(ts.num_individuals, dtype=int)
266+
)
248267

249268

250269
@pytest.mark.skipif(sys.platform == "win32", reason="No cyvcf2 on windows")

tsinfer/formats.py

+27
Original file line numberDiff line numberDiff line change
@@ -2520,6 +2520,12 @@ def sequence_length(self):
25202520
try:
25212521
return self.data.attrs["sequence_length"]
25222522
except KeyError:
2523+
warnings.warn(
2524+
"`sequence_length` was not found as an attribute in the dataset, so"
2525+
" the largest position has been used. It can be set with"
2526+
" ds.attrs['sequence_length'] = 1337; ds.to_zarr('path/to/store',"
2527+
" mode='a')"
2528+
)
25232529
return int(np.max(self.data["variant_position"])) + 1
25242530

25252531
@property
@@ -2653,6 +2659,12 @@ def individuals_time(self):
26532659
try:
26542660
return self.data["individuals_time"][:][self.individuals_select]
26552661
except KeyError:
2662+
warnings.warn(
2663+
"`individuals_time` was not found as an array in the dataset, so "
2664+
"tskit.UNKNOWN_TIME has been used. It can be apppended to the dataset "
2665+
"with data_array.to_zarr('path/to/store', append_dim='samples', "
2666+
"mode='a')"
2667+
)
26562668
return np.full(self.num_individuals, tskit.UNKNOWN_TIME)
26572669

26582670
@functools.cached_property
@@ -2696,20 +2708,35 @@ def individuals_location(self):
26962708
try:
26972709
return self.data["individuals_location"][:][self.individuals_select]
26982710
except KeyError:
2711+
warnings.warn(
2712+
"`individuals_location` was not found as an array in the dataset, "
2713+
"so [] has been used. It can be apppended to the dataset with "
2714+
"data_array.to_zarr('path/to/store', append_dim='samples', mode='a')"
2715+
)
26992716
return np.array([[]] * self.num_individuals, dtype=float)
27002717

27012718
@functools.cached_property
27022719
def individuals_population(self):
27032720
try:
27042721
return self.data["individuals_population"][:][self.individuals_select]
27052722
except KeyError:
2723+
warnings.warn(
2724+
"`individuals_population` was not found as an array in the dataset, "
2725+
"so tskit.NULL has been used. It can be apppended to the dataset with "
2726+
"data_array.to_zarr('path/to/store', append_dim='samples', mode='a')"
2727+
)
27062728
return np.full((self.num_individuals), tskit.NULL, dtype=np.int32)
27072729

27082730
@functools.cached_property
27092731
def individuals_flags(self):
27102732
try:
27112733
return self.data["individuals_flags"][:][self.individuals_select]
27122734
except KeyError:
2735+
warnings.warn(
2736+
"`individuals_flags` was not found as an array in the dataset, so 0 "
2737+
"has been used. It can be apppended to the dataset with "
2738+
"data_array.to_zarr('path/to/store', append_dim='samples', mode='a')"
2739+
)
27132740
return np.full((self.num_individuals), 0, dtype=np.int32)
27142741

27152742
@staticmethod

0 commit comments

Comments
 (0)