Skip to content

Commit bbe861e

Browse files
amontanez24fealho
andauthored
Fixing tests for rdt 1.13.2 (#2320)
Co-authored-by: Felipe <[email protected]>
1 parent 84cf401 commit bbe861e

File tree

5 files changed

+33
-26
lines changed

5 files changed

+33
-26
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies = [
3535
'copulas>=0.12.0',
3636
'ctgan>=0.10.2',
3737
'deepecho>=0.6.1',
38-
'rdt>=1.13.1',
38+
'rdt>=1.13.2.dev0',
3939
'sdmetrics>=0.17.0',
4040
'platformdirs>=4.0',
4141
'pyyaml>=6.0.1',

sdv/constraints/tabular.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def _fit(self, table_data):
299299
self._uuids_to_combinations = {}
300300
for combination in self._combinations.itertuples(index=False, name=None):
301301
mappable_combination = get_mappable_combination(combination)
302-
uuid_str = str(uuid.uuid4())
302+
uuid_str = str(uuid.uuid5(uuid.NAMESPACE_DNS, str(mappable_combination)))
303303
self._combinations_to_uuids[mappable_combination] = uuid_str
304304
self._uuids_to_combinations[uuid_str] = mappable_combination
305305

tests/integration/constraints/test_tabular.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ def test_fixed_combinations_with_nans():
6969
assert len(synthetic_data) == 1000
7070
pd.testing.assert_frame_equal(
7171
synthetic_data.drop_duplicates(ignore_index=True),
72-
data.drop_duplicates(ignore_index=True),
72+
pd.DataFrame({
73+
'A': [1, np.nan, 2],
74+
'B': [10, 30, 20],
75+
}).drop_duplicates(ignore_index=True),
7376
check_like=True,
7477
)
7578

tests/integration/single_table/test_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def test_sample_keys_are_scrambled():
212212

213213
# Assert
214214
ids = sampled['guest_email'].head()
215-
expected_keys = pd.Series(['ARH', 'BBH', 'BAP', 'AIF', 'AQP'], name='guest_email')
215+
expected_keys = pd.Series(['AQQ', 'BBI', 'AET', 'AZM', 'AHZ'], name='guest_email')
216216
pd.testing.assert_series_equal(ids, expected_keys)
217217

218218

tests/integration/single_table/test_constraints.py

+26-22
Original file line numberDiff line numberDiff line change
@@ -468,28 +468,29 @@ def test_inequality_constraint_with_datetimes_and_nones():
468468
sampled = synth.sample(10)
469469

470470
# Assert
471+
synth.validate(sampled)
471472
expected_sampled = pd.DataFrame({
472473
'A': [
473474
'2020-01-02',
474475
'2020-01-02',
476+
np.nan,
477+
np.nan,
475478
'2020-01-02',
479+
np.nan,
476480
'2020-01-02',
477-
'2020-01-02',
478-
'2020-01-02',
479-
'2020-01-02',
480-
'2020-01-02',
481+
np.nan,
481482
'2020-01-02',
482483
np.nan,
483484
],
484485
'B': [
485-
np.nan,
486486
'2021-12-30',
487487
'2021-12-30',
488488
'2021-12-30',
489-
np.nan,
490489
'2021-12-30',
491490
np.nan,
492491
'2021-12-30',
492+
'2021-12-30',
493+
'2021-12-30',
493494
np.nan,
494495
'2021-12-30',
495496
],
@@ -529,19 +530,20 @@ def test_scalar_inequality_constraint_with_datetimes_and_nones():
529530
sampled = synth.sample(5)
530531

531532
# Assert
533+
synth.validate(sampled)
532534
expected_sampled = pd.DataFrame({
533535
'A': {
534536
0: np.nan,
535537
1: '2020-01-19',
536538
2: np.nan,
537-
3: '2020-01-29',
539+
3: np.nan,
538540
4: '2020-01-31',
539541
},
540542
'B': {
541-
0: '2021-07-28',
542-
1: '2021-07-14',
543-
2: '2021-07-26',
544-
3: '2021-07-02',
543+
0: np.nan,
544+
1: np.nan,
545+
2: np.nan,
546+
3: np.nan,
545547
4: '2021-06-06',
546548
},
547549
})
@@ -585,27 +587,28 @@ def test_scalar_range_constraint_with_datetimes_and_nones():
585587
sampled = synth.sample(10)
586588

587589
# Assert
590+
synth.validate(sampled)
588591
expected_sampled = pd.DataFrame({
589592
'A': {
590-
0: '2020-02-04',
593+
0: np.nan,
591594
1: np.nan,
592595
2: '2020-02-07',
593596
3: np.nan,
594-
4: np.nan,
597+
4: '2020-02-29',
595598
5: '2020-02-29',
596599
6: np.nan,
597600
7: np.nan,
598-
8: np.nan,
601+
8: '2020-01-26',
599602
9: '2020-02-02',
600603
},
601604
'B': {
602605
0: np.nan,
603606
1: np.nan,
604607
2: np.nan,
605608
3: np.nan,
606-
4: np.nan,
607-
5: '2021-11-22',
608-
6: np.nan,
609+
4: '2021-06-21',
610+
5: np.nan,
611+
6: '2021-09-28',
609612
7: '2021-06-19',
610613
8: np.nan,
611614
9: np.nan,
@@ -657,14 +660,14 @@ def test_range_constraint_with_datetimes_and_nones():
657660
'A': [
658661
'2020-01-02',
659662
'2020-01-02',
663+
np.nan,
660664
'2020-01-02',
661665
'2020-01-02',
666+
np.nan,
662667
'2020-01-02',
663668
'2020-01-02',
664669
'2020-01-02',
665-
'2020-01-02',
666-
'2020-01-02',
667-
'2020-01-02',
670+
np.nan,
668671
],
669672
'B': [
670673
np.nan,
@@ -674,7 +677,7 @@ def test_range_constraint_with_datetimes_and_nones():
674677
np.nan,
675678
'2021-12-30',
676679
np.nan,
677-
np.nan,
680+
'2021-12-30',
678681
np.nan,
679682
'2021-12-30',
680683
],
@@ -686,12 +689,13 @@ def test_range_constraint_with_datetimes_and_nones():
686689
np.nan,
687690
'2022-12-30',
688691
np.nan,
689-
np.nan,
692+
'2022-12-30',
690693
np.nan,
691694
'2022-12-30',
692695
],
693696
})
694697
pd.testing.assert_frame_equal(expected_sampled, sampled)
698+
synth.validate(sampled)
695699

696700

697701
def test_inequality_constraint_all_possible_nans_configurations():

0 commit comments

Comments
 (0)