Skip to content

Commit d683d42

Browse files
committed
make release-tag: Merge branch 'main' into stable
2 parents 7ce9da1 + b474ec8 commit d683d42

File tree

8 files changed

+87
-27
lines changed

8 files changed

+87
-27
lines changed

Diff for: HISTORY.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# Release Notes
22

3-
### v1.16.0 - 2024-08-22
3+
### v1.16.1 - 2024-08-27
4+
5+
### Internal
6+
7+
* [dtypes] `FixedIncrements` Fails with New Numerical Data Types - Issue [#2157](https://github.com/sdv-dev/SDV/issues/2157) by @R-Palazzo
8+
9+
## v1.16.0 - 2024-08-22
410

511
This release enables the `HMASynthesizer` and other utility functions to work with null foreign key values! It also adds an `anonymization` method to the metadata classes. Additionally, it patches a bug that lets SDV work with more Pandas data types.
612

Diff for: latest_requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cloudpickle==3.0.0
2-
copulas==0.11.0
2+
copulas==0.11.1
33
ctgan==0.10.1
44
deepecho==0.6.0
55
graphviz==0.20.3

Diff for: pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ namespaces = false
132132
version = {attr = 'sdv.__version__'}
133133

134134
[tool.bumpversion]
135-
current_version = "1.16.0"
135+
current_version = "1.16.1.dev1"
136136
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
137137
serialize = [
138138
'{major}.{minor}.{patch}.{release}{candidate}',

Diff for: sdv/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
__author__ = 'DataCebo, Inc.'
88
__email__ = '[email protected]'
9-
__version__ = '1.16.0'
9+
__version__ = '1.16.1.dev1'
1010

1111

1212
import sys

Diff for: sdv/constraints/tabular.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,9 @@ def _transform(self, table_data):
13391339
pandas.DataFrame:
13401340
Data divided by increment.
13411341
"""
1342-
table_data[self.column_name] = table_data[self.column_name] / self.increment_value
1342+
table_data[self.column_name] = (table_data[self.column_name] / self.increment_value).astype(
1343+
self._dtype
1344+
)
13431345
return table_data
13441346

13451347
def _reverse_transform(self, table_data):

Diff for: static_code_analysis.txt

+22-22
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Run started:2024-07-11 19:44:16.993309
1+
Run started:2024-08-23 00:37:57.536879
22

33
Test results:
44
>> Issue: [B105:hardcoded_password_string] Possible hardcoded password: '# Release Notes
@@ -17,21 +17,21 @@ Test results:
1717
Severity: Low Confidence: High
1818
CWE: CWE-703 (https://cwe.mitre.org/data/definitions/703.html)
1919
More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b110_try_except_pass.html
20-
Location: ./sdv/_utils.py:320:8
21-
319
22-
320 except Exception:
23-
321 pass
24-
322
20+
Location: ./sdv/_utils.py:326:8
21+
325
22+
326 except Exception:
23+
327 pass
24+
328
2525

2626
--------------------------------------------------
2727
>> Issue: [B105:hardcoded_password_string] Possible hardcoded password: '#'
2828
Severity: Low Confidence: Medium
2929
CWE: CWE-259 (https://cwe.mitre.org/data/definitions/259.html)
3030
More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b105_hardcoded_password_string.html
31-
Location: ./sdv/constraints/tabular.py:1125:16
32-
1124 def _get_diff_column_name(self, table_data):
33-
1125 token = '#'
34-
1126 columns = [self._column_name, self._low_value, self._high_value]
31+
Location: ./sdv/constraints/tabular.py:1128:16
32+
1127 def _get_diff_column_name(self, table_data):
33+
1128 token = '#'
34+
1129 columns = [self._column_name, self._low_value, self._high_value]
3535

3636
--------------------------------------------------
3737
>> Issue: [B101:assert_used] Use of assert detected. The enclosed code will be removed when compiling to optimised byte code.
@@ -48,28 +48,28 @@ Test results:
4848
Severity: Low Confidence: High
4949
CWE: CWE-703 (https://cwe.mitre.org/data/definitions/703.html)
5050
More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b110_try_except_pass.html
51-
Location: ./sdv/metadata/single_table.py:536:12
52-
535
53-
536 except Exception:
54-
537 pass
55-
538
51+
Location: ./sdv/metadata/single_table.py:538:12
52+
537
53+
538 except Exception:
54+
539 pass
55+
540
5656

5757
--------------------------------------------------
5858
>> Issue: [B110:try_except_pass] Try, Except, Pass detected.
5959
Severity: Low Confidence: High
6060
CWE: CWE-703 (https://cwe.mitre.org/data/definitions/703.html)
6161
More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b110_try_except_pass.html
62-
Location: ./sdv/multi_table/hma.py:340:12
63-
339 index.append(foreign_key_value)
64-
340 except Exception:
65-
341 # Skip children rows subsets that fail
66-
342 pass
67-
343
62+
Location: ./sdv/multi_table/hma.py:355:12
63+
354 index.append(foreign_key_value)
64+
355 except Exception:
65+
356 # Skip children rows subsets that fail
66+
357 pass
67+
358
6868

6969
--------------------------------------------------
7070

7171
Code scanned:
72-
Total lines of code: 12224
72+
Total lines of code: 12451
7373
Total lines skipped (#nosec): 0
7474
Total potential issues skipped due to specifically being disabled (e.g., #nosec BXXX): 0
7575

Diff for: tests/integration/constraints/test_tabular.py

+37
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,40 @@ def test_fixed_combinations_with_nans():
7070
data.drop_duplicates(ignore_index=True),
7171
check_like=True,
7272
)
73+
74+
75+
def test_fixedincrements_with_nullable_pandas_dtypes():
76+
"""Test that FixedIncrements constraint works with nullable pandas dtypes."""
77+
# Setup
78+
data = pd.DataFrame({
79+
'UInt8': pd.Series([1, pd.NA, 3], dtype='UInt8') * 10,
80+
'UInt16': pd.Series([1, pd.NA, 4], dtype='UInt16') * 10,
81+
'UInt32': pd.Series([1, pd.NA, 5], dtype='UInt32') * 10,
82+
'UInt64': pd.Series([1, pd.NA, 6], dtype='UInt64') * 10,
83+
})
84+
metadata = SingleTableMetadata().load_from_dict({
85+
'columns': {
86+
'UInt8': {'sdtype': 'numerical', 'computer_representation': 'UInt8'},
87+
'UInt16': {'sdtype': 'numerical', 'computer_representation': 'UInt16'},
88+
'UInt32': {'sdtype': 'numerical', 'computer_representation': 'UInt32'},
89+
'UInt64': {'sdtype': 'numerical', 'computer_representation': 'UInt64'},
90+
}
91+
})
92+
gcs = GaussianCopulaSynthesizer(metadata)
93+
my_constraints = [
94+
{
95+
'constraint_class': 'FixedIncrements',
96+
'constraint_parameters': {'column_name': column, 'increment_value': 10},
97+
}
98+
for column in data.columns
99+
]
100+
gcs.add_constraints(my_constraints)
101+
102+
# Run
103+
gcs.fit(data)
104+
synthetic_data = gcs.sample(10)
105+
106+
# Assert
107+
synthetic_data.dtypes.to_dict() == data.dtypes.to_dict()
108+
for column in data.columns:
109+
assert np.all(synthetic_data[column] % 10 == 0)

Diff for: tests/unit/constraints/test_tabular.py

+15
Original file line numberDiff line numberDiff line change
@@ -4583,6 +4583,21 @@ def test__transform(self):
45834583
expected = pd.DataFrame({'column': [1, 2, np.nan, 5]})
45844584
pd.testing.assert_frame_equal(transformed, expected)
45854585

4586+
@pytest.mark.parametrize('dtype', ['int16', 'Int16', 'int64', 'Int64'])
4587+
def test__transform_with_integer_columns(self, dtype):
4588+
"""Test the `transform` method with integer columns"""
4589+
# Setup
4590+
data = pd.DataFrame({'column': pd.Series([7, 14, 21, 28], dtype=dtype)})
4591+
instance = FixedIncrements(column_name='column', increment_value=7)
4592+
instance._dtype = dtype
4593+
4594+
# Run
4595+
transformed = instance._transform(data)
4596+
4597+
# Assert
4598+
expected = pd.DataFrame({'column': pd.Series([1, 2, 3, 4], dtype=dtype)})
4599+
pd.testing.assert_frame_equal(transformed, expected)
4600+
45864601
def test_reverse_transform(self):
45874602
"""Test the ``FixedIncrements.reverse_transform`` method.
45884603

0 commit comments

Comments
 (0)