Skip to content

Commit d6f6b5c

Browse files
committed
def
1 parent ca7892c commit d6f6b5c

File tree

2 files changed

+37
-19
lines changed

2 files changed

+37
-19
lines changed

sdv/metadata/multi_table.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ def detect_table_from_dataframe(self, table_name, data):
544544
"""
545545
self._validate_table_not_detected(table_name)
546546
table = SingleTableMetadata()
547-
table._detect_columns(data)
547+
table._detect_columns(data, table_name)
548548
self.tables[table_name] = table
549549
self._log_detected_table(table)
550550

sdv/metadata/single_table.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -595,35 +595,53 @@ def _detect_primary_key(self, data):
595595

596596
return None
597597

598-
def _detect_columns(self, data):
598+
def _detect_columns(self, data, table_name=None):
599599
"""Detect the columns' sdtypes from the data.
600600
601601
Args:
602602
data (pandas.DataFrame):
603603
The data to be analyzed.
604+
table_name (str):
605+
The name of the table to be analyzed. Defaults to ``None``.
604606
"""
605607
old_columns = data.columns
606608
data.columns = data.columns.astype(str)
607609
for field in data:
608-
column_data = data[field]
609-
clean_data = column_data.dropna()
610-
dtype = clean_data.infer_objects().dtype.kind
611-
612-
sdtype = self._detect_pii_column(field)
613-
if sdtype is None:
614-
if dtype in self._DTYPES_TO_SDTYPES:
615-
sdtype = self._DTYPES_TO_SDTYPES[dtype]
616-
elif dtype in ['i', 'f', 'u']:
617-
sdtype = self._determine_sdtype_for_numbers(column_data)
618-
619-
elif dtype == 'O':
620-
sdtype = self._determine_sdtype_for_objects(column_data)
610+
try:
611+
column_data = data[field]
612+
clean_data = column_data.dropna()
613+
dtype = clean_data.infer_objects().dtype.kind
621614

615+
sdtype = self._detect_pii_column(field)
622616
if sdtype is None:
623-
raise InvalidMetadataError(
624-
f"Unsupported data type for column '{field}' (kind: {dtype})."
625-
"The valid data types are: 'object', 'int', 'float', 'datetime', 'bool'."
626-
)
617+
if dtype in self._DTYPES_TO_SDTYPES:
618+
sdtype = self._DTYPES_TO_SDTYPES[dtype]
619+
elif dtype in ['i', 'f', 'u']:
620+
sdtype = self._determine_sdtype_for_numbers(column_data)
621+
622+
elif dtype == 'O':
623+
sdtype = self._determine_sdtype_for_objects(column_data)
624+
625+
if sdtype is None:
626+
table_str = f"table '{table_name}' " if table_name else ''
627+
error_message = (
628+
f"Unsupported data type for {table_str}column '{field}' (kind: {dtype}"
629+
"). The valid data types are: 'object', 'int', 'float', 'datetime',"
630+
" 'bool'."
631+
)
632+
raise InvalidMetadataError(error_message)
633+
634+
except Exception as e:
635+
error_type = type(e).__name__
636+
if error_type == 'InvalidMetadataError':
637+
raise e
638+
639+
table_str = f"table '{table_name}' " if table_name else ''
640+
error_message = (
641+
f"Unable to detect metadata for {table_str}column '{field}' due to an invalid "
642+
f'data format.\n {error_type}: {e}'
643+
)
644+
raise InvalidMetadataError(error_message) from e
627645

628646
column_dict = {'sdtype': sdtype}
629647
sdtype_in_reference = sdtype in self._REFERENCE_TO_SDTYPE.values()

0 commit comments

Comments
 (0)