@@ -61,8 +61,18 @@ def load_from_dict(cls, metadata_dict, single_table_name=None):
61
61
instance ._set_metadata_dict (metadata_dict , single_table_name )
62
62
return instance
63
63
64
+ @staticmethod
65
+ def _validate_infer_sdtypes_and_keys (infer_sdtypes , infer_keys ):
66
+ if not isinstance (infer_sdtypes , bool ):
67
+ raise ValueError ("'infer_sdtypes' must be a boolean value." )
68
+
69
+ if infer_keys not in ['primary_and_foreign' , 'primary_only' , None ]:
70
+ raise ValueError (
71
+ "'infer_keys' must be one of: 'primary_and_foreign', 'primary_only', None."
72
+ )
73
+
64
74
@classmethod
65
- def detect_from_dataframes (cls , data ):
75
+ def detect_from_dataframes (cls , data , infer_sdtypes = True , infer_keys = 'primary_and_foreign' ):
66
76
"""Detect the metadata for all tables in a dictionary of dataframes.
67
77
68
78
This method automatically detects the ``sdtypes`` for the given ``pandas.DataFrames``.
@@ -71,23 +81,38 @@ def detect_from_dataframes(cls, data):
71
81
Args:
72
82
data (dict):
73
83
Dictionary of table names to dataframes.
84
+ infer_sdtypes (bool):
85
+ A boolean describing whether to infer the sdtypes of each column.
86
+ If True it infers the sdtypes based on the data.
87
+ If False it does not infer the sdtypes and all columns are marked as unknown.
88
+ Defaults to True.
89
+ infer_keys (str):
90
+ A string describing whether to infer the primary and/or foreign keys. Options are:
91
+ - 'primary_and_foreign': Infer the primary keys in each table,
92
+ and the foreign keys in other tables that refer to them
93
+ - 'primary_only': Infer only the primary keys of each table
94
+ - None: Do not infer any keys
95
+ Defaults to 'primary_and_foreign'.
74
96
75
97
Returns:
76
98
Metadata:
77
99
A new metadata object with the sdtypes detected from the data.
78
100
"""
79
101
if not data or not all (isinstance (df , pd .DataFrame ) for df in data .values ()):
80
102
raise ValueError ('The provided dictionary must contain only pandas DataFrame objects.' )
103
+ cls ._validate_detect_from_dataframes (infer_sdtypes , infer_keys )
81
104
82
105
metadata = Metadata ()
83
106
for table_name , dataframe in data .items ():
84
- metadata .detect_table_from_dataframe (table_name , dataframe )
107
+ metadata .detect_table_from_dataframe (table_name , dataframe , infer_sdtypes , infer_keys )
108
+
109
+ if infer_keys == 'primary_and_foreign' :
110
+ metadata ._detect_relationships (data )
85
111
86
- metadata ._detect_relationships (data )
87
112
return metadata
88
113
89
114
@classmethod
90
- def detect_from_dataframe (cls , data , table_name = DEFAULT_SINGLE_TABLE_NAME ):
115
+ def detect_from_dataframe (cls , data , table_name = DEFAULT_SINGLE_TABLE_NAME , infer_sdtypes = True , infer_keys = 'primary_and_foreign' ):
91
116
"""Detect the metadata for a DataFrame.
92
117
93
118
This method automatically detects the ``sdtypes`` for the given ``pandas.DataFrame``.
@@ -96,13 +121,26 @@ def detect_from_dataframe(cls, data, table_name=DEFAULT_SINGLE_TABLE_NAME):
96
121
Args:
97
122
data (pandas.DataFrame):
98
123
Dictionary of table names to dataframes.
124
+ infer_sdtypes (bool):
125
+ A boolean describing whether to infer the sdtypes of each column.
126
+ If True it infers the sdtypes based on the data.
127
+ If False it does not infer the sdtypes and all columns are marked as unknown.
128
+ Defaults to True.
129
+ infer_keys (str):
130
+ A string describing whether to infer the primary and/or foreign keys. Options are:
131
+ - 'primary_and_foreign': Infer the primary keys in each table,
132
+ and the foreign keys in other tables that refer to them
133
+ - 'primary_only': Infer only the primary keys of each table
134
+ - None: Do not infer any keys
135
+ Defaults to 'primary_and_foreign'.
99
136
100
137
Returns:
101
138
Metadata:
102
139
A new metadata object with the sdtypes detected from the data.
103
140
"""
104
141
if not isinstance (data , pd .DataFrame ):
105
142
raise ValueError ('The provided data must be a pandas DataFrame object.' )
143
+ cls ._validate_infer_sdtypes_and_keys (infer_sdtypes , infer_keys )
106
144
107
145
metadata = Metadata ()
108
146
metadata .detect_table_from_dataframe (table_name , data )
0 commit comments