You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
- Optional wanted_levels feature has been integrated into the OneHotEncodingTransformer which allows users to specify which levels in a column they wish to encode. `#384 <https://github.com/azukds/tubular/issues/384>_`
50
+
- Created unit tests to check if the values provided for wanted_values are as expected and if the output is as expected.
Test to verify OneHotEncodingTransformer.transform zero-filled levels from user-specified "wanted_levels" and encodes only those listed in "wanted_levels".
Copy file name to clipboardexpand all lines: tubular/nominal.py
+83-10
Original file line number
Diff line number
Diff line change
@@ -1134,6 +1134,9 @@ class OneHotEncodingTransformer(
1134
1134
Names of columns to transform. If the default of None is supplied all object and category
1135
1135
columns in X are used.
1136
1136
1137
+
wanted_values: dict[str, list[str] or None , default = None
1138
+
Optional parameter to select specific column levels to be transformed. If it is None, all levels in the categorical column will be encoded. It will take the format {col1: [level_1, level_2, ...]}.
1139
+
1137
1140
separator : str
1138
1141
Used to create dummy column names, the name will take
1139
1142
the format [categorical feature][separator][category level]
@@ -1170,6 +1173,7 @@ class attribute, indicates whether transformer has been converted to polars/pand
1170
1173
def__init__(
1171
1174
self,
1172
1175
columns: str|list[str] |None=None,
1176
+
wanted_values: dict[str, list[str]] |None=None,
1173
1177
separator: str="_",
1174
1178
drop_original: bool=False,
1175
1179
copy: bool|None=None,
@@ -1184,6 +1188,29 @@ def __init__(
1184
1188
**kwargs,
1185
1189
)
1186
1190
1191
+
ifwanted_valuesisnotNone:
1192
+
ifnotisinstance(wanted_values, dict):
1193
+
msg=f"{self.classname()}: wanted_values should be a dictionary"
1194
+
raiseTypeError(msg)
1195
+
1196
+
forkey, val_listinwanted_values.items():
1197
+
# check key is a string
1198
+
ifnotisinstance(key, str):
1199
+
msg=f"{self.classname()}: Key in 'wanted_values' should be a string"
1200
+
raiseTypeError(msg)
1201
+
1202
+
# check value is a list
1203
+
ifnotisinstance(val_list, list):
1204
+
msg=f"{self.classname()}: Values in the 'wanted_values' dictionary should be a list"
1205
+
raiseTypeError(msg)
1206
+
1207
+
# check if each value within the list is a string
1208
+
forvalinval_list:
1209
+
ifnotisinstance(val, str):
1210
+
msg=f"{self.classname()}: Entries in 'wanted_values' list should be a string"
0 commit comments