@@ -38,7 +38,7 @@ def test_wanted_values_is_dict(self, values, minimal_attribute_dict):
38
38
39
39
with pytest .raises (
40
40
TypeError ,
41
- match = "OneHotEncodingTransformer: Wanted_values should be a dictionary" ,
41
+ match = "OneHotEncodingTransformer: wanted_values should be a dictionary" ,
42
42
):
43
43
OneHotEncodingTransformer (** args )
44
44
@@ -131,7 +131,8 @@ def test_fit_missing_levels_warning(self, library):
131
131
df = d .create_df_1 (library = library )
132
132
133
133
transformer = OneHotEncodingTransformer (
134
- columns = ["b" ], wanted_values = {"b" : ["f" , "g" ]}
134
+ columns = ["b" ],
135
+ wanted_values = {"b" : ["f" , "g" ]},
135
136
)
136
137
137
138
with pytest .warns (
@@ -160,6 +161,21 @@ def test_fields_with_over_100_levels_error(self, library):
160
161
):
161
162
transformer .fit (df )
162
163
164
+ @pytest .mark .parametrize (
165
+ "library" ,
166
+ ["pandas" , "polars" ],
167
+ )
168
+ def test_fit_no_warning_if_all_wanted_values_present (self , library ):
169
+ """Test that OneHotEncodingTransformer.fit does NOT raise a warning when all levels in wanted_levels are present in the data."""
170
+ df = d .create_df_1 (library = library )
171
+
172
+ transformer = OneHotEncodingTransformer (
173
+ columns = ["b" ], wanted_values = {"b" : ["a" , "b" , "c" , "d" , "e" , "f" ]}
174
+ )
175
+
176
+ with pytest .warns (None ):
177
+ transformer .fit (df )
178
+
163
179
164
180
class TestTransform (
165
181
DropOriginalTransformMixinTests ,
@@ -378,7 +394,8 @@ def test_transform_missing_levels_warning(self, library):
378
394
df_test = d .create_df_8 (library = library )
379
395
380
396
transformer = OneHotEncodingTransformer (
381
- columns = ["b" ], wanted_values = {"b" : ["v" , "x" , "z" ]}
397
+ columns = ["b" ],
398
+ wanted_values = {"b" : ["v" , "x" , "z" ]},
382
399
)
383
400
384
401
transformer .fit (df_train )
@@ -433,42 +450,64 @@ def test_unseen_categories_encoded_as_all_zeroes(self, library):
433
450
df_expected_row ,
434
451
)
435
452
436
-
437
453
@pytest .mark .parametrize (
438
454
"library" ,
439
455
["pandas" , "polars" ],
440
456
)
441
- def test_transform_missing_levels_encoded_as_all_zeroes (self , library ):
442
- """Test OneHotEncodingTransformer.transform triggers a warning for missing levels."""
457
+ def test_transform_output_with_wanted_values_arg (self , library ):
458
+ """
459
+ Test to verify OneHotEncodingTransformer.transform zero-filled levels from user-specified "wanted_levels" and encodes only those listed in "wanted_levels".
460
+
461
+ """
443
462
df_train = d .create_df_7 (library = library )
444
463
df_test = d .create_df_8 (library = library )
445
464
446
465
transformer = OneHotEncodingTransformer (
447
- columns = ["b" ], wanted_values = {"b" : ["v" , "x" , "z" ]}
466
+ columns = ["b" ],
467
+ wanted_values = {"b" : ["v" , "x" , "z" ]},
448
468
)
449
469
450
470
transformer .fit (df_train )
451
471
df_transformed = transformer .transform (df_test )
452
472
453
- expected_df_dict = {
473
+ expected_df_dict = {
454
474
"a" : [1 , 5 , 2 , 3 , 3 ],
455
475
"b" : ["w" , "w" , "z" , "y" , "x" ],
456
476
"c" : ["a" , "a" , "c" , "b" , "a" ],
457
- "b_v" : [0 ]* 5 ,
458
- "b_x" : [0 ,0 , 0 , 0 , 1 ],
459
- "b_z" :[0 ,0 , 1 , 0 , 0 ],
477
+ "b_v" : [0 ] * 5 ,
478
+ "b_x" : [0 , 0 , 0 , 0 , 1 ],
479
+ "b_z" : [0 , 0 , 1 , 0 , 0 ],
460
480
}
461
- expected_df = dataframe_init_dispatch (library = library , dataframe_dict = expected_df_dict )
481
+ expected_df = dataframe_init_dispatch (
482
+ library = library ,
483
+ dataframe_dict = expected_df_dict ,
484
+ )
462
485
expected_df = nw .from_native (expected_df )
463
486
# cast the columns
464
- boolean_cols = ["b_v" , "b_x" , "b_z" ]
487
+ boolean_cols = ["b_v" , "b_x" , "b_z" ]
465
488
for col_name in boolean_cols :
466
- expected_df = expected_df .with_columns (
467
- nw .col (col_name ).cast (nw .Boolean )
489
+ expected_df = expected_df .with_columns (
490
+ nw .col (col_name ).cast (nw .Boolean ),
468
491
)
469
- expected_df = expected_df .with_columns (
470
- nw .col ("c" ).cast (nw .Categorical )
492
+ expected_df = expected_df .with_columns (
493
+ nw .col ("c" ).cast (nw .Categorical ),
471
494
)
472
495
473
496
assert_frame_equal_dispatch (df_transformed , expected_df .to_native ())
474
-
497
+
498
+ @pytest .mark .parametrize (
499
+ "library" ,
500
+ ["pandas" , "polars" ],
501
+ )
502
+ def test_transform_no_warning_if_all_wanted_values_present (self , library ):
503
+ """Test that OneHotEncodingTransformer.transform does NOT raise a warning when all levels in wanted_levels are present in the data."""
504
+ df_train = d .create_df_7 (library = library )
505
+ df_test = d .create_df_8 (library = library )
506
+
507
+ transformer = OneHotEncodingTransformer (
508
+ columns = ["b" ], wanted_values = {"b" : ["x" , "z" , "y" ]}
509
+ )
510
+ transformer .fit (df_train )
511
+
512
+ with pytest .warns (None ):
513
+ transformer .transform (df_test )
0 commit comments