Skip to content

Commit d49ebbf

Browse files
committed
Remove columns with duplicated names
This is a safety net for the case that encodings provide duplicated column names (likely due to bugs)
1 parent 755f38a commit d49ebbf

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

baybe/parameters/substance.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Substance parameters."""
22

33
import gc
4+
import warnings
45
from functools import cached_property
56
from typing import Any, ClassVar
67

@@ -140,8 +141,17 @@ def comp_df(self) -> pd.DataFrame:
140141
kwargs_fingerprint=self.kwargs_fingerprint,
141142
)
142143

143-
# Drop NaN and constant columns
144+
# Drop NaN, constant columns and columns with duplicated names
144145
comp_df = comp_df.loc[:, ~comp_df.isna().any(axis=0)]
146+
mask_duplicated_columns = comp_df.columns.duplicated()
147+
if any(mask_duplicated_columns):
148+
warnings.warn(
149+
f"There were duplicated column names for the substance parameter "
150+
f"{self.name} with encoding {self.encoding.name}. This could indicate "
151+
f"bugs with the encoding computation. The duplicated columns will be "
152+
f"dropped."
153+
)
154+
comp_df = comp_df.loc[:, ~mask_duplicated_columns]
145155
comp_df = df_drop_single_value_columns(comp_df)
146156

147157
# Label the rows with the molecule names

0 commit comments

Comments
 (0)