We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e8d0c8e commit a408f0aCopy full SHA for a408f0a
modnet/featurizers/utils.py
@@ -3,20 +3,22 @@
3
__all__ = ("clean_df",)
4
5
6
-def clean_df(df):
+def clean_df(df, drop_allnan: bool = True):
7
"""Cleans dataframe by dropping missing values, replacing NaN's and infinities
8
and selecting only columns containing numerical data.
9
10
Args:
11
df (pd.DataFrame): the dataframe to clean.
12
+ drop_allnan: if True, clean_df will remove features that are fully NaNs.
13
14
Returns:
15
pandas.DataFrame: the cleaned dataframe.
16
17
"""
18
19
df = df.select_dtypes(include="number")
- df = df.dropna(axis=1, how="all")
20
+ if drop_allnan:
21
+ df = df.dropna(axis=1, how="all")
22
df = df.replace([np.inf, -np.inf, np.nan], np.nan)
23
24
return df
0 commit comments