diff --git a/deepecho/models/base.py b/deepecho/models/base.py index 545a3e7..2c30243 100644 --- a/deepecho/models/base.py +++ b/deepecho/models/base.py @@ -54,12 +54,12 @@ def fit_sequences(self, sequences, context_types, data_types): For example, a sequence might look something like:: { - "context": [1], - "data": [ + 'context': [1], + 'data': [ [1, 3, 4, 5, 11, 3, 4], - [2, 2, 3, 4, 5, 1, 2], - [1, 3, 4, 5, 2, 3, 1] - ] + [2, 2, 3, 4, 5, 1, 2], + [1, 3, 4, 5, 2, 3, 1], + ], } The "context" attribute maps to a list of variables which diff --git a/deepecho/models/basic_gan.py b/deepecho/models/basic_gan.py index c6beadb..4cdeb6e 100644 --- a/deepecho/models/basic_gan.py +++ b/deepecho/models/basic_gan.py @@ -271,7 +271,7 @@ def _analyze_data(self, sequences, context_types, data_types): def _normalize(tensor, value, properties): """Normalize the value between 0 and 1 and flag nans.""" value_idx, missing_idx = properties['indices'] - if pd.isnull(value): + if pd.isna(value): tensor[value_idx] = 0.0 tensor[missing_idx] = 1.0 else: @@ -493,12 +493,12 @@ def fit_sequences(self, sequences, context_types, data_types): For example, a sequence might look something like:: { - "context": [1], - "data": [ + 'context': [1], + 'data': [ [1, 3, 4, 5, 11, 3, 4], - [2, 2, 3, 4, 5, 1, 2], - [1, 3, 4, 5, 2, 3, 1] - ] + [2, 2, 3, 4, 5, 1, 2], + [1, 3, 4, 5, 2, 3, 1], + ], } The "context" attribute maps to a list of variables which diff --git a/deepecho/models/par.py b/deepecho/models/par.py index 57e2a68..46996a4 100644 --- a/deepecho/models/par.py +++ b/deepecho/models/par.py @@ -131,7 +131,7 @@ def _idx_map(self, x, t): 'type': t, 'mu': np.nanmean(x[i]), 'std': np.nanstd(x[i]), - 'nulls': pd.isnull(x[i]).any(), + 'nulls': pd.isna(x[i]).any(), 'indices': (idx, idx + 1, idx + 2), } idx += 3 @@ -141,7 +141,7 @@ def _idx_map(self, x, t): 'type': t, 'min': np.nanmin(x[i]), 'range': np.nanmax(x[i]) - np.nanmin(x[i]), - 'nulls': pd.isnull(x[i]).any(), + 'nulls': pd.isna(x[i]).any(), 'indices': (idx, idx + 1, idx + 2), } idx += 3 @@ -150,7 +150,7 @@ def _idx_map(self, x, t): idx_map[i] = {'type': t, 'indices': {}} idx += 1 for v in set(x[i]): - if pd.isnull(v): + if pd.isna(v): v = None idx_map[i]['indices'][v] = idx @@ -210,30 +210,30 @@ def _data_to_tensor(self, data): elif props['type'] in ['continuous', 'timestamp']: mu_idx, sigma_idx, missing_idx = props['indices'] - if pd.isnull(data[key][i]) or props['std'] == 0: + if pd.isna(data[key][i]) or props['std'] == 0: x[mu_idx] = 0.0 else: x[mu_idx] = (data[key][i] - props['mu']) / props['std'] x[sigma_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(data[key][i]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(data[key][i]) else 0.0 elif props['type'] in ['count']: r_idx, p_idx, missing_idx = props['indices'] - if pd.isnull(data[key][i]) or props['range'] == 0: + if pd.isna(data[key][i]) or props['range'] == 0: x[r_idx] = 0.0 else: x[r_idx] = (data[key][i] - props['min']) / props['range'] x[p_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(data[key][i]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(data[key][i]) else 0.0 elif props['type'] in [ 'categorical', 'ordinal', ]: # categorical value = data[key][i] - if pd.isnull(value): + if pd.isna(value): value = None x[props['indices'][value]] = 1.0 @@ -258,25 +258,25 @@ def _context_to_tensor(self, context): mu_idx, sigma_idx, missing_idx = props['indices'] x[mu_idx] = ( 0.0 - if (pd.isnull(context[key]) or props['std'] == 0) + if (pd.isna(context[key]) or props['std'] == 0) else (context[key] - props['mu']) / props['std'] ) x[sigma_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(context[key]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(context[key]) else 0.0 elif props['type'] in ['count']: r_idx, p_idx, missing_idx = props['indices'] x[r_idx] = ( 0.0 - if (pd.isnull(context[key]) or props['range'] == 0) + if (pd.isna(context[key]) or props['range'] == 0) else (context[key] - props['min']) / props['range'] ) x[p_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(context[key]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(context[key]) else 0.0 elif props['type'] in ['categorical', 'ordinal']: value = context[key] - if pd.isnull(value): + if pd.isna(value): value = None x[props['indices'][value]] = 1.0 @@ -295,12 +295,12 @@ def fit_sequences(self, sequences, context_types, data_types): For example, a sequence might look something like:: { - "context": [1], - "data": [ + 'context': [1], + 'data': [ [1, 3, 4, 5, 11, 3, 4], - [2, 2, 3, 4, 5, 1, 2], - [1, 3, 4, 5, 2, 3, 1] - ] + [2, 2, 3, 4, 5, 1, 2], + [1, 3, 4, 5, 2, 3, 1], + ], } The "context" attribute maps to a list of variables which @@ -406,9 +406,7 @@ def _compute_loss(self, X_padded, Y_padded, seq_len): p_true = X_padded[: seq_len[i], i, missing_idx] p_pred = missing[: seq_len[i], i] log_likelihood += torch.sum(p_true * p_pred) - log_likelihood += torch.sum( - (1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred)) - ) + log_likelihood += torch.sum((1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred))) elif props['type'] in ['count']: r_idx, p_idx, missing_idx = props['indices'] @@ -428,9 +426,7 @@ def _compute_loss(self, X_padded, Y_padded, seq_len): p_true = X_padded[: seq_len[i], i, missing_idx] p_pred = missing[: seq_len[i], i] log_likelihood += torch.sum(p_true * p_pred) - log_likelihood += torch.sum( - (1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred)) - ) + log_likelihood += torch.sum((1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred))) elif props['type'] in ['categorical', 'ordinal']: idx = list(props['indices'].values()) diff --git a/deepecho/sequences.py b/deepecho/sequences.py index 847c511..8f0a11f 100644 --- a/deepecho/sequences.py +++ b/deepecho/sequences.py @@ -59,7 +59,7 @@ def segment_by_time(sequence, segment_size, sequence_index): while start <= max_time: end = start + segment_size selected = (start <= sequence_index) & (sequence_index < end) - sequences.append(sequence[selected.values].reset_index(drop=True)) + sequences.append(sequence[selected.to_numpy()].reset_index(drop=True)) start = end return sequences @@ -112,7 +112,7 @@ def _convert_to_dicts(segments, context_columns): if len(context.drop_duplicates()) > 1: raise ValueError('Context columns are not constant within each segment.') - context = context.iloc[0].values + context = context.iloc[0].to_numpy() segment = segment.drop(context_columns, axis=1) else: context = [] @@ -180,7 +180,7 @@ def assemble_sequences( segments = [] groupby_columns = entity_columns[0] if len(entity_columns) == 1 else entity_columns for _, sequence in data.groupby(groupby_columns): - sequence.drop(entity_columns, axis=1, inplace=True) + sequence = sequence.drop(entity_columns, axis=1) if context_columns: if len(sequence[context_columns].drop_duplicates()) > 1: raise ValueError('Context columns are not constant within each entity.')