add_reward.py

import numpy as np
import pandas as pd
from cache_em_all import Cachable
from load_data import load_data

# Utuility function from physionet competition: 
# https://github.com/physionetchallenges/evaluation-2019/blob/master/evaluate_sepsis_score.py
def compute_prediction_utility(labels, predictions, dt_early=-12, dt_optimal=-6, dt_late=3.0, max_u_tp=1, min_u_fn=-2, u_fp=-0.05, u_tn=0, check_errors=True):
    '''
    Inputs:
    labels: Actual sepsis labels for each hour, a numpy array of 0 or 1.
    predictions: Output from model, also a numpy array of 0 or 1.
    
    Output:
    Utility at each hour.
    '''
    # Check inputs for errors.
    if check_errors:
        if len(predictions) != len(labels):
            raise Exception('Numbers of predictions and labels must be the same.')

        for label in labels:
            if not label in (0, 1):
                raise Exception('Labels must satisfy label == 0 or label == 1.')

        for prediction in predictions:
            if not prediction in (0, 1):
                raise Exception('Predictions must satisfy prediction == 0 or prediction == 1.')

        if dt_early >= dt_optimal:
            raise Exception('The earliest beneficial time for predictions must be before the optimal time.')

        if dt_optimal >= dt_late:
            raise Exception('The optimal time for predictions must be before the latest beneficial time.')

    # Does the patient eventually have sepsis?
    if np.any(labels):
        is_septic = True
        t_sepsis = np.argmax(labels) - dt_optimal
    else:
        is_septic = False
        t_sepsis = float('inf')

    n = len(labels)

    # Define slopes and intercept points for utility functions of the form
    # u = m * t + b.
    m_1 = float(max_u_tp) / float(dt_optimal - dt_early)
    b_1 = -m_1 * dt_early
    m_2 = float(-max_u_tp) / float(dt_late - dt_optimal)
    b_2 = -m_2 * dt_late
    m_3 = float(min_u_fn) / float(dt_late - dt_optimal)
    b_3 = -m_3 * dt_optimal

    # Compare predicted and true conditions.
    u = np.zeros(n)
    for t in range(n):
        if t <= t_sepsis + dt_late:
            # TP
            if is_septic and predictions[t]:
                if t <= t_sepsis + dt_optimal:
                    u[t] = max(m_1 * (t - t_sepsis) + b_1, u_fp)
                elif t <= t_sepsis + dt_late:
                    u[t] = m_2 * (t - t_sepsis) + b_2
            # FP
            elif not is_septic and predictions[t]:
                u[t] = u_fp
            # FN
            elif is_septic and not predictions[t]:
                if t <= t_sepsis + dt_optimal:
                    u[t] = 0
                elif t <= t_sepsis + dt_late:
                    u[t] = m_3 * (t - t_sepsis) + b_3
            # TN
            elif not is_septic and not predictions[t]:
                u[t] = u_tn

    # Instead of return total utility, return utility
    # at each hour.
    return u

def add_reward(labels, prediction=None):
    '''
    Helper function to be used in the pandas apply.
    '''
    if prediction == 'zero':
        preds = np.zeros(len(labels))
    elif prediction == 'one':
        preds = np.ones(len(labels))
    return pd.Series(compute_prediction_utility(labels, preds))

def add_reward_df(df):
    '''
    Create reward columns for dataframe.
    '''
    group = df.groupby('patient')
    df['zeros_reward'] = group.SepsisLabel.apply(add_reward, prediction='zero')
    df['ones_reward'] = group.SepsisLabel.apply(add_reward, prediction='one')
    return df

def add_end_episode(df):
    '''
    Helper function to signify end of a patient's observations.
    '''
    n = df.shape[0]
    end_episode = np.zeros(n)
    end_episode[-1] = 1
    return pd.Series(end_episode)

@Cachable('training_setA_rewards.csv')
def add_end_episode_df(df):
    '''
    Mark the end of a patients observation, to be used 
    in Open Ai gym environment to signal the end of an episode.
    '''
    group = df.groupby('patient')
    df['end_episode'] = group.hours.apply(add_end_episode)
    return df
    

if __name__ == "__main__":
    df = load_data()
    add_reward_df(df)
    add_end_episode_df(df)