Merge pull request #103 from fact-project/remove_source_dependent

kbruegge · web-flow · commit 30a266f76498 · 2018-03-05T10:29:02.000+01:00
Remove functions for source dependent separation, fixes #100
diff --git a/fact/VERSION b/fact/VERSION
@@ -1 +1 @@
-0.17.0
+0.18.0
diff --git a/fact/analysis/__init__.py b/fact/analysis/__init__.py
@@ -2,7 +2,6 @@
 from .binning import ontime_binning, qla_binning, groupby_observation_blocks, bin_runs
 
 from .core import calc_run_summary_source_independent, split_on_off_source_independent
-from .core import calc_run_summary_source_dependent, split_on_off_source_dependent
 
 from .source import (
     calc_theta_equatorial,
@@ -18,8 +17,6 @@
     'groupby_observation_blocks',
     'bin_runs',
     'calc_run_summary_source_independent',
-    'calc_run_summary_source_dependent',
-    'split_on_off_source_dependent',
     'split_on_off_source_independent',
     'calc_theta_equatorial',
     'calc_theta_camera',
diff --git a/fact/analysis/core.py b/fact/analysis/core.py
@@ -123,108 +123,6 @@ def split_on_off_source_independent(
     return on_data, off_data
 
 
-def calc_run_summary_source_dependent(
-        events, runs,
-        prediction_threshold,
-        on_prediction_key='gamma_prediction',
-        off_prediction_keys=default_prediction_off_keys,
-        ):
-    '''
-    Calculate run summaries for the given signal prediction cuts.
-    This function needs to be used, if source dependent features like
-    Theta were used for the classification.
-
-    Parameters
-    ----------
-    events: pd.DataFrame
-        DataFrame with event data, needs to contain the columns
-        `'night'`, `'run'`, `theta_key` and the `theta_off_keys`
-    prediction_threshold: float
-        Threshold for the signalness prediction
-    on_prediction_key: str
-        Key to the classifier prediction for the on region
-    off_prediction_keys: list[str]
-        Iterable of keys to the classifier predictions for the off regions
-    '''
-
-    runs = runs.set_index(['night', 'run_id'])
-    runs.sort_index(inplace=True)
-
-    on_data, off_data = split_on_off_source_dependent(
-        events, prediction_threshold, on_prediction_key, off_prediction_keys
-    )
-
-    alpha = 1 / len(off_prediction_keys)
-
-    runs['n_on'] = on_data.groupby(['night', 'run_id']).size()
-    runs['n_on'].fillna(0, inplace=True)
-
-    runs['n_off'] = off_data.groupby(['night', 'run_id']).size()
-    runs['n_off'].fillna(0, inplace=True)
-
-    runs['significance'] = li_ma_significance(
-        runs['n_on'], runs['n_off'], alpha
-    )
-
-    runs['n_excess'] = runs['n_on'] - alpha * runs['n_off']
-    runs['n_excess_err'] = np.sqrt(runs['n_on'] + alpha**2 * runs['n_off'])
-
-    runs['excess_rate_per_h'] = runs['n_excess'] / runs['ontime'] / 3600
-    runs['excess_rate_per_h_err'] = runs['n_excess_err'] / runs['ontime'] / 3600
-
-    runs.reset_index(inplace=True)
-
-    return runs
-
-
-def split_on_off_source_dependent(
-        events,
-        prediction_threshold,
-        on_prediction_key='gamma_prediction',
-        off_prediction_keys=default_prediction_off_keys,
-        ):
-    '''
-    Split events dataframe into on and off region
-    For the off regions, keys are renamed to their "on" equivalents
-    and the "off" keys are dropped.
-
-    Parameters
-    ----------
-    events: pd.DataFrame
-        DataFrame containing event information, required are
-        `theta_key` and `theta_off_keys`.
-    prediction_threshold: float
-        Threshold for the signalness prediction
-    on_prediction_key: str
-        Key to the classifier prediction for the on region
-    off_prediction_keys: list[str]
-        Iterable of keys to the classifier predictions for the off regions
-    '''
-    on_data = events.query('{} >= {}'.format(
-        on_prediction_key, prediction_threshold)
-    ).copy()
-
-    off_dfs = []
-    for region, off_key in enumerate(off_prediction_keys, start=1):
-        off_df = events.query('{} >= {}'.format(
-            off_key, prediction_threshold)
-        ).copy()
-
-        off_df['off_region'] = region
-
-        off_df.drop(on_prediction_key, axis=1, inplace=True)
-        off_df[on_prediction_key] = off_df[off_key]
-        off_df.drop(off_key, axis=1, inplace=True)
-
-        drop_off_columns(off_df, region, inplace=True)
-
-        off_dfs.append(off_df)
-
-    off_data = pd.concat(off_dfs)
-
-    return on_data, off_data
-
-
 def drop_off_columns(df, off_region, inplace=False):
     '''
     Replace the "On" column with the column