Merge pull request #33 from FAST-HEP/BK_add_unstack_stage_postproc

benkrikler · web-flow · commit 21e45e506a18 · 2020-04-29T11:10:23.000+02:00
Add stage to postproc to unstack weights
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.6.3] - 2020-04-29
+### Added
+- Add GenericPandas and UnstackWeights stages, PR #33 [@benkrikler](https://github.com/benkrikler)
+
 ## [0.6.2] - 2020-04-21
 ### Fixed
 - Fix `split` function in postproc module to work with numbers, PR #32 [@benkrikler](github.com/benkrikler)
diff --git a/fast_plotter/postproc/functions.py b/fast_plotter/postproc/functions.py
@@ -267,8 +267,12 @@ def stack_weights(df, drop_n_col=False):
     logger.info("Stacking weights")
     if drop_n_col:
         df.drop("n", inplace=True, axis="columns")
+    else:
+        df.set_index("n", append=True, inplace=True)
     df.columns = pd.MultiIndex.from_tuples([c.split(":") for c in df.columns], names=["systematic", ""])
     out = df.stack(0, dropna=False)
+    if not drop_n_col:
+        out.reset_index(level="n", inplace=True)
     return out
 
 
@@ -279,15 +283,32 @@ def to_datacard_inputs(df, select_data, rename_syst_vars=False):
     logger.info("Converting to datacard inputs")
     if rename_syst_vars:
         df.columns = [n.replace("_up:", "_Up:").replace("_down:", "_Down:") for n in df.columns]
-    df.set_index("n", append=True, inplace=True)
     df = stack_weights(df)
-    df.reset_index(level="n", inplace=True)
     data_mask = df.eval(select_data)
     df["content"] = df.n
     df["content"][~data_mask] = df.sumw
     df["error"] = df.content / np.sqrt(df.n)
 
-    df.drop(["n", "sumw", "sumw2"], inplace=True, axis="columns")
+
+def generic_pandas(df, func, *args, **kwargs):
+    """
+    Apply generic pandas function to each input
+    """
+    logger.info("Apply generic pandas function")
+    return getattr(df, func)(*args, **kwargs)
+
+
+def unstack_weights(df, weight_name="systematic", includes_counts=True):
+    """
+    The inverse to stack_weights
+    """
+    logger.info("Unstacking systematics")
+    if includes_counts:
+        df = df.set_index("n", append=True)
+    df = df.unstack(weight_name)
+    df.columns = ["{1}:{0}".format(*c) if c[1] else c[0] for c in df.columns]
+    if includes_counts:
+        df = df.reset_index(level="n")
     return df
 
 
diff --git a/fast_plotter/postproc/stages.py b/fast_plotter/postproc/stages.py
@@ -131,6 +131,16 @@ class NormaliseGroup(BaseManipulator):
     func = "normalise_group"
 
 
+class GenericPandas(BaseManipulator):
+    cardinality = "one-to-one"
+    func = "generic_pandas"
+
+
+class UnstackWeights(BaseManipulator):
+    cardinality = "one-to-one"
+    func = "unstack_weights"
+
+
 class OpenMany(BaseManipulator):
     cardinality = "none-to-many"
     func = "open_many"
diff --git a/fast_plotter/version.py b/fast_plotter/version.py
@@ -12,5 +12,5 @@ def split_version(version):
     return tuple(result)
 
 
-__version__ = '0.6.2'
+__version__ = '0.6.3'
 version_info = split_version(__version__) # noqa
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.6.2
+current_version = 0.6.3
 commit = True
 tag = False