Skip to content

Commit

Permalink
Merge pull request #33 from FAST-HEP/BK_add_unstack_stage_postproc
Browse files Browse the repository at this point in the history
Add stage to postproc to unstack weights
  • Loading branch information
benkrikler authored Apr 29, 2020
2 parents a641a67 + 134d9ca commit 21e45e5
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.6.3] - 2020-04-29
### Added
- Add GenericPandas and UnstackWeights stages, PR #33 [@benkrikler](https://github.com/benkrikler)

## [0.6.2] - 2020-04-21
### Fixed
- Fix `split` function in postproc module to work with numbers, PR #32 [@benkrikler](github.com/benkrikler)
Expand Down
27 changes: 24 additions & 3 deletions fast_plotter/postproc/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,12 @@ def stack_weights(df, drop_n_col=False):
logger.info("Stacking weights")
if drop_n_col:
df.drop("n", inplace=True, axis="columns")
else:
df.set_index("n", append=True, inplace=True)
df.columns = pd.MultiIndex.from_tuples([c.split(":") for c in df.columns], names=["systematic", ""])
out = df.stack(0, dropna=False)
if not drop_n_col:
out.reset_index(level="n", inplace=True)
return out


Expand All @@ -279,15 +283,32 @@ def to_datacard_inputs(df, select_data, rename_syst_vars=False):
logger.info("Converting to datacard inputs")
if rename_syst_vars:
df.columns = [n.replace("_up:", "_Up:").replace("_down:", "_Down:") for n in df.columns]
df.set_index("n", append=True, inplace=True)
df = stack_weights(df)
df.reset_index(level="n", inplace=True)
data_mask = df.eval(select_data)
df["content"] = df.n
df["content"][~data_mask] = df.sumw
df["error"] = df.content / np.sqrt(df.n)

df.drop(["n", "sumw", "sumw2"], inplace=True, axis="columns")

def generic_pandas(df, func, *args, **kwargs):
"""
Apply generic pandas function to each input
"""
logger.info("Apply generic pandas function")
return getattr(df, func)(*args, **kwargs)


def unstack_weights(df, weight_name="systematic", includes_counts=True):
"""
The inverse to stack_weights
"""
logger.info("Unstacking systematics")
if includes_counts:
df = df.set_index("n", append=True)
df = df.unstack(weight_name)
df.columns = ["{1}:{0}".format(*c) if c[1] else c[0] for c in df.columns]
if includes_counts:
df = df.reset_index(level="n")
return df


Expand Down
10 changes: 10 additions & 0 deletions fast_plotter/postproc/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ class NormaliseGroup(BaseManipulator):
func = "normalise_group"


class GenericPandas(BaseManipulator):
cardinality = "one-to-one"
func = "generic_pandas"


class UnstackWeights(BaseManipulator):
cardinality = "one-to-one"
func = "unstack_weights"


class OpenMany(BaseManipulator):
cardinality = "none-to-many"
func = "open_many"
Expand Down
2 changes: 1 addition & 1 deletion fast_plotter/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def split_version(version):
return tuple(result)


__version__ = '0.6.2'
__version__ = '0.6.3'
version_info = split_version(__version__) # noqa
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.2
current_version = 0.6.3
commit = True
tag = False

Expand Down

0 comments on commit 21e45e5

Please sign in to comment.