Skip to content

Commit

Permalink
Merge pull request #61 from FAST-HEP/BK_fix_multiple_cutflows
Browse files Browse the repository at this point in the history
Make array(s) methods consistent for masked uproot trees.  Fixes #58 and #25.
  • Loading branch information
benkrikler authored Jul 27, 2019
2 parents b6a807d + 43c48a3 commit 0f537ab
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 0 deletions.
34 changes: 34 additions & 0 deletions fast_carpenter/masked_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,40 @@ def df(self, *args, **kwargs):
def pandas(self):
return MaskedUprootTree.PandasWrap(self)

def unmasked_array(self, *args, **kwargs):
return self.tree.array(*args, **kwargs)

def unmasked_arrays(self, *args, **kwargs):
return self.tree.arrays(*args, **kwargs)

def array(self, *args, **kwargs):
array = self.tree.array(*args, **kwargs)
if self._mask is None:
return array
return array[self._mask]

def arrays(self, *args, **kwargs):
arrays = self.tree.arrays(*args, **kwargs)
if self._mask is None:
return arrays
if isinstance(arrays, dict):
return {k: v[self._mask] for k, v in arrays.items()}
if isinstance(arrays, tuple):
return tuple([v[self._mask] for v in arrays])
if isinstance(arrays, list):
return [v[self._mask] for v in arrays]
if isinstance(arrays, pd.DataFrame):
return mask_df(arrays, self._mask, self.event_ranger.start_entry)
if isinstance(arrays, np.ndarray):
if arrays.ndim == 1:
return arrays[self._mask]
if arrays.ndim == 2:
if arrays.shape[1] == len(self.tree):
return arrays[:, self._mask]
msg = "Unexpected numpy array for mask, shape:%s, mask length: %s"
raise NotImplementedError(msg % (arrays.shape, len(self)))
return arrays[self._mask]

@property
def mask(self):
return self._mask
Expand Down
9 changes: 9 additions & 0 deletions tests/selection/test_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,12 @@ def test_cutflow_2_collect(select_2, tmpdir, infile, full_event_range, multi_chu
assert output.loc[("test_mc", 0, "All"), ("totals_incl", "unweighted")] == 4580 * 2
assert output.loc[("test_data", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2
assert output.loc[("test_mc", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2


def test_sequential_stages(cutflow_1, select_2, infile, full_event_range, tmpdir):
cutflow_2 = stage.CutFlow("cutflow_2", str(tmpdir), selection=select_2, weights="EventWeight")
chunk = FakeBEEvent(MaskedUprootTree(infile, event_ranger=full_event_range), "data")
cutflow_1.event(chunk)
cutflow_2.event(chunk)

assert len(chunk.tree) == 2
43 changes: 43 additions & 0 deletions tests/test_masked_tree.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import division
import pytest
import numpy as np
import pandas as pd
import fast_carpenter.masked_tree as m_tree


Expand Down Expand Up @@ -46,3 +47,45 @@ def test_w_mask_int(tree_w_mask_int, infile):
assert len(tree_w_mask_int) == 25
df = tree_w_mask_int.pandas.df("Muon_Px")
assert len(df.index.unique(0)) == 25


def test_array(tree_w_mask_int, infile):
assert len(tree_w_mask_int) == 50
tree_w_mask_int.apply_mask(np.arange(0, len(tree_w_mask_int), 2))
assert len(tree_w_mask_int) == 25
array = tree_w_mask_int.array("Muon_Px")
assert len(array) == 25


def test_arrays(tree_w_mask_int, infile):
assert len(tree_w_mask_int) == 50
tree_w_mask_int.apply_mask(np.arange(0, len(tree_w_mask_int), 2))
assert len(tree_w_mask_int) == 25

arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"], outputtype=dict)
assert isinstance(arrays, dict)
assert len(arrays) == 2
assert [len(v) for v in arrays.values()] == [25, 25]

for outtype in [list, tuple]:
arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"], outputtype=outtype)
assert isinstance(arrays, outtype)
assert len(arrays) == 2
assert len(arrays[0]) == 25
assert len(arrays[1]) == 25

arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"],
outputtype=lambda *args: np.array(args))
assert isinstance(arrays, np.ndarray)
assert arrays.shape == (2, 25)

arrays = tree_w_mask_int.arrays(["Muon_Px"],
outputtype=lambda *args: np.array(args))
assert isinstance(arrays, np.ndarray)
assert arrays.shape == (1, 25)

arrays = tree_w_mask_int.arrays(["Muon_Px", "Muon_Py"],
outputtype=pd.DataFrame)
assert isinstance(arrays, pd.DataFrame)
assert len(arrays) == 25
assert len(arrays.columns) == 2

0 comments on commit 0f537ab

Please sign in to comment.