Skip to content

Commit

Permalink
Merge pull request #122 from FAST-HEP/BK_broadcast_expressions
Browse files Browse the repository at this point in the history
Add variable broadcasting for expressions
  • Loading branch information
benkrikler authored Jun 16, 2020
2 parents 072cc2f + 4e8fe7d commit 7f887b9
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 21 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [0.18.0] - 2020-06-17
### Added
- Add broadcasting between variables of different jaggedness in expressions, PR #122 [@BenKrikler](httsp://github.com/benkrikler)

### Removed
- Testing against Python <= 3.5, PR #124

Expand Down
65 changes: 50 additions & 15 deletions fast_carpenter/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
import numexpr
import tokenize
import awkward
import logging
try:
from StringIO import StringIO
except ImportError:
from io import StringIO

logger = logging.getLogger(__name__)


__all__ = ["get_branches", "evaluate"]

Expand Down Expand Up @@ -58,35 +61,60 @@ class TreeToDictAdaptor():
"""
Make an uproot tree look like a dict for numexpr
"""
def __init__(self, tree, alias_dict):
def __init__(self, tree, alias_dict, needed_variables):
self.tree = tree
self.counts = None
self.aliases = alias_dict
self.vars, self.counts = self.broadcast_variables(needed_variables)

def broadcast_variables(self, variables):
arrays = {}
most_jagged = (-1, None)
for var in variables:
if var in constants:
continue
array = self.get_raw(var)
contents, counts = deconstruct_jaggedness(array, counts=[])
arrays[var] = (contents, counts, array)
if len(counts) > most_jagged[0]:
most_jagged = (len(counts), var)
most_jagged = most_jagged[1]

broadcast_to = arrays[most_jagged][1]
broadcast_vars = {most_jagged: arrays[most_jagged]}
for var, (contents, counts, raw) in arrays.items():
if var == most_jagged:
continue

# Check broadcastable
for left, right in zip(broadcast_to, counts):
if not np.array_equal(left, right):
raise ValueError("Unable to broadcast all values")
for copies in broadcast_to[len(counts):]:
contents = np.repeat(contents, copies)

broadcast_vars[var] = (contents, broadcast_to, raw)
return broadcast_vars, broadcast_to

def __getitem__(self, item):
if item in constants:
return constants[item]
result = self.vars[item][0]
return result

def get_raw(self, item):
if item in constants:
return constants[item]
full_item = self.aliases.get(item, item)
array = self.tree.array(full_item)
array = self.strip_jaggedness(array)
return array

def __contains__(self, item):
return item in self.tree or item in self.aliases
return item in self.vars

def __iter__(self):
for i in self.tree:
for i in self.vars:
yield i

def strip_jaggedness(self, array):
array, new_counts = deconstruct_jaggedness(array, counts=[])
if self.counts is not None:
if not all(np.array_equal(c, n) for c, n in zip(self.counts, new_counts)):
raise RuntimeError("Operation using arrays with different jaggedness")
else:
self.counts = new_counts
return array

def apply_jaggedness(self, array):
if self.counts is None:
return array
Expand All @@ -111,7 +139,14 @@ def preprocess_expression(expression):

def evaluate(tree, expression):
cleaned_expression, alias_dict = preprocess_expression(expression)
adaptor = TreeToDictAdaptor(tree, alias_dict)
context = numexpr.necompiler.getContext({}, frame_depth=1)
variables = numexpr.necompiler.getExprNames(cleaned_expression, context)[0]
try:
adaptor = TreeToDictAdaptor(tree, alias_dict, variables)
except ValueError:
msg = "Cannot broadcast all variables in expression: %s" % expression
logger.error(msg)
raise ValueError(msg)
result = numexpr.evaluate(cleaned_expression, local_dict=adaptor)
result = adaptor.apply_jaggedness(result)
return result
2 changes: 1 addition & 1 deletion fast_carpenter/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def split_version(version):
return tuple(result)


__version__ = '0.17.5'
__version__ = '0.18.0'
version_info = split_version(__version__) # noqa
3 changes: 1 addition & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.17.5
current_version = 0.18.0
commit = True
tag = False

Expand All @@ -18,4 +18,3 @@ test = pytest

[tool:pytest]
collect_ignore = ['setup.py']

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_version():
return _globals["__version__"]


requirements = ['atuproot==0.1.13', 'atsge==0.2.1', 'mantichora==0.9.7',
requirements = ['atuproot==0.1.13', 'atsge==0.2.1', 'atpbar==1.0.8', 'mantichora==0.9.7',
'fast-flow', 'fast-curator', 'awkward',
'pandas', 'numpy', 'numba', 'numexpr', 'uproot>=3']
repositories = []
Expand Down
11 changes: 9 additions & 2 deletions tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ def test_3D_jagged(wrapped_tree):
fake_3d_2 = JaggedArray.fromiter(fake_3d_2)
wrapped_tree.new_variable("SecondFake3D", fake_3d_2)

with pytest.raises(RuntimeError) as e:
with pytest.raises(ValueError) as e:
expressions.evaluate(wrapped_tree, "SecondFake3D + Fake3D")
assert "different jaggedness" in str(e)
assert "Cannot broadcast" in str(e)


@pytest.mark.parametrize('input, expected', [
Expand All @@ -107,3 +107,10 @@ def test_preprocess_expression(input, expected):
clean_expr, alias_dict = expressions.preprocess_expression(input)
assert clean_expr == expected[0]
assert alias_dict == expected[1]


def test_broadcast(wrapped_tree):
expressions.evaluate(wrapped_tree, "NJet * Jet_Py + NElectron * Jet_Px")

with pytest.raises(ValueError):
expressions.evaluate(wrapped_tree, "Jet_Py + Muon_Px")

0 comments on commit 7f887b9

Please sign in to comment.