Skip to content

Commit 072cc2f

Browse files
authored
Merge pull request #124 from FAST-HEP/BK_fix_binned_df_empty
Handle empty data chunks in binned df explode
2 parents 8fa33c0 + 7f69cd5 commit 072cc2f

File tree

4 files changed

+23
-5
lines changed

4 files changed

+23
-5
lines changed

.travis.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@ dist: xenial
22
language: python
33

44
python:
5-
- "2.7"
6-
- "3.5"
75
- "3.6"
86
- "3.7"
7+
- "3.8"
98

109
install:
1110
- pip install -r .requirements_dev.txt
@@ -32,4 +31,4 @@ deploy:
3231
on:
3332
tags: true
3433
repo: FAST-HEP/fast-carpenter
35-
condition: "$TRAVIS_PYTHON_VERSION == 3.6 && $TRAVIS_TAG =~ ^v[0-9]+[.][0-9]+[.][0-9]+(-rc[0-9]+|[.]dev[0-9]+)?$"
34+
condition: "$TRAVIS_PYTHON_VERSION == 3.7 && $TRAVIS_TAG =~ ^v[0-9]+[.][0-9]+[.][0-9]+(-rc[0-9]+|[.]dev[0-9]+)?$"

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

77
## [Unreleased]
8+
### Removed
9+
- Testing against Python <= 3.5, PR #124
10+
11+
### Fixed
12+
- Fix handling of empty data chunks in BinnedDataframe stage, PR #124 [@BenKrikler](https://github.com/benkrikler)
813

914
## [0.17.5] - 2020-04-03
1015
### Added

fast_carpenter/summary/binned_dataframe.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,18 @@ def _merge_dataframes(dataset_readers_list):
7070
for dataset, readers in dataset_readers_list:
7171
dataset_df = readers[0]
7272
for df in readers[1:]:
73-
if df is None:
73+
if df is None or df.empty:
7474
continue
7575
dataset_df = dataset_df.add(df, fill_value=0.)
76+
if dataset_df is None or dataset_df.empty:
77+
continue
7678
all_dfs.append(dataset_df)
7779
keys.append(dataset)
78-
final_df = pd.concat(all_dfs, keys=keys, names=['dataset'], sort=True)
80+
if all_dfs:
81+
final_df = pd.concat(all_dfs, keys=keys, names=['dataset'], sort=True)
82+
else:
83+
final_df = pd.DataFrame()
84+
7985
return final_df
8086

8187

@@ -204,6 +210,8 @@ def event(self, chunk):
204210

205211
data = chunk.tree.pandas.df(all_inputs, flatten=False)
206212
data = explode(data)
213+
if data is None or data.empty:
214+
return True
207215

208216
binned_values = _bin_values(data, dimensions=self._bin_dims,
209217
binnings=self._binnings,
@@ -279,6 +287,9 @@ def explode(df):
279287
https://stackoverflow.com/questions/12680754/split-explode-pandas\
280288
-dataframe-string-entry-to-separate-rows/40449726#40449726
281289
"""
290+
if df is None or df.empty:
291+
return df
292+
282293
# get the list columns
283294
lst_cols = [col for col, dtype in df.dtypes.items() if is_object_dtype(dtype)]
284295
# Be more specific about which objects are ok

tests/summary/test_binned_dataframe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ def test_explode():
258258
assert len(exploded) == 1 + 8 + 3
259259
assert np.array_equal(exploded.list, [0, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2])
260260

261+
exploded = bdf.explode(pd.DataFrame(columns=["one", "two", "3"]))
262+
assert exploded.empty is True
263+
261264

262265
def test_densify_dataframe_integers():
263266
index = [("one", 1), ("one", 3), ("two", 2), ("three", 1), ("three", 2)]

0 commit comments

Comments
 (0)