From e6a9f5ca7ea10da638d411a58df911637f17ae7e Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 31 Oct 2019 08:41:35 +0100 Subject: [PATCH 1/3] Add test that fails for 3D jagged arrays currently --- tests/define/test_reductions.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/define/test_reductions.py b/tests/define/test_reductions.py index 8d4a7ac..4af4654 100644 --- a/tests/define/test_reductions.py +++ b/tests/define/test_reductions.py @@ -20,6 +20,20 @@ def test_jagged_nth(jagged_1): assert np.isnan(reduced[5]) +def test_jagged_nth_3D(jagged_1): + fake_3d = [[np.arange(i + 1) + j + for i in range(j % 3)] + for j in range(5)] + get_second = reductions.JaggedNth(1, np.nan) + reduced = get_second(fake_3d) + assert reduced[0] == 1.1 + assert reduced[1] == 4.4 + assert np.isnan(reduced[2]) + assert reduced[3] == 7.7 + assert reduced[4] == 10.0 + assert np.isnan(reduced[5]) + + def test_jagged_nth_negative(jagged_1): get_first_second = reductions.JaggedNth(-1, np.nan) reduced = get_first_second(jagged_1) From 9b35a109689806d345570233df34d6a545d306ba Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 31 Oct 2019 09:54:04 +0100 Subject: [PATCH 2/3] Reimplement JaggedNth for arbitrary jaggedness --- fast_carpenter/define/reductions.py | 35 +++++++++++++++++++++-------- tests/define/test_reductions.py | 28 ++++++++++++++++++----- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/fast_carpenter/define/reductions.py b/fast_carpenter/define/reductions.py index 7af8dc5..c732b65 100644 --- a/fast_carpenter/define/reductions.py +++ b/fast_carpenter/define/reductions.py @@ -1,5 +1,6 @@ import numpy as np import six +from ..expressions import deconstruct_jaggedness, reconstruct_jaggedness __all__ = ["get_pandas_reduction"] @@ -14,17 +15,33 @@ def __init__(self, index, fill_missing, force_float=True): self.index = index self.fill_missing = fill_missing self.dtype = None - if force_float and isinstance(fill_missing, int): - if fill_missing is True or fill_missing is False: - self.dtype = bool - else: - self.dtype = float + if fill_missing is True or fill_missing is False: + self.dtype = bool + elif force_float or isinstance(fill_missing, float): + self.dtype = float + else: + self.dtype = int def __call__(self, array): - mask = array.counts > abs(self.index) - int(self.index < 0) - output = np.full(len(array), self.fill_missing, dtype=self.dtype) - output[mask] = array[mask, self.index] - return output + # The next two lines ought to be enough + # result = array.pad(abs(self.index) + int(self.index >= 0)) + # result = result[..., self.index] + + # Flatten out the first K-1 dimensions: + flat, counts = deconstruct_jaggedness(array, []) + result = reconstruct_jaggedness(flat, counts[:1]) + + # Now get the Nth item on the last dimension + result = result.pad(abs(self.index) + int(self.index >= 0)) + result = result[..., self.index] + + # Now replay the remaining dimensions on this + result = reconstruct_jaggedness(result, counts[1:]) + + if self.dtype is not None: + result = result.astype(self.dtype) + result = result.fillna(self.fill_missing) + return result class JaggedMethod(object): diff --git a/tests/define/test_reductions.py b/tests/define/test_reductions.py index 4af4654..2804792 100644 --- a/tests/define/test_reductions.py +++ b/tests/define/test_reductions.py @@ -24,14 +24,30 @@ def test_jagged_nth_3D(jagged_1): fake_3d = [[np.arange(i + 1) + j for i in range(j % 3)] for j in range(5)] + fake_3d = JaggedArray.fromiter(fake_3d) get_second = reductions.JaggedNth(1, np.nan) reduced = get_second(fake_3d) - assert reduced[0] == 1.1 - assert reduced[1] == 4.4 - assert np.isnan(reduced[2]) - assert reduced[3] == 7.7 - assert reduced[4] == 10.0 - assert np.isnan(reduced[5]) + assert len(reduced[0]) == 0 + assert len(reduced[1]) == 1 + assert np.isnan(reduced[1]) + assert len(reduced[2]) == 2 + assert np.isnan(reduced[2][0]) + assert reduced[2][1] == 3 + assert len(reduced[3]) == 0 + assert len(reduced[4]) == 1 + assert np.isnan(reduced[4]) + + get_first = reductions.JaggedNth(0, np.nan) + reduced = get_first(fake_3d) + assert len(reduced[0]) == 0 + assert len(reduced[1]) == 1 + assert reduced[1][0] == 1 + assert len(reduced[2]) == 2 + assert reduced[2][0] == 2 + assert reduced[2][1] == 2 + assert len(reduced[3]) == 0 + assert len(reduced[4]) == 1 + assert reduced[4] == 4 def test_jagged_nth_negative(jagged_1): From 923e1777a66c73ab52914759c60d18aa5649d32b Mon Sep 17 00:00:00 2001 From: Ben Krikler Date: Thu, 31 Oct 2019 09:55:42 +0100 Subject: [PATCH 3/3] Update CHANGELOG --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f07ec3d..d286a46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## unreleased +## Unreleased +### Added ### Changed - Added support for variables with multiple dots in the name (nested branches). Issue #95, PR #97 [@kreczko](https://github.com/kreczko) +- Fix JaggedNth to work with arbitrary depth jagged arrays, Issue #87 [@benkrikler](https://github.com/benkrikler) ## [0.15.0] - 2019-10-27