Skip to content

Commit 2e7e3e2

Browse files
authored
PERF: BlockPlacement construction (#40227)
1 parent 154026c commit 2e7e3e2

File tree

7 files changed

+45
-26
lines changed

7 files changed

+45
-26
lines changed

pandas/_libs/internals.pyx

+9-7
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,24 @@ cimport numpy as cnp
1515
from numpy cimport (
1616
NPY_INT64,
1717
int64_t,
18+
ndarray,
1819
)
1920

2021
cnp.import_array()
2122

2223
from pandas._libs.algos import ensure_int64
24+
from pandas._libs.util cimport is_integer_object
2325

2426

2527
@cython.final
2628
cdef class BlockPlacement:
2729
# __slots__ = '_as_slice', '_as_array', '_len'
2830
cdef:
2931
slice _as_slice
30-
object _as_array
32+
ndarray _as_array # Note: this still allows `None`
3133
bint _has_slice, _has_array, _is_known_slice_like
3234

33-
def __init__(self, val):
35+
def __cinit__(self, val):
3436
cdef:
3537
slice slc
3638

@@ -39,7 +41,7 @@ cdef class BlockPlacement:
3941
self._has_slice = False
4042
self._has_array = False
4143

42-
if isinstance(val, int):
44+
if is_integer_object(val):
4345
slc = slice(val, val + 1, 1)
4446
self._as_slice = slc
4547
self._has_slice = True
@@ -160,12 +162,12 @@ cdef class BlockPlacement:
160162
np.concatenate([self.as_array] + [o.as_array for o in others])
161163
)
162164

163-
cdef iadd(self, other):
165+
cdef BlockPlacement iadd(self, other):
164166
cdef:
165167
slice s = self._ensure_has_slice()
166168
Py_ssize_t other_int, start, stop, step, l
167169

168-
if isinstance(other, int) and s is not None:
170+
if is_integer_object(other) and s is not None:
169171
other_int = <Py_ssize_t>other
170172

171173
if other_int == 0:
@@ -438,13 +440,13 @@ def get_blkno_placements(blknos, group: bool = True):
438440
"""
439441
Parameters
440442
----------
441-
blknos : array of int64
443+
blknos : np.ndarray[int64]
442444
group : bool, default True
443445
444446
Returns
445447
-------
446448
iterator
447-
yield (BlockPlacement, blkno)
449+
yield (blkno, BlockPlacement)
448450
"""
449451
blknos = ensure_int64(blknos)
450452

pandas/core/indexes/base.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -4534,7 +4534,6 @@ def __getitem__(self, key):
45344534
# There's no custom logic to be implemented in __getslice__, so it's
45354535
# not overloaded intentionally.
45364536
getitem = self._data.__getitem__
4537-
promote = self._shallow_copy
45384537

45394538
if is_scalar(key):
45404539
key = com.cast_scalar_indexer(key, warn_float=True)
@@ -4543,7 +4542,9 @@ def __getitem__(self, key):
45434542
if isinstance(key, slice):
45444543
# This case is separated from the conditional above to avoid
45454544
# pessimization of basic indexing.
4546-
return promote(getitem(key))
4545+
result = getitem(key)
4546+
# Going through simple_new for performance.
4547+
return type(self)._simple_new(result, name=self.name)
45474548

45484549
if com.is_bool_indexer(key):
45494550
key = np.asarray(key, dtype=bool)
@@ -4553,7 +4554,9 @@ def __getitem__(self, key):
45534554
if np.ndim(result) > 1:
45544555
deprecate_ndim_indexing(result)
45554556
return result
4556-
return promote(result)
4557+
# NB: Using _constructor._simple_new would break if MultiIndex
4558+
# didn't override __getitem__
4559+
return self._constructor._simple_new(result, name=self.name)
45574560
else:
45584561
return result
45594562

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def _simple_new(cls, values: Categorical, name: Optional[Hashable] = None):
231231
result = object.__new__(cls)
232232

233233
result._data = values
234-
result.name = name
234+
result._name = name
235235
result._cache = {}
236236

237237
result._reset_identity()

pandas/core/indexes/multi.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -2076,15 +2076,16 @@ def __getitem__(self, key):
20762076

20772077
return tuple(retval)
20782078
else:
2079+
# in general cannot be sure whether the result will be sorted
2080+
sortorder = None
20792081
if com.is_bool_indexer(key):
20802082
key = np.asarray(key, dtype=bool)
20812083
sortorder = self.sortorder
2082-
else:
2083-
# cannot be sure whether the result will be sorted
2084-
sortorder = None
2085-
2086-
if isinstance(key, Index):
2087-
key = np.asarray(key)
2084+
elif isinstance(key, slice):
2085+
if key.step is None or key.step > 0:
2086+
sortorder = self.sortorder
2087+
elif isinstance(key, Index):
2088+
key = np.asarray(key)
20882089

20892090
new_codes = [level_codes[key] for level_codes in self.codes]
20902091

pandas/core/indexes/range.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
164164
assert isinstance(values, range)
165165

166166
result._range = values
167-
result.name = name
167+
result._name = name
168168
result._cache = {}
169169
result._reset_identity()
170170
return result

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ def _split_op_result(self, result) -> List[Block]:
449449
nbs = []
450450
for i, loc in enumerate(self.mgr_locs):
451451
vals = result[i]
452-
block = self.make_block(values=vals, placement=[loc])
452+
block = self.make_block(values=vals, placement=loc)
453453
nbs.append(block)
454454
return nbs
455455

pandas/core/internals/managers.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ class BlockManager(DataManager):
149149
_blknos: np.ndarray
150150
_blklocs: np.ndarray
151151

152+
# Non-trivially faster than a property
153+
ndim = 2 # overridden by SingleBlockManager
154+
152155
def __init__(
153156
self,
154157
blocks: Sequence[Block],
@@ -173,6 +176,21 @@ def __init__(
173176
self._blknos = None
174177
self._blklocs = None
175178

179+
@classmethod
180+
def _simple_new(cls, blocks: Tuple[Block, ...], axes: List[Index]):
181+
"""
182+
Fastpath constructor; does NO validation.
183+
"""
184+
obj = cls.__new__(cls)
185+
obj.axes = axes
186+
obj.blocks = blocks
187+
188+
# Populate known_consolidate, blknos, and blklocs lazily
189+
obj._known_consolidated = False
190+
obj._blknos = None
191+
obj._blklocs = None
192+
return obj
193+
176194
@classmethod
177195
def from_blocks(cls, blocks: List[Block], axes: List[Index]):
178196
"""
@@ -233,10 +251,6 @@ def __nonzero__(self) -> bool:
233251
def shape(self) -> Shape:
234252
return tuple(len(ax) for ax in self.axes)
235253

236-
@property
237-
def ndim(self) -> int:
238-
return len(self.axes)
239-
240254
def _normalize_axis(self, axis):
241255
# switch axis to follow BlockManager logic
242256
if self.ndim == 2:
@@ -800,8 +814,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
800814
new_axes = list(self.axes)
801815
new_axes[axis] = new_axes[axis][slobj]
802816

803-
bm = type(self)(new_blocks, new_axes, verify_integrity=False)
804-
return bm
817+
return type(self)._simple_new(tuple(new_blocks), new_axes)
805818

806819
@property
807820
def nblocks(self) -> int:
@@ -1322,7 +1335,7 @@ def reindex_indexer(
13221335

13231336
def _slice_take_blocks_ax0(
13241337
self, slice_or_indexer, fill_value=lib.no_default, only_slice: bool = False
1325-
):
1338+
) -> List[Block]:
13261339
"""
13271340
Slice/take blocks along axis=0.
13281341

0 commit comments

Comments
 (0)