-
-
Notifications
You must be signed in to change notification settings - Fork 38
Expand file tree
/
Copy pathdelete.py
More file actions
76 lines (60 loc) · 1.9 KB
/
delete.py
File metadata and controls
76 lines (60 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
# Benchmark for measuring delete() performance with different index types:
# int, slice, and list — with varying sizes.
from dataclasses import dataclass
from time import perf_counter as time
import numpy as np
import blosc2
@dataclass
class Row:
id: int = blosc2.field(blosc2.int64(ge=0))
c_val: complex = blosc2.field(blosc2.complex128(), default=0j)
score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0)
active: bool = blosc2.field(blosc2.bool(), default=True)
N = 1_000_000
print(f"delete() benchmark | N = {N:,}\n")
# Build base data once
np_dtype = np.dtype([
("id", np.int64),
("c_val", np.complex128),
("score", np.float64),
("active", np.bool_),
])
DATA = np.array(
[
(i, complex(i * 0.1, i * 0.01), 10.0 + (i % 100) * 0.4, i % 3 == 0)
for i in range(N)
],
dtype=np_dtype,
)
delete_cases = [
("int", 0),
("slice small", slice(0, 100)),
("slice large", slice(0, 100_000)),
("slice full", slice(0, N)),
("list small", list(range(100))),
("list large", list(range(100_000))),
("list full", list(range(N))),
]
print("=" * 60)
print(f"{'CASE':<20} {'ROWS DELETED':>14} {'TIME (s)':>12}")
print("-" * 60)
for label, key in delete_cases:
ct = blosc2.CTable(Row, expected_size=N)
ct.extend(DATA)
if isinstance(key, int):
n_deleted = 1
elif isinstance(key, slice):
n_deleted = len(range(*key.indices(N)))
else:
n_deleted = len(key)
t0 = time()
ct.delete(key)
t_delete = time() - t0
print(f"{label:<20} {n_deleted:>14,} {t_delete:>12.6f}")
print("-" * 60)