Skip to content

Commit c1bc4a2

Browse files
committed
Add ability to handle CTable indexes in Index and remove CTableIndex
1 parent f12be86 commit c1bc4a2

6 files changed

Lines changed: 211 additions & 255 deletions

File tree

doc/getting_started/tutorials/15.indexing-ctables.ipynb

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
"id": "b23746ca",
3737
"metadata": {
3838
"ExecuteTime": {
39-
"end_time": "2026-04-19T09:26:14.791545Z",
40-
"start_time": "2026-04-19T09:26:11.855205Z"
39+
"end_time": "2026-05-06T07:51:01.883922Z",
40+
"start_time": "2026-05-06T07:51:00.823273Z"
4141
}
4242
},
4343
"outputs": [
@@ -98,8 +98,8 @@
9898
"id": "d9411a66072ed12c",
9999
"metadata": {
100100
"ExecuteTime": {
101-
"end_time": "2026-04-19T09:26:14.825272Z",
102-
"start_time": "2026-04-19T09:26:14.793048Z"
101+
"end_time": "2026-05-06T07:51:01.930426Z",
102+
"start_time": "2026-05-06T07:51:01.897273Z"
103103
}
104104
},
105105
"outputs": [
@@ -132,7 +132,7 @@
132132
"\n",
133133
"Call `create_index(col_name)` to build a bucket index on a column. Pass `kind=...` to choose another index kind, including `blosc2.IndexKind.OPSI` for tunable iterative ordering or `blosc2.IndexKind.FULL` for globally sorted indexes that can also support ordered reuse. OPSI is a separate exact-filtering index kind, not a slower way to build a `FULL`/CSI index; its build effort is controlled by `optlevel` or the explicit `opsi_max_cycles` keyword.\n",
134134
"\n",
135-
"The returned `CTableIndex` handle shows the column name, kind, and whether the index is stale.\n"
135+
"The returned `Index` handle shows the column name, kind, and whether the index is stale. Use `storage_stats()` to inspect the total `(nbytes, cbytes, cratio)` for the index payload.\n"
136136
]
137137
},
138138
{
@@ -141,25 +141,27 @@
141141
"id": "2ac1f281",
142142
"metadata": {
143143
"ExecuteTime": {
144-
"end_time": "2026-04-19T09:26:14.869481Z",
145-
"start_time": "2026-04-19T09:26:14.826650Z"
144+
"end_time": "2026-05-06T07:51:01.981307Z",
145+
"start_time": "2026-05-06T07:51:01.934008Z"
146146
}
147147
},
148148
"outputs": [
149149
{
150150
"name": "stdout",
151151
"output_type": "stream",
152152
"text": [
153-
"<CTableIndex col='sensor_id' kind='bucket' name='__self__'>\n",
153+
"Index(kind='bucket', col_name='sensor_id', name='__self__', stale=False)\n",
154154
"stale? False\n",
155-
"all indexes: [<CTableIndex col='sensor_id' kind='bucket' name='__self__'>]\n"
155+
"storage stats: (6292289, 6333, 993.5716090320543)\n",
156+
"all indexes: [Index(kind='bucket', col_name='sensor_id', name='__self__', stale=False)]\n"
156157
]
157158
}
158159
],
159160
"source": [
160161
"idx = t.create_index(\"sensor_id\")\n",
161162
"print(idx)\n",
162163
"print(\"stale?\", idx.stale)\n",
164+
"print(\"storage stats:\", idx.storage_stats())\n",
163165
"print(\"all indexes:\", t.indexes)"
164166
]
165167
},
@@ -180,8 +182,8 @@
180182
"id": "dcc2dc87",
181183
"metadata": {
182184
"ExecuteTime": {
183-
"end_time": "2026-04-19T09:26:14.945997Z",
184-
"start_time": "2026-04-19T09:26:14.870232Z"
185+
"end_time": "2026-05-06T07:51:02.066579Z",
186+
"start_time": "2026-05-06T07:51:01.986232Z"
185187
}
186188
},
187189
"outputs": [
@@ -218,8 +220,8 @@
218220
"id": "b0132381",
219221
"metadata": {
220222
"ExecuteTime": {
221-
"end_time": "2026-04-19T09:26:14.973150Z",
222-
"start_time": "2026-04-19T09:26:14.946925Z"
223+
"end_time": "2026-05-06T07:51:02.100252Z",
224+
"start_time": "2026-05-06T07:51:02.076491Z"
223225
}
224226
},
225227
"outputs": [
@@ -261,8 +263,8 @@
261263
"id": "dc4d2897",
262264
"metadata": {
263265
"ExecuteTime": {
264-
"end_time": "2026-04-19T09:26:15.018004Z",
265-
"start_time": "2026-04-19T09:26:14.975125Z"
266+
"end_time": "2026-05-06T07:51:02.173294Z",
267+
"start_time": "2026-05-06T07:51:02.110489Z"
266268
}
267269
},
268270
"outputs": [
@@ -299,8 +301,8 @@
299301
"id": "e1583b4f",
300302
"metadata": {
301303
"ExecuteTime": {
302-
"end_time": "2026-04-19T09:26:15.034449Z",
303-
"start_time": "2026-04-19T09:26:15.018681Z"
304+
"end_time": "2026-05-06T07:51:02.210048Z",
305+
"start_time": "2026-05-06T07:51:02.187084Z"
304306
}
305307
},
306308
"outputs": [
@@ -333,17 +335,17 @@
333335
"id": "85d42133",
334336
"metadata": {
335337
"ExecuteTime": {
336-
"end_time": "2026-04-19T09:26:16.491396Z",
337-
"start_time": "2026-04-19T09:26:15.035139Z"
338+
"end_time": "2026-05-06T07:51:03.134956Z",
339+
"start_time": "2026-05-06T07:51:02.213213Z"
338340
}
339341
},
340342
"outputs": [
341343
{
342344
"name": "stdout",
343345
"output_type": "stream",
344346
"text": [
345-
"Created: <CTableIndex col='sensor_id' kind='bucket' name='__self__'>\n",
346-
"Sidecar files: 7\n",
347+
"Created: Index(kind='bucket', col_name='sensor_id', name='__self__', stale=False)\n",
348+
"Storage stats: (12583745, 7788, 1615.7864663585003)\n",
347349
"Rows > 280 (before close): 19\n"
348350
]
349351
}
@@ -365,9 +367,8 @@
365367
"pidx = pt.create_index(\"sensor_id\")\n",
366368
"print(\"Created:\", pidx)\n",
367369
"\n",
368-
"# Sidecar files\n",
369-
"index_dir = Path(path) / \"_indexes\" / \"sensor_id\"\n",
370-
"print(\"Sidecar files:\", len(list(index_dir.glob(\"**/*.b2nd\"))))\n",
370+
"# Storage usage for all index sidecars\n",
371+
"print(\"Storage stats:\", pidx.storage_stats())\n",
371372
"\n",
372373
"# Query before close\n",
373374
"r1 = pt.where(pt[\"sensor_id\"] > 280)\n",
@@ -380,35 +381,29 @@
380381
"id": "149ddba5",
381382
"metadata": {
382383
"ExecuteTime": {
383-
"end_time": "2026-04-19T09:26:16.626568Z",
384-
"start_time": "2026-04-19T09:26:16.511857Z"
384+
"end_time": "2026-05-06T07:51:03.222744Z",
385+
"start_time": "2026-05-06T07:51:03.167571Z"
385386
}
386387
},
387388
"outputs": [
388389
{
389390
"name": "stdout",
390391
"output_type": "stream",
391392
"text": [
392-
"Indexes after reopen: [<CTableIndex col='sensor_id' kind='bucket' name='__self__'>]\n",
393+
"Indexes after reopen: [Index(kind='bucket', col_name='sensor_id', name='__self__', stale=False)]\n",
394+
"Storage stats after reopen: (12583745, 7788, 1615.7864663585003)\n",
393395
"Rows > 280 (after reopen): 19\n",
394396
"Results match ✓\n"
395397
]
396-
},
397-
{
398-
"name": "stderr",
399-
"output_type": "stream",
400-
"text": [
401-
"/var/folders/r3/bycghmsx079bmglqt_2xmlt00000gn/T/ipykernel_81567/2229258138.py:3: FutureWarning: blosc2.open() currently defaults to mode='a', but this will change to mode='r' in a future release. Pass mode='a' explicitly to keep writable behavior, or mode='r' for read-only access.\n",
402-
" pt2 = blosc2.open(path)\n"
403-
]
404398
}
405399
],
406400
"source": [
407401
"# Close and reopen — catalog is preserved\n",
408402
"del pt\n",
409-
"pt2 = blosc2.open(path)\n",
403+
"pt2 = blosc2.open(path, mode=\"r\")\n",
410404
"\n",
411405
"print(\"Indexes after reopen:\", pt2.indexes)\n",
406+
"print(\"Storage stats after reopen:\", pt2.index(\"sensor_id\").storage_stats())\n",
412407
"\n",
413408
"r2 = pt2.where(pt2[\"sensor_id\"] > 280)\n",
414409
"print(\"Rows > 280 (after reopen):\", len(r2))\n",
@@ -438,8 +433,8 @@
438433
"id": "83db418b",
439434
"metadata": {
440435
"ExecuteTime": {
441-
"end_time": "2026-04-19T09:26:16.914246Z",
442-
"start_time": "2026-04-19T09:26:16.629760Z"
436+
"end_time": "2026-05-06T07:51:03.352544Z",
437+
"start_time": "2026-05-06T07:51:03.226079Z"
443438
}
444439
},
445440
"outputs": [
@@ -505,8 +500,8 @@
505500
"id": "363827fec805190a",
506501
"metadata": {
507502
"ExecuteTime": {
508-
"end_time": "2026-04-19T09:26:16.924330Z",
509-
"start_time": "2026-04-19T09:26:16.915076Z"
503+
"end_time": "2026-05-06T07:51:03.366566Z",
504+
"start_time": "2026-05-06T07:51:03.353982Z"
510505
}
511506
},
512507
"outputs": [],

doc/reference/ctable.rst

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,11 @@ Indexes
265265
-------
266266

267267
CTable indexes are created with :meth:`CTable.create_index` and returned as
268-
:class:`blosc2.ctable.CTableIndex` handles. ``CTableIndex`` has the same
269-
user-facing role as :class:`blosc2.Index`, but for tables instead of arrays:
270-
it refers to an index stored in the table index catalog, and delegates
271-
maintenance operations such as ``drop()``, ``rebuild()``, and ``compact()``
272-
back to the owning table. Users normally only receive these handles from the
273-
CTable API; they do not instantiate them directly.
268+
:class:`blosc2.Index` handles. For tables, ``Index`` refers to an entry stored
269+
in the table index catalog and delegates maintenance operations such as
270+
``drop()``, ``rebuild()``, and ``compact()`` back to the owning table. Users
271+
normally only receive these handles from the CTable API; they do not instantiate
272+
them directly.
274273

275274
Indexes can target stored columns or **direct expressions** over stored columns
276275
via ``create_index(expression=...)``. This lets queries reuse indexes for
@@ -298,19 +297,7 @@ globally sorted ordered reuse is required.
298297
.. automethod:: CTable.rebuild_index
299298
.. automethod:: CTable.compact_index
300299

301-
.. autoclass:: blosc2.ctable.CTableIndex
302-
303-
.. autoattribute:: blosc2.ctable.CTableIndex.col_name
304-
.. autoattribute:: blosc2.ctable.CTableIndex.kind
305-
.. autoattribute:: blosc2.ctable.CTableIndex.stale
306-
.. autoattribute:: blosc2.ctable.CTableIndex.name
307-
.. autoattribute:: blosc2.ctable.CTableIndex.nbytes
308-
.. autoattribute:: blosc2.ctable.CTableIndex.cbytes
309-
.. autoattribute:: blosc2.ctable.CTableIndex.cratio
310-
.. automethod:: blosc2.ctable.CTableIndex.storage_stats
311-
.. automethod:: blosc2.ctable.CTableIndex.drop
312-
.. automethod:: blosc2.ctable.CTableIndex.rebuild
313-
.. automethod:: blosc2.ctable.CTableIndex.compact
300+
See :class:`blosc2.Index` for the returned handle attributes and methods.
314301

315302

316303
Persistence

doc/reference/index_class.rst

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,22 @@
33
Index
44
=====
55

6-
Handle for an index attached to a :class:`~blosc2.NDArray`.
6+
Handle for an index attached to a :class:`~blosc2.NDArray` or :class:`~blosc2.CTable`.
77

88
``Index`` objects are returned by NDArray indexing APIs such as
99
:meth:`blosc2.NDArray.create_index`, :meth:`blosc2.NDArray.index`, and
10-
:attr:`blosc2.NDArray.indexes`. Use this handle to inspect index metadata and
11-
storage usage, or to drop, rebuild, and compact the index. Users normally do
12-
not instantiate ``Index`` directly.
13-
14-
For table indexes, :class:`blosc2.ctable.CTableIndex` plays the same
15-
user-facing role for :class:`~blosc2.CTable` objects. It is documented in the
16-
:ref:`CTable` reference because table indexes can target columns and table
17-
expressions.
10+
:attr:`blosc2.NDArray.indexes`, and by the equivalent :class:`~blosc2.CTable`
11+
indexing APIs. Use this handle to inspect index metadata and storage usage, or
12+
to drop, rebuild, and compact the index. Users normally do not instantiate
13+
``Index`` directly.
1814

1915
.. currentmodule:: blosc2
2016

2117
.. autoclass:: Index
2218

2319
.. autoattribute:: Index.descriptor
2420
.. autoattribute:: Index.kind
21+
.. autoattribute:: Index.col_name
2522
.. autoattribute:: Index.field
2623
.. autoattribute:: Index.name
2724
.. autoattribute:: Index.target
@@ -30,6 +27,10 @@ expressions.
3027
.. autoattribute:: Index.nbytes
3128
.. autoattribute:: Index.cbytes
3229
.. autoattribute:: Index.cratio
30+
.. automethod:: Index.storage_stats
31+
.. automethod:: Index.__getitem__
32+
.. automethod:: Index.__iter__
33+
.. automethod:: Index.__len__
3334
.. automethod:: Index.drop
3435
.. automethod:: Index.rebuild
3536
.. automethod:: Index.compact

examples/ctable/indexing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ def load_rows(table: blosc2.CTable, nrows: int = 240) -> None:
5151
idx_active = pt.create_index("active")
5252
print("Indexes created:", pt.indexes)
5353
print("sensor_id stale?", idx_sensor.stale)
54+
print("sensor_id storage stats (nbytes, cbytes, cratio):", idx_sensor.storage_stats())
5455
print("active stale?", idx_active.stale)
56+
print("active storage stats (nbytes, cbytes, cratio):", idx_active.storage_stats())
5557

5658
# Queries can combine indexed and non-indexed predicates.
5759
recent_active = pt.where((pt.sensor_id >= 180) & pt.active & (pt.region == "north"))
@@ -77,6 +79,7 @@ def load_rows(table: blosc2.CTable, nrows: int = 240) -> None:
7779
packed = blosc2.open(str(bundle_path), mode="r")
7880
print("Reopened object type:", type(packed).__name__)
7981
print("Indexes after reopen from .b2z:", packed.indexes)
82+
print("sensor_id storage stats after reopen:", packed.index("sensor_id").storage_stats())
8083

8184
# Query directly against the .b2z bundle; no unpack step is needed.
8285
warm_active = packed.where(packed.active & (packed.status == "warm") & (packed.sensor_id > 100))

0 commit comments

Comments
 (0)