Skip to content

Commit a1a7137

Browse files
authored
Revert "Revert 1933 agis/sc 43316/add wrapping for ls recursive (#1964)" (#1968)
This reverts commit 75e32f9.
1 parent 997b328 commit a1a7137

File tree

5 files changed

+157
-5
lines changed

5 files changed

+157
-5
lines changed

HISTORY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
## Improvements
66

7+
* Add wrapping for ls_recursive by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1933
78
* Migrate away from deprecated TileDB C++ APIs by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1958
89
* Pybind11 Config should honor prefix for iter by @Shelnutt2 in https://github.com/TileDB-Inc/TileDB-Py/pull/1962
910
* Fix test skipping by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1957

tiledb/cc/vfs.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include <tiledb/tiledb>
2+
#include <tiledb/tiledb_experimental>
23

4+
#include <pybind11/functional.h>
35
#include <pybind11/numpy.h>
46
#include <pybind11/pybind11.h>
57
#include <pybind11/pytypes.h>
@@ -41,6 +43,34 @@ void init_vfs(py::module &m) {
4143
.def("_copy_file", &VFS::copy_file)
4244

4345
.def("_ls", &VFS::ls)
46+
.def(
47+
"_ls_recursive",
48+
// 1. the user provides a callback, the user callback is passed to the
49+
// C++ function. Return an empty vector.
50+
// 2. no callback is passed and a default callback is used. The
51+
// default callback just appends each path gathered. Return the paths.
52+
[](VFS &vfs, std::string uri, py::object callback) {
53+
tiledb::Context ctx;
54+
if (callback.is_none()) {
55+
std::vector<std::string> paths;
56+
tiledb::VFSExperimental::ls_recursive(
57+
ctx, vfs, uri,
58+
[&](const std::string_view path,
59+
uint64_t object_size) -> bool {
60+
paths.push_back(std::string(path));
61+
return true;
62+
});
63+
return paths;
64+
} else {
65+
tiledb::VFSExperimental::ls_recursive(
66+
ctx, vfs, uri,
67+
[&](const std::string_view path,
68+
uint64_t object_size) -> bool {
69+
return callback(path, object_size).cast<bool>();
70+
});
71+
return std::vector<std::string>();
72+
}
73+
})
4474
.def("_touch", &VFS::touch);
4575
}
4676

tiledb/libtiledb.pxd

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,8 +1053,7 @@ cdef extern from "tiledb/tiledb.h":
10531053
tiledb_ctx_t * ctx,
10541054
tiledb_vfs_t * vfs,
10551055
const char * path,
1056-
int (*callback)(const char *, void *) noexcept,
1057-
void * data)
1056+
void * data) nogil
10581057

10591058
int tiledb_vfs_move_file(
10601059
tiledb_ctx_t* ctx,

tiledb/tests/test_vfs.py

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def test_ls(self):
270270
basepath = self.path("test_vfs_ls")
271271
self.vfs.create_dir(basepath)
272272
for id in (1, 2, 3):
273-
dir = os.path.join(basepath, "dir" + str(id))
273+
dir = os.path.join(basepath, f"dir{id}")
274274
self.vfs.create_dir(dir)
275275
fname = os.path.join(basepath, "file_" + str(id))
276276
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
@@ -291,6 +291,106 @@ def test_ls(self):
291291
),
292292
)
293293

294+
@pytest.mark.skipif(
295+
pytest.tiledb_vfs not in ["s3", "file"], reason="Only test on S3 and local"
296+
)
297+
def test_ls_recursive(self):
298+
# Create a nested directory structure to test recursive listing
299+
basepath = self.path("test_vfs_ls_recursive")
300+
self.vfs.create_dir(basepath)
301+
302+
dir = os.path.join(basepath, "dir1")
303+
self.vfs.create_dir(dir)
304+
305+
fname = os.path.join(dir, "file_1")
306+
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
307+
fio.write(b"")
308+
309+
fname = os.path.join(dir, "file_2")
310+
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
311+
fio.write(b"")
312+
313+
dir = os.path.join(basepath, "dir2")
314+
self.vfs.create_dir(dir)
315+
316+
dir2 = os.path.join(dir, "dir2_1")
317+
self.vfs.create_dir(dir2)
318+
319+
fname = os.path.join(dir2, "file_1")
320+
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
321+
fio.write(b"")
322+
fname = os.path.join(dir2, "file_2")
323+
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
324+
fio.write(b"")
325+
326+
dir2 = os.path.join(dir, "dir2_2")
327+
self.vfs.create_dir(dir2)
328+
329+
fname = os.path.join(dir2, "file_1")
330+
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
331+
fio.write(b"")
332+
333+
expected = [
334+
"dir1",
335+
"dir1/file_1",
336+
"dir1/file_2",
337+
"dir2",
338+
"dir2/dir2_1",
339+
"dir2/dir2_1/file_1",
340+
"dir2/dir2_1/file_2",
341+
"dir2/dir2_2",
342+
"dir2/dir2_2/file_1",
343+
]
344+
345+
self.assertSetEqual(
346+
set(expected),
347+
set(
348+
map(
349+
# # Keep only the paths after the basepath and normalize them to work on all platforms
350+
lambda x: os.path.normpath(
351+
x.split("test_vfs_ls_recursive/")[1]
352+
).replace("\\", "/"),
353+
self.vfs.ls_recursive(basepath),
354+
)
355+
),
356+
)
357+
358+
# Check with user provided callback
359+
callback_results = []
360+
361+
def callback(uri, _): # we don't use the second argument 'is_dir'
362+
callback_results.append(uri)
363+
return True
364+
365+
self.vfs.ls_recursive(basepath, callback)
366+
367+
self.assertSetEqual(
368+
set(expected),
369+
set(
370+
map(
371+
# Keep only the paths after the basepath and normalize them to work on all platforms
372+
lambda x: os.path.normpath(
373+
x.split("test_vfs_ls_recursive/")[1]
374+
).replace("\\", "/"),
375+
callback_results,
376+
)
377+
),
378+
)
379+
380+
# Can also be called by calling ls with recursive=True
381+
self.assertSetEqual(
382+
set(expected),
383+
set(
384+
map(
385+
# Keep only the paths after the basepath and normalize them to work on all platforms
386+
lambda x: os.path.normpath(
387+
x.split("test_vfs_ls_recursive/")[1]
388+
).replace("\\", "/"),
389+
self.vfs.ls(basepath, recursive=True),
390+
)
391+
),
392+
)
393+
294394
def test_dir_size(self):
295395
vfs = tiledb.VFS()
296396

tiledb/vfs.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import io
22
import os
33
from types import TracebackType
4-
from typing import List, Optional, Type, Union
4+
from typing import Callable, List, Optional, Type, Union
55

66
import numpy as np
77

@@ -287,17 +287,39 @@ def copy_file(self, old_uri: _AnyPath, new_uri: _AnyPath):
287287
"""
288288
return self._copy_file(_to_path_str(old_uri), _to_path_str(new_uri))
289289

290-
def ls(self, uri: _AnyPath) -> List[str]:
290+
def ls(self, uri: _AnyPath, recursive: bool = False) -> List[str]:
291291
"""Retrieves the children in directory `uri`. This function is
292292
non-recursive, i.e., it focuses in one level below `uri`.
293293
294294
:param str uri: Input URI of the directory
295+
:param bool recursive: If True, recursively list all children in the directory
295296
:rtype: List[str]
296297
:return: The children in directory `uri`
297298
298299
"""
300+
if recursive:
301+
return VFS._ls_recursive(self, _to_path_str(uri), None)
302+
299303
return self._ls(_to_path_str(uri))
300304

305+
def ls_recursive(
306+
self, uri: _AnyPath, callback: Optional[Callable[[str, int], bool]] = None
307+
):
308+
"""Recursively lists objects at the input URI, invoking the provided callback
309+
on each entry gathered. The callback is passed the data pointer provided
310+
on each invocation and is responsible for writing the collected results
311+
into this structure. If the callback returns True, the walk will continue.
312+
If False, the walk will stop. If an error is thrown, the walk will stop and
313+
the error will be propagated to the caller using std::throw_with_nested.
314+
315+
Currently only S3 is supported, and the `path` must be a valid S3 URI.
316+
317+
:param str uri: Input URI of the directory
318+
:param callback: Callback function to invoke on each entry
319+
320+
"""
321+
return VFS._ls_recursive(self, _to_path_str(uri), callback)
322+
301323
def touch(self, uri: _AnyPath):
302324
"""Touches a file with the input URI, i.e., creates a new empty file.
303325

0 commit comments

Comments
 (0)