Skip to content

Commit a139d88

Browse files
committed
fix: fix an issue where pagination on list files did not work properly
1 parent 42cccdc commit a139d88

File tree

3 files changed

+142
-6
lines changed

3 files changed

+142
-6
lines changed

libs/foundry-dev-tools/src/foundry_dev_tools/clients/catalog.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def list_dataset_files(
2727
page_size: int = 1000,
2828
logical_path: api_types.PathInDataset | None = None,
2929
page_start_logical_path: api_types.PathInDataset | None = None,
30+
start_transaction_rid: api_types.TransactionRid | None = None,
3031
include_open_exclusive_transaction: bool = False,
3132
exclude_hidden_files: bool = False,
3233
temporary_credentials_auth_token: str | None = None,
@@ -43,6 +44,8 @@ def list_dataset_files(
4344
(a slash is added to the end of logicalPath if necessary and a prefix-match is performed)
4445
page_start_logical_path: if specified page starts at the given path,
4546
otherwise at the beginning of the file list
47+
start_transaction_rid: if a startTransactionRid is given, the view starting at the startTransactionRid
48+
and ending at the endRef is returned
4649
include_open_exclusive_transaction: if files added in open transaction should be returned
4750
as well in the response
4851
exclude_hidden_files: if hidden files should be excluded (e.g. _log files)
@@ -62,7 +65,7 @@ def list_dataset_files(
6265
]
6366
"""
6467

65-
def _inner_get(next_page_token: str | None = None) -> dict:
68+
def _inner_get(page_start_logical_path: str | None = None) -> dict:
6669
return self.api_get_dataset_view_files3(
6770
dataset_rid=dataset_rid,
6871
end_ref=end_ref,
@@ -71,14 +74,17 @@ def _inner_get(next_page_token: str | None = None) -> dict:
7174
page_start_logical_path=page_start_logical_path,
7275
include_open_exclusive_transaction=include_open_exclusive_transaction,
7376
exclude_hidden_files=exclude_hidden_files,
74-
start_transaction_rid=next_page_token,
77+
start_transaction_rid=start_transaction_rid,
7578
temporary_credentials_auth_token=temporary_credentials_auth_token,
7679
).json()
7780

7881
result: list[dict] = []
79-
batch_result = {"nextPageToken": ""}
80-
while batch_result["nextPageToken"] is not None:
81-
batch_result = _inner_get(next_page_token=batch_result["nextPageToken"])
82+
first_result = _inner_get(page_start_logical_path=page_start_logical_path)
83+
result.extend(first_result["values"])
84+
next_page_token = first_result.get("nextPageToken", None)
85+
while next_page_token is not None:
86+
batch_result = _inner_get(page_start_logical_path=next_page_token)
87+
next_page_token = batch_result.get("nextPageToken", None)
8288
result.extend(batch_result["values"]) # type: ignore[arg-type]
8389
return result
8490

@@ -110,7 +116,8 @@ def api_get_dataset_view_files3(
110116
include_open_exclusive_transaction: if files added in open transaction should be returned
111117
as well in the response
112118
exclude_hidden_files: if hidden files should be excluded (e.g. _log files)
113-
start_transaction_rid: start transaction rid
119+
start_transaction_rid: if a startTransactionRid is given, the view starting at the startTransactionRid
120+
and ending at the endRef is returned
114121
temporary_credentials_auth_token: to generate temporary credentials for presigned URLs
115122
**kwargs: gets passed to :py:meth:`APIClient.api_request`
116123

tests/integration/test_general_api.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ def test_monster_integration_test(): # noqa: PLR0915
112112

113113
TEST_SINGLETON.v1_client.commit_transaction(ds["rid"], transaction_rid)
114114

115+
files = TEST_SINGLETON.ctx.catalog.list_dataset_files(dataset_rid=ds["rid"], end_ref=branch, page_size=1)
116+
117+
assert len(files) == 3
118+
115119
schema = {
116120
"fieldSchemaList": [
117121
{"type": "INTEGER", "name": "col1", "customMetadata": {}},

tests/unit/clients/test_catalog.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from foundry_dev_tools.utils.clients import build_api_url
2+
from tests.unit.mocks import TEST_HOST
3+
4+
5+
def test_list_dataset_files(test_context_mock):
6+
test_context_mock.mock_adapter.register_uri(
7+
"PUT",
8+
build_api_url(TEST_HOST.url, "foundry-catalog", "catalog/datasets/rid/views/master/files3"),
9+
response_list=[
10+
{
11+
"json": {
12+
"values": [
13+
{
14+
"logicalPath": "dummy_file_0.parquet",
15+
"physicalPath": "1234",
16+
"physicalUri": "https://s3.eu-central-1.amazonaws.com/",
17+
"transactionRid": "ri.foundry.main.transaction.000002c3-6680-ad68-8d6b-500ef09cbd46",
18+
"fileMetadata": {"length": 523},
19+
"isOpen": False,
20+
"timeModified": "2024-12-05T14:36:18.413Z",
21+
}
22+
],
23+
"nextPageToken": "dummy_file_1.parquet",
24+
}
25+
},
26+
{
27+
"json": {
28+
"values": [
29+
{
30+
"logicalPath": "dummy_file_1.parquet",
31+
"physicalPath": "2345",
32+
"physicalUri": "https://s3.eu-central-1.amazonaws.com/",
33+
"transactionRid": "ri.foundry.main.transaction.000002c3-6680-ad68-8d6b-500ef09cbd46",
34+
"fileMetadata": {"length": 523},
35+
"isOpen": False,
36+
"timeModified": "2024-12-05T14:36:18.413Z",
37+
}
38+
],
39+
"nextPageToken": "dummy_file_2.parquet",
40+
}
41+
},
42+
{
43+
"json": {
44+
"values": [
45+
{
46+
"logicalPath": "dummy_file_2.parquet",
47+
"physicalPath": "234234",
48+
"physicalUri": "https://s3.eu-central-1.amazonaws.com/",
49+
"transactionRid": "ri.foundry.main.transaction.000002c3-6680-ad68-8d6b-500ef09cbd46",
50+
"fileMetadata": {"length": 523},
51+
"isOpen": False,
52+
"timeModified": "2024-12-05T14:36:18.413Z",
53+
}
54+
],
55+
"nextPageToken": None,
56+
}
57+
},
58+
],
59+
)
60+
61+
files = test_context_mock.catalog.list_dataset_files(dataset_rid="rid", page_size=1)
62+
63+
assert len(files) == 3
64+
assert test_context_mock.mock_adapter.call_count == 3
65+
66+
67+
def test_list_dataset_files_nextpagetoken_not_present(test_context_mock):
68+
test_context_mock.mock_adapter.register_uri(
69+
"PUT",
70+
build_api_url(TEST_HOST.url, "foundry-catalog", "catalog/datasets/rid/views/master/files3"),
71+
response_list=[
72+
{
73+
"json": {
74+
"values": [
75+
{
76+
"logicalPath": "dummy_file_0.parquet",
77+
"physicalPath": "1234",
78+
"physicalUri": "https://s3.eu-central-1.amazonaws.com/",
79+
"transactionRid": "ri.foundry.main.transaction.000002c3-6680-ad68-8d6b-500ef09cbd46",
80+
"fileMetadata": {"length": 523},
81+
"isOpen": False,
82+
"timeModified": "2024-12-05T14:36:18.413Z",
83+
}
84+
],
85+
"nextPageToken": "dummy_file_1.parquet",
86+
}
87+
},
88+
{
89+
"json": {
90+
"values": [
91+
{
92+
"logicalPath": "dummy_file_1.parquet",
93+
"physicalPath": "2345",
94+
"physicalUri": "https://s3.eu-central-1.amazonaws.com/",
95+
"transactionRid": "ri.foundry.main.transaction.000002c3-6680-ad68-8d6b-500ef09cbd46",
96+
"fileMetadata": {"length": 523},
97+
"isOpen": False,
98+
"timeModified": "2024-12-05T14:36:18.413Z",
99+
}
100+
],
101+
"nextPageToken": "dummy_file_2.parquet",
102+
}
103+
},
104+
{
105+
"json": {
106+
"values": [
107+
{
108+
"logicalPath": "dummy_file_2.parquet",
109+
"physicalPath": "234234",
110+
"physicalUri": "https://s3.eu-central-1.amazonaws.com/",
111+
"transactionRid": "ri.foundry.main.transaction.000002c3-6680-ad68-8d6b-500ef09cbd46",
112+
"fileMetadata": {"length": 523},
113+
"isOpen": False,
114+
"timeModified": "2024-12-05T14:36:18.413Z",
115+
}
116+
]
117+
}
118+
},
119+
],
120+
)
121+
122+
files = test_context_mock.catalog.list_dataset_files(dataset_rid="rid", page_size=1)
123+
124+
assert len(files) == 3
125+
assert test_context_mock.mock_adapter.call_count == 3

0 commit comments

Comments
 (0)