Skip to content

Commit 959542a

Browse files
Add ListObjectV2 support
fix: use bucket_list_v2_streaming when listobjectsv2 enabled
1 parent 22949ba commit 959542a

File tree

5 files changed

+157
-11
lines changed

5 files changed

+157
-11
lines changed

Diff for: .ci.s3cfg

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ limit = -1
4040
limitrate = 0
4141
list_md5 = False
4242
list_allow_unordered = False
43+
enable_list_objects_v2 = False
4344
log_target_prefix =
4445
long_listing = False
4546
max_delete = -1

Diff for: S3/Config.py

+5
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,11 @@ class Config(object):
255255
# This may be faster when listing very large buckets.
256256
list_allow_unordered = False
257257
# Maximum attempts of re-issuing failed requests
258+
259+
# See https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
260+
enable_list_objects_v2 = False
261+
262+
258263
max_retries = 5
259264

260265
## Creating a singleton

Diff for: S3/FileLists.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -438,9 +438,15 @@ def _get_filelist_remote(remote_uri, recursive = True):
438438

439439
total_size = 0
440440

441-
s3 = S3(Config())
442-
response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(),
443-
recursive = recursive, uri_params = uri_params)
441+
cfg = Config()
442+
s3 = S3(cfg)
443+
response = s3.bucket_list(
444+
remote_uri.bucket(),
445+
prefix=remote_uri.object(),
446+
recursive=recursive,
447+
uri_params=uri_params,
448+
list_objects_v2=cfg.enable_list_objects_v2
449+
)
444450

445451
rem_base_original = rem_base = remote_uri.object()
446452
remote_uri_original = remote_uri

Diff for: S3/S3.py

+134-7
Original file line numberDiff line numberDiff line change
@@ -317,19 +317,148 @@ def list_all_buckets(self):
317317
response["list"] = getListFromXml(response["data"], "Bucket")
318318
return response
319319

320-
def bucket_list(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1):
320+
def bucket_list(
321+
self,
322+
bucket,
323+
prefix=None,
324+
recursive=None,
325+
uri_params=None,
326+
limit=-1,
327+
list_objects_v2=False
328+
):
329+
if uri_params is None:
330+
uri_params = {}
331+
if uri_params.get("list_type") == "v2":
332+
list_objects_v2 = True
333+
321334
item_list = []
322335
prefixes = []
323-
for truncated, dirs, objects in self.bucket_list_streaming(bucket, prefix, recursive, uri_params, limit):
324-
item_list.extend(objects)
325-
prefixes.extend(dirs)
336+
if list_objects_v2:
337+
uri_params.update({"list_type": "v2"})
338+
for truncated, dirs, objects in self.bucket_list_v2_streaming(
339+
bucket,
340+
prefix,
341+
recursive,
342+
uri_params,
343+
limit
344+
):
345+
item_list.extend(objects)
346+
prefixes.extend(dirs)
347+
else:
348+
for truncated, dirs, objects in self.bucket_list_streaming(
349+
bucket,
350+
prefix,
351+
recursive,
352+
uri_params,
353+
limit
354+
):
355+
item_list.extend(objects)
356+
prefixes.extend(dirs)
326357

327358
response = {}
328359
response['list'] = item_list
329360
response['common_prefixes'] = prefixes
330361
response['truncated'] = truncated
331362
return response
332363

364+
def bucket_list_v2_streaming(
365+
self,
366+
bucket,
367+
prefix=None,
368+
recursive=None,
369+
uri_params={},
370+
limit=-1,
371+
):
372+
def _list_truncated(data):
373+
# <IsTruncated> can either be "true" or "false" or be missing completely
374+
is_truncated = getTextFromXml(data, ".//IsTruncated") or "false"
375+
return is_truncated.lower() != "false"
376+
377+
def _get_contents(data):
378+
return getListFromXml(data, "Contents")
379+
380+
def _get_common_prefixes(data):
381+
return getListFromXml(data, "CommonPrefixes")
382+
383+
def _get_next_continuation_token(data):
384+
return getTextFromXml(data, "NextContinuationToken")
385+
386+
uri_params = uri_params and uri_params.copy() or {}
387+
truncated = True
388+
389+
num_objects = 0
390+
num_prefixes = 0
391+
max_keys = limit
392+
next_continuation_token = ""
393+
while truncated:
394+
if next_continuation_token:
395+
response = self.bucket_list_v2_noparse(
396+
bucket,
397+
prefix,
398+
recursive,
399+
uri_params,
400+
max_keys,
401+
next_continuation_token
402+
)
403+
else:
404+
response = self.bucket_list_v2_noparse(
405+
bucket,
406+
prefix,
407+
recursive,
408+
uri_params,
409+
max_keys
410+
)
411+
current_list = _get_contents(response["data"])
412+
current_prefixes = _get_common_prefixes(response["data"])
413+
num_objects += len(current_list)
414+
num_prefixes += len(current_prefixes)
415+
if limit > num_objects + num_prefixes:
416+
max_keys = limit - (num_objects + num_prefixes)
417+
truncated = _list_truncated(response["data"])
418+
if truncated:
419+
if limit == -1 or num_objects + num_prefixes < limit:
420+
if current_list or current_prefixes:
421+
next_continuation_token = _get_next_continuation_token(
422+
response["data"]
423+
)
424+
else:
425+
# Unexpectedly, the server lied, and so the previous
426+
# response was not truncated. So, no new key to get.
427+
yield False, current_prefixes, current_list
428+
break
429+
else:
430+
yield truncated, current_prefixes, current_list
431+
break
432+
433+
yield truncated, current_prefixes, current_list
434+
435+
def bucket_list_v2_noparse(
436+
self,
437+
bucket,
438+
prefix=None,
439+
recursive=None,
440+
uri_params={},
441+
max_keys=-1,
442+
continuation_token=None
443+
):
444+
if prefix:
445+
uri_params['prefix'] = prefix
446+
if not self.config.recursive and not recursive:
447+
uri_params['delimiter'] = "/"
448+
if max_keys != -1:
449+
uri_params['max-keys'] = str(max_keys)
450+
if self.config.list_allow_unordered:
451+
uri_params['allow-unordered'] = "true"
452+
if continuation_token:
453+
uri_params["continuation-token"] = continuation_token
454+
request = self.create_request(
455+
"BUCKET_LIST",
456+
bucket=bucket,
457+
uri_params=uri_params
458+
)
459+
response = self.send_request(request)
460+
return response
461+
333462
def bucket_list_streaming(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1):
334463
""" Generator that produces <dir_list>, <object_list> pairs of groups of content of a specified bucket. """
335464
def _list_truncated(data):
@@ -383,9 +512,7 @@ def _get_next_marker(data, current_elts, key):
383512

384513
yield truncated, current_prefixes, current_list
385514

386-
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = None, max_keys = -1):
387-
if uri_params is None:
388-
uri_params = {}
515+
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}, max_keys = -1):
389516
if prefix:
390517
uri_params['prefix'] = prefix
391518
if not self.config.recursive and not recursive:

Diff for: s3cmd

+8-1
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,12 @@ def subcmd_bucket_list(s3, uri, limit):
202202
if prefix.endswith('*'):
203203
prefix = prefix[:-1]
204204
try:
205-
response = s3.bucket_list(bucket, prefix = prefix, limit = limit)
205+
response = s3.bucket_list(
206+
bucket,
207+
prefix=prefix,
208+
limit=limit,
209+
list_objects_v2=cfg.enable_list_objects_v2
210+
)
206211
except S3Error as e:
207212
if e.info["Code"] in S3.codes:
208213
error(S3.codes[e.info["Code"]] % bucket)
@@ -3234,6 +3239,8 @@ def main():
32343239

32353240
optparser.add_option( "--list-allow-unordered", dest="list_allow_unordered", action="store_true", help="Not an AWS standard. Allow the listing results to be returned in unsorted order. This may be faster when listing very large buckets.")
32363241

3242+
optparser.add_option( "--enable_list_objects_v2", dest="enable_list_objects_v2", action="store_true", help="Switches list API to ListObjectsV2")
3243+
32373244
optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).")
32383245

32393246
optparser.add_option( "--ws-index", dest="website_index", action="store", help="Name of index-document (only for [ws-create] command)")

0 commit comments

Comments
 (0)