Skip to content

Commit 79c5782

Browse files
committed
sort list of files
1 parent 2d04683 commit 79c5782

File tree

1 file changed

+4
-1
lines changed
  • src/datatrove/pipeline/dedup/fast_mh3/src

1 file changed

+4
-1
lines changed

src/datatrove/pipeline/dedup/fast_mh3/src/main.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,13 +232,16 @@ async fn list_s3_files(client: &Client, s3_path: &S3Path, total_files: usize) ->
232232
.context("Failed to list S3 objects")
233233
}).await?;
234234

235-
let files: Vec<String> = resp
235+
let mut files: Vec<String> = resp
236236
.contents()
237237
.iter()
238238
.filter_map(|obj| obj.key()
239239
.map(|key| format!("s3://{}/{}", s3_path.bucket, key)))
240240
.collect();
241241

242+
// Sort files lexicographically
243+
files.sort();
244+
242245
if files.len() != total_files {
243246
anyhow::bail!(
244247
"Expected {} files, found {} in s3://{}/{}",

0 commit comments

Comments
 (0)