10
10
11
11
import logging
12
12
import os
13
- import shutil
14
13
import sys
15
14
from concurrent .futures import ThreadPoolExecutor
16
15
from concurrent .futures import as_completed
17
16
18
17
import tqdm
19
- from anemoi .utils .s3 import download
20
- from anemoi .utils .s3 import upload
18
+ from anemoi .utils .remote import Transfer
19
+ from anemoi .utils .remote import TransferMethodNotImplementedError
21
20
22
21
from . import Command
23
22
29
28
isatty = False
30
29
31
30
32
- class S3Downloader :
33
- def __init__ (self , source , target , transfers , overwrite , resume , verbosity , ** kwargs ):
34
- self .source = source
35
- self .target = target
36
- self .transfers = transfers
37
- self .overwrite = overwrite
38
- self .resume = resume
39
- self .verbosity = verbosity
40
-
41
- def run (self ):
42
- if self .target == "." :
43
- self .target = os .path .basename (self .source )
44
-
45
- if self .overwrite and os .path .exists (self .target ):
46
- LOG .info (f"Deleting { self .target } " )
47
- shutil .rmtree (self .target )
48
-
49
- download (
50
- self .source + "/" if not self .source .endswith ("/" ) else self .source ,
51
- self .target ,
52
- overwrite = self .overwrite ,
53
- resume = self .resume ,
54
- verbosity = self .verbosity ,
55
- threads = self .transfers ,
56
- )
57
-
58
-
59
- class S3Uploader :
60
- def __init__ (self , source , target , transfers , overwrite , resume , verbosity , ** kwargs ):
61
- self .source = source
62
- self .target = target
63
- self .transfers = transfers
64
- self .overwrite = overwrite
65
- self .resume = resume
66
- self .verbosity = verbosity
67
-
68
- def run (self ):
69
- upload (
70
- self .source ,
71
- self .target ,
72
- overwrite = self .overwrite ,
73
- resume = self .resume ,
74
- verbosity = self .verbosity ,
75
- threads = self .transfers ,
76
- )
77
-
78
-
79
- class DefaultCopier :
31
+ class ZarrCopier :
80
32
def __init__ (self , source , target , transfers , block_size , overwrite , resume , verbosity , nested , rechunk , ** kwargs ):
81
33
self .source = source
82
34
self .target = target
@@ -90,6 +42,14 @@ def __init__(self, source, target, transfers, block_size, overwrite, resume, ver
90
42
91
43
self .rechunking = rechunk .split ("," ) if rechunk else []
92
44
45
+ source_is_ssh = self .source .startswith ("ssh://" )
46
+ target_is_ssh = self .target .startswith ("ssh://" )
47
+
48
+ if source_is_ssh or target_is_ssh :
49
+ if self .rechunk :
50
+ raise NotImplementedError ("Rechunking with SSH not implemented." )
51
+ assert NotImplementedError ("SSH not implemented." )
52
+
93
53
def _store (self , path , nested = False ):
94
54
if nested :
95
55
import zarr
@@ -337,26 +297,33 @@ def run(self, args):
337
297
if args .source == args .target :
338
298
raise ValueError ("Source and target are the same." )
339
299
340
- kwargs = vars (args )
341
-
342
300
if args .overwrite and args .resume :
343
301
raise ValueError ("Cannot use --overwrite and --resume together." )
344
302
345
- source_in_s3 = args .source .startswith ("s3://" )
346
- target_in_s3 = args .target .startswith ("s3://" )
347
-
348
- copier = None
349
-
350
- if args .rechunk or (source_in_s3 and target_in_s3 ):
351
- copier = DefaultCopier (** kwargs )
352
- else :
353
- if source_in_s3 :
354
- copier = S3Downloader (** kwargs )
355
-
356
- if target_in_s3 :
357
- copier = S3Uploader (** kwargs )
358
-
303
+ if not args .rechunk :
304
+ # rechunking is only supported for ZARR datasets, it is implemented in this package
305
+ try :
306
+ if args .source .startswith ("s3://" ) and not args .source .endswith ("/" ):
307
+ args .source = args .source + "/"
308
+ copier = Transfer (
309
+ args .source ,
310
+ args .target ,
311
+ overwrite = args .overwrite ,
312
+ resume = args .resume ,
313
+ verbosity = args .verbosity ,
314
+ threads = args .transfers ,
315
+ )
316
+ copier .run ()
317
+ return
318
+ except TransferMethodNotImplementedError :
319
+ # DataTransfer relies on anemoi-utils which is agnostic to the source and target format
320
+ # it transfers file and folders, ignoring that it is zarr data
321
+ # if it is not implemented, we fallback to the ZarrCopier
322
+ pass
323
+
324
+ copier = ZarrCopier (** vars (args ))
359
325
copier .run ()
326
+ return
360
327
361
328
362
329
class Copy (CopyMixin , Command ):
0 commit comments