4
4
import os
5
5
import re
6
6
import threading
7
+ from glob import has_magic
7
8
8
9
from .spec import AbstractFileSystem
9
10
from .utils import is_exception , other_paths
12
13
lock = threading .Lock ()
13
14
14
15
15
- def _run_until_done (loop , coro ):
16
- """execute coroutine, when already in the event loop"""
17
- # raise Nested
18
- with lock :
19
- task = asyncio .current_task (loop = loop )
20
- if task :
21
- asyncio .tasks ._unregister_task (task )
22
- asyncio .tasks ._current_tasks .pop (loop , None )
23
- runner = loop .create_task (coro )
24
- try :
25
- while not runner .done ():
26
- try :
27
- loop ._run_once ()
28
- except (IndexError , RuntimeError ):
29
- pass
30
- finally :
31
- if task :
32
- with lock :
33
- asyncio .tasks ._current_tasks [loop ] = task
34
- return runner .result ()
35
-
36
-
37
16
def sync (loop , func , * args , callback_timeout = None , ** kwargs ):
38
17
"""
39
18
Make loop run coroutine until it returns. Runs in this thread
40
19
"""
41
20
coro = func (* args , ** kwargs )
42
21
if loop .is_running ():
43
- result = _run_until_done ( loop , coro )
22
+ raise NotImplementedError
44
23
else :
45
24
result = loop .run_until_complete (coro )
46
25
return result
@@ -60,12 +39,8 @@ def maybe_sync(func, self, *args, **kwargs):
60
39
except RuntimeError :
61
40
loop0 = None
62
41
if loop0 is not None and loop0 .is_running ():
63
- if inspect .iscoroutinefunction (func ):
64
- # run coroutine while pausing this one (because we are within async)
65
- return _run_until_done (loop , func (* args , ** kwargs ))
66
- else :
67
- # make awaitable which then calls the blocking function
68
- raise NotImplementedError ()
42
+ # TEMPORARY - to be removed
43
+ raise NotImplementedError ()
69
44
else :
70
45
if inspect .iscoroutinefunction (func ):
71
46
# run the awaitable on the loop
@@ -119,11 +94,6 @@ def get_loop():
119
94
"_rm_file" ,
120
95
"_cp_file" ,
121
96
"_pipe_file" ,
122
- ]
123
- # these methods could be overridden, but have default sync versions which rely on _ls
124
- # the sync methods below all call expand_path, which in turn may call walk or glob
125
- # (if passed paths with glob characters, or for recursive=True, respectively)
126
- default_async_methods = [
127
97
"_expand_path" ,
128
98
"_info" ,
129
99
"_isfile" ,
@@ -133,6 +103,7 @@ def get_loop():
133
103
"_glob" ,
134
104
"_find" ,
135
105
"_du" ,
106
+ "_size" ,
136
107
]
137
108
138
109
@@ -167,6 +138,9 @@ def loop(self):
167
138
self ._loop .loop = get_loop ()
168
139
return self ._loop .loop
169
140
141
+ async def _rm_file (self , path , ** kwargs ):
142
+ raise NotImplementedError
143
+
170
144
async def _rm (self , path , recursive = False , ** kwargs ):
171
145
await asyncio .gather (* [self ._rm_file (p , ** kwargs ) for p in path ])
172
146
@@ -269,6 +243,210 @@ def get(self, rpath, lpath, recursive=False, **kwargs):
269
243
[os .makedirs (os .path .dirname (lp ), exist_ok = True ) for lp in lpaths ]
270
244
return sync (self .loop , self ._get , rpaths , lpaths )
271
245
246
+ async def _info (self , path ):
247
+ raise NotImplementedError
248
+
249
+ async def _isfile (self , path ):
250
+ try :
251
+ return (await self ._info (path ))["type" ] == "file"
252
+ except : # noqa: E722
253
+ return False
254
+
255
+ async def _isdir (self , path ):
256
+ try :
257
+ return (await self ._info (path ))["type" ] == "directory"
258
+ except IOError :
259
+ return False
260
+
261
+ async def _size (self , path ):
262
+ return (await self ._info (path )).get ("size" , None )
263
+
264
+ async def _exists (self , path ):
265
+ try :
266
+ await self ._info (path )
267
+ return True
268
+ except FileNotFoundError :
269
+ return False
270
+
271
+ async def _ls (self , path , ** kwargs ):
272
+ raise NotImplementedError
273
+
274
+ async def _walk (self , path , maxdepth = None , ** kwargs ):
275
+ path = self ._strip_protocol (path )
276
+ full_dirs = {}
277
+ dirs = {}
278
+ files = {}
279
+
280
+ detail = kwargs .pop ("detail" , False )
281
+ try :
282
+ listing = await self ._ls (path , detail = True , ** kwargs )
283
+ except (FileNotFoundError , IOError ):
284
+ yield [], [], []
285
+ return
286
+
287
+ for info in listing :
288
+ # each info name must be at least [path]/part , but here
289
+ # we check also for names like [path]/part/
290
+ pathname = info ["name" ].rstrip ("/" )
291
+ name = pathname .rsplit ("/" , 1 )[- 1 ]
292
+ if info ["type" ] == "directory" and pathname != path :
293
+ # do not include "self" path
294
+ full_dirs [pathname ] = info
295
+ dirs [name ] = info
296
+ elif pathname == path :
297
+ # file-like with same name as give path
298
+ files ["" ] = info
299
+ else :
300
+ files [name ] = info
301
+
302
+ if detail :
303
+ yield path , dirs , files
304
+ else :
305
+ yield path , list (dirs ), list (files )
306
+
307
+ if maxdepth is not None :
308
+ maxdepth -= 1
309
+ if maxdepth < 1 :
310
+ return
311
+
312
+ for d in full_dirs :
313
+ async for _ in self ._walk (d , maxdepth = maxdepth , detail = detail , ** kwargs ):
314
+ yield _
315
+
316
+ async def _glob (self , path , ** kwargs ):
317
+ import re
318
+
319
+ ends = path .endswith ("/" )
320
+ path = self ._strip_protocol (path )
321
+ indstar = path .find ("*" ) if path .find ("*" ) >= 0 else len (path )
322
+ indques = path .find ("?" ) if path .find ("?" ) >= 0 else len (path )
323
+ indbrace = path .find ("[" ) if path .find ("[" ) >= 0 else len (path )
324
+
325
+ ind = min (indstar , indques , indbrace )
326
+
327
+ detail = kwargs .pop ("detail" , False )
328
+
329
+ if not has_magic (path ):
330
+ root = path
331
+ depth = 1
332
+ if ends :
333
+ path += "/*"
334
+ elif await self ._exists (path ):
335
+ if not detail :
336
+ return [path ]
337
+ else :
338
+ return {path : await self ._info (path )}
339
+ else :
340
+ if not detail :
341
+ return [] # glob of non-existent returns empty
342
+ else :
343
+ return {}
344
+ elif "/" in path [:ind ]:
345
+ ind2 = path [:ind ].rindex ("/" )
346
+ root = path [: ind2 + 1 ]
347
+ depth = None if "**" in path else path [ind2 + 1 :].count ("/" ) + 1
348
+ else :
349
+ root = ""
350
+ depth = None if "**" in path else path [ind + 1 :].count ("/" ) + 1
351
+
352
+ allpaths = await self ._find (
353
+ root , maxdepth = depth , withdirs = True , detail = True , ** kwargs
354
+ )
355
+ # Escape characters special to python regex, leaving our supported
356
+ # special characters in place.
357
+ # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
358
+ # for shell globbing details.
359
+ pattern = (
360
+ "^"
361
+ + (
362
+ path .replace ("\\ " , r"\\" )
363
+ .replace ("." , r"\." )
364
+ .replace ("+" , r"\+" )
365
+ .replace ("//" , "/" )
366
+ .replace ("(" , r"\(" )
367
+ .replace (")" , r"\)" )
368
+ .replace ("|" , r"\|" )
369
+ .replace ("^" , r"\^" )
370
+ .replace ("$" , r"\$" )
371
+ .replace ("{" , r"\{" )
372
+ .replace ("}" , r"\}" )
373
+ .rstrip ("/" )
374
+ .replace ("?" , "." )
375
+ )
376
+ + "$"
377
+ )
378
+ pattern = re .sub ("[*]{2}" , "=PLACEHOLDER=" , pattern )
379
+ pattern = re .sub ("[*]" , "[^/]*" , pattern )
380
+ pattern = re .compile (pattern .replace ("=PLACEHOLDER=" , ".*" ))
381
+ out = {
382
+ p : allpaths [p ]
383
+ for p in sorted (allpaths )
384
+ if pattern .match (p .replace ("//" , "/" ).rstrip ("/" ))
385
+ }
386
+ if detail :
387
+ return out
388
+ else :
389
+ return list (out )
390
+
391
+ async def _du (self , path , total = True , maxdepth = None , ** kwargs ):
392
+ sizes = {}
393
+ # async for?
394
+ for f in await self ._find (path , maxdepth = maxdepth , ** kwargs ):
395
+ info = await self ._info (f )
396
+ sizes [info ["name" ]] = info ["size" ]
397
+ if total :
398
+ return sum (sizes .values ())
399
+ else :
400
+ return sizes
401
+
402
+ async def _find (self , path , maxdepth = None , withdirs = False , ** kwargs ):
403
+ path = self ._strip_protocol (path )
404
+ out = dict ()
405
+ detail = kwargs .pop ("detail" , False )
406
+ # async for?
407
+ async for _ , dirs , files in self ._walk (path , maxdepth , detail = True , ** kwargs ):
408
+ if withdirs :
409
+ files .update (dirs )
410
+ out .update ({info ["name" ]: info for name , info in files .items ()})
411
+ if (await self ._isfile (path )) and path not in out :
412
+ # walk works on directories, but find should also return [path]
413
+ # when path happens to be a file
414
+ out [path ] = {}
415
+ names = sorted (out )
416
+ if not detail :
417
+ return names
418
+ else :
419
+ return {name : out [name ] for name in names }
420
+
421
+ async def _expand_path (self , path , recursive = False , maxdepth = None ):
422
+ if isinstance (path , str ):
423
+ out = await self ._expand_path ([path ], recursive , maxdepth )
424
+ else :
425
+ # reduce depth on each recursion level unless None or 0
426
+ maxdepth = maxdepth if not maxdepth else maxdepth - 1
427
+ out = set ()
428
+ path = [self ._strip_protocol (p ) for p in path ]
429
+ for p in path : # can gather here
430
+ if has_magic (p ):
431
+ bit = set (await self ._glob (p ))
432
+ out |= bit
433
+ if recursive :
434
+ out |= set (
435
+ await self ._expand_path (
436
+ list (bit ), recursive = recursive , maxdepth = maxdepth
437
+ )
438
+ )
439
+ continue
440
+ elif recursive :
441
+ rec = set (await self ._find (p , maxdepth = maxdepth , withdirs = True ))
442
+ out |= rec
443
+ if p not in out and (recursive is False or (await self ._exists (p ))):
444
+ # should only check once, for the root
445
+ out .add (p )
446
+ if not out :
447
+ raise FileNotFoundError (path )
448
+ return list (sorted (out ))
449
+
272
450
273
451
def mirror_sync_methods (obj ):
274
452
"""Populate sync and async methods for obj
@@ -285,7 +463,7 @@ def mirror_sync_methods(obj):
285
463
"""
286
464
from fsspec import AbstractFileSystem
287
465
288
- for method in async_methods + default_async_methods + dir (AsyncFileSystem ):
466
+ for method in async_methods + dir (AsyncFileSystem ):
289
467
if not method .startswith ("_" ):
290
468
continue
291
469
smethod = method [1 :]
0 commit comments