@@ -50,14 +50,35 @@ class MMapCache(BaseCache):
50
50
Ensure there is enough disc space in the temporary location.
51
51
52
52
This cache method might only work on posix
53
+
54
+ Parameters
55
+ ----------
56
+ blocksize: int
57
+ How far to read ahead in numbers of bytes
58
+ fetcher: func
59
+ Function of the form f(start, end) which gets bytes from remote as
60
+ specified
61
+ size: int
62
+ How big this file is
63
+ location: str
64
+ Where to create the temporary file. If None, a temporary file is
65
+ created using tempfile.TemporaryFile().
66
+ blocks: set
67
+ Set of block numbers that have already been fetched. If None, an empty
68
+ set is created.
69
+ multi_fetcher: func
70
+ Function of the form f([(start, end)]) which gets bytes from remote
71
+ as specified. This function is used to fetch multiple blocks at once.
72
+ If not specified, the fetcher function is used instead.
53
73
"""
54
74
55
75
name = "mmap"
56
76
57
- def __init__ (self , blocksize , fetcher , size , location = None , blocks = None ):
77
+ def __init__ (self , blocksize , fetcher , size , location = None , blocks = None , multi_fetcher = None ):
58
78
super ().__init__ (blocksize , fetcher , size )
59
79
self .blocks = set () if blocks is None else blocks
60
80
self .location = location
81
+ self .multi_fetcher = multi_fetcher
61
82
self .cache = self ._makefile ()
62
83
63
84
def _makefile (self ):
@@ -93,16 +114,30 @@ def _fetch(self, start, end):
93
114
start_block = start // self .blocksize
94
115
end_block = end // self .blocksize
95
116
need = [i for i in range (start_block , end_block + 1 ) if i not in self .blocks ]
117
+ ranges = []
96
118
while need :
97
119
# TODO: not a for loop so we can consolidate blocks later to
98
- # make fewer fetch calls; this could be parallel
120
+ # make fewer fetch calls
99
121
i = need .pop (0 )
100
122
sstart = i * self .blocksize
101
123
send = min (sstart + self .blocksize , self .size )
102
- logger .debug (f"MMap get block #{ i } ({ sstart } -{ send } " )
103
- self .cache [sstart :send ] = self .fetcher (sstart , send )
124
+ ranges .append ((sstart , send ))
104
125
self .blocks .add (i )
105
126
127
+ if not ranges :
128
+ return self .cache [start :end ]
129
+
130
+ if self .multi_fetcher :
131
+ logger .debug (f"MMap get blocks { ranges } " )
132
+ for idx , r in enumerate (self .multi_fetcher (ranges )):
133
+ (sstart , send ) = ranges [idx ]
134
+ logger .debug (f"MMap get block ({ sstart } -{ send } " )
135
+ self .cache [sstart :send ] = r
136
+ else :
137
+ for (sstart , send ) in ranges :
138
+ logger .debug (f"MMap get block ({ sstart } -{ send } " )
139
+ self .cache [sstart :send ] = self .fetcher (sstart , send )
140
+
106
141
return self .cache [start :end ]
107
142
108
143
def __getstate__ (self ):
0 commit comments