@@ -163,6 +163,58 @@ FileHandle::FileHandle(const std::string& file_path,
163
163
}
164
164
}
165
165
166
+ FileHandle::FileHandle (FileHandle&& o) noexcept
167
+ : _fd_direct_on{std::exchange (o._fd_direct_on , -1 )},
168
+ _fd_direct_off{std::exchange (o._fd_direct_off , -1 )},
169
+ _initialized{std::exchange (o._initialized , false )},
170
+ _compat_mode{std::exchange (o._compat_mode , CompatMode::AUTO)},
171
+ _nbytes{std::exchange (o._nbytes , 0 )},
172
+ _handle{std::exchange (o._handle , CUfileHandle_t{})}
173
+ {
174
+ }
175
+
176
+ FileHandle& FileHandle::operator =(FileHandle&& o) noexcept
177
+ {
178
+ _fd_direct_on = std::exchange (o._fd_direct_on , -1 );
179
+ _fd_direct_off = std::exchange (o._fd_direct_off , -1 );
180
+ _initialized = std::exchange (o._initialized , false );
181
+ _compat_mode = std::exchange (o._compat_mode , CompatMode::AUTO);
182
+ _nbytes = std::exchange (o._nbytes , 0 );
183
+ _handle = std::exchange (o._handle , CUfileHandle_t{});
184
+ return *this ;
185
+ }
186
+
187
+ FileHandle::~FileHandle () noexcept { close (); }
188
+
189
+ bool FileHandle::closed () const noexcept { return !_initialized; }
190
+
191
+ void FileHandle::close () noexcept
192
+ {
193
+ try {
194
+ if (closed ()) { return ; }
195
+
196
+ if (!is_compat_mode_preferred ()) { cuFileAPI::instance ().HandleDeregister (_handle); }
197
+ _compat_mode = CompatMode::AUTO;
198
+ ::close (_fd_direct_off);
199
+ if (_fd_direct_on != -1 ) { ::close (_fd_direct_on); }
200
+ _fd_direct_on = -1 ;
201
+ _fd_direct_off = -1 ;
202
+ _initialized = false ;
203
+ } catch (...) {
204
+ }
205
+ }
206
+
207
+ CUfileHandle_t FileHandle::handle ()
208
+ {
209
+ if (closed ()) { throw CUfileException (" File handle is closed" ); }
210
+ if (is_compat_mode_preferred ()) {
211
+ throw CUfileException (" The underlying cuFile handle isn't available in compatibility mode" );
212
+ }
213
+ return _handle;
214
+ }
215
+
216
+ int FileHandle::fd () const noexcept { return _fd_direct_off; }
217
+
166
218
int FileHandle::fd_open_flags () const { return open_flags (_fd_direct_off); }
167
219
168
220
std::size_t FileHandle::nbytes () const
@@ -172,4 +224,232 @@ std::size_t FileHandle::nbytes() const
172
224
return _nbytes;
173
225
}
174
226
227
+ std::size_t FileHandle::read (void * devPtr_base,
228
+ std::size_t size,
229
+ std::size_t file_offset,
230
+ std::size_t devPtr_offset,
231
+ bool sync_default_stream)
232
+ {
233
+ if (is_compat_mode_preferred ()) {
234
+ return detail::posix_device_read (_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset);
235
+ }
236
+ if (sync_default_stream) { CUDA_DRIVER_TRY (cudaAPI::instance ().StreamSynchronize (nullptr )); }
237
+
238
+ KVIKIO_NVTX_SCOPED_RANGE (" cufileRead()" , size);
239
+ ssize_t ret = cuFileAPI::instance ().Read (
240
+ _handle, devPtr_base, size, convert_size2off (file_offset), convert_size2off (devPtr_offset));
241
+ CUFILE_CHECK_BYTES_DONE (ret);
242
+ return ret;
243
+ }
244
+
245
+ std::size_t FileHandle::write (const void * devPtr_base,
246
+ std::size_t size,
247
+ std::size_t file_offset,
248
+ std::size_t devPtr_offset,
249
+ bool sync_default_stream)
250
+ {
251
+ _nbytes = 0 ; // Invalidate the computed file size
252
+
253
+ if (is_compat_mode_preferred ()) {
254
+ return detail::posix_device_write (
255
+ _fd_direct_off, devPtr_base, size, file_offset, devPtr_offset);
256
+ }
257
+ if (sync_default_stream) { CUDA_DRIVER_TRY (cudaAPI::instance ().StreamSynchronize (nullptr )); }
258
+
259
+ KVIKIO_NVTX_SCOPED_RANGE (" cufileWrite()" , size);
260
+ ssize_t ret = cuFileAPI::instance ().Write (
261
+ _handle, devPtr_base, size, convert_size2off (file_offset), convert_size2off (devPtr_offset));
262
+ if (ret == -1 ) {
263
+ throw std::system_error (errno, std::generic_category (), " Unable to write file" );
264
+ }
265
+ if (ret < -1 ) {
266
+ throw CUfileException (std::string{" cuFile error at: " } + __FILE__ + " :" +
267
+ KVIKIO_STRINGIFY (__LINE__) + " : " + CUFILE_ERRSTR (ret));
268
+ }
269
+ return ret;
270
+ }
271
+
272
+ std::future<std::size_t > FileHandle::pread (void * buf,
273
+ std::size_t size,
274
+ std::size_t file_offset,
275
+ std::size_t task_size,
276
+ std::size_t gds_threshold,
277
+ bool sync_default_stream)
278
+ {
279
+ KVIKIO_NVTX_MARKER (" FileHandle::pread()" , size);
280
+ if (is_host_memory (buf)) {
281
+ auto op = [this ](void * hostPtr_base,
282
+ std::size_t size,
283
+ std::size_t file_offset,
284
+ std::size_t hostPtr_offset) -> std::size_t {
285
+ char * buf = static_cast <char *>(hostPtr_base) + hostPtr_offset;
286
+ return detail::posix_host_read<detail::PartialIO::NO>(_fd_direct_off, buf, size, file_offset);
287
+ };
288
+
289
+ return parallel_io (op, buf, size, file_offset, task_size, 0 );
290
+ }
291
+
292
+ CUcontext ctx = get_context_from_pointer (buf);
293
+
294
+ // Shortcut that circumvent the threadpool and use the POSIX backend directly.
295
+ if (size < gds_threshold) {
296
+ auto task = [this , ctx, buf, size, file_offset]() -> std::size_t {
297
+ PushAndPopContext c (ctx);
298
+ return detail::posix_device_read (_fd_direct_off, buf, size, file_offset, 0 );
299
+ };
300
+ return std::async (std::launch::deferred, task);
301
+ }
302
+
303
+ // Let's synchronize once instead of in each task.
304
+ if (sync_default_stream && !is_compat_mode_preferred ()) {
305
+ PushAndPopContext c (ctx);
306
+ CUDA_DRIVER_TRY (cudaAPI::instance ().StreamSynchronize (nullptr ));
307
+ }
308
+
309
+ // Regular case that use the threadpool and run the tasks in parallel
310
+ auto task = [this , ctx](void * devPtr_base,
311
+ std::size_t size,
312
+ std::size_t file_offset,
313
+ std::size_t devPtr_offset) -> std::size_t {
314
+ PushAndPopContext c (ctx);
315
+ return read (devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false );
316
+ };
317
+ auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info (buf, &ctx);
318
+ return parallel_io (task, devPtr_base, size, file_offset, task_size, devPtr_offset);
319
+ }
320
+
321
+ std::future<std::size_t > FileHandle::pwrite (const void * buf,
322
+ std::size_t size,
323
+ std::size_t file_offset,
324
+ std::size_t task_size,
325
+ std::size_t gds_threshold,
326
+ bool sync_default_stream)
327
+ {
328
+ KVIKIO_NVTX_MARKER (" FileHandle::pwrite()" , size);
329
+ if (is_host_memory (buf)) {
330
+ auto op = [this ](const void * hostPtr_base,
331
+ std::size_t size,
332
+ std::size_t file_offset,
333
+ std::size_t hostPtr_offset) -> std::size_t {
334
+ const char * buf = static_cast <const char *>(hostPtr_base) + hostPtr_offset;
335
+ return detail::posix_host_write<detail::PartialIO::NO>(
336
+ _fd_direct_off, buf, size, file_offset);
337
+ };
338
+
339
+ return parallel_io (op, buf, size, file_offset, task_size, 0 );
340
+ }
341
+
342
+ CUcontext ctx = get_context_from_pointer (buf);
343
+
344
+ // Shortcut that circumvent the threadpool and use the POSIX backend directly.
345
+ if (size < gds_threshold) {
346
+ auto task = [this , ctx, buf, size, file_offset]() -> std::size_t {
347
+ PushAndPopContext c (ctx);
348
+ return detail::posix_device_write (_fd_direct_off, buf, size, file_offset, 0 );
349
+ };
350
+ return std::async (std::launch::deferred, task);
351
+ }
352
+
353
+ // Let's synchronize once instead of in each task.
354
+ if (sync_default_stream && !is_compat_mode_preferred ()) {
355
+ PushAndPopContext c (ctx);
356
+ CUDA_DRIVER_TRY (cudaAPI::instance ().StreamSynchronize (nullptr ));
357
+ }
358
+
359
+ // Regular case that use the threadpool and run the tasks in parallel
360
+ auto op = [this , ctx](const void * devPtr_base,
361
+ std::size_t size,
362
+ std::size_t file_offset,
363
+ std::size_t devPtr_offset) -> std::size_t {
364
+ PushAndPopContext c (ctx);
365
+ return write (devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false );
366
+ };
367
+ auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info (buf, &ctx);
368
+ return parallel_io (op, devPtr_base, size, file_offset, task_size, devPtr_offset);
369
+ }
370
+
371
+ void FileHandle::read_async (void * devPtr_base,
372
+ std::size_t * size_p,
373
+ off_t * file_offset_p,
374
+ off_t * devPtr_offset_p,
375
+ ssize_t * bytes_read_p,
376
+ CUstream stream)
377
+ {
378
+ if (is_compat_mode_preferred_for_async (_compat_mode)) {
379
+ CUDA_DRIVER_TRY (cudaAPI::instance ().StreamSynchronize (stream));
380
+ *bytes_read_p =
381
+ static_cast <ssize_t >(read (devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p));
382
+ } else {
383
+ CUFILE_TRY (cuFileAPI::instance ().ReadAsync (
384
+ _handle, devPtr_base, size_p, file_offset_p, devPtr_offset_p, bytes_read_p, stream));
385
+ }
386
+ }
387
+
388
+ StreamFuture FileHandle::read_async (
389
+ void * devPtr_base, std::size_t size, off_t file_offset, off_t devPtr_offset, CUstream stream)
390
+ {
391
+ StreamFuture ret (devPtr_base, size, file_offset, devPtr_offset, stream);
392
+ auto [devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_read_p, stream_] =
393
+ ret.get_args ();
394
+ read_async (devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_read_p, stream_);
395
+ return ret;
396
+ }
397
+
398
+ void FileHandle::write_async (void * devPtr_base,
399
+ std::size_t * size_p,
400
+ off_t * file_offset_p,
401
+ off_t * devPtr_offset_p,
402
+ ssize_t * bytes_written_p,
403
+ CUstream stream)
404
+ {
405
+ if (is_compat_mode_preferred_for_async (_compat_mode)) {
406
+ CUDA_DRIVER_TRY (cudaAPI::instance ().StreamSynchronize (stream));
407
+ *bytes_written_p =
408
+ static_cast <ssize_t >(write (devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p));
409
+ } else {
410
+ CUFILE_TRY (cuFileAPI::instance ().WriteAsync (
411
+ _handle, devPtr_base, size_p, file_offset_p, devPtr_offset_p, bytes_written_p, stream));
412
+ }
413
+ }
414
+
415
+ StreamFuture FileHandle::write_async (
416
+ void * devPtr_base, std::size_t size, off_t file_offset, off_t devPtr_offset, CUstream stream)
417
+ {
418
+ StreamFuture ret (devPtr_base, size, file_offset, devPtr_offset, stream);
419
+ auto [devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_written_p, stream_] =
420
+ ret.get_args ();
421
+ write_async (devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_written_p, stream_);
422
+ return ret;
423
+ }
424
+
425
+ bool FileHandle::is_compat_mode_preferred () const noexcept
426
+ {
427
+ return defaults::is_compat_mode_preferred (_compat_mode);
428
+ }
429
+
430
+ bool FileHandle::is_compat_mode_preferred_for_async () const noexcept
431
+ {
432
+ static bool is_extra_symbol_available = is_stream_api_available ();
433
+ static bool is_config_path_empty = config_path ().empty ();
434
+ return is_compat_mode_preferred () || !is_extra_symbol_available || is_config_path_empty;
435
+ }
436
+
437
+ bool FileHandle::is_compat_mode_preferred_for_async (CompatMode requested_compat_mode)
438
+ {
439
+ if (defaults::is_compat_mode_preferred (requested_compat_mode)) { return true ; }
440
+
441
+ if (!is_stream_api_available ()) {
442
+ if (requested_compat_mode == CompatMode::AUTO) { return true ; }
443
+ throw std::runtime_error (" Missing the cuFile stream api." );
444
+ }
445
+
446
+ // When checking for availability, we also check if cuFile's config file exists. This is
447
+ // because even when the stream API is available, it doesn't work if no config file exists.
448
+ if (config_path ().empty ()) {
449
+ if (requested_compat_mode == CompatMode::AUTO) { return true ; }
450
+ throw std::runtime_error (" Missing cuFile configuration file." );
451
+ }
452
+ return false ;
453
+ }
454
+
175
455
} // namespace kvikio
0 commit comments