Skip to content

Commit 716a99f

Browse files
Separate more interface and definition. Add comments on std::future. Mark noexcept to compat mode-related functions (#588)
This PR performs makes the following three improvements: - Separates interface and definition for `file_handle.hpp` that was missed in the previous PR #581. - To help avoid UB (e.g. program crash) for downstream applications, adds the following qualifying remark to the returned future object of `pread/pwrite`: >The `std::future` object's `wait()` or `get()` should not be called after the lifetime of the FileHandle object ends. Otherwise, the behavior is undefined. - Add `noexcept` specifier to compatibility mode-related functions. Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Lawrence Mitchell (https://github.com/wence-) URL: #588
1 parent e5888f8 commit 716a99f

File tree

8 files changed

+335
-259
lines changed

8 files changed

+335
-259
lines changed

cpp/include/kvikio/defaults.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ class defaults {
139139
* (`ON`/`OFF`/`AUTO`) to two (`ON`/`OFF`) so as to determine the actual I/O path. This function
140140
* is lightweight as the inferred result is cached.
141141
*/
142-
static CompatMode infer_compat_mode_if_auto(CompatMode compat_mode);
142+
static CompatMode infer_compat_mode_if_auto(CompatMode compat_mode) noexcept;
143143

144144
/**
145145
* @brief Given a requested compatibility mode, whether it is expected to reduce to `ON`.
@@ -156,7 +156,7 @@ class defaults {
156156
* @param compat_mode Compatibility mode.
157157
* @return Boolean answer.
158158
*/
159-
static bool is_compat_mode_preferred(CompatMode compat_mode);
159+
static bool is_compat_mode_preferred(CompatMode compat_mode) noexcept;
160160

161161
/**
162162
* @brief Whether the global compatibility mode from class defaults is expected to be `ON`.

cpp/include/kvikio/file_handle.hpp

Lines changed: 25 additions & 233 deletions
Large diffs are not rendered by default.

cpp/include/kvikio/shim/cufile.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ class cuFileAPI {
102102
* @return The boolean answer
103103
*/
104104
#ifdef KVIKIO_CUFILE_FOUND
105-
bool is_cufile_library_available();
105+
bool is_cufile_library_available() noexcept;
106106
#else
107-
constexpr bool is_cufile_library_available() { return false; }
107+
constexpr bool is_cufile_library_available() noexcept { return false; }
108108
#endif
109109

110110
/**
@@ -115,7 +115,7 @@ constexpr bool is_cufile_library_available() { return false; }
115115
*
116116
* @return The boolean answer
117117
*/
118-
bool is_cufile_available();
118+
bool is_cufile_available() noexcept;
119119

120120
/**
121121
* @brief Get cufile version (or zero if older than v1.8).
@@ -129,9 +129,9 @@ bool is_cufile_available();
129129
* @return The version (1000*major + 10*minor) or zero if older than 1080.
130130
*/
131131
#ifdef KVIKIO_CUFILE_FOUND
132-
int cufile_version();
132+
int cufile_version() noexcept;
133133
#else
134-
constexpr int cufile_version() { return 0; }
134+
constexpr int cufile_version() noexcept { return 0; }
135135
#endif
136136

137137
/**

cpp/include/kvikio/shim/utils.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ void get_symbol(T& handle, void* lib, const char* name)
8080
*
8181
* @return The boolean answer
8282
*/
83-
[[nodiscard]] bool is_running_in_wsl();
83+
[[nodiscard]] bool is_running_in_wsl() noexcept;
8484

8585
/**
8686
* @brief Check if `/run/udev` is readable
@@ -91,6 +91,6 @@ void get_symbol(T& handle, void* lib, const char* name)
9191
*
9292
* @return The boolean answer
9393
*/
94-
[[nodiscard]] bool run_udev_readable();
94+
[[nodiscard]] bool run_udev_readable() noexcept;
9595

9696
} // namespace kvikio

cpp/src/defaults.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ CompatMode defaults::compat_mode() { return instance()->_compat_mode; }
143143

144144
void defaults::compat_mode_reset(CompatMode compat_mode) { instance()->_compat_mode = compat_mode; }
145145

146-
CompatMode defaults::infer_compat_mode_if_auto(CompatMode compat_mode)
146+
CompatMode defaults::infer_compat_mode_if_auto(CompatMode compat_mode) noexcept
147147
{
148148
if (compat_mode == CompatMode::AUTO) {
149149
static auto inferred_compat_mode_for_auto = []() -> CompatMode {
@@ -154,7 +154,7 @@ CompatMode defaults::infer_compat_mode_if_auto(CompatMode compat_mode)
154154
return compat_mode;
155155
}
156156

157-
bool defaults::is_compat_mode_preferred(CompatMode compat_mode)
157+
bool defaults::is_compat_mode_preferred(CompatMode compat_mode) noexcept
158158
{
159159
return compat_mode == CompatMode::ON ||
160160
(compat_mode == CompatMode::AUTO &&

cpp/src/file_handle.cpp

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,58 @@ FileHandle::FileHandle(const std::string& file_path,
163163
}
164164
}
165165

166+
FileHandle::FileHandle(FileHandle&& o) noexcept
167+
: _fd_direct_on{std::exchange(o._fd_direct_on, -1)},
168+
_fd_direct_off{std::exchange(o._fd_direct_off, -1)},
169+
_initialized{std::exchange(o._initialized, false)},
170+
_compat_mode{std::exchange(o._compat_mode, CompatMode::AUTO)},
171+
_nbytes{std::exchange(o._nbytes, 0)},
172+
_handle{std::exchange(o._handle, CUfileHandle_t{})}
173+
{
174+
}
175+
176+
FileHandle& FileHandle::operator=(FileHandle&& o) noexcept
177+
{
178+
_fd_direct_on = std::exchange(o._fd_direct_on, -1);
179+
_fd_direct_off = std::exchange(o._fd_direct_off, -1);
180+
_initialized = std::exchange(o._initialized, false);
181+
_compat_mode = std::exchange(o._compat_mode, CompatMode::AUTO);
182+
_nbytes = std::exchange(o._nbytes, 0);
183+
_handle = std::exchange(o._handle, CUfileHandle_t{});
184+
return *this;
185+
}
186+
187+
FileHandle::~FileHandle() noexcept { close(); }
188+
189+
bool FileHandle::closed() const noexcept { return !_initialized; }
190+
191+
void FileHandle::close() noexcept
192+
{
193+
try {
194+
if (closed()) { return; }
195+
196+
if (!is_compat_mode_preferred()) { cuFileAPI::instance().HandleDeregister(_handle); }
197+
_compat_mode = CompatMode::AUTO;
198+
::close(_fd_direct_off);
199+
if (_fd_direct_on != -1) { ::close(_fd_direct_on); }
200+
_fd_direct_on = -1;
201+
_fd_direct_off = -1;
202+
_initialized = false;
203+
} catch (...) {
204+
}
205+
}
206+
207+
CUfileHandle_t FileHandle::handle()
208+
{
209+
if (closed()) { throw CUfileException("File handle is closed"); }
210+
if (is_compat_mode_preferred()) {
211+
throw CUfileException("The underlying cuFile handle isn't available in compatibility mode");
212+
}
213+
return _handle;
214+
}
215+
216+
int FileHandle::fd() const noexcept { return _fd_direct_off; }
217+
166218
int FileHandle::fd_open_flags() const { return open_flags(_fd_direct_off); }
167219

168220
std::size_t FileHandle::nbytes() const
@@ -172,4 +224,232 @@ std::size_t FileHandle::nbytes() const
172224
return _nbytes;
173225
}
174226

227+
std::size_t FileHandle::read(void* devPtr_base,
228+
std::size_t size,
229+
std::size_t file_offset,
230+
std::size_t devPtr_offset,
231+
bool sync_default_stream)
232+
{
233+
if (is_compat_mode_preferred()) {
234+
return detail::posix_device_read(_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset);
235+
}
236+
if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); }
237+
238+
KVIKIO_NVTX_SCOPED_RANGE("cufileRead()", size);
239+
ssize_t ret = cuFileAPI::instance().Read(
240+
_handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
241+
CUFILE_CHECK_BYTES_DONE(ret);
242+
return ret;
243+
}
244+
245+
std::size_t FileHandle::write(const void* devPtr_base,
246+
std::size_t size,
247+
std::size_t file_offset,
248+
std::size_t devPtr_offset,
249+
bool sync_default_stream)
250+
{
251+
_nbytes = 0; // Invalidate the computed file size
252+
253+
if (is_compat_mode_preferred()) {
254+
return detail::posix_device_write(
255+
_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset);
256+
}
257+
if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); }
258+
259+
KVIKIO_NVTX_SCOPED_RANGE("cufileWrite()", size);
260+
ssize_t ret = cuFileAPI::instance().Write(
261+
_handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
262+
if (ret == -1) {
263+
throw std::system_error(errno, std::generic_category(), "Unable to write file");
264+
}
265+
if (ret < -1) {
266+
throw CUfileException(std::string{"cuFile error at: "} + __FILE__ + ":" +
267+
KVIKIO_STRINGIFY(__LINE__) + ": " + CUFILE_ERRSTR(ret));
268+
}
269+
return ret;
270+
}
271+
272+
std::future<std::size_t> FileHandle::pread(void* buf,
273+
std::size_t size,
274+
std::size_t file_offset,
275+
std::size_t task_size,
276+
std::size_t gds_threshold,
277+
bool sync_default_stream)
278+
{
279+
KVIKIO_NVTX_MARKER("FileHandle::pread()", size);
280+
if (is_host_memory(buf)) {
281+
auto op = [this](void* hostPtr_base,
282+
std::size_t size,
283+
std::size_t file_offset,
284+
std::size_t hostPtr_offset) -> std::size_t {
285+
char* buf = static_cast<char*>(hostPtr_base) + hostPtr_offset;
286+
return detail::posix_host_read<detail::PartialIO::NO>(_fd_direct_off, buf, size, file_offset);
287+
};
288+
289+
return parallel_io(op, buf, size, file_offset, task_size, 0);
290+
}
291+
292+
CUcontext ctx = get_context_from_pointer(buf);
293+
294+
// Shortcut that circumvent the threadpool and use the POSIX backend directly.
295+
if (size < gds_threshold) {
296+
auto task = [this, ctx, buf, size, file_offset]() -> std::size_t {
297+
PushAndPopContext c(ctx);
298+
return detail::posix_device_read(_fd_direct_off, buf, size, file_offset, 0);
299+
};
300+
return std::async(std::launch::deferred, task);
301+
}
302+
303+
// Let's synchronize once instead of in each task.
304+
if (sync_default_stream && !is_compat_mode_preferred()) {
305+
PushAndPopContext c(ctx);
306+
CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr));
307+
}
308+
309+
// Regular case that use the threadpool and run the tasks in parallel
310+
auto task = [this, ctx](void* devPtr_base,
311+
std::size_t size,
312+
std::size_t file_offset,
313+
std::size_t devPtr_offset) -> std::size_t {
314+
PushAndPopContext c(ctx);
315+
return read(devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false);
316+
};
317+
auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info(buf, &ctx);
318+
return parallel_io(task, devPtr_base, size, file_offset, task_size, devPtr_offset);
319+
}
320+
321+
std::future<std::size_t> FileHandle::pwrite(const void* buf,
322+
std::size_t size,
323+
std::size_t file_offset,
324+
std::size_t task_size,
325+
std::size_t gds_threshold,
326+
bool sync_default_stream)
327+
{
328+
KVIKIO_NVTX_MARKER("FileHandle::pwrite()", size);
329+
if (is_host_memory(buf)) {
330+
auto op = [this](const void* hostPtr_base,
331+
std::size_t size,
332+
std::size_t file_offset,
333+
std::size_t hostPtr_offset) -> std::size_t {
334+
const char* buf = static_cast<const char*>(hostPtr_base) + hostPtr_offset;
335+
return detail::posix_host_write<detail::PartialIO::NO>(
336+
_fd_direct_off, buf, size, file_offset);
337+
};
338+
339+
return parallel_io(op, buf, size, file_offset, task_size, 0);
340+
}
341+
342+
CUcontext ctx = get_context_from_pointer(buf);
343+
344+
// Shortcut that circumvent the threadpool and use the POSIX backend directly.
345+
if (size < gds_threshold) {
346+
auto task = [this, ctx, buf, size, file_offset]() -> std::size_t {
347+
PushAndPopContext c(ctx);
348+
return detail::posix_device_write(_fd_direct_off, buf, size, file_offset, 0);
349+
};
350+
return std::async(std::launch::deferred, task);
351+
}
352+
353+
// Let's synchronize once instead of in each task.
354+
if (sync_default_stream && !is_compat_mode_preferred()) {
355+
PushAndPopContext c(ctx);
356+
CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr));
357+
}
358+
359+
// Regular case that use the threadpool and run the tasks in parallel
360+
auto op = [this, ctx](const void* devPtr_base,
361+
std::size_t size,
362+
std::size_t file_offset,
363+
std::size_t devPtr_offset) -> std::size_t {
364+
PushAndPopContext c(ctx);
365+
return write(devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false);
366+
};
367+
auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info(buf, &ctx);
368+
return parallel_io(op, devPtr_base, size, file_offset, task_size, devPtr_offset);
369+
}
370+
371+
void FileHandle::read_async(void* devPtr_base,
372+
std::size_t* size_p,
373+
off_t* file_offset_p,
374+
off_t* devPtr_offset_p,
375+
ssize_t* bytes_read_p,
376+
CUstream stream)
377+
{
378+
if (is_compat_mode_preferred_for_async(_compat_mode)) {
379+
CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream));
380+
*bytes_read_p =
381+
static_cast<ssize_t>(read(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p));
382+
} else {
383+
CUFILE_TRY(cuFileAPI::instance().ReadAsync(
384+
_handle, devPtr_base, size_p, file_offset_p, devPtr_offset_p, bytes_read_p, stream));
385+
}
386+
}
387+
388+
StreamFuture FileHandle::read_async(
389+
void* devPtr_base, std::size_t size, off_t file_offset, off_t devPtr_offset, CUstream stream)
390+
{
391+
StreamFuture ret(devPtr_base, size, file_offset, devPtr_offset, stream);
392+
auto [devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_read_p, stream_] =
393+
ret.get_args();
394+
read_async(devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_read_p, stream_);
395+
return ret;
396+
}
397+
398+
void FileHandle::write_async(void* devPtr_base,
399+
std::size_t* size_p,
400+
off_t* file_offset_p,
401+
off_t* devPtr_offset_p,
402+
ssize_t* bytes_written_p,
403+
CUstream stream)
404+
{
405+
if (is_compat_mode_preferred_for_async(_compat_mode)) {
406+
CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream));
407+
*bytes_written_p =
408+
static_cast<ssize_t>(write(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p));
409+
} else {
410+
CUFILE_TRY(cuFileAPI::instance().WriteAsync(
411+
_handle, devPtr_base, size_p, file_offset_p, devPtr_offset_p, bytes_written_p, stream));
412+
}
413+
}
414+
415+
StreamFuture FileHandle::write_async(
416+
void* devPtr_base, std::size_t size, off_t file_offset, off_t devPtr_offset, CUstream stream)
417+
{
418+
StreamFuture ret(devPtr_base, size, file_offset, devPtr_offset, stream);
419+
auto [devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_written_p, stream_] =
420+
ret.get_args();
421+
write_async(devPtr_base_, size_p, file_offset_p, devPtr_offset_p, bytes_written_p, stream_);
422+
return ret;
423+
}
424+
425+
bool FileHandle::is_compat_mode_preferred() const noexcept
426+
{
427+
return defaults::is_compat_mode_preferred(_compat_mode);
428+
}
429+
430+
bool FileHandle::is_compat_mode_preferred_for_async() const noexcept
431+
{
432+
static bool is_extra_symbol_available = is_stream_api_available();
433+
static bool is_config_path_empty = config_path().empty();
434+
return is_compat_mode_preferred() || !is_extra_symbol_available || is_config_path_empty;
435+
}
436+
437+
bool FileHandle::is_compat_mode_preferred_for_async(CompatMode requested_compat_mode)
438+
{
439+
if (defaults::is_compat_mode_preferred(requested_compat_mode)) { return true; }
440+
441+
if (!is_stream_api_available()) {
442+
if (requested_compat_mode == CompatMode::AUTO) { return true; }
443+
throw std::runtime_error("Missing the cuFile stream api.");
444+
}
445+
446+
// When checking for availability, we also check if cuFile's config file exists. This is
447+
// because even when the stream API is available, it doesn't work if no config file exists.
448+
if (config_path().empty()) {
449+
if (requested_compat_mode == CompatMode::AUTO) { return true; }
450+
throw std::runtime_error("Missing cuFile configuration file.");
451+
}
452+
return false;
453+
}
454+
175455
} // namespace kvikio

0 commit comments

Comments
 (0)