|
3 | 3 | #include <cstddef>
|
4 | 4 | #include <cstdio>
|
5 | 5 | #include <cstdlib>
|
| 6 | +#include <cstdarg> |
6 | 7 | #include <cuda_runtime.h>
|
7 | 8 | #include <memory>
|
8 | 9 | #include <new>
|
|
11 | 12 | #include <utility>
|
12 | 13 | #include <vector>
|
13 | 14 |
|
14 |
| -namespace cupp { |
| 15 | +namespace cudapp { |
15 | 16 |
|
16 | 17 | std::error_category const &cudaErrorCategory() noexcept {
|
17 | 18 | static struct : std::error_category {
|
@@ -40,7 +41,7 @@ void throwCudaError(cudaError_t err, char const *file, int line) {
|
40 | 41 | do { \
|
41 | 42 | cudaError_t err = (expr); \
|
42 | 43 | if (err != cudaSuccess) [[unlikely]] { \
|
43 |
| - ::cupp::throwCudaError(err, __FILE__, __LINE__); \ |
| 44 | + ::cudapp::throwCudaError(err, __FILE__, __LINE__); \ |
44 | 45 | } \
|
45 | 46 | } while (0)
|
46 | 47 |
|
@@ -265,10 +266,22 @@ public:
|
265 | 266 | }
|
266 | 267 | };
|
267 | 268 |
|
268 |
| - void synchronize() const { |
| 269 | + void join() const { |
269 | 270 | CHECK_CUDA(cudaEventSynchronize(*this));
|
270 | 271 | }
|
271 | 272 |
|
| 273 | + bool joinReady() const { |
| 274 | + cudaError_t res = cudaEventQuery(*this); |
| 275 | + if (res == cudaSuccess) { |
| 276 | + return true; |
| 277 | + } |
| 278 | + if (res == cudaErrorNotReady) { |
| 279 | + return false; |
| 280 | + } |
| 281 | + CHECK_CUDA(res); |
| 282 | + return false; |
| 283 | + } |
| 284 | + |
272 | 285 | float elapsedMillis(CudaEvent const &event) const {
|
273 | 286 | float result;
|
274 | 287 | CHECK_CUDA(cudaEventElapsedTime(&result, *this, event));
|
@@ -315,10 +328,6 @@ public:
|
315 | 328 | return CudaStream(nullptr);
|
316 | 329 | }
|
317 | 330 |
|
318 |
| - void synchronize() const { |
319 |
| - CHECK_CUDA(cudaStreamSynchronize(*this)); |
320 |
| - } |
321 |
| - |
322 | 331 | void copy(void *dst, void *src, size_t size, cudaMemcpyKind kind) const {
|
323 | 332 | CHECK_CUDA(cudaMemcpyAsync(dst, src, size, kind, *this));
|
324 | 333 | }
|
@@ -348,23 +357,27 @@ public:
|
348 | 357 | CHECK_CUDA(cudaStreamWaitEvent(*this, event, flags));
|
349 | 358 | }
|
350 | 359 |
|
351 |
| - void asyncWait(cudaStreamCallback_t callback, void *userData) const { |
| 360 | + void join() const { |
| 361 | + CHECK_CUDA(cudaStreamSynchronize(*this)); |
| 362 | + } |
| 363 | + |
| 364 | + void joinAsync(cudaStreamCallback_t callback, void *userData) const { |
352 | 365 | CHECK_CUDA(cudaStreamAddCallback(*this, callback, userData, 0));
|
353 | 366 | }
|
354 | 367 |
|
355 | 368 | template <class Func>
|
356 |
| - void asyncWait(Func &&func) const { |
| 369 | + void joinAsync(Func &&func) const { |
357 | 370 | auto userData = std::make_unique<Func>();
|
358 | 371 | cudaStreamCallback_t callback = [](cudaStream_t stream,
|
359 | 372 | cudaError_t status, void *userData) {
|
360 | 373 | std::unique_ptr<Func> func(static_cast<Func *>(userData));
|
361 | 374 | (*func)(stream, status);
|
362 | 375 | };
|
363 |
| - asyncWait(callback, userData.get()); |
| 376 | + joinAsync(callback, userData.get()); |
364 | 377 | userData.release();
|
365 | 378 | }
|
366 | 379 |
|
367 |
| - bool pollWait() { |
| 380 | + bool joinReady() const { |
368 | 381 | cudaError_t res = cudaStreamQuery(*this);
|
369 | 382 | if (res == cudaSuccess) {
|
370 | 383 | return true;
|
@@ -418,7 +431,7 @@ struct CudaAllocator : private Arena {
|
418 | 431 | if (res == cudaErrorMemoryAllocation) [[unlikely]] {
|
419 | 432 | throw std::bad_alloc();
|
420 | 433 | }
|
421 |
| - CHECK_CUDA(("Arena::doMalloc", res)); |
| 434 | + CHECK_CUDA(res /* Arena::doMalloc */); |
422 | 435 | return static_cast<T *>(ptr);
|
423 | 436 | }
|
424 | 437 |
|
@@ -459,6 +472,21 @@ struct CudaAllocator : private Arena {
|
459 | 472 | template <class T>
|
460 | 473 | using CudaVector = std::vector<T, CudaAllocator<T>>;
|
461 | 474 |
|
| 475 | +#if defined(__clang__) && defined(__CUDACC__) && defined(__GLIBCXX__) |
| 476 | +__host__ __device__ static void printf(const char *fmt, ...) { |
| 477 | + va_list args; |
| 478 | + va_start(args, fmt); |
| 479 | +#if __CUDA_ARCH__ |
| 480 | + ::vprintf(fmt, (const char *)args); |
| 481 | +#else |
| 482 | + ::vprintf(fmt, args); |
| 483 | +#endif |
| 484 | + va_end(args); |
| 485 | +} |
| 486 | +#else |
| 487 | +using ::printf; |
| 488 | +#endif |
| 489 | + |
462 | 490 | // #if __cpp_lib_memory_resource
|
463 | 491 | // template <class Arena>
|
464 | 492 | // struct CudaResource : std::pmr::memory_resource, private Arena {
|
|
0 commit comments