Skip to content

Commit ebc7760

Browse files
committed
Add try catch in command_buffer.cpp
1 parent f5ac85b commit ebc7760

File tree

2 files changed

+87
-75
lines changed

2 files changed

+87
-75
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,49 +1361,55 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
13611361
return UR_RESULT_ERROR_INVALID_VALUE;
13621362
}
13631363

1364-
auto KernelCommandHandle = static_cast<kernel_command_handle *>(hCommand);
1364+
try {
1365+
auto KernelCommandHandle = static_cast<kernel_command_handle *>(hCommand);
13651366

1366-
UR_CHECK_ERROR(validateCommandDesc(KernelCommandHandle, pUpdateKernelLaunch));
1367-
UR_CHECK_ERROR(
1368-
updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch));
1369-
UR_CHECK_ERROR(updateCommand(KernelCommandHandle, pUpdateKernelLaunch));
1370-
1371-
// If no work-size is provided make sure we pass nullptr to setKernelParams so
1372-
// it can guess the local work size.
1373-
const bool ProvidedLocalSize = !KernelCommandHandle->isNullLocalSize();
1374-
size_t *LocalWorkSize =
1375-
ProvidedLocalSize ? KernelCommandHandle->LocalWorkSize : nullptr;
1376-
1377-
// Set the number of threads per block to the number of threads per warp
1378-
// by default unless user has provided a better number.
1379-
size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
1380-
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
1381-
CUfunction CuFunc = KernelCommandHandle->Kernel->get();
1382-
auto Result = setKernelParams(
1383-
CommandBuffer->Context, CommandBuffer->Device,
1384-
KernelCommandHandle->WorkDim, KernelCommandHandle->GlobalWorkOffset,
1385-
KernelCommandHandle->GlobalWorkSize, LocalWorkSize,
1386-
KernelCommandHandle->Kernel, CuFunc, ThreadsPerBlock, BlocksPerGrid);
1387-
if (Result != UR_RESULT_SUCCESS) {
1388-
return Result;
1389-
}
1367+
UR_CHECK_ERROR(
1368+
validateCommandDesc(KernelCommandHandle, pUpdateKernelLaunch));
1369+
UR_CHECK_ERROR(
1370+
updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch));
1371+
UR_CHECK_ERROR(updateCommand(KernelCommandHandle, pUpdateKernelLaunch));
13901372

1391-
CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;
1373+
// If no work-size is provided make sure we pass nullptr to setKernelParams
1374+
// so it can guess the local work size.
1375+
const bool ProvidedLocalSize = !KernelCommandHandle->isNullLocalSize();
1376+
size_t *LocalWorkSize =
1377+
ProvidedLocalSize ? KernelCommandHandle->LocalWorkSize : nullptr;
13921378

1393-
Params.func = CuFunc;
1394-
Params.gridDimX = BlocksPerGrid[0];
1395-
Params.gridDimY = BlocksPerGrid[1];
1396-
Params.gridDimZ = BlocksPerGrid[2];
1397-
Params.blockDimX = ThreadsPerBlock[0];
1398-
Params.blockDimY = ThreadsPerBlock[1];
1399-
Params.blockDimZ = ThreadsPerBlock[2];
1400-
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
1401-
Params.kernelParams =
1402-
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
1379+
// Set the number of threads per block to the number of threads per warp
1380+
// by default unless user has provided a better number.
1381+
size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
1382+
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
1383+
CUfunction CuFunc = KernelCommandHandle->Kernel->get();
1384+
auto Result = setKernelParams(
1385+
CommandBuffer->Context, CommandBuffer->Device,
1386+
KernelCommandHandle->WorkDim, KernelCommandHandle->GlobalWorkOffset,
1387+
KernelCommandHandle->GlobalWorkSize, LocalWorkSize,
1388+
KernelCommandHandle->Kernel, CuFunc, ThreadsPerBlock, BlocksPerGrid);
1389+
if (Result != UR_RESULT_SUCCESS) {
1390+
return Result;
1391+
}
14031392

1404-
CUgraphNode Node = KernelCommandHandle->Node;
1405-
CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec;
1406-
UR_CHECK_ERROR(cuGraphExecKernelNodeSetParams(CudaGraphExec, Node, &Params));
1393+
CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;
1394+
1395+
Params.func = CuFunc;
1396+
Params.gridDimX = BlocksPerGrid[0];
1397+
Params.gridDimY = BlocksPerGrid[1];
1398+
Params.gridDimZ = BlocksPerGrid[2];
1399+
Params.blockDimX = ThreadsPerBlock[0];
1400+
Params.blockDimY = ThreadsPerBlock[1];
1401+
Params.blockDimZ = ThreadsPerBlock[2];
1402+
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
1403+
Params.kernelParams = const_cast<void **>(
1404+
KernelCommandHandle->Kernel->getArgIndices().data());
1405+
1406+
CUgraphNode Node = KernelCommandHandle->Node;
1407+
CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec;
1408+
UR_CHECK_ERROR(
1409+
cuGraphExecKernelNodeSetParams(CudaGraphExec, Node, &Params));
1410+
} catch (ur_result_t Err) {
1411+
return Err;
1412+
}
14071413
return UR_RESULT_SUCCESS;
14081414
}
14091415

source/adapters/hip/command_buffer.cpp

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,42 +1066,48 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
10661066

10671067
ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer;
10681068

1069-
UR_CHECK_ERROR(validateCommandDesc(hCommand, pUpdateKernelLaunch));
1070-
UR_CHECK_ERROR(
1071-
updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch));
1072-
UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch));
1073-
1074-
// If no worksize is provided make sure we pass nullptr to setKernelParams
1075-
// so it can guess the local work size.
1076-
const bool ProvidedLocalSize = !hCommand->isNullLocalSize();
1077-
size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr;
1078-
1079-
// Set the number of threads per block to the number of threads per warp
1080-
// by default unless user has provided a better number
1081-
size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
1082-
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
1083-
hipFunction_t HIPFunc = hCommand->Kernel->get();
1084-
UR_CHECK_ERROR(setKernelParams(
1085-
CommandBuffer->Device, hCommand->WorkDim, hCommand->GlobalWorkOffset,
1086-
hCommand->GlobalWorkSize, LocalWorkSize, hCommand->Kernel, HIPFunc,
1087-
ThreadsPerBlock, BlocksPerGrid));
1088-
1089-
hipKernelNodeParams &Params = hCommand->Params;
1090-
1091-
Params.func = HIPFunc;
1092-
Params.gridDim.x = BlocksPerGrid[0];
1093-
Params.gridDim.y = BlocksPerGrid[1];
1094-
Params.gridDim.z = BlocksPerGrid[2];
1095-
Params.blockDim.x = ThreadsPerBlock[0];
1096-
Params.blockDim.y = ThreadsPerBlock[1];
1097-
Params.blockDim.z = ThreadsPerBlock[2];
1098-
Params.sharedMemBytes = hCommand->Kernel->getLocalSize();
1099-
Params.kernelParams =
1100-
const_cast<void **>(hCommand->Kernel->getArgIndices().data());
1101-
1102-
hipGraphNode_t Node = hCommand->Node;
1103-
hipGraphExec_t HipGraphExec = CommandBuffer->HIPGraphExec;
1104-
UR_CHECK_ERROR(hipGraphExecKernelNodeSetParams(HipGraphExec, Node, &Params));
1069+
try {
1070+
UR_CHECK_ERROR(validateCommandDesc(hCommand, pUpdateKernelLaunch));
1071+
UR_CHECK_ERROR(
1072+
updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch));
1073+
UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch));
1074+
1075+
// If no worksize is provided make sure we pass nullptr to setKernelParams
1076+
// so it can guess the local work size.
1077+
const bool ProvidedLocalSize = !hCommand->isNullLocalSize();
1078+
size_t *LocalWorkSize =
1079+
ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr;
1080+
1081+
// Set the number of threads per block to the number of threads per warp
1082+
// by default unless user has provided a better number
1083+
size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
1084+
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
1085+
hipFunction_t HIPFunc = hCommand->Kernel->get();
1086+
UR_CHECK_ERROR(setKernelParams(
1087+
CommandBuffer->Device, hCommand->WorkDim, hCommand->GlobalWorkOffset,
1088+
hCommand->GlobalWorkSize, LocalWorkSize, hCommand->Kernel, HIPFunc,
1089+
ThreadsPerBlock, BlocksPerGrid));
1090+
1091+
hipKernelNodeParams &Params = hCommand->Params;
1092+
1093+
Params.func = HIPFunc;
1094+
Params.gridDim.x = BlocksPerGrid[0];
1095+
Params.gridDim.y = BlocksPerGrid[1];
1096+
Params.gridDim.z = BlocksPerGrid[2];
1097+
Params.blockDim.x = ThreadsPerBlock[0];
1098+
Params.blockDim.y = ThreadsPerBlock[1];
1099+
Params.blockDim.z = ThreadsPerBlock[2];
1100+
Params.sharedMemBytes = hCommand->Kernel->getLocalSize();
1101+
Params.kernelParams =
1102+
const_cast<void **>(hCommand->Kernel->getArgIndices().data());
1103+
1104+
hipGraphNode_t Node = hCommand->Node;
1105+
hipGraphExec_t HipGraphExec = CommandBuffer->HIPGraphExec;
1106+
UR_CHECK_ERROR(
1107+
hipGraphExecKernelNodeSetParams(HipGraphExec, Node, &Params));
1108+
} catch (ur_result_t Err) {
1109+
return Err;
1110+
}
11051111
return UR_RESULT_SUCCESS;
11061112
}
11071113

0 commit comments

Comments
 (0)