Skip to content

Commit d62f773

Browse files
Fix stream not being set when calling hipMemsetAsync (#3244)
* Fix stream not being set when calling hipMemsetAsync * fix clang format issue * Fix missing handle for additional ZeroOutBuffer call in 6.2 --------- Co-authored-by: Jun Liu <[email protected]>
1 parent e275d9d commit d62f773

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

src/include/miopen/solver/implicitgemm_ck_util.hpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,10 @@ class TransposeInstance
376376
Run(handle, kernels, out_ptr, buf_handle.get());
377377
}
378378

379-
void ZeroOutBuffer()
379+
void ZeroOutBuffer(const Handle& handle)
380380
{
381-
[[maybe_unused]] auto status = hipMemset(buf_handle.get(), 0, tensor_sz);
381+
[[maybe_unused]] auto status =
382+
hipMemsetAsync(buf_handle.get(), 0, tensor_sz, handle.GetStream());
382383
assert(status == hipSuccess);
383384
}
384385

@@ -702,7 +703,7 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx,
702703
/// \todo: Will need SetTensor() to properly zero out non-packed tensors
703704
if(output_tr_inst.GetConvOperandTag() == internal::ConvOperandTag::Weights)
704705
{
705-
output_tr_inst.ZeroOutBuffer();
706+
output_tr_inst.ZeroOutBuffer(handle);
706707
}
707708

708709
std::array<internal::TransposeInstanceTagged*, 3> tr_ptrs = {
@@ -845,7 +846,7 @@ ConvSolution InitInvokerFactoryWrwNCHW(const ExecutionContext& ctx,
845846
/// \todo: Will need SetTensor() to properly zero out non-packed tensors
846847
if(output_tr_inst.GetConvOperandTag() == internal::ConvOperandTag::Weights)
847848
{
848-
output_tr_inst.ZeroOutBuffer();
849+
output_tr_inst.ZeroOutBuffer(handle);
849850
}
850851

851852
std::array<internal::TransposeInstanceTagged*, 3> tr_ptrs = {

0 commit comments

Comments
 (0)