@@ -364,9 +364,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
364
364
size_t padded_size = ggml_backend_buft_get_alloc_size (buffer->buft , tensor);
365
365
366
366
if (padded_size > original_size && tensor->view_src == nullptr ) {
367
- SYCL_CHECK (CHECK_TRY_ERROR (ctx->stream ->memset (
368
- (char *)tensor->data + original_size, 0 ,
369
- padded_size - original_size)));
367
+ SYCL_CHECK (CHECK_TRY_ERROR (
368
+ ctx->stream ->memset ((char *) tensor->data + original_size, 0 , padded_size - original_size)));
370
369
}
371
370
}
372
371
return GGML_STATUS_SUCCESS;
@@ -385,16 +384,17 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
385
384
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context ;
386
385
ggml_sycl_set_device (ctx->device );
387
386
auto stream = &(dpct::dev_mgr::instance ().get_device (ctx->device ).default_queue ());
388
- SYCL_CHECK (
389
- CHECK_TRY_ERROR ( dpct::dev_mgr::instance (). get_device (ctx-> device ). queues_wait_and_throw ()));
387
+ SYCL_CHECK (CHECK_TRY_ERROR ( dpct::dev_mgr::instance (). get_device (ctx-> device ). queues_wait_and_throw ()));
388
+ # ifndef _WIN32
390
389
// Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU.
391
390
// This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here.
392
- char * host_buf = (char *) malloc (size);
391
+ char * host_buf = (char *) malloc (size);
393
392
memcpy (host_buf, data, size);
394
- SYCL_CHECK (
395
- CHECK_TRY_ERROR ((*stream).memcpy ((char *)tensor->data + offset, host_buf, size)
396
- .wait ()));
393
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy ((char *) tensor->data + offset, host_buf, size).wait ()));
397
394
free (host_buf);
395
+ #else
396
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy ((char *) tensor->data + offset, data, size).wait ()));
397
+ #endif
398
398
}
399
399
catch (sycl::exception const &exc) {
400
400
std::cerr << exc.what () << " Exception caught at file:" << __FILE__
@@ -498,9 +498,7 @@ static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
498
498
SYCL_CHECK (
499
499
CHECK_TRY_ERROR (dpct::get_current_device ().queues_wait_and_throw ()));
500
500
501
- SYCL_CHECK (CHECK_TRY_ERROR ((*stream)
502
- .memset (ctx->dev_ptr , value, buffer->size )
503
- ));
501
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memset (ctx->dev_ptr , value, buffer->size )));
504
502
}
505
503
catch (sycl::exception const &exc) {
506
504
std::cerr << exc.what () << " Exception caught at file:" << __FILE__
@@ -840,10 +838,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
840
838
the error codes. The original code was commented out and a warning
841
839
string was inserted. You need to rewrite this code.
842
840
*/
843
- SYCL_CHECK (CHECK_TRY_ERROR (
844
- (*stream)
845
- .memset (buf + original_size, 0 , size - original_size)
846
- ));
841
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memset (buf + original_size, 0 , size - original_size)));
847
842
}
848
843
849
844
extra->data_device [i] = buf;
@@ -908,10 +903,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
908
903
*/
909
904
ggml_sycl_set_device (i);
910
905
const queue_ptr stream = ctx->streams [i];
911
- SYCL_CHECK (CHECK_TRY_ERROR (
912
- (*stream)
913
- .memcpy (extra->data_device [i], buf_host, original_size)
914
- ));
906
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy (extra->data_device [i], buf_host, original_size)));
915
907
}
916
908
}
917
909
catch (sycl::exception const &exc) {
@@ -961,10 +953,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
961
953
*/
962
954
ggml_sycl_set_device (i);
963
955
const queue_ptr stream = ctx->streams [i];
964
- SYCL_CHECK (CHECK_TRY_ERROR (
965
- (*stream)
966
- .memcpy (buf_host, extra->data_device [i], original_size)
967
- ));
956
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy (buf_host, extra->data_device [i], original_size)));
968
957
}
969
958
}
970
959
catch (sycl::exception const &exc) {
@@ -2501,10 +2490,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
2501
2490
if (i != ctx.device ) {
2502
2491
if (convert_src1_to_q8_1) {
2503
2492
char * src1_ddq_i_source = dev[ctx.device ].src1_ddq + src1_ddq_i_offset;
2504
- SYCL_CHECK (CHECK_TRY_ERROR (stream->memcpy (
2505
- src1_ddq_i, src1_ddq_i_source,
2506
- src1_ncols * src1_padded_col_size * q8_1_ts /
2507
- q8_1_bs)));
2493
+ SYCL_CHECK (CHECK_TRY_ERROR (stream->memcpy (
2494
+ src1_ddq_i, src1_ddq_i_source, src1_ncols * src1_padded_col_size * q8_1_ts / q8_1_bs)));
2508
2495
} else {
2509
2496
2510
2497
float * src1_ddf_i_source = (float *) src1_extra->data_device [ctx.device ];
@@ -2569,9 +2556,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
2569
2556
float * dhf_dst_i = (float *) ((char *) dst_off_device + i02*nb2 + i03*nb3);
2570
2557
GGML_ASSERT (dst->nb [1 ] == ne0*sizeof (float ));
2571
2558
dhf_dst_i += src1_col_0*ne0;
2572
- SYCL_CHECK (CHECK_TRY_ERROR (
2573
- stream->memcpy (dhf_dst_i, dst_dd_i,
2574
- src1_ncols * ne0 * sizeof (float ))));
2559
+ SYCL_CHECK (
2560
+ CHECK_TRY_ERROR (stream->memcpy (dhf_dst_i, dst_dd_i, src1_ncols * ne0 * sizeof (float ))));
2575
2561
}
2576
2562
}
2577
2563
@@ -3739,8 +3725,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
3739
3725
3740
3726
GGML_ASSERT (buf->buft == ggml_backend_sycl_buffer_type (sycl_ctx->device ) && " unsupported buffer type" );
3741
3727
const queue_ptr stream = sycl_ctx->stream (sycl_ctx->device , 0 );
3742
- SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (
3743
- data, (const char *)tensor->data + offset, size)));
3728
+ SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (data, (const char *) tensor->data + offset, size)));
3744
3729
}
3745
3730
catch (sycl::exception const &exc) {
3746
3731
std::cerr << exc.what () << " Exception caught at file:" << __FILE__
@@ -3759,8 +3744,7 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
3759
3744
was inserted. You need to rewrite this code.
3760
3745
*/
3761
3746
const queue_ptr stream = sycl_ctx->stream (sycl_ctx->device , 0 );
3762
- SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (
3763
- dst->data , src->data , ggml_nbytes (dst))));
3747
+ SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (dst->data , src->data , ggml_nbytes (dst))));
3764
3748
return true ;
3765
3749
}
3766
3750
0 commit comments