Skip to content

Commit 384dcb0

Browse files
committed
Remove mmap workaround on windows
After some testing I found that mmap is supported on windows and for many GPUs on Linux. Therefore I remove the workaround for windows since it is not necessary.
1 parent 417a1de commit 384dcb0

File tree

1 file changed

+19
-35
lines changed

1 file changed

+19
-35
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

+19-35
Original file line numberDiff line numberDiff line change
@@ -364,9 +364,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
364364
size_t padded_size = ggml_backend_buft_get_alloc_size(buffer->buft, tensor);
365365

366366
if (padded_size > original_size && tensor->view_src == nullptr) {
367-
SYCL_CHECK(CHECK_TRY_ERROR(ctx->stream->memset(
368-
(char *)tensor->data + original_size, 0,
369-
padded_size - original_size)));
367+
SYCL_CHECK(CHECK_TRY_ERROR(
368+
ctx->stream->memset((char *) tensor->data + original_size, 0, padded_size - original_size)));
370369
}
371370
}
372371
return GGML_STATUS_SUCCESS;
@@ -385,16 +384,17 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
385384
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
386385
ggml_sycl_set_device(ctx->device);
387386
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
388-
SYCL_CHECK(
389-
CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
387+
SYCL_CHECK(CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
388+
#ifndef _WIN32
390389
// Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU.
391390
// This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here.
392-
char* host_buf = (char*)malloc(size);
391+
char * host_buf = (char *) malloc(size);
393392
memcpy(host_buf, data, size);
394-
SYCL_CHECK(
395-
CHECK_TRY_ERROR((*stream).memcpy((char *)tensor->data + offset, host_buf, size)
396-
.wait()));
393+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, host_buf, size).wait()));
397394
free(host_buf);
395+
#else
396+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, data, size).wait()));
397+
#endif
398398
}
399399
catch (sycl::exception const &exc) {
400400
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -498,9 +498,7 @@ static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
498498
SYCL_CHECK(
499499
CHECK_TRY_ERROR(dpct::get_current_device().queues_wait_and_throw()));
500500

501-
SYCL_CHECK(CHECK_TRY_ERROR((*stream)
502-
.memset(ctx->dev_ptr, value, buffer->size)
503-
));
501+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memset(ctx->dev_ptr, value, buffer->size)));
504502
}
505503
catch (sycl::exception const &exc) {
506504
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -840,10 +838,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
840838
the error codes. The original code was commented out and a warning
841839
string was inserted. You need to rewrite this code.
842840
*/
843-
SYCL_CHECK(CHECK_TRY_ERROR(
844-
(*stream)
845-
.memset(buf + original_size, 0, size - original_size)
846-
));
841+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memset(buf + original_size, 0, size - original_size)));
847842
}
848843

849844
extra->data_device[i] = buf;
@@ -908,10 +903,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
908903
*/
909904
ggml_sycl_set_device(i);
910905
const queue_ptr stream = ctx->streams[i];
911-
SYCL_CHECK(CHECK_TRY_ERROR(
912-
(*stream)
913-
.memcpy(extra->data_device[i], buf_host, original_size)
914-
));
906+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy(extra->data_device[i], buf_host, original_size)));
915907
}
916908
}
917909
catch (sycl::exception const &exc) {
@@ -961,10 +953,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
961953
*/
962954
ggml_sycl_set_device(i);
963955
const queue_ptr stream = ctx->streams[i];
964-
SYCL_CHECK(CHECK_TRY_ERROR(
965-
(*stream)
966-
.memcpy(buf_host, extra->data_device[i], original_size)
967-
));
956+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy(buf_host, extra->data_device[i], original_size)));
968957
}
969958
}
970959
catch (sycl::exception const &exc) {
@@ -2501,10 +2490,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
25012490
if (i != ctx.device) {
25022491
if (convert_src1_to_q8_1) {
25032492
char * src1_ddq_i_source = dev[ctx.device].src1_ddq + src1_ddq_i_offset;
2504-
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
2505-
src1_ddq_i, src1_ddq_i_source,
2506-
src1_ncols * src1_padded_col_size * q8_1_ts /
2507-
q8_1_bs)));
2493+
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
2494+
src1_ddq_i, src1_ddq_i_source, src1_ncols * src1_padded_col_size * q8_1_ts / q8_1_bs)));
25082495
} else {
25092496

25102497
float * src1_ddf_i_source = (float *) src1_extra->data_device[ctx.device];
@@ -2569,9 +2556,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
25692556
float * dhf_dst_i = (float *) ((char *) dst_off_device + i02*nb2 + i03*nb3);
25702557
GGML_ASSERT(dst->nb[1] == ne0*sizeof(float));
25712558
dhf_dst_i += src1_col_0*ne0;
2572-
SYCL_CHECK(CHECK_TRY_ERROR(
2573-
stream->memcpy(dhf_dst_i, dst_dd_i,
2574-
src1_ncols * ne0 * sizeof(float))));
2559+
SYCL_CHECK(
2560+
CHECK_TRY_ERROR(stream->memcpy(dhf_dst_i, dst_dd_i, src1_ncols * ne0 * sizeof(float))));
25752561
}
25762562
}
25772563

@@ -3739,8 +3725,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
37393725

37403726
GGML_ASSERT(buf->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
37413727
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
3742-
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(
3743-
data, (const char *)tensor->data + offset, size)));
3728+
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(data, (const char *) tensor->data + offset, size)));
37443729
}
37453730
catch (sycl::exception const &exc) {
37463731
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -3759,8 +3744,7 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
37593744
was inserted. You need to rewrite this code.
37603745
*/
37613746
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
3762-
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(
3763-
dst->data, src->data, ggml_nbytes(dst))));
3747+
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(dst->data, src->data, ggml_nbytes(dst))));
37643748
return true;
37653749
}
37663750

0 commit comments

Comments
 (0)