Skip to content

Commit

Permalink
fix: Update vdo-opencl-filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
johan-olsson-work authored and pataxis committed May 6, 2024
1 parent c51f116 commit 660abf3
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 42 deletions.
28 changes: 14 additions & 14 deletions vdo-opencl-filtering/app/sobel_nv12.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/

__kernel void sobel_3x1(__global const unsigned char *In_y,
__global const unsigned char *Out_y,
__global const unsigned char *Out_cbcr,
__global unsigned char *Out_y,
__global unsigned char *Out_cbcr,
int width,
int height)
{
Expand All @@ -40,36 +40,36 @@ __kernel void sobel_3x1(__global const unsigned char *In_y,
uchar8 mag = (uchar8)0;

/* Previous row */
uchar16 temp = vload16(0, (uchar*)In_y + pix_id - width - 1);
uchar16 temp = vload16(0, &In_y[pix_id - width - 1]);
short8 middle = convert_short8(temp.s12345678);

gy += middle * (short8)(-2);

/* Current row */
temp = vload16(0, (uchar*) In_y + pix_id -1);
temp = vload16(0, &In_y[pix_id -1]);
short8 left = convert_short8(temp.s01234567);
short8 right = convert_short8(temp.s23456789);

gx += left * (short8)(-2);
gx += right * (short8)(2);

/* Next row */
temp = vload16(0, (uchar*) In_y + pix_id + width - 1);
temp = vload16(0, &In_y[pix_id + width - 1]);
middle = convert_short8(temp.s12345678);

gy += middle * (short8)(2);

mag = convert_uchar8(clamp(abs(gx) + abs(gy),1, 255));
vstore8(mag, 0, Out_y + pix_id);
vstore8(mag, 0, &Out_y[pix_id]);

/* Write cbcr data (128 for greyscale) */
uchar8 cbcr = (uchar8) 128;
vstore8(cbcr, 0, Out_cbcr + cbcr_id);
vstore8(cbcr, 0, &Out_cbcr[cbcr_id]);
}

__kernel void sobel_3x3(__global const unsigned char *In_y,
__global const unsigned char *Out_y,
__global const unsigned char *Out_cbcr,
__global unsigned char *Out_y,
__global unsigned char *Out_cbcr,
int width,
int height)
{
Expand All @@ -86,7 +86,7 @@ __kernel void sobel_3x3(__global const unsigned char *In_y,
uchar8 mag = (uchar8)0;

/* Previous row */
uchar16 temp = vload16(0, (uchar*)In_y + pix_id - width - 1);
uchar16 temp = vload16(0, &In_y[pix_id - width - 1]);
short8 left = convert_short8(temp.s01234567);
short8 middle = convert_short8(temp.s12345678);
short8 right = convert_short8(temp.s23456789);
Expand All @@ -99,15 +99,15 @@ __kernel void sobel_3x3(__global const unsigned char *In_y,
gy += right * (short8)(-1);

/* Current row */
temp = vload16(0, (uchar*) In_y + pix_id -1);
temp = vload16(0, &In_y[pix_id -1]);
left = convert_short8(temp.s01234567);
right = convert_short8(temp.s23456789);

gx += left * (short8)(-2);
gx += right * (short8)(2);

/* Next row */
temp = vload16(0, (uchar*) In_y + pix_id + width - 1);
temp = vload16(0, &In_y[pix_id + width - 1]);
left = convert_short8(temp.s01234567);
middle = convert_short8(temp.s12345678);
right = convert_short8(temp.s23456789);
Expand All @@ -120,9 +120,9 @@ __kernel void sobel_3x3(__global const unsigned char *In_y,
gy += right * (short8)(1);

mag = convert_uchar8(clamp(abs(gx) + abs(gy),1, 255));
vstore8(mag, 0, Out_y + pix_id);
vstore8(mag, 0, &Out_y[pix_id]);

/* Write cbcr data (128 for greyscale) */
uchar8 cbcr = (uchar8) 128;
vstore8(cbcr, 0, Out_cbcr + cbcr_id);
vstore8(cbcr, 0, &Out_cbcr[cbcr_id]);
}
87 changes: 59 additions & 28 deletions vdo-opencl-filtering/app/vdo_cl_filter_demo.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ cl_kernel kernel;
cl_command_queue command_queue;

/* OpenCL memory objects */
cl_mem out_image;
cl_mem out_image_y;
cl_mem out_image_cbcr;

Expand Down Expand Up @@ -254,7 +255,7 @@ setup_opencl(const char* kernel_name, enum render_area area, unsigned width, uns
* pixels directly in the kernel.
*/
static int do_opencl_filtering(cl_mem* in_image_y,
void* out_data,
cl_mem* out_image,
unsigned width,
unsigned height,
size_t image_y_size,
Expand All @@ -270,6 +271,16 @@ static int do_opencl_filtering(cl_mem* in_image_y,
size_t local_work_size[2] = {8, 4};
size_t offset[2] = {1, 0};

cl_buffer_region y_region = {
.origin = 0,
.size = image_y_size,
};

cl_buffer_region c_region = {
.origin = image_y_size,
.size = image_cbcr_size,
};

/*
* Since we use NV12 data we could also create a single memory object
* direcly with luma and chroma included. For simplicity we split them up.
Expand All @@ -278,16 +289,17 @@ static int do_opencl_filtering(cl_mem* in_image_y,
* memory, such that no unnecessary data has to be copied to GPU memory.
* This data may still however be cached in the GPU.
*/
out_image_y = clCreateBuffer(context,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
image_y_size,
out_data,
&ret);
out_image_cbcr = clCreateBuffer(context,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
image_cbcr_size,
(out_data + image_y_size),
out_image_y = clCreateSubBuffer(*out_image,
CL_MEM_WRITE_ONLY,
CL_BUFFER_CREATE_TYPE_REGION,
&y_region,
&ret);

out_image_cbcr = clCreateSubBuffer(*out_image,
CL_MEM_WRITE_ONLY,
CL_BUFFER_CREATE_TYPE_REGION,
&c_region,
&ret);
if (ret != CL_SUCCESS) {
syslog(LOG_ERR, "Unable to create cl memory objects");
return -1;
Expand Down Expand Up @@ -379,6 +391,7 @@ int main(int argc, char* argv[]) {
VdoMap* vdo_stream_info = NULL;
void* out_data = NULL;
cl_mem* in_images = NULL;
cl_int cl_ret = CL_SUCCESS;

const gchar* output_file_format = "yuv"; /* Also the VDO stream format */

Expand Down Expand Up @@ -457,21 +470,6 @@ int main(int argc, char* argv[]) {
/* Size of chroma data in the image buffer */
size_t image_cbcr_size = image_y_size / 2;

/*
* Allocate memory for output buffer. In this case it's more practical with
* a separate output buffer since we're performing a filtering operation.
*/
out_data = mmap(NULL,
(image_y_size + image_cbcr_size),
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0);
if (out_data == MAP_FAILED) {
syslog(LOG_ERR, "mmap failed: %d", errno);
goto exit;
}

/* Set up OpenCL */
if (setup_opencl(kernel_name, cur_render_area, image_width, image_height)) {
syslog(LOG_ERR, "Unable to setup OpenCL");
Expand All @@ -487,6 +485,37 @@ int main(int argc, char* argv[]) {
*/
in_images = (cl_mem*)malloc(sizeof(cl_mem) * buffer_count);

/*
* Allocate memory for output buffer. In this case it's more practical with
* a separate output buffer since we're performing a filtering operation.
*
* If possible, allocate the buffer using OpenCL, and then map up that memory
* to the CPU.
*/
out_image = clCreateBuffer(context,
CL_MEM_ALLOC_HOST_PTR,
image_y_size + image_cbcr_size,
NULL,
&cl_ret);
if (cl_ret != CL_SUCCESS) {
syslog(LOG_ERR, "Unable to create new cl out memory object: %d", cl_ret);
return -1;
}
out_data = clEnqueueMapBuffer(command_queue,
out_image,
CL_TRUE,
CL_MAP_READ,
0,
image_y_size + image_cbcr_size,
0,
NULL,
NULL,
&cl_ret);
if (cl_ret != CL_SUCCESS) {
syslog(LOG_ERR, "Unable to map cl out memory object: %d", cl_ret);
return -1;
}

/* Loop for the pre-determined number of frames */
for (guint n = 0; n < frames; n++) {
/* Lifetimes of buffer and frame are linked, no need to free frame */
Expand Down Expand Up @@ -530,7 +559,7 @@ int main(int argc, char* argv[]) {
goto exit;

if (do_opencl_filtering(&in_image_y,
out_data,
&out_image,
image_width,
image_height,
image_y_size,
Expand All @@ -553,8 +582,10 @@ int main(int argc, char* argv[]) {
if (vdo_error_is_expected(&error))
g_clear_error(&error);

if (munmap(out_data, image_y_size + image_cbcr_size)) {
syslog(LOG_ERR, "Unable to unmap image output buffer");
cl_ret = clEnqueueUnmapMemObject(command_queue, out_image, out_data, 0, NULL, NULL);
if (cl_ret != CL_SUCCESS) {
syslog(LOG_ERR, "Unable to unmap cl memory object: %d", cl_ret);
return -1;
}

hash_table_destroy();
Expand Down

0 comments on commit 660abf3

Please sign in to comment.