-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhost.c
234 lines (200 loc) · 7.75 KB
/
host.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_TARGET_OPENCL_VERSION 300
#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>
#include <time.h>
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image.h"
#include "stb_image_write.h"
void print_devices() {
// Get available platforms and devices
cl_platform_id platforms[10];
cl_device_id devices[10];
cl_uint num_platforms, num_devices;
// Get available platforms
clGetPlatformIDs(10, platforms, &num_platforms);
printf("Number of Platforms: %d\n", num_platforms);
for (cl_uint i = 0; i < num_platforms; i++) {
char platform_name[1024];
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
printf("Platform %d: %s\n", i + 1, platform_name);
// Get devices available on this platform
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 10, devices, &num_devices);
for (cl_uint j = 0; j < num_devices; j++) {
char device_name[1024];
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(device_name), device_name, NULL);
printf(" Device %d: %s\n", j + 1, device_name);
}
}
}
int main() {
// OpenCL variables
cl_platform_id platform_id;
cl_device_id device_id;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_mem inputBuffer, outputBuffer;
cl_int err;
print_devices();
// Specify the input and output image paths
const char* inputPath = "/home/anasfarooq8/OpenCL-and-Docker/ISIC_2020_Test_Input/ISIC_0073502.jpg";
const char* outputPath = "/home/anasfarooq8/OpenCL-and-Docker/output/ISIC_0073502_GrayScale.JPG";
// Load the image
int width, height, channels;
unsigned char* image = stbi_load(inputPath, &width, &height, &channels, 0);
if (!image) {
printf("Failed to load image\n");
return -1;
}
printf("\nImage loaded successfully. Width: %d, Height: %d, Channels: %d\n", width, height, channels);
// Prepare a buffer for the grayscale output
unsigned char* grayscale = (unsigned char*)malloc(width * height * sizeof(unsigned char));
// Get the specified platform and device
err = clGetPlatformIDs(1, &platform_id, NULL);
if (err != CL_SUCCESS)
{
printf("Error getting platform. %d\n", err);
return -1;
}
err |= clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
if (err != CL_SUCCESS)
{
printf("Error getting device. %d\n", err);
return -1;
}
// Determine device type and print the device name
cl_device_type deviceType;
char deviceName[1024];
clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(deviceType), &deviceType, NULL);
clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
printf("Running on device: %s\n", deviceName);
// Create a context
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
if (err != CL_SUCCESS)
{
printf("Error creating context.\n");
return -1;
}
// Create a command queue
queue = clCreateCommandQueue(context, device_id, 0, &err);
if (err != CL_SUCCESS)
{
printf("Error creating command queue.\n");
return -1;
}
// Create the program
const char *kernelSource =
"__kernel void grayscale(__global unsigned char* input, __global unsigned char* output, int width, int height, int channels) {"
" int x = get_global_id(0);"
" int y = get_global_id(1);"
" if (x < width && y < height) {"
" int index = (y * width + x) * channels;"
" float gray = 0.299f * input[index] + 0.587f * input[index + 1] + 0.114f * input[index + 2];"
" output[y * width + x] = (unsigned char)gray;"
" }"
"}";
// Create the program
program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err);
if (err != CL_SUCCESS)
{
printf("Error creating program.\n");
return -1;
}
// Build the program
err = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error building program.\n");
return -1;
}
// Create the kernel
kernel = clCreateKernel(program, "grayscale", &err);
if (err != CL_SUCCESS)
{
printf("Error creating kernel.\n");
return -1;
}
// Create memory buffers
inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, width * height * channels * sizeof(unsigned char), image, NULL);
outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, width * height * sizeof(unsigned char), NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error creating memory buffers.\n");
return -1;
}
// Set kernel arguments
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
err |= clSetKernelArg(kernel, 2, sizeof(int), &width);
err |= clSetKernelArg(kernel, 3, sizeof(int), &height);
err |= clSetKernelArg(kernel, 4, sizeof(int), &channels);
if (err != CL_SUCCESS) {
printf("Error setting kernel arguments: %d\n", err);
return -1;
}
// Set global and local sizes
// size_t globalSize[2] = { (size_t)width, (size_t)height };
// Set global and local sizes
size_t globalSize[2] = { (size_t)((width + 15) / 16) * 16, (size_t)((height + 15) / 16) * 16 };
// Determine local size based on device type
size_t localSize[2];
if (deviceType == CL_DEVICE_TYPE_CPU) {
// CPU: Use local size of 1x1
localSize[0] = 1;
localSize[1] = 1;
} else {
// GPU: Use the maximum work-group size supported
size_t maxLocalSize, maxComputeUnits;
maxLocalSize = maxComputeUnits = 0;
clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxLocalSize, NULL);
clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &maxComputeUnits, NULL);
// Max work group size 256 => (Intel(R) UHD Graphics 620)
localSize[0] = 16;
localSize[1] = 16;
printf("Max Work Group Size: %zu\n", maxLocalSize);
printf("Max Compute Units: %zu\n", maxComputeUnits);
}
printf("Global Size X: %zu\n", globalSize[0]);
printf("Global Size Y: %zu\n", globalSize[1]);
printf("Local Size X: %zu\n", localSize[0]);
printf("Local Size Y: %zu\n", localSize[1]);
clock_t start = clock();
// Execute the kernel
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, localSize, 0, NULL, NULL);
if (err != CL_SUCCESS) {
printf("Error executing kernel: %d\n", err);
return -1;
}
clock_t end = clock();
// Read the result
err = clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, width * height * sizeof(unsigned char), grayscale, 0, NULL, NULL);
if (err != CL_SUCCESS) {
printf("Error reading result: %d\n", err);
return -1;
}
// Clean up
if (inputBuffer)
clReleaseMemObject(inputBuffer);
if (outputBuffer)
clReleaseMemObject(outputBuffer);
if (kernel)
clReleaseKernel(kernel);
if (program)
clReleaseProgram(program);
if (queue)
clReleaseCommandQueue(queue);
if (context)
clReleaseContext(context);
// Save the Converted GrayScale Image
stbi_write_jpg(outputPath, width, height, 1, grayscale, 100);
free(grayscale);
stbi_image_free(image);
printf("Image Converted & Saved Successfully!\n");
double time_taken = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("Time taken for grayscale conversion: %f seconds\n", time_taken);
return 0;
}