Skip to content

diffs between multi_os and originals in the upstream repo #3

@smokhov

Description

@smokhov

Just saving for reference before merging:

  • Tutorial 1:
--- "src/Tutorial 1.cpp"	2025-06-22 21:06:37.057787000 -0400
+++ "Tutorial 1.cpp"	2025-06-22 21:06:37.050783000 -0400
@@ -54,7 +54,7 @@
 			throw err;
 		}
 
-		//Part 4 - memory allocation
+		//Part 3 - memory allocation
 		//host - input
 		std::vector<int> A = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; //C++11 allows this type of initialisation
 		std::vector<int> B = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
@@ -70,13 +70,13 @@
 		cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, vector_size);
 		cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, vector_size);
 
-		//Part 5 - device operations
+		//Part 4 - device operations
 
-		//5.1 Copy arrays A and B to device memory
+		//4.1 Copy arrays A and B to device memory
 		queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, vector_size, &A[0]);
 		queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, vector_size, &B[0]);
 
-		//5.2 Setup and execute the kernel (i.e. device code)
+		//4.2 Setup and execute the kernel (i.e. device code)
 		cl::Kernel kernel_add = cl::Kernel(program, "add");
 		kernel_add.setArg(0, buffer_A);
 		kernel_add.setArg(1, buffer_B);
@@ -84,7 +84,7 @@
 
 		queue.enqueueNDRangeKernel(kernel_add, cl::NullRange, cl::NDRange(vector_elements), cl::NullRange);
 
-		//5.3 Copy the result from device to host
+		//4.3 Copy the result from device to host
 		queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, vector_size, &C[0]);
 
 		std::cout << "A = " << A << std::endl;
  • Tutorial 2:
--- "src/Tutorial 2.cpp"	2025-06-22 21:06:37.068775000 -0400
+++ "Tutorial 2.cpp"	2025-06-22 21:06:37.061813000 -0400
@@ -82,12 +82,12 @@
 //		queue.enqueueWriteBuffer(dev_convolution_mask, CL_TRUE, 0, convolution_mask.size()*sizeof(float), &convolution_mask[0]);
 
 		//4.2 Setup and execute the kernel (i.e. device code)
-		cl::Kernel kernel = cl::Kernel(program, "identityND");
+		cl::Kernel kernel = cl::Kernel(program, "identity");
 		kernel.setArg(0, dev_image_input);
 		kernel.setArg(1, dev_image_output);
 //		kernel.setArg(2, dev_convolution_mask);
 
-		queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(image_input.width(), image_input.height(), image_input.spectrum()), cl::NullRange);
+		queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(image_input.size()), cl::NullRange);
 
 		vector<unsigned char> output_buffer(image_input.size());
 		//4.3 Copy the result from device to host
diff -ru kernels/my_kernels.cl src/kernels/my_kernels.cl
--- kernels/my_kernels.cl	2025-06-22 21:06:37.066781000 -0400
+++ src/kernels/my_kernels.cl	2025-06-22 21:06:37.070804000 -0400
@@ -9,7 +9,6 @@
 	int image_size = get_global_size(0)/3; //each image consists of 3 colour channels
 	int colour_channel = id / image_size; // 0 - red, 1 - green, 2 - blue
 
-	//this is just a copy operation, modify to filter out the individual colour channels
 	B[id] = A[id];
 }
 
@@ -30,7 +29,7 @@
 }
 
 //2D averaging filter
-kernel void avg_filterND(global const uchar* A, global uchar* B) {
+kernel void avg_filter2D(global const uchar* A, global uchar* B) {
 	int width = get_global_size(0); //image width in pixels
 	int height = get_global_size(1); //image height in pixels
 	int image_size = width*height; //image size in pixels
@@ -42,24 +41,19 @@
 
 	int id = x + y*width + c*image_size; //global id in 1D space
 
-	uint result = 0;
+	ushort result = 0;
 
-	//simple boundary handling - just copy the original pixel
-	if ((x == 0) || (x == width-1) || (y == 0) || (y == height-1)) {
-		result = A[id];	
-	} else {
-		for (int i = (x-1); i <= (x+1); i++)
-		for (int j = (y-1); j <= (y+1); j++) 
-			result += A[i + j*width + c*image_size];
+	for (int i = (x-1); i <= (x+1); i++)
+	for (int j = (y-1); j <= (y+1); j++) 
+		result += A[i + j*width + c*image_size];
 
-		result /= 9;
-	}
+	result /= 9;
 
 	B[id] = (uchar)result;
 }
 
 //2D 3x3 convolution kernel
-kernel void convolutionND(global const uchar* A, global uchar* B, constant float* mask) {
+kernel void convolution2D(global const uchar* A, global uchar* B, constant float* mask) {
 	int width = get_global_size(0); //image width in pixels
 	int height = get_global_size(1); //image height in pixels
 	int image_size = width*height; //image size in pixels
@@ -71,16 +65,11 @@
 
 	int id = x + y*width + c*image_size; //global id in 1D space
 
-	float result = 0;
+	ushort result = 0;
 
-	//simple boundary handling - just copy the original pixel
-	if ((x == 0) || (x == width-1) || (y == 0) || (y == height-1)) {
-		result = A[id];	
-	} else {
-		for (int i = (x-1); i <= (x+1); i++)
-		for (int j = (y-1); j <= (y+1); j++) 
-			result += A[i + j*width + c*image_size]*mask[i-(x-1) + j-(y-1)];
-	}
+	for (int i = (x-1); i <= (x+1); i++)
+	for (int j = (y-1); j <= (y+1); j++) 
+		result += A[i + j*width + c*image_size]*mask[i-(x-1) + j-(y-1)];
 
 	B[id] = (uchar)result;
 }
\ No newline at end of file
  • Tutorial 3:
diff -ru kernels/my_kernels.cl src/kernels/my_kernels.cl
--- kernels/my_kernels.cl	2025-06-22 21:06:37.110794000 -0400
+++ src/kernels/my_kernels.cl	2025-06-22 21:06:37.114780000 -0400
@@ -195,7 +195,7 @@
 kernel void scan_add_atomic(global int* A, global int* B) {
 	int id = get_global_id(0);
 	int N = get_global_size(0);
-	for (int i = id+1; i < N && id < N; i++)
+	for (int i = id+1; i < N; i++)
 		atomic_add(&B[i], A[id]);
 }

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions