Browse Source

filter implemented

valentin 6 years ago
parent
commit
2a46230182
2 changed files with 121 additions and 19 deletions
  1. 25 3
      ffthelp.cl
  2. 96 16
      src/prog.c

+ 25 - 3
ffthelp.cl

@@ -41,8 +41,6 @@ kernel void fftshift(int Cwidth, int Cheight, __global float* Celements, __globa
 			
 			data_out[row * Cwidth * 2 + col * 2 + Cwidth] = Celements[row * Cwidth * 2 + col * 2];
 			data_out[row * Cwidth * 2 + col * 2 + 1 + Cwidth] = Celements[row * Cwidth * 2 + col * 2 + 1];
-
-
 		}
 	}
 
@@ -58,4 +56,28 @@ kernel void fftshift(int Cwidth, int Cheight, __global float* Celements, __globa
 			data_out[(row + Cheight / 2) * Cwidth * 2 + col * 2 + 1] = Celements[row * Cwidth * 2 + col * 2 + 1];
 		}
 	}
-}
+}
+
+kernel void convolutionKernelInterleaved(int Cwidth, int Cheight, __global float* Celements, __global float *output, __global float* filter, int numCoeff)
+{
+
+	int row = get_global_id(0);
+	int col = get_global_id(1);
+
+	int index = 0;
+
+	float sum = 0.f;
+
+	int colOffset = 0;
+
+	for (int i = 0; i < numCoeff; i++)
+	{
+		colOffset = col - (numCoeff / 2) + i;
+		if (colOffset >= 0 && colOffset < Cwidth)
+		{
+			sum += Celements[row * 2 * Cwidth + 2 * (colOffset)] * filter[numCoeff - 1 - i];
+		}
+	}
+
+	Celements[row * 2 * Cwidth + col] =  sum;
+}

+ 96 - 16
src/prog.c

@@ -119,7 +119,7 @@ getDataFromSplitBinary (FILE *f, Matrix *ret)
 }
 
 static int
-readFilterFromFile (int isSmooth, int n)
+readFilterFromFile (int isSmooth, int n, float *filter, int *filterlength)
 {
     char filtname[64];
     if (n != 0)
@@ -143,10 +143,8 @@ readFilterFromFile (int isSmooth, int n)
 
     fread (filterCoefficients, sizeof(float), size, f);
 
-    for (int i = 0; i < size; ++i)
-    {
-        printf("%e\n", filterCoefficients[i]);
-    }
+    filter = filterCoefficients;
+    *filterlength = size;
 
     return 1;
 }
@@ -300,7 +298,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
     get_event_times (event, &wait, &execution);
     waitFFTShift[0] = wait;
     executionFFTShift[0] = execution;
-    printf("Swap x axis: \nwaited for device %d us:\n time to execute: %d us\n", wait, execution);
+    printf("Swap x axis: \nwaited for device %lu us:\n time to execute: %lu us\n", wait, execution);
 
     wallClock = g_timer_elapsed (timer, NULL);
 
@@ -331,7 +329,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
     get_event_times (event, &wait, &execution);
     waitFFTShift[1] = wait;
     executionFFTShift[1] = execution;
-    printf("Swap y axis: \n waited for device %d us:\n time to execute: %d us\n", wait, execution);
+    printf("Swap y axis: \n waited for device %lu us:\n time to execute: %lu us\n", wait, execution);
 
     wallClock = g_timer_elapsed (timer, NULL);
 
@@ -357,7 +355,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
     g_timer_stop (timer);
     get_event_times (event, &wait, &execution);
     computeAbsSpectrum = execution;
-    printf("waited for device %d us:\n time to execute: %d us\n", wait, execution);
+    printf("waited for device %lu us:\n time to execute: %lu us\n", wait, execution);
 
     wallClock = g_timer_elapsed (timer, NULL);
 
@@ -370,7 +368,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
     OCL_CHECK_ERROR (errcode);
     clWaitForEvents (1, &event);
     get_event_times (event, &wait, &execution);
-    printf("Read out result \n waited for device %d us:\n time to execute: %d us\n", wait, execution);
+    printf("Read out result \n waited for device %lu us:\n time to execute: %lu us\n", wait, execution);
     readFromDevice = wait + execution;
 
     wallClock = g_timer_elapsed (timer, NULL);
@@ -379,11 +377,88 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
 
     *result = C;
 
+    g_timer_destroy (timer);
+
+
     clFFT_DestroyPlan (fftPlan);
+    OCL_CHECK_ERROR (clReleaseEvent (event));
     OCL_CHECK_ERROR (clReleaseMemObject (d_A.elements));
     OCL_CHECK_ERROR (clReleaseMemObject (d_C.elements));
 }
 
+static void
+filterData (int decimFactor, int isSmooth, Matrix * const input, Matrix *output,
+             const cl_context context,
+             const cl_kernel convKernel,
+             const cl_command_queue queue)
+{
+
+    cl_int errcode;
+    // measure event queue
+    cl_event event;
+    // times in micro seconds
+    unsigned long wait;
+    unsigned long execution;
+
+    GTimer *timer;
+    timer = g_timer_new ();
+    double wallClock = 0.0;
+
+    float *filter;
+    int filterlength = 0;
+    readFilterFromFile (isSmooth, decimFactor, filter, &filterlength);
+
+    int length = input->width * input->height;
+
+    cl_mem tempBuffer = clCreateBuffer (context, CL_MEM_READ_WRITE,
+                                   length * sizeof(float) * 2, NULL, &errcode);
+    OCL_CHECK_ERROR (errcode);
+
+    GPUMatrix d_C;
+    d_C.width = input->width;
+    d_C.height = input->height;
+    d_C.elements = clCreateBuffer (context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                   length * sizeof(float) * 2, input->elements, &errcode);
+
+    OCL_CHECK_ERROR (errcode);
+
+
+    size_t globalWorkSize[] = { input->height, input->width };
+    cl_uint i = 0;
+    OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (d_C.width), 
+                    (void*) &d_C.width));
+    OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (d_C.height), 
+                    (void*) &d_C.height));
+    OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (d_C.elements), 
+                    (void*) &d_C.elements));
+    OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (tempBuffer), 
+                    (void*) &tempBuffer));
+    OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (filter), 
+                    (void*) &filter));
+    OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (filterlength), 
+                    (void*) &filterlength));
+
+    g_timer_start (timer);
+    OCL_CHECK_ERROR (clEnqueueNDRangeKernel (queue, convKernel, 2, NULL, 
+                            globalWorkSize, NULL, 
+                            0, NULL, &event));
+
+    clWaitForEvents (1, &event);
+    g_timer_stop (timer);
+    get_event_times (event, &wait, &execution);
+    printf("\nwaited for device %lu us:\n time to execute: %lu us\n", wait, execution);
+
+    wallClock = g_timer_elapsed (timer, NULL);
+
+    printf("took %e on host\n", wallClock);
+
+    g_timer_destroy (timer);
+    OCL_CHECK_ERROR (clReleaseEvent (event));
+
+
+}
+
+
 int
 main (int argc, const char **argv)
 {
@@ -398,6 +473,7 @@ main (int argc, const char **argv)
     cl_program program;
     cl_kernel absKernel;
     cl_kernel fftshiftKernel;
+    cl_kernel convolutionKernelInterleaved;
 
     ocl = ocl_new_from_args (argc, argv, CL_QUEUE_PROFILING_ENABLE);
     context = ocl_get_context (ocl);
@@ -411,26 +487,29 @@ main (int argc, const char **argv)
     OCL_CHECK_ERROR (errcode);
     fftshiftKernel = clCreateKernel (program, "fftshift", &errcode);
     OCL_CHECK_ERROR (errcode);
+    convolutionKernelInterleaved = clCreateKernel (program, "convolutionKernelInterleaved", &errcode);
+    OCL_CHECK_ERROR (errcode);
 
     //create Matrices
-    Matrix kspace, absimg;
+    Matrix kSpace, decimkSpace, absimg;
     Matrix refImg;
     Matrix sqD;
 
-	FILE *bin = fopen ("ksd.bin", "r");
+    FILE *bin = fopen ("ksd.bin", "r");
     if (bin == NULL)
     {
         printf ("Error opening file!\n");
         exit (1);
     }
 
-    getDataFromSplitBinary (bin, &kspace);
+    getDataFromSplitBinary (bin, &kSpace);
     fclose (bin);
 
-    readFilterFromFile (1, 8);
+    filterData (8, 1, &kSpace, &decimkSpace, 
+                        context, convolutionKernelInterleaved, queues[0]);
 
-    tranformFromKSpace (&kspace, &absimg, MRI_CHANNELS, context,
-                        fftshiftKernel, absKernel, queues[0]);
+   /* tranformFromKSpace (&kSpace, &absimg, MRI_CHANNELS, context,
+                        fftshiftKernel, absKernel, queues[0]);*/
 
     MatrixSplit absimgSplit;
     convertInterleavedToSplitMatrix (&absimg, &absimgSplit);
@@ -442,7 +521,8 @@ main (int argc, const char **argv)
         exit(1);
     }
 
-    writeSplitMatrixToBinaryFile (f, &absimgSplit);
+    writeSplitMatrixToBinaryFile (f, &kSpace);
+    writeSplitMatrixToBinaryFile (f, &decimkSpace);
     fclose (f);
 
     ocl_free (ocl);