|
@@ -119,7 +119,7 @@ getDataFromSplitBinary (FILE *f, Matrix *ret)
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
-readFilterFromFile (int isSmooth, int n)
|
|
|
+readFilterFromFile (int isSmooth, int n, float *filter, int *filterlength)
|
|
|
{
|
|
|
char filtname[64];
|
|
|
if (n != 0)
|
|
@@ -143,10 +143,8 @@ readFilterFromFile (int isSmooth, int n)
|
|
|
|
|
|
fread (filterCoefficients, sizeof(float), size, f);
|
|
|
|
|
|
- for (int i = 0; i < size; ++i)
|
|
|
- {
|
|
|
- printf("%e\n", filterCoefficients[i]);
|
|
|
- }
|
|
|
+ filter = filterCoefficients;
|
|
|
+ *filterlength = size;
|
|
|
|
|
|
return 1;
|
|
|
}
|
|
@@ -300,7 +298,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
|
|
|
get_event_times (event, &wait, &execution);
|
|
|
waitFFTShift[0] = wait;
|
|
|
executionFFTShift[0] = execution;
|
|
|
- printf("Swap x axis: \nwaited for device %d us:\n time to execute: %d us\n", wait, execution);
|
|
|
+ printf("Swap x axis: \nwaited for device %lu us:\n time to execute: %lu us\n", wait, execution);
|
|
|
|
|
|
wallClock = g_timer_elapsed (timer, NULL);
|
|
|
|
|
@@ -331,7 +329,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
|
|
|
get_event_times (event, &wait, &execution);
|
|
|
waitFFTShift[1] = wait;
|
|
|
executionFFTShift[1] = execution;
|
|
|
- printf("Swap y axis: \n waited for device %d us:\n time to execute: %d us\n", wait, execution);
|
|
|
+ printf("Swap y axis: \n waited for device %lu us:\n time to execute: %lu us\n", wait, execution);
|
|
|
|
|
|
wallClock = g_timer_elapsed (timer, NULL);
|
|
|
|
|
@@ -357,7 +355,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
|
|
|
g_timer_stop (timer);
|
|
|
get_event_times (event, &wait, &execution);
|
|
|
computeAbsSpectrum = execution;
|
|
|
- printf("waited for device %d us:\n time to execute: %d us\n", wait, execution);
|
|
|
+ printf("waited for device %lu us:\n time to execute: %lu us\n", wait, execution);
|
|
|
|
|
|
wallClock = g_timer_elapsed (timer, NULL);
|
|
|
|
|
@@ -370,7 +368,7 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
|
|
|
OCL_CHECK_ERROR (errcode);
|
|
|
clWaitForEvents (1, &event);
|
|
|
get_event_times (event, &wait, &execution);
|
|
|
- printf("Read out result \n waited for device %d us:\n time to execute: %d us\n", wait, execution);
|
|
|
+ printf("Read out result \n waited for device %lu us:\n time to execute: %lu us\n", wait, execution);
|
|
|
readFromDevice = wait + execution;
|
|
|
|
|
|
wallClock = g_timer_elapsed (timer, NULL);
|
|
@@ -379,11 +377,88 @@ tranformFromKSpace (Matrix * const A, Matrix *result,
|
|
|
|
|
|
*result = C;
|
|
|
|
|
|
+ g_timer_destroy (timer);
|
|
|
+
|
|
|
+
|
|
|
clFFT_DestroyPlan (fftPlan);
|
|
|
+ OCL_CHECK_ERROR (clReleaseEvent (event));
|
|
|
OCL_CHECK_ERROR (clReleaseMemObject (d_A.elements));
|
|
|
OCL_CHECK_ERROR (clReleaseMemObject (d_C.elements));
|
|
|
}
|
|
|
|
|
|
+static void
|
|
|
+filterData (int decimFactor, int isSmooth, Matrix * const input, Matrix *output,
|
|
|
+ const cl_context context,
|
|
|
+ const cl_kernel convKernel,
|
|
|
+ const cl_command_queue queue)
|
|
|
+{
|
|
|
+
|
|
|
+ cl_int errcode;
|
|
|
+ // measure event queue
|
|
|
+ cl_event event;
|
|
|
+ // times in micro seconds
|
|
|
+ unsigned long wait;
|
|
|
+ unsigned long execution;
|
|
|
+
|
|
|
+ GTimer *timer;
|
|
|
+ timer = g_timer_new ();
|
|
|
+ double wallClock = 0.0;
|
|
|
+
|
|
|
+ float *filter;
|
|
|
+ int filterlength = 0;
|
|
|
+ readFilterFromFile (isSmooth, decimFactor, filter, &filterlength);
|
|
|
+
|
|
|
+ int length = input->width * input->height;
|
|
|
+
|
|
|
+ cl_mem tempBuffer = clCreateBuffer (context, CL_MEM_READ_WRITE,
|
|
|
+ length * sizeof(float) * 2, NULL, &errcode);
|
|
|
+ OCL_CHECK_ERROR (errcode);
|
|
|
+
|
|
|
+ GPUMatrix d_C;
|
|
|
+ d_C.width = input->width;
|
|
|
+ d_C.height = input->height;
|
|
|
+ d_C.elements = clCreateBuffer (context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
|
|
+ length * sizeof(float) * 2, input->elements, &errcode);
|
|
|
+
|
|
|
+ OCL_CHECK_ERROR (errcode);
|
|
|
+
|
|
|
+
|
|
|
+ size_t globalWorkSize[] = { input->height, input->width };
|
|
|
+ cl_uint i = 0;
|
|
|
+ OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (d_C.width),
|
|
|
+ (void*) &d_C.width));
|
|
|
+ OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (d_C.height),
|
|
|
+ (void*) &d_C.height));
|
|
|
+ OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (d_C.elements),
|
|
|
+ (void*) &d_C.elements));
|
|
|
+ OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (tempBuffer),
|
|
|
+ (void*) &tempBuffer));
|
|
|
+ OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (filter),
|
|
|
+ (void*) &filter));
|
|
|
+ OCL_CHECK_ERROR (clSetKernelArg(convKernel, i++, sizeof (filterlength),
|
|
|
+ (void*) &filterlength));
|
|
|
+
|
|
|
+ g_timer_start (timer);
|
|
|
+ OCL_CHECK_ERROR (clEnqueueNDRangeKernel (queue, convKernel, 2, NULL,
|
|
|
+ globalWorkSize, NULL,
|
|
|
+ 0, NULL, &event));
|
|
|
+
|
|
|
+ clWaitForEvents (1, &event);
|
|
|
+ g_timer_stop (timer);
|
|
|
+ get_event_times (event, &wait, &execution);
|
|
|
+ printf("\nwaited for device %lu us:\n time to execute: %lu us\n", wait, execution);
|
|
|
+
|
|
|
+ wallClock = g_timer_elapsed (timer, NULL);
|
|
|
+
|
|
|
+ printf("took %e on host\n", wallClock);
|
|
|
+
|
|
|
+ g_timer_destroy (timer);
|
|
|
+ OCL_CHECK_ERROR (clReleaseEvent (event));
|
|
|
+
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
int
|
|
|
main (int argc, const char **argv)
|
|
|
{
|
|
@@ -398,6 +473,7 @@ main (int argc, const char **argv)
|
|
|
cl_program program;
|
|
|
cl_kernel absKernel;
|
|
|
cl_kernel fftshiftKernel;
|
|
|
+ cl_kernel convolutionKernelInterleaved;
|
|
|
|
|
|
ocl = ocl_new_from_args (argc, argv, CL_QUEUE_PROFILING_ENABLE);
|
|
|
context = ocl_get_context (ocl);
|
|
@@ -411,26 +487,29 @@ main (int argc, const char **argv)
|
|
|
OCL_CHECK_ERROR (errcode);
|
|
|
fftshiftKernel = clCreateKernel (program, "fftshift", &errcode);
|
|
|
OCL_CHECK_ERROR (errcode);
|
|
|
+ convolutionKernelInterleaved = clCreateKernel (program, "convolutionKernelInterleaved", &errcode);
|
|
|
+ OCL_CHECK_ERROR (errcode);
|
|
|
|
|
|
//create Matrices
|
|
|
- Matrix kspace, absimg;
|
|
|
+ Matrix kSpace, decimkSpace, absimg;
|
|
|
Matrix refImg;
|
|
|
Matrix sqD;
|
|
|
|
|
|
- FILE *bin = fopen ("ksd.bin", "r");
|
|
|
+ FILE *bin = fopen ("ksd.bin", "r");
|
|
|
if (bin == NULL)
|
|
|
{
|
|
|
printf ("Error opening file!\n");
|
|
|
exit (1);
|
|
|
}
|
|
|
|
|
|
- getDataFromSplitBinary (bin, &kspace);
|
|
|
+ getDataFromSplitBinary (bin, &kSpace);
|
|
|
fclose (bin);
|
|
|
|
|
|
- readFilterFromFile (1, 8);
|
|
|
+ filterData (8, 1, &kSpace, &decimkSpace,
|
|
|
+ context, convolutionKernelInterleaved, queues[0]);
|
|
|
|
|
|
- tranformFromKSpace (&kspace, &absimg, MRI_CHANNELS, context,
|
|
|
- fftshiftKernel, absKernel, queues[0]);
|
|
|
+ /* tranformFromKSpace (&kSpace, &absimg, MRI_CHANNELS, context,
|
|
|
+ fftshiftKernel, absKernel, queues[0]);*/
|
|
|
|
|
|
MatrixSplit absimgSplit;
|
|
|
convertInterleavedToSplitMatrix (&absimg, &absimgSplit);
|
|
@@ -442,7 +521,8 @@ main (int argc, const char **argv)
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
- writeSplitMatrixToBinaryFile (f, &absimgSplit);
|
|
|
+ writeSplitMatrixToBinaryFile (f, &kSpace);
|
|
|
+ writeSplitMatrixToBinaryFile (f, &decimkSpace);
|
|
|
fclose (f);
|
|
|
|
|
|
ocl_free (ocl);
|