#include #include #include "cpu_fft.h" void loop_data_fftw (OutputType outputType, TimeEntry *time_entry) { Timer *timer; timer = timer_new (); time_entry->dim_entries = (DimEntry *)malloc(N_DIMS * sizeof(DimEntry)); time_entry->lib_name = "CPU (FFTW)"; printf ("Device: %s\n", time_entry->lib_name); fflush (stdout); for (int k = 0; k < N_DIMS; k++) { int dim = DIMS[k]; int power_min = N_POWERS_INTERVALS[k][0]; int power_max = N_POWERS_INTERVALS[k][1]; int num_entries = power_max - power_min + 1; time_entry->dim_entries[k].n_dims = dim; time_entry->dim_entries[k].sizes = (unsigned int **)malloc(sizeof(unsigned int *) * num_entries); time_entry->dim_entries[k].times = (double *)malloc(sizeof(double) * num_entries); time_entry->dim_entries[k].errors = (double *)malloc(sizeof(double) * num_entries); PRINT_DIM (dim); fflush (stdout); for (int m = power_min, i = 0; m <= power_max; m++, i++) { fftw_complex *host_orig_mem; fftw_complex *host_result_mem; fftw_complex *host_immediate_mem; fftw_plan plan; fftw_plan inverse_plan; double time_sec; double sum = 0.0; size_t side_size = pow(2,m); size_t size = pow(side_size,dim); size_t size_bytes = sizeof (fftw_complex) * size; host_orig_mem = fftw_malloc (size_bytes); host_immediate_mem = fftw_malloc (size_bytes); host_result_mem = fftw_malloc (size_bytes); switch (dim) { case 1: plan = fftw_plan_dft_1d (side_size, host_orig_mem, host_immediate_mem, FFTW_FORWARD, FFTW_ESTIMATE); inverse_plan = fftw_plan_dft_1d (side_size, host_immediate_mem, host_result_mem, FFTW_BACKWARD, FFTW_ESTIMATE); break; case 2: plan = fftw_plan_dft_2d (side_size, side_size, host_orig_mem, host_immediate_mem, FFTW_FORWARD, FFTW_ESTIMATE); inverse_plan = fftw_plan_dft_2d (side_size, side_size, host_immediate_mem, host_result_mem, FFTW_BACKWARD, FFTW_ESTIMATE); break; case 3: plan = fftw_plan_dft_3d (side_size, side_size, side_size, host_orig_mem, host_immediate_mem, FFTW_FORWARD, FFTW_ESTIMATE); inverse_plan = fftw_plan_dft_3d (side_size, side_size, side_size, host_immediate_mem, host_result_mem, FFTW_BACKWARD, FFTW_ESTIMATE); break; default: fprintf (stderr, "Unknown FFT dimensions\n"); return; } for (int j = 0; j < size; j++) { host_orig_mem[j][0] = rand() / ((double) RAND_MAX); host_orig_mem[j][1] = rand() / ((double) RAND_MAX); } PRINT_DIMS(dim, side_size); fflush (stdout); printf ("."); fflush (stdout); timer_start (timer); for (int j = 0; j < N_RUNS; j++) { fftw_execute (plan); } timer_stop (timer); fftw_execute (inverse_plan); for (int j = 0; j < size; j++) { sum += fabs (host_result_mem[j][0] / size - host_orig_mem[j][0]); sum += fabs (host_result_mem[j][1] / size - host_orig_mem[j][1]); } time_sec = timer_get_seconds (timer) / N_RUNS; time_entry->dim_entries[k].sizes[i] = (unsigned int *)malloc(sizeof(unsigned int) * dim); for (int j = 0; j < dim; j++) { time_entry->dim_entries[k].sizes[i][j] = side_size; } time_entry->dim_entries[k].times[i] = get_measurements_with_format(outputType, size_bytes, time_sec); time_entry->dim_entries[k].errors[i] = sum / size; fftw_destroy_plan (inverse_plan); fftw_destroy_plan (plan); fftw_free (host_orig_mem); fftw_free (host_immediate_mem); fftw_free (host_result_mem); } printf ("\n"); fflush (stdout); } printf ("\n"); timer_destroy (timer); }