|
@@ -54,6 +54,7 @@ exit(EXIT_FAILURE); \
|
|
|
#endif
|
|
|
|
|
|
#include "timer.h"
|
|
|
+#include "time_entry.h"
|
|
|
|
|
|
#define N_DIMS 3
|
|
|
#define N_RUNS 4
|
|
@@ -63,7 +64,7 @@ const int N_ARRAYS = 6;
|
|
|
|
|
|
const int DIMS[N_DIMS] = {1, 2, 3};
|
|
|
const int N_DIM_ARRAYS[N_DIMS] = {5, 4, 4};
|
|
|
-const int N_POWERS_INTERVALS[N_DIMS][2] = {{5, 11}, {8, 12}, {7, 8}};
|
|
|
+const int N_POWERS_INTERVALS[N_DIMS][2] = {{5, 11}, {8, 11}, {7, 7}};
|
|
|
|
|
|
#define UPDATE_SIZE(size) size *= 8;
|
|
|
#define PRINT_DIM_SIZE(side_size,dim) { \
|
|
@@ -264,29 +265,37 @@ loop_data_cuda (const char *vendor,
|
|
|
|
|
|
#ifdef HAVE_FFTW
|
|
|
static void
|
|
|
-loop_data_fftw (double *times, double *errors, FILE *fp)
|
|
|
+loop_data_fftw (TimeEntry *time_entry)
|
|
|
{
|
|
|
Timer *timer;
|
|
|
|
|
|
timer = timer_new ();
|
|
|
- fprintf (fp, "FFTW_bw FFTW_err ");
|
|
|
+
|
|
|
+ time_entry->lib_name = "FFTW";
|
|
|
+ time_entry->dim_entries = (DimEntry *)malloc(N_DIMS * sizeof(DimEntry));
|
|
|
|
|
|
for (int k = 0; k < N_DIMS; k++) {
|
|
|
int dim = DIMS[k];
|
|
|
int power_min = N_POWERS_INTERVALS[k][0];
|
|
|
int power_max = N_POWERS_INTERVALS[k][1];
|
|
|
+ int num_entries = power_max - power_min + 1;
|
|
|
+
|
|
|
+ time_entry->dim_entries[k].n_dims = dim;
|
|
|
+ time_entry->dim_entries[k].sizes = (unsigned int **)malloc(sizeof(unsigned int *) * num_entries);
|
|
|
+ time_entry->dim_entries[k].times = (double *)malloc(sizeof(double) * num_entries);
|
|
|
+ time_entry->dim_entries[k].errors = (double *)malloc(sizeof(double) * num_entries);
|
|
|
|
|
|
printf ("%dD:", dim);
|
|
|
fflush (stdout);
|
|
|
|
|
|
- for (int m = power_min; m <= power_max; m++) {
|
|
|
+ for (int m = power_min, i = 0; m <= power_max; m++, i++) {
|
|
|
fftw_complex *host_orig_mem;
|
|
|
fftw_complex *host_result_mem;
|
|
|
fftw_complex *host_immediate_mem;
|
|
|
fftw_plan plan;
|
|
|
fftw_plan inverse_plan;
|
|
|
- //double time;
|
|
|
- //double mflops;
|
|
|
+ double time;
|
|
|
+ double mflops;
|
|
|
double sum = 0.0;
|
|
|
|
|
|
size_t side_size = pow(2,m);
|
|
@@ -332,66 +341,16 @@ loop_data_fftw (double *times, double *errors, FILE *fp)
|
|
|
host_orig_mem[j][1] = rand() / ((double) RAND_MAX);
|
|
|
}
|
|
|
|
|
|
- if (dim == 1) printf (" %zu.", side_size);
|
|
|
- else if (dim == 2) printf (" %zux%zu.", side_size, side_size);
|
|
|
- else printf (" %zux%zux%zu.", side_size, side_size, side_size);
|
|
|
- fflush (stdout);
|
|
|
-
|
|
|
- timer_start (timer);
|
|
|
-
|
|
|
- for (int j = 0; j < N_RUNS; j++) {
|
|
|
- fftw_execute (plan);
|
|
|
+ if (dim == 1) {
|
|
|
+ printf (" %zu.", side_size);
|
|
|
}
|
|
|
-
|
|
|
- timer_stop (timer);
|
|
|
-
|
|
|
- /* Check precision */
|
|
|
- fftw_execute (inverse_plan);
|
|
|
-
|
|
|
- for (int j = 0; j < size; j++) {
|
|
|
- sum += fabs (host_result_mem[j][0] / size - host_orig_mem[j][0]);
|
|
|
- sum += fabs (host_result_mem[j][1] / size - host_orig_mem[j][1]);
|
|
|
+ else if (dim == 2) {
|
|
|
+ printf (" %zux%zu.", side_size, side_size);
|
|
|
}
|
|
|
-
|
|
|
- //time = timer_get_seconds (timer) / N_RUNS / 1000.0;
|
|
|
- //mflops = 5 * size * log (size) / log (2) / time;
|
|
|
- //times[i] = mflops;
|
|
|
- //errors[i] = sum / size;
|
|
|
-
|
|
|
- fftw_destroy_plan (inverse_plan);
|
|
|
- fftw_destroy_plan (plan);
|
|
|
- fftw_free (host_orig_mem);
|
|
|
- fftw_free (host_immediate_mem);
|
|
|
- fftw_free (host_result_mem);
|
|
|
- }
|
|
|
- printf ("\n");
|
|
|
- fflush (stdout);
|
|
|
- }
|
|
|
- /*
|
|
|
- for (int i = 0; i < N_ARRAYS; i++) {
|
|
|
- fftw_complex *host_orig_mem;
|
|
|
- fftw_complex *host_result_mem;
|
|
|
- fftw_complex *host_immediate_mem;
|
|
|
- fftw_plan plan;
|
|
|
- fftw_plan inverse_plan;
|
|
|
- double time;
|
|
|
- double mflops;
|
|
|
- double sum = 0.0;
|
|
|
-
|
|
|
- UPDATE_SIZE (size);
|
|
|
-
|
|
|
- host_orig_mem = fftw_malloc (sizeof (fftw_complex) * size);
|
|
|
- host_immediate_mem = fftw_malloc (sizeof (fftw_complex) * size);
|
|
|
- host_result_mem = fftw_malloc (sizeof (fftw_complex) * size);
|
|
|
-
|
|
|
- plan = fftw_plan_dft_1d (size, host_orig_mem, host_immediate_mem, FFTW_FORWARD, FFTW_ESTIMATE);
|
|
|
-
|
|
|
- for (int j = 0; j < size; j++) {
|
|
|
- host_orig_mem[j][0] = rand() / ((double) RAND_MAX);
|
|
|
- host_orig_mem[j][1] = rand() / ((double) RAND_MAX);
|
|
|
+ else {
|
|
|
+ printf (" %zux%zux%zu.", side_size, side_size, side_size);
|
|
|
}
|
|
|
|
|
|
- printf (" %zu.", size);
|
|
|
fflush (stdout);
|
|
|
|
|
|
timer_start (timer);
|
|
@@ -402,7 +361,7 @@ loop_data_fftw (double *times, double *errors, FILE *fp)
|
|
|
|
|
|
timer_stop (timer);
|
|
|
|
|
|
- inverse_plan = fftw_plan_dft_1d (size, host_immediate_mem, host_result_mem, FFTW_BACKWARD, FFTW_ESTIMATE);
|
|
|
+ /* Check precision */
|
|
|
fftw_execute (inverse_plan);
|
|
|
|
|
|
for (int j = 0; j < size; j++) {
|
|
@@ -412,16 +371,25 @@ loop_data_fftw (double *times, double *errors, FILE *fp)
|
|
|
|
|
|
time = timer_get_seconds (timer) / N_RUNS / 1000.0;
|
|
|
mflops = 5 * size * log (size) / log (2) / time;
|
|
|
- times[i] = mflops;
|
|
|
- errors[i] = sum / size;
|
|
|
|
|
|
+ time_entry->dim_entries[k].sizes[i] = (unsigned int *)malloc(sizeof(unsigned int) * dim);
|
|
|
+ for (int j = 0; j < dim; j++) {
|
|
|
+ time_entry->dim_entries[k].sizes[i][j] = side_size;
|
|
|
+ }
|
|
|
+
|
|
|
+ time_entry->dim_entries[k].times[i] = mflops;
|
|
|
+ time_entry->dim_entries[k].errors[i] = sum / size;
|
|
|
+
|
|
|
fftw_destroy_plan (inverse_plan);
|
|
|
fftw_destroy_plan (plan);
|
|
|
fftw_free (host_orig_mem);
|
|
|
fftw_free (host_immediate_mem);
|
|
|
fftw_free (host_result_mem);
|
|
|
+ }
|
|
|
+ printf ("\n");
|
|
|
+ fflush (stdout);
|
|
|
}
|
|
|
- */
|
|
|
+
|
|
|
printf ("\n");
|
|
|
timer_destroy (timer);
|
|
|
}
|
|
@@ -623,6 +591,8 @@ compute_cuda_fft (cufftComplex *dev_mem,
|
|
|
int
|
|
|
main (int argc, char **argv)
|
|
|
{
|
|
|
+ TimeEntry *time_entries = (TimeEntry *)malloc(sizeof(TimeEntry) * 1);
|
|
|
+
|
|
|
#ifdef HAVE_OPENCL
|
|
|
cl_platform_id platform;
|
|
|
cl_uint n_devices;
|
|
@@ -652,8 +622,8 @@ main (int argc, char **argv)
|
|
|
|
|
|
#ifdef HAVE_FFTW
|
|
|
static int with_fftw = 1;
|
|
|
- double *fftw_times;
|
|
|
- double *fftw_errors;
|
|
|
+ //double *fftw_times;
|
|
|
+ //double *fftw_errors;
|
|
|
#endif
|
|
|
|
|
|
static int show_help = 0;
|
|
@@ -678,6 +648,7 @@ main (int argc, char **argv)
|
|
|
|
|
|
size_t size = INITIAL_SIZE;
|
|
|
FILE *fp;
|
|
|
+ FILE *fp_new;
|
|
|
|
|
|
/* Parse options */
|
|
|
while (getopt_long (argc, argv, "", long_options, NULL) != -1)
|
|
@@ -694,8 +665,12 @@ main (int argc, char **argv)
|
|
|
|
|
|
/* Write header */
|
|
|
fp = fopen ("result.txt", "w");
|
|
|
+ fp_new = fopen ("result_new.txt", "w");
|
|
|
+
|
|
|
fprintf (fp, "# size ");
|
|
|
|
|
|
+ fprintf (fp_new, "# ");
|
|
|
+
|
|
|
#ifdef HAVE_OPENCL
|
|
|
OCL_CHECK_ERROR (clGetPlatformIDs (1, &platform, NULL));
|
|
|
|
|
@@ -745,15 +720,55 @@ main (int argc, char **argv)
|
|
|
#endif
|
|
|
|
|
|
#ifdef HAVE_FFTW
|
|
|
- fftw_times = malloc (N_ARRAYS * sizeof (double));
|
|
|
- fftw_errors = malloc (N_ARRAYS * sizeof (double));
|
|
|
+ //fftw_times = malloc (N_ARRAYS * sizeof (double));
|
|
|
+ //fftw_errors = malloc (N_ARRAYS * sizeof (double));
|
|
|
|
|
|
if (with_fftw) {
|
|
|
printf ("Testing FFTW3 ...\n");
|
|
|
- loop_data_fftw (fftw_times, fftw_errors, fp);
|
|
|
+ loop_data_fftw (&(time_entries[0]));
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+ for (int i = 0; i < N_DIMS; i++) {
|
|
|
+ int min_power = N_POWERS_INTERVALS[i][0];
|
|
|
+ int max_power = N_POWERS_INTERVALS[i][1];
|
|
|
+
|
|
|
+ for (int j = min_power; j <= max_power; j++) {
|
|
|
+ int side_size = pow(2,j);
|
|
|
+ switch (DIMS[i]) {
|
|
|
+ case 1:
|
|
|
+ fprintf (fp_new, "%d ", side_size);
|
|
|
+ fprintf (fp_new, "%d(Error) ", side_size);
|
|
|
+ break;
|
|
|
+ case 2:
|
|
|
+ fprintf (fp_new, "%dx%d ", side_size, side_size);
|
|
|
+ fprintf (fp_new, "%dx%d(Error) ", side_size, side_size);
|
|
|
+ break;
|
|
|
+ case 3:
|
|
|
+ fprintf (fp_new, "%dx%dx%d ", side_size, side_size, side_size);
|
|
|
+ fprintf (fp_new, "%dx%dx%d(Error) ", side_size, side_size, side_size);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ fprintf (fp_new, "\n");
|
|
|
+
|
|
|
+ for (int i = 0; i < 1; i++) { //loop over time entries
|
|
|
+ fprintf (fp_new, "%s ", time_entries[i].lib_name);
|
|
|
+
|
|
|
+ DimEntry *dim_entries = time_entries[i].dim_entries;
|
|
|
+
|
|
|
+ for (int dim = 0; dim < N_DIMS; dim++) {
|
|
|
+ DimEntry dim_entry = dim_entries[dim];
|
|
|
+
|
|
|
+ for (int j = 0; j < (N_POWERS_INTERVALS[dim][1] - N_POWERS_INTERVALS[dim][0] + 1); j++) {
|
|
|
+ fprintf (fp_new, "%f %f ", dim_entry.times[j], dim_entry.errors[j]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
for (int i = 0; i < N_ARRAYS; i++) {
|
|
|
UPDATE_SIZE (size);
|
|
|
|
|
@@ -774,13 +789,13 @@ main (int argc, char **argv)
|
|
|
}
|
|
|
#endif
|
|
|
}
|
|
|
-
|
|
|
+/*
|
|
|
#ifdef HAVE_FFTW
|
|
|
if (with_fftw) {
|
|
|
fprintf (fp, "%f %f ", fftw_times[i], fftw_errors[i]);
|
|
|
}
|
|
|
#endif
|
|
|
-
|
|
|
+*/
|
|
|
#ifdef HAVE_CUDA_FFT
|
|
|
if (with_cuda_fft) {
|
|
|
fprintf (fp, "%f %f ", cuda_times[i], cuda_times[i]);
|
|
@@ -801,5 +816,7 @@ main (int argc, char **argv)
|
|
|
#endif
|
|
|
|
|
|
fclose (fp);
|
|
|
+
|
|
|
+ fclose (fp_new);
|
|
|
return 0;
|
|
|
}
|