Browse Source

Add new profiling approach

Roman Shkarin 9 years ago
parent
commit
1356031845
3 changed files with 112 additions and 73 deletions
  1. 90 73
      benchmark.c
  2. 11 0
      dim_entry.h
  3. 11 0
      time_entry.h

+ 90 - 73
benchmark.c

@@ -54,6 +54,7 @@ exit(EXIT_FAILURE); \
 #endif
 
 #include "timer.h"
+#include "time_entry.h"
 
 #define N_DIMS 3
 #define N_RUNS 4
@@ -63,7 +64,7 @@ const int N_ARRAYS = 6;
 
 const int DIMS[N_DIMS] = {1, 2, 3};
 const int N_DIM_ARRAYS[N_DIMS] = {5, 4, 4};
-const int N_POWERS_INTERVALS[N_DIMS][2] = {{5, 11}, {8, 12}, {7, 8}};
+const int N_POWERS_INTERVALS[N_DIMS][2] = {{5, 11}, {8, 11}, {7, 7}};
 
 #define UPDATE_SIZE(size) size *= 8;
 #define PRINT_DIM_SIZE(side_size,dim) { \
@@ -264,29 +265,37 @@ loop_data_cuda (const char *vendor,
 
 #ifdef HAVE_FFTW
 static void
-loop_data_fftw (double *times, double *errors, FILE *fp)
+loop_data_fftw (TimeEntry *time_entry)
 {
     Timer *timer;
 
     timer = timer_new ();
-    fprintf (fp, "FFTW_bw FFTW_err ");
+
+    time_entry->lib_name = "FFTW";
+    time_entry->dim_entries = (DimEntry *)malloc(N_DIMS * sizeof(DimEntry));
 
     for (int k = 0; k < N_DIMS; k++) {
         int dim = DIMS[k];
         int power_min = N_POWERS_INTERVALS[k][0];
         int power_max = N_POWERS_INTERVALS[k][1];
+        int num_entries = power_max - power_min + 1;
+
+        time_entry->dim_entries[k].n_dims = dim;
+        time_entry->dim_entries[k].sizes  = (unsigned int **)malloc(sizeof(unsigned int *) * num_entries);
+        time_entry->dim_entries[k].times  = (double *)malloc(sizeof(double) * num_entries);
+        time_entry->dim_entries[k].errors = (double *)malloc(sizeof(double) * num_entries);
 
         printf ("%dD:", dim);
         fflush (stdout);
 
-        for (int m = power_min; m <= power_max; m++) {
+        for (int m = power_min, i = 0; m <= power_max; m++, i++) {
             fftw_complex *host_orig_mem;
             fftw_complex *host_result_mem;
             fftw_complex *host_immediate_mem;
             fftw_plan plan;
             fftw_plan inverse_plan;
-            //double time;
-            //double mflops;
+            double time;
+            double mflops;
             double sum = 0.0;
 
             size_t side_size = pow(2,m);
@@ -332,66 +341,16 @@ loop_data_fftw (double *times, double *errors, FILE *fp)
                 host_orig_mem[j][1] = rand() / ((double) RAND_MAX);
             }
 
-            if (dim == 1) printf (" %zu.", side_size);
-            else if (dim == 2) printf (" %zux%zu.", side_size, side_size);
-            else printf (" %zux%zux%zu.", side_size, side_size, side_size);
-            fflush (stdout);
-
-            timer_start (timer);
-
-            for (int j = 0; j < N_RUNS; j++) {
-                fftw_execute (plan);
+            if (dim == 1) {
+                printf (" %zu.", side_size);
             }
-
-            timer_stop (timer);
-
-            /* Check precision */
-            fftw_execute (inverse_plan);
-
-            for (int j = 0; j < size; j++) {
-                sum += fabs (host_result_mem[j][0] / size - host_orig_mem[j][0]);
-                sum += fabs (host_result_mem[j][1] / size - host_orig_mem[j][1]);
+            else if (dim == 2) {
+                printf (" %zux%zu.", side_size, side_size);
             }
-
-            //time = timer_get_seconds (timer) / N_RUNS / 1000.0;
-            //mflops = 5 * size * log (size) / log (2) / time;
-            //times[i] = mflops;
-            //errors[i] = sum / size;
-
-            fftw_destroy_plan (inverse_plan);
-            fftw_destroy_plan (plan);
-            fftw_free (host_orig_mem);
-            fftw_free (host_immediate_mem);
-            fftw_free (host_result_mem);
-        }
-        printf ("\n");
-        fflush (stdout);
-    }
-    /*
-    for (int i = 0; i < N_ARRAYS; i++) {
-            fftw_complex *host_orig_mem;
-            fftw_complex *host_result_mem;
-            fftw_complex *host_immediate_mem;
-            fftw_plan plan;
-            fftw_plan inverse_plan;
-            double time;
-            double mflops;
-            double sum = 0.0;
-
-            UPDATE_SIZE (size);
-
-            host_orig_mem = fftw_malloc (sizeof (fftw_complex) * size);
-            host_immediate_mem = fftw_malloc (sizeof (fftw_complex) * size);
-            host_result_mem = fftw_malloc (sizeof (fftw_complex) * size);
-
-            plan = fftw_plan_dft_1d (size, host_orig_mem, host_immediate_mem, FFTW_FORWARD, FFTW_ESTIMATE);
-
-            for (int j = 0; j < size; j++) {
-                host_orig_mem[j][0] = rand() / ((double) RAND_MAX);
-                host_orig_mem[j][1] = rand() / ((double) RAND_MAX);
+            else {
+                printf (" %zux%zux%zu.", side_size, side_size, side_size);
             }
 
-            printf (" %zu.", size);
             fflush (stdout);
 
             timer_start (timer);
@@ -402,7 +361,7 @@ loop_data_fftw (double *times, double *errors, FILE *fp)
 
             timer_stop (timer);
 
-            inverse_plan = fftw_plan_dft_1d (size, host_immediate_mem, host_result_mem, FFTW_BACKWARD, FFTW_ESTIMATE);
+            /* Check precision */
             fftw_execute (inverse_plan);
 
             for (int j = 0; j < size; j++) {
@@ -412,16 +371,25 @@ loop_data_fftw (double *times, double *errors, FILE *fp)
 
             time = timer_get_seconds (timer) / N_RUNS / 1000.0;
             mflops = 5 * size * log (size) / log (2) / time;
-            times[i] = mflops;
-            errors[i] = sum / size;
 
+            time_entry->dim_entries[k].sizes[i] = (unsigned int *)malloc(sizeof(unsigned int) * dim);
+            for (int j = 0; j < dim; j++) {
+                time_entry->dim_entries[k].sizes[i][j] = side_size;
+            }
+
+            time_entry->dim_entries[k].times[i] = mflops;
+            time_entry->dim_entries[k].errors[i] = sum / size;
+            
             fftw_destroy_plan (inverse_plan);
             fftw_destroy_plan (plan);
             fftw_free (host_orig_mem);
             fftw_free (host_immediate_mem);
             fftw_free (host_result_mem);
+        }
+        printf ("\n");
+        fflush (stdout);
     }
-    */
+    
     printf ("\n");
     timer_destroy (timer);
 }
@@ -623,6 +591,8 @@ compute_cuda_fft (cufftComplex *dev_mem,
 int
 main (int argc, char **argv)
 {
+    TimeEntry *time_entries = (TimeEntry *)malloc(sizeof(TimeEntry) * 1);
+
 #ifdef HAVE_OPENCL
     cl_platform_id platform;
     cl_uint n_devices;
@@ -652,8 +622,8 @@ main (int argc, char **argv)
 
 #ifdef HAVE_FFTW
     static int with_fftw = 1;
-    double *fftw_times;
-    double *fftw_errors;
+    //double *fftw_times;
+    //double *fftw_errors;
 #endif
 
     static int show_help = 0;
@@ -678,6 +648,7 @@ main (int argc, char **argv)
 
     size_t size = INITIAL_SIZE;
     FILE *fp;
+    FILE *fp_new;
 
     /* Parse options */
     while (getopt_long (argc, argv, "", long_options, NULL) != -1)
@@ -694,8 +665,12 @@ main (int argc, char **argv)
 
     /* Write header */
     fp = fopen ("result.txt", "w");
+    fp_new = fopen ("result_new.txt", "w");
+
     fprintf (fp, "# size ");
 
+    fprintf (fp_new, "# ");
+
 #ifdef HAVE_OPENCL
     OCL_CHECK_ERROR (clGetPlatformIDs (1, &platform, NULL));
 
@@ -745,15 +720,55 @@ main (int argc, char **argv)
 #endif
 
 #ifdef HAVE_FFTW
-    fftw_times = malloc (N_ARRAYS * sizeof (double));
-    fftw_errors = malloc (N_ARRAYS * sizeof (double));
+    //fftw_times = malloc (N_ARRAYS * sizeof (double));
+    //fftw_errors = malloc (N_ARRAYS * sizeof (double));
 
     if (with_fftw) {
         printf ("Testing FFTW3 ...\n");
-        loop_data_fftw (fftw_times, fftw_errors, fp);
+        loop_data_fftw (&(time_entries[0]));
     }
 #endif
 
+    for (int i = 0; i < N_DIMS; i++) {
+        int min_power = N_POWERS_INTERVALS[i][0];
+        int max_power = N_POWERS_INTERVALS[i][1];
+
+        for (int j = min_power; j <= max_power; j++) {
+            int side_size = pow(2,j);
+            switch (DIMS[i]) {
+                case 1:
+                 fprintf (fp_new, "%d ", side_size);
+                 fprintf (fp_new, "%d(Error) ", side_size);
+                 break;
+                case 2:
+                 fprintf (fp_new, "%dx%d ", side_size, side_size);
+                 fprintf (fp_new, "%dx%d(Error) ", side_size, side_size);
+                 break;
+                case 3:
+                 fprintf (fp_new, "%dx%dx%d ", side_size, side_size, side_size);
+                 fprintf (fp_new, "%dx%dx%d(Error) ", side_size, side_size, side_size);
+                 break;
+            }   
+        }
+        
+    }
+
+    fprintf (fp_new, "\n");
+
+    for (int i = 0; i < 1; i++) { //loop over time entries
+        fprintf (fp_new, "%s ", time_entries[i].lib_name);
+
+        DimEntry *dim_entries = time_entries[i].dim_entries;
+
+        for (int dim = 0; dim < N_DIMS; dim++) {
+            DimEntry dim_entry = dim_entries[dim];
+
+            for (int j = 0; j < (N_POWERS_INTERVALS[dim][1] - N_POWERS_INTERVALS[dim][0] + 1); j++) {
+                fprintf (fp_new, "%f %f ", dim_entry.times[j], dim_entry.errors[j]);
+            }
+        }   
+    }
+
     for (int i = 0; i < N_ARRAYS; i++) {
         UPDATE_SIZE (size);
 
@@ -774,13 +789,13 @@ main (int argc, char **argv)
             }
 #endif
         }
-
+/*
 #ifdef HAVE_FFTW
         if (with_fftw) {
             fprintf (fp, "%f %f ", fftw_times[i], fftw_errors[i]);
         }
 #endif
-
+*/
 #ifdef HAVE_CUDA_FFT
         if (with_cuda_fft) {
             fprintf (fp, "%f %f ", cuda_times[i], cuda_times[i]);
@@ -801,5 +816,7 @@ main (int argc, char **argv)
 #endif
 
     fclose (fp);
+
+    fclose (fp_new);
     return 0;
 }

+ 11 - 0
dim_entry.h

@@ -0,0 +1,11 @@
+#ifndef DIMENTRY_H
+#define DIMENTRY_H
+
+typedef struct _DimEntry {
+	unsigned int n_dims;
+    unsigned int **sizes;
+    double *times;
+    double *errors;
+} DimEntry;
+
+#endif

+ 11 - 0
time_entry.h

@@ -0,0 +1,11 @@
+#ifndef TIMEENTRY_H
+#define TIMEENTRY_H
+
+#include "dim_entry.h"
+
+typedef struct _TimeEntry {
+	char *lib_name;
+    DimEntry *dim_entries;
+} TimeEntry;
+
+#endif