perf.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #include <glib.h>
  2. #include <CL/cl.h>
  3. #include <errno.h>
  4. #include "ocl.h"
  5. typedef struct {
  6. /* input */
  7. gchar *src_range;
  8. gchar *dst_range;
  9. /* derived */
  10. gsize src_from;
  11. gsize src_to;
  12. gsize src_step;
  13. gsize dst_from;
  14. gsize dst_to;
  15. gsize dst_step;
  16. OclPlatform *ocl;
  17. cl_command_queue queue;
  18. } Options;
  19. static const gsize DEFAULT_SIZE = 4096;
  20. static const guint NUM_ITERATIONS = 5;
  21. static gboolean
  22. parse_uint (const gchar *input, gsize *value)
  23. {
  24. *value = (gsize) g_ascii_strtoull (input, NULL, 10);
  25. if (errno == ERANGE || errno == EINVAL) {
  26. g_print ("Could not parse `%s'", input);
  27. return FALSE;
  28. }
  29. return TRUE;
  30. }
  31. static gboolean
  32. split_triple (const gchar *input, gsize *from, gsize *to, gsize *step)
  33. {
  34. gchar **tokens;
  35. *from = DEFAULT_SIZE;
  36. *to = DEFAULT_SIZE;
  37. *step = 1;
  38. if (input == NULL)
  39. return TRUE;
  40. tokens = g_strsplit (input, ":", 0);
  41. if (tokens[0] == NULL)
  42. return TRUE;
  43. if (parse_uint (tokens[0], from)) {
  44. *to = *from;
  45. }
  46. else {
  47. return FALSE;
  48. }
  49. if (tokens[1] != NULL) {
  50. if (!parse_uint (tokens[1], to))
  51. return FALSE;
  52. if (tokens[2] != NULL) {
  53. if (!parse_uint (tokens[2], step))
  54. return FALSE;
  55. }
  56. }
  57. if (*to < *from) {
  58. g_print ("%zu is smaller than %zu\n", *to, *from);
  59. return FALSE;
  60. }
  61. g_strfreev (tokens);
  62. return TRUE;
  63. }
  64. static gboolean
  65. validate_input (Options *opts)
  66. {
  67. if (!split_triple (opts->src_range, &opts->src_from, &opts->src_to, &opts->src_step))
  68. return FALSE;
  69. if (!split_triple (opts->dst_range, &opts->dst_from, &opts->dst_to, &opts->dst_step))
  70. return FALSE;
  71. return TRUE;
  72. }
  73. static void
  74. measure_copy (Options *opts, cl_mem src_mem, cl_mem dst_mem, gsize size, gsize dst_offset, gdouble *wall_duration, gdouble *event_duration)
  75. {
  76. GTimer *timer;
  77. cl_int error;
  78. cl_event event;
  79. cl_ulong timestamps[4];
  80. timer = g_timer_new ();
  81. OCL_CHECK_ERROR (clEnqueueCopyBuffer (opts->queue, src_mem, dst_mem, 0, dst_offset, size, 0, NULL, &event));
  82. OCL_CHECK_ERROR (clWaitForEvents (1, &event));
  83. *wall_duration += g_timer_elapsed (timer, NULL);
  84. ocl_get_event_times (event, &timestamps[0], &timestamps[1], &timestamps[2], &timestamps[3]);
  85. *event_duration += (timestamps[1] - timestamps[0]) / 1000. / 1000. / 1000.;
  86. OCL_CHECK_ERROR (clReleaseEvent (event));
  87. g_timer_destroy (timer);
  88. }
  89. static void
  90. benchmark_intra_gpu_copy (Options *opts)
  91. {
  92. cl_context context;
  93. cl_int error;
  94. context = ocl_get_context (opts->ocl);
  95. for (gsize src_size = opts->src_from; src_size <= opts->src_to; src_size += opts->src_step) {
  96. cl_mem src_mem;
  97. src_mem = clCreateBuffer (context, CL_MEM_READ_ONLY, src_size, NULL, &error);
  98. OCL_CHECK_ERROR (error);
  99. for (gsize dst_size = opts->dst_from; dst_size <= opts->dst_to; dst_size += opts->dst_step) {
  100. cl_mem dst_mem;
  101. gsize num_blocks;
  102. gsize remaining_size;
  103. gdouble wall_duration;
  104. gdouble event_duration;
  105. gdouble throughput;
  106. num_blocks = dst_size / src_size;
  107. remaining_size = dst_size % src_size;
  108. dst_mem = clCreateBuffer (context, CL_MEM_WRITE_ONLY, dst_size, NULL, &error);
  109. OCL_CHECK_ERROR (error);
  110. wall_duration = 0;
  111. event_duration = 0;
  112. for (guint i = 0; i < NUM_ITERATIONS; i++) {
  113. /* Copy blocks that fit */
  114. for (gsize block = 0; block < num_blocks; block++) {
  115. measure_copy (opts, src_mem, dst_mem, src_size, block * src_size, &wall_duration, &event_duration);
  116. }
  117. /* Copy last block if necessary */
  118. if (remaining_size > 0) {
  119. measure_copy (opts, src_mem, dst_mem, remaining_size, num_blocks * src_size, &wall_duration, &event_duration);
  120. }
  121. }
  122. wall_duration /= NUM_ITERATIONS;
  123. event_duration /= NUM_ITERATIONS;
  124. throughput = dst_size / wall_duration / 1024. / 1024.;
  125. g_print ("%zu %zu %f %f %f [num_blocks=%zu, remaining=%zu]\n",
  126. src_size, dst_size, wall_duration, event_duration, throughput, num_blocks, remaining_size);
  127. OCL_CHECK_ERROR (clReleaseMemObject (dst_mem));
  128. }
  129. OCL_CHECK_ERROR (clReleaseMemObject (src_mem));
  130. }
  131. }
  132. int
  133. main (int argc, char *argv[])
  134. {
  135. OclPlatform *ocl;
  136. GOptionContext *context;
  137. GError *error = NULL;
  138. Options opts = {
  139. .src_range = NULL,
  140. .dst_range = NULL
  141. };
  142. GOptionEntry entries[] = {
  143. { "source-size", 0, 0, G_OPTION_ARG_STRING, &opts.src_range, "start:stop:step for source size", "M:N:S" },
  144. { "dest-size", 0, 0, G_OPTION_ARG_STRING, &opts.dst_range, "start:stop:step for dstination size", "M:N:S" },
  145. { NULL }
  146. };
  147. context = g_option_context_new (NULL);
  148. g_option_context_add_main_entries (context, entries, NULL);
  149. if (!g_option_context_parse (context, &argc, &argv, &error)) {
  150. g_print ("Parsing options failed: %s\n", error->message);
  151. return 1;
  152. }
  153. if (!validate_input (&opts))
  154. return 1;
  155. opts.ocl = ocl_new_with_queues (0, CL_DEVICE_TYPE_GPU, CL_QUEUE_PROFILING_ENABLE);
  156. opts.queue = ocl_get_cmd_queues (opts.ocl)[0];
  157. benchmark_intra_gpu_copy (&opts);
  158. ocl_free (opts.ocl);
  159. return 0;
  160. }