#include #include #include #include "ocl.h" typedef struct { /* input */ gchar *src_range; gchar *dst_range; /* derived */ gsize src_from; gsize src_to; gsize src_step; gsize dst_from; gsize dst_to; gsize dst_step; OclPlatform *ocl; cl_command_queue queue; } Options; static const gsize DEFAULT_SIZE = 4096; static const guint NUM_ITERATIONS = 5; static gboolean parse_uint (const gchar *input, gsize *value) { *value = (gsize) g_ascii_strtoull (input, NULL, 10); if (errno == ERANGE || errno == EINVAL) { g_print ("Could not parse `%s'", input); return FALSE; } return TRUE; } static gboolean split_triple (const gchar *input, gsize *from, gsize *to, gsize *step) { gchar **tokens; *from = DEFAULT_SIZE; *to = DEFAULT_SIZE; *step = 1; if (input == NULL) return TRUE; tokens = g_strsplit (input, ":", 0); if (tokens[0] == NULL) return TRUE; if (parse_uint (tokens[0], from)) { *to = *from; } else { return FALSE; } if (tokens[1] != NULL) { if (!parse_uint (tokens[1], to)) return FALSE; if (tokens[2] != NULL) { if (!parse_uint (tokens[2], step)) return FALSE; } } if (*to < *from) { g_print ("%zu is smaller than %zu\n", *to, *from); return FALSE; } g_strfreev (tokens); return TRUE; } static gboolean validate_input (Options *opts) { if (!split_triple (opts->src_range, &opts->src_from, &opts->src_to, &opts->src_step)) return FALSE; if (!split_triple (opts->dst_range, &opts->dst_from, &opts->dst_to, &opts->dst_step)) return FALSE; return TRUE; } static void measure_copy (Options *opts, cl_mem src_mem, cl_mem dst_mem, gsize size, gsize dst_offset, gdouble *wall_duration, gdouble *event_duration) { GTimer *timer; cl_int error; cl_event event; cl_ulong timestamps[4]; timer = g_timer_new (); OCL_CHECK_ERROR (clEnqueueCopyBuffer (opts->queue, src_mem, dst_mem, 0, dst_offset, size, 0, NULL, &event)); OCL_CHECK_ERROR (clWaitForEvents (1, &event)); *wall_duration += g_timer_elapsed (timer, NULL); ocl_get_event_times (event, ×tamps[0], ×tamps[1], ×tamps[2], ×tamps[3]); *event_duration += (timestamps[1] - timestamps[0]) / 1000. / 1000. / 1000.; OCL_CHECK_ERROR (clReleaseEvent (event)); g_timer_destroy (timer); } static void benchmark_intra_gpu_copy (Options *opts) { cl_context context; cl_int error; context = ocl_get_context (opts->ocl); for (gsize src_size = opts->src_from; src_size <= opts->src_to; src_size += opts->src_step) { cl_mem src_mem; src_mem = clCreateBuffer (context, CL_MEM_READ_ONLY, src_size, NULL, &error); OCL_CHECK_ERROR (error); for (gsize dst_size = opts->dst_from; dst_size <= opts->dst_to; dst_size += opts->dst_step) { cl_mem dst_mem; gsize num_blocks; gsize remaining_size; gdouble wall_duration; gdouble event_duration; gdouble throughput; num_blocks = dst_size / src_size; remaining_size = dst_size % src_size; dst_mem = clCreateBuffer (context, CL_MEM_WRITE_ONLY, dst_size, NULL, &error); OCL_CHECK_ERROR (error); wall_duration = 0; event_duration = 0; for (guint i = 0; i < NUM_ITERATIONS; i++) { /* Copy blocks that fit */ for (gsize block = 0; block < num_blocks; block++) { measure_copy (opts, src_mem, dst_mem, src_size, block * src_size, &wall_duration, &event_duration); } /* Copy last block if necessary */ if (remaining_size > 0) { measure_copy (opts, src_mem, dst_mem, remaining_size, num_blocks * src_size, &wall_duration, &event_duration); } } wall_duration /= NUM_ITERATIONS; event_duration /= NUM_ITERATIONS; throughput = dst_size / wall_duration / 1024. / 1024.; g_print ("%zu %zu %f %f %f [num_blocks=%zu, remaining=%zu]\n", src_size, dst_size, wall_duration, event_duration, throughput, num_blocks, remaining_size); OCL_CHECK_ERROR (clReleaseMemObject (dst_mem)); } OCL_CHECK_ERROR (clReleaseMemObject (src_mem)); } } int main (int argc, char *argv[]) { OclPlatform *ocl; GOptionContext *context; GError *error = NULL; Options opts = { .src_range = NULL, .dst_range = NULL }; GOptionEntry entries[] = { { "source-size", 0, 0, G_OPTION_ARG_STRING, &opts.src_range, "start:stop:step for source size", "M:N:S" }, { "dest-size", 0, 0, G_OPTION_ARG_STRING, &opts.dst_range, "start:stop:step for dstination size", "M:N:S" }, { NULL } }; context = g_option_context_new (NULL); g_option_context_add_main_entries (context, entries, NULL); if (!g_option_context_parse (context, &argc, &argv, &error)) { g_print ("Parsing options failed: %s\n", error->message); return 1; } if (!validate_input (&opts)) return 1; opts.ocl = ocl_new_with_queues (0, CL_DEVICE_TYPE_GPU, CL_QUEUE_PROFILING_ENABLE); opts.queue = ocl_get_cmd_queues (opts.ocl)[0]; benchmark_intra_gpu_copy (&opts); ocl_free (opts.ocl); return 0; }