123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212 |
- #include <glib.h>
- #include <CL/cl.h>
- #include <errno.h>
- #include "ocl.h"
- typedef struct {
- /* input */
- gchar *src_range;
- gchar *dst_range;
- /* derived */
- gsize src_from;
- gsize src_to;
- gsize src_step;
- gsize dst_from;
- gsize dst_to;
- gsize dst_step;
- OclPlatform *ocl;
- cl_command_queue queue;
- } Options;
- static const gsize DEFAULT_SIZE = 4096;
- static const guint NUM_ITERATIONS = 5;
- static gboolean
- parse_uint (const gchar *input, gsize *value)
- {
- *value = (gsize) g_ascii_strtoull (input, NULL, 10);
- if (errno == ERANGE || errno == EINVAL) {
- g_print ("Could not parse `%s'", input);
- return FALSE;
- }
- return TRUE;
- }
- static gboolean
- split_triple (const gchar *input, gsize *from, gsize *to, gsize *step)
- {
- gchar **tokens;
- *from = DEFAULT_SIZE;
- *to = DEFAULT_SIZE;
- *step = 1;
- if (input == NULL)
- return TRUE;
- tokens = g_strsplit (input, ":", 0);
- if (tokens[0] == NULL)
- return TRUE;
- if (parse_uint (tokens[0], from)) {
- *to = *from;
- }
- else {
- return FALSE;
- }
- if (tokens[1] != NULL) {
- if (!parse_uint (tokens[1], to))
- return FALSE;
- if (tokens[2] != NULL) {
- if (!parse_uint (tokens[2], step))
- return FALSE;
- }
- }
- if (*to < *from) {
- g_print ("%zu is smaller than %zu\n", *to, *from);
- return FALSE;
- }
- g_strfreev (tokens);
- return TRUE;
- }
- static gboolean
- validate_input (Options *opts)
- {
- if (!split_triple (opts->src_range, &opts->src_from, &opts->src_to, &opts->src_step))
- return FALSE;
- if (!split_triple (opts->dst_range, &opts->dst_from, &opts->dst_to, &opts->dst_step))
- return FALSE;
- return TRUE;
- }
- static void
- measure_copy (Options *opts, cl_mem src_mem, cl_mem dst_mem, gsize size, gsize dst_offset, gdouble *wall_duration, gdouble *event_duration)
- {
- GTimer *timer;
- cl_int error;
- cl_event event;
- cl_ulong timestamps[4];
- timer = g_timer_new ();
- OCL_CHECK_ERROR (clEnqueueCopyBuffer (opts->queue, src_mem, dst_mem, 0, dst_offset, size, 0, NULL, &event));
- OCL_CHECK_ERROR (clWaitForEvents (1, &event));
- *wall_duration += g_timer_elapsed (timer, NULL);
- ocl_get_event_times (event, ×tamps[0], ×tamps[1], ×tamps[2], ×tamps[3]);
- *event_duration += (timestamps[1] - timestamps[0]) / 1000. / 1000. / 1000.;
- OCL_CHECK_ERROR (clReleaseEvent (event));
- g_timer_destroy (timer);
- }
- static void
- benchmark_intra_gpu_copy (Options *opts)
- {
- cl_context context;
- cl_int error;
- context = ocl_get_context (opts->ocl);
- for (gsize src_size = opts->src_from; src_size <= opts->src_to; src_size += opts->src_step) {
- cl_mem src_mem;
- src_mem = clCreateBuffer (context, CL_MEM_READ_ONLY, src_size, NULL, &error);
- OCL_CHECK_ERROR (error);
- for (gsize dst_size = opts->dst_from; dst_size <= opts->dst_to; dst_size += opts->dst_step) {
- cl_mem dst_mem;
- gsize num_blocks;
- gsize remaining_size;
- gdouble wall_duration;
- gdouble event_duration;
- gdouble throughput;
- num_blocks = dst_size / src_size;
- remaining_size = dst_size % src_size;
- dst_mem = clCreateBuffer (context, CL_MEM_WRITE_ONLY, dst_size, NULL, &error);
- OCL_CHECK_ERROR (error);
- wall_duration = 0;
- event_duration = 0;
- for (guint i = 0; i < NUM_ITERATIONS; i++) {
- /* Copy blocks that fit */
- for (gsize block = 0; block < num_blocks; block++) {
- measure_copy (opts, src_mem, dst_mem, src_size, block * src_size, &wall_duration, &event_duration);
- }
- /* Copy last block if necessary */
- if (remaining_size > 0) {
- measure_copy (opts, src_mem, dst_mem, remaining_size, num_blocks * src_size, &wall_duration, &event_duration);
- }
- }
- wall_duration /= NUM_ITERATIONS;
- event_duration /= NUM_ITERATIONS;
- throughput = dst_size / wall_duration / 1024. / 1024.;
- g_print ("%zu %zu %f %f %f [num_blocks=%zu, remaining=%zu]\n",
- src_size, dst_size, wall_duration, event_duration, throughput, num_blocks, remaining_size);
- OCL_CHECK_ERROR (clReleaseMemObject (dst_mem));
- }
- OCL_CHECK_ERROR (clReleaseMemObject (src_mem));
- }
- }
- int
- main (int argc, char *argv[])
- {
- OclPlatform *ocl;
- GOptionContext *context;
- GError *error = NULL;
- Options opts = {
- .src_range = NULL,
- .dst_range = NULL
- };
- GOptionEntry entries[] = {
- { "source-size", 0, 0, G_OPTION_ARG_STRING, &opts.src_range, "start:stop:step for source size", "M:N:S" },
- { "dest-size", 0, 0, G_OPTION_ARG_STRING, &opts.dst_range, "start:stop:step for dstination size", "M:N:S" },
- { NULL }
- };
- context = g_option_context_new (NULL);
- g_option_context_add_main_entries (context, entries, NULL);
- if (!g_option_context_parse (context, &argc, &argv, &error)) {
- g_print ("Parsing options failed: %s\n", error->message);
- return 1;
- }
- if (!validate_input (&opts))
- return 1;
- opts.ocl = ocl_new_with_queues (0, CL_DEVICE_TYPE_GPU, CL_QUEUE_PROFILING_ENABLE);
- opts.queue = ocl_get_cmd_queues (opts.ocl)[0];
- benchmark_intra_gpu_copy (&opts);
- ocl_free (opts.ocl);
- return 0;
- }
|