Browse Source

Initial commit

Matthias Vogelgesang 8 years ago
commit
39103aeaa8
5 changed files with 773 additions and 0 deletions
  1. 7 0
      Makefile
  2. 66 0
      c.mk
  3. 417 0
      ocl.c
  4. 71 0
      ocl.h
  5. 212 0
      perf.c

+ 7 - 0
Makefile

@@ -0,0 +1,7 @@
+SRC = perf.c ocl.c
+BIN = perf
+CFLAGS = -std=c99
+LDFLAGS = -lOpenCL
+PKG_DEPS = glib-2.0
+
+include c.mk

+ 66 - 0
c.mk

@@ -0,0 +1,66 @@
+#
+# c.mk - Generic Makefile for Linux toy applications
+#
+# Required variables:
+#
+#   - $(SRC): C source files
+#   - $(BIN): filename of linked binary
+#
+# Optional variables:
+#
+# 	- $(PKG_DEPS): List of pkg-config compatible packages
+# 	- $(CFLAGS), $(LDFLAGS), GNU compliant directories
+#
+# Example Makefile:
+#
+#   PKG_DEPS = glib-2.0
+#   SRC = foo.c
+#   BIN = bar
+#
+#   include c.mk
+#
+
+ifeq ($V, 1)
+	Q =
+else
+	Q = @
+endif
+
+OBJS = $(patsubst %.c,%.o,$(SRC))
+
+#  Determine C flags and ld flags
+ifdef PKG_DEPS
+	PKG_CFLAGS = $(shell pkg-config --cflags $(PKG_DEPS))
+	PKG_LDFLAGS = $(shell pkg-config --libs $(PKG_DEPS))
+else
+	PKG_CFLAGS =
+	PKG_LDFLAGS =
+endif
+
+CFLAGS ?= -Wall -Werror -O2
+CFLAGS += $(PKG_CFLAGS) -std=c99
+LDFLAGS += $(PKG_LDFLAGS)
+
+# GNU-compliant install directories
+prefix ?= /usr/local
+exec_prefix ?= $(prefix)
+bindir ?= $(exec_prefix)/bin
+
+# Targets
+.PHONY: clean
+
+all: $(BIN)
+
+%.o: %.c
+	@echo " CC $@"
+	$(Q)$(CC) -c $(CFLAGS) -o $@ $<
+
+$(BIN): $(OBJS)
+	@echo " LD $@"
+	$(Q)$(CC) $(OBJS) -o $@ $(LDFLAGS)
+
+clean:
+	$(Q)rm -f $(BIN) $(OBJS)
+
+install: $(BIN)
+	$(Q)install -D -m 755 $(BIN) $(DESTDIR)$(bindir)/$(BIN)

+ 417 - 0
ocl.c

@@ -0,0 +1,417 @@
+/*
+ *  This file is part of oclkit.
+ *
+ *  oclkit is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  oclkit is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+#include "ocl.h"
+
+struct OclPlatform {
+    cl_platform_id       platform;
+    cl_context           context;
+    cl_uint              num_devices;
+    cl_device_id        *devices;
+    cl_command_queue    *cmd_queues;
+    int                  own_queues;
+};
+
+static const char* opencl_error_msgs[] = {
+    "CL_SUCCESS",
+    "CL_DEVICE_NOT_FOUND",
+    "CL_DEVICE_NOT_AVAILABLE",
+    "CL_COMPILER_NOT_AVAILABLE",
+    "CL_MEM_OBJECT_ALLOCATION_FAILURE",
+    "CL_OUT_OF_RESOURCES",
+    "CL_OUT_OF_HOST_MEMORY",
+    "CL_PROFILING_INFO_NOT_AVAILABLE",
+    "CL_MEM_COPY_OVERLAP",
+    "CL_IMAGE_FORMAT_MISMATCH",
+    "CL_IMAGE_FORMAT_NOT_SUPPORTED",
+    "CL_BUILD_PROGRAM_FAILURE",
+    "CL_MAP_FAILURE",
+    "CL_MISALIGNED_SUB_BUFFER_OFFSET",
+    "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST",
+
+    /* next IDs start at 30! */
+    "CL_INVALID_VALUE",
+    "CL_INVALID_DEVICE_TYPE",
+    "CL_INVALID_PLATFORM",
+    "CL_INVALID_DEVICE",
+    "CL_INVALID_CONTEXT",
+    "CL_INVALID_QUEUE_PROPERTIES",
+    "CL_INVALID_COMMAND_QUEUE",
+    "CL_INVALID_HOST_PTR",
+    "CL_INVALID_MEM_OBJECT",
+    "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
+    "CL_INVALID_IMAGE_SIZE",
+    "CL_INVALID_SAMPLER",
+    "CL_INVALID_BINARY",
+    "CL_INVALID_BUILD_OPTIONS",
+    "CL_INVALID_PROGRAM",
+    "CL_INVALID_PROGRAM_EXECUTABLE",
+    "CL_INVALID_KERNEL_NAME",
+    "CL_INVALID_KERNEL_DEFINITION",
+    "CL_INVALID_KERNEL",
+    "CL_INVALID_ARG_INDEX",
+    "CL_INVALID_ARG_VALUE",
+    "CL_INVALID_ARG_SIZE",
+    "CL_INVALID_KERNEL_ARGS",
+    "CL_INVALID_WORK_DIMENSION",
+    "CL_INVALID_WORK_GROUP_SIZE",
+    "CL_INVALID_WORK_ITEM_SIZE",
+    "CL_INVALID_GLOBAL_OFFSET",
+    "CL_INVALID_EVENT_WAIT_LIST",
+    "CL_INVALID_EVENT",
+    "CL_INVALID_OPERATION",
+    "CL_INVALID_GL_OBJECT",
+    "CL_INVALID_BUFFER_SIZE",
+    "CL_INVALID_MIP_LEVEL",
+    "CL_INVALID_GLOBAL_WORK_SIZE"
+};
+
+const char*
+ocl_strerr (int error)
+{
+    int index = 0;
+
+    if (error >= -14)
+        index = -error;
+    else if (error <= -30 && error >= -64)
+        index = -error-15;
+
+    return opencl_error_msgs[index];
+}
+
+static void
+transfer_error (cl_int src, cl_int *dst)
+{
+    if (dst != NULL)
+        *dst = src;
+}
+
+char *
+ocl_read_program (const char *filename)
+{
+    FILE *fp;
+    char *buffer;
+    size_t length;
+    size_t buffer_length;
+
+    if ((fp = fopen(filename, "r")) == NULL)
+        return NULL;
+
+    fseek (fp, 0, SEEK_END);
+    length = ftell (fp);
+    rewind (fp);
+
+    buffer = malloc (length + 1);
+    buffer[length] = '\0';
+
+    if (buffer == NULL) {
+        fclose(fp);
+        return NULL;
+    }
+
+    buffer_length = fread (buffer, 1, length, fp);
+    fclose(fp);
+
+    if (buffer_length != length) {
+        free (buffer);
+        buffer = NULL;
+    }
+
+    return buffer;
+}
+
+OclPlatform *
+ocl_new (unsigned platform,
+         cl_device_type type)
+{
+    OclPlatform *ocl;
+    cl_int errcode;
+    cl_uint num_platforms;
+    cl_platform_id *platforms;
+
+    ocl = malloc (sizeof(OclPlatform));
+
+    OCL_CHECK_ERROR (clGetPlatformIDs (0, NULL, &num_platforms));
+    platforms = malloc (sizeof (cl_platform_id) * num_platforms);
+
+    if (platform >= num_platforms) {
+        fprintf (stderr, "invalid platform %i out of %i platforms\n", platform, num_platforms);
+        goto ocl_new_cleanup;
+    }
+
+    OCL_CHECK_ERROR (clGetPlatformIDs (num_platforms, platforms, NULL));
+    ocl->platform = platforms[platform];
+
+    OCL_CHECK_ERROR (clGetDeviceIDs (ocl->platform, type, 0, NULL, &ocl->num_devices));
+
+    ocl->devices = malloc (ocl->num_devices * sizeof(cl_device_id));
+    OCL_CHECK_ERROR (clGetDeviceIDs (ocl->platform, type, ocl->num_devices, ocl->devices, NULL));
+
+    ocl->context = clCreateContext (NULL, ocl->num_devices, ocl->devices, NULL, NULL, &errcode);
+    OCL_CHECK_ERROR (errcode);
+
+    ocl->own_queues = 0;
+
+    free (platforms);
+
+    return ocl;
+
+ocl_new_cleanup:
+    free (ocl);
+    free (platforms);
+    return NULL;
+}
+
+OclPlatform *
+ocl_new_with_queues (unsigned platform,
+                     cl_device_type type,
+                     cl_command_queue_properties queue_properties)
+{
+    OclPlatform *ocl;
+    cl_int errcode;
+
+    ocl = ocl_new (platform, type);
+
+    if (ocl == NULL)
+        return NULL;
+
+    ocl->own_queues = 1;
+    ocl->cmd_queues = malloc (ocl->num_devices * sizeof(cl_command_queue));
+
+    for (cl_uint i = 0; i < ocl->num_devices; i++) {
+        ocl->cmd_queues[i] = clCreateCommandQueue (ocl->context, ocl->devices[i],
+                                                   queue_properties, &errcode);
+        OCL_CHECK_ERROR (errcode);
+    }
+
+    return ocl;
+}
+
+void
+ocl_print_usage (void)
+{
+    printf ("oclkit options\n"
+            "      --ocl-platform\tIndex of platform, starting with 0, to use\n"
+            "      --ocl-type\tDevice type: gpu, cpu or accelerator\n");
+}
+
+int
+ocl_read_args (int argc,
+               const char **argv,
+               unsigned int *platform,
+               cl_device_type *type)
+{
+    int c;
+
+    static struct option options[] = {
+        { "ocl-platform",   required_argument, NULL, 'p' },
+        { "ocl-type",       required_argument, NULL, 't' },
+        { "help",           no_argument,       NULL, 'h' },
+        { NULL, 0, NULL, 0 }
+    };
+
+    while (1) {
+        int index = 0;
+
+        c = getopt_long (argc, (char **) argv, "p:d:t:h", options, &index);
+
+        if (c == -1)
+            break;
+
+        switch (c) {
+            case 'h':
+                ocl_print_usage ();
+                return -1;
+            case 'p':
+                *platform = atoi (optarg);
+                break;
+            case 't':
+                {
+                    int n = strlen (optarg);
+                    n = n > 10 ? 10 : n;    /* for accelerator */
+
+                    if (!strncmp (optarg, "gpu", n))
+                        *type = CL_DEVICE_TYPE_GPU;
+                    else if (!strncmp (optarg, "cpu", n))
+                        *type = CL_DEVICE_TYPE_CPU;
+                    else if (!strncmp (optarg, "accelerator", n))
+                        *type = CL_DEVICE_TYPE_ACCELERATOR;
+                }
+                break;
+            default:
+                abort ();
+        }
+    }
+
+    return 0;
+}
+
+OclPlatform *
+ocl_new_from_args (int argc,
+                   const char **argv,
+                   cl_command_queue_properties queue_properties)
+{
+    unsigned platform = 0;
+    cl_device_type type = CL_DEVICE_TYPE_GPU;
+
+    if (ocl_read_args (argc, argv, &platform, &type))
+        return NULL;
+
+    return ocl_new_with_queues (platform, type, queue_properties);
+}
+
+void
+ocl_free (OclPlatform *ocl)
+{
+    if (ocl == NULL)
+        return;
+
+    if (ocl->own_queues) {
+        for (cl_uint i = 0; i < ocl->num_devices; i++)
+            OCL_CHECK_ERROR (clReleaseCommandQueue (ocl->cmd_queues[i]));
+
+        free (ocl->cmd_queues);
+    }
+
+    OCL_CHECK_ERROR (clReleaseContext (ocl->context));
+
+    free (ocl->devices);
+    free (ocl);
+}
+
+char *
+ocl_get_platform_info (OclPlatform *ocl,
+                       cl_platform_info param)
+{
+    size_t size;
+    char *result;
+
+    OCL_CHECK_ERROR (clGetPlatformInfo (ocl->platform, param, 0, NULL, &size));
+    result = malloc (size);
+    OCL_CHECK_ERROR (clGetPlatformInfo (ocl->platform, param, size, result, NULL));
+    return result;
+}
+
+cl_program
+ocl_create_program_from_source (OclPlatform *ocl,
+                                const char *source,
+                                const char *options,
+                                cl_int *errcode)
+{
+    cl_int tmp_err;
+    cl_program program;
+
+    program = clCreateProgramWithSource (ocl->context, 1, (const char **) &source, NULL, &tmp_err);
+
+    if (tmp_err != CL_SUCCESS) {
+        transfer_error (tmp_err, errcode);
+        return NULL;
+    }
+
+    tmp_err = clBuildProgram (program, ocl->num_devices, ocl->devices, options, NULL, NULL);
+
+    if (tmp_err != CL_SUCCESS) {
+        size_t log_size;
+        char* log;
+
+        transfer_error (tmp_err, errcode);
+
+        OCL_CHECK_ERROR (clGetProgramBuildInfo (program, ocl->devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size));
+        log = malloc (log_size * sizeof(char));
+
+        OCL_CHECK_ERROR (clGetProgramBuildInfo (program, ocl->devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL));
+        fprintf (stderr, "\n** Error building program. Build log:\n%s\n", log);
+        free (log);
+        return NULL;
+    }
+
+    *errcode = CL_SUCCESS;
+
+    return program;
+}
+
+cl_program
+ocl_create_program_from_file (OclPlatform *ocl,
+                              const char *filename,
+                              const char *options,
+                              cl_int *errcode)
+{
+    char *source;
+    cl_program program;
+
+    source = ocl_read_program (filename);
+
+    if (source == NULL)
+        return NULL;
+
+    program = ocl_create_program_from_source (ocl, source, options, errcode);
+    free(source);
+    return program;
+}
+
+cl_context
+ocl_get_context (OclPlatform *ocl)
+{
+    assert (ocl != NULL);
+    return ocl->context;
+}
+
+int
+ocl_get_num_devices (OclPlatform *ocl)
+{
+    assert (ocl != NULL);
+    return ocl->num_devices;
+}
+
+cl_device_id *
+ocl_get_devices (OclPlatform *ocl)
+{
+    assert (ocl != NULL);
+    return ocl->devices;
+}
+
+cl_command_queue *
+ocl_get_cmd_queues (OclPlatform *ocl)
+{
+    assert (ocl != NULL);
+    return ocl->cmd_queues;
+}
+
+void
+ocl_get_event_times (cl_event event,
+                     cl_ulong *start,
+                     cl_ulong *end,
+                     cl_ulong *queued,
+                     cl_ulong *submitted)
+{
+    if (queued != NULL)
+        OCL_CHECK_ERROR (clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_QUEUED, sizeof (cl_ulong), queued, NULL));
+
+    if (submitted != NULL)
+        OCL_CHECK_ERROR (clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_SUBMIT, sizeof (cl_ulong), submitted, NULL));
+
+    if (start != NULL)
+        OCL_CHECK_ERROR (clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), start, NULL));
+
+    if (end != NULL)
+        OCL_CHECK_ERROR (clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), end, NULL));
+}

+ 71 - 0
ocl.h

@@ -0,0 +1,71 @@
+/*
+ *  This file is part of oclkit.
+ *
+ *  oclkit is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  oclkit is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef OCL_H
+#define OCL_H
+
+#include <CL/cl.h>
+#include <stdio.h>
+
+typedef struct OclPlatform OclPlatform;
+
+#define OCL_CHECK_ERROR(error) { \
+    if ((error) != CL_SUCCESS) fprintf (stderr, "OpenCL error <%s:%i>: %s\n", __FILE__, __LINE__, ocl_strerr((error))); }
+
+int                 ocl_read_args       (int                 argc,
+                                         const char        **argv,
+                                         unsigned int       *platform,
+                                         cl_device_type     *type);
+OclPlatform *       ocl_new             (unsigned            platform,
+                                         cl_device_type      type);
+OclPlatform *       ocl_new_with_queues (unsigned            platform,
+                                         cl_device_type      type,
+                                         cl_command_queue_properties
+                                                             queue_properties);
+OclPlatform *       ocl_new_from_args   (int                 argc,
+                                         const char **       argv,
+                                         cl_command_queue_properties
+                                                             queue_properties);
+void                ocl_print_usage     (void);
+void                ocl_free            (OclPlatform        *ocl);
+char *              ocl_get_platform_info
+                                        (OclPlatform        *ocl,
+                                         cl_platform_info    param);
+cl_context          ocl_get_context     (OclPlatform        *ocl);
+cl_program          ocl_create_program_from_file
+                                        (OclPlatform        *ocl,
+                                         const char         *filename,
+                                         const char         *options,
+                                         cl_int             *errcode);
+cl_program          ocl_create_program_from_source
+                                        (OclPlatform        *ocl,
+                                         const char         *source,
+                                         const char         *options,
+                                         cl_int             *errcode);
+int                 ocl_get_num_devices (OclPlatform        *ocl);
+cl_device_id *      ocl_get_devices     (OclPlatform        *ocl);
+cl_command_queue *  ocl_get_cmd_queues  (OclPlatform        *ocl);
+const char*         ocl_strerr          (int                 error);
+char*               ocl_read_program    (const char         *filename);
+void                ocl_get_event_times (cl_event            event,
+                                         cl_ulong           *start,
+                                         cl_ulong           *end,
+                                         cl_ulong           *queued,
+                                         cl_ulong           *submitted);
+
+
+#endif

+ 212 - 0
perf.c

@@ -0,0 +1,212 @@
+#include <glib.h>
+#include <CL/cl.h>
+#include <errno.h>
+#include "ocl.h"
+
+typedef struct {
+    /* input */
+    gchar *src_range;
+    gchar *dst_range;
+
+    /* derived */
+    gsize src_from;
+    gsize src_to;
+    gsize src_step;
+    gsize dst_from;
+    gsize dst_to;
+    gsize dst_step;
+
+    OclPlatform *ocl;
+    cl_command_queue queue;
+} Options;
+
+static const gsize DEFAULT_SIZE = 4096;
+static const guint NUM_ITERATIONS = 5;
+
+static gboolean
+parse_uint (const gchar *input, gsize *value)
+{
+    *value = (gsize) g_ascii_strtoull (input, NULL, 10);
+
+    if (errno == ERANGE || errno == EINVAL) {
+        g_print ("Could not parse `%s'", input);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+static gboolean
+split_triple (const gchar *input, gsize *from, gsize *to, gsize *step)
+{
+    gchar **tokens;
+
+    *from = DEFAULT_SIZE;
+    *to = DEFAULT_SIZE;
+    *step = 1;
+
+    if (input == NULL)
+        return TRUE;
+
+    tokens = g_strsplit (input, ":", 0);
+
+    if (tokens[0] == NULL)
+        return TRUE;
+
+    if (parse_uint (tokens[0], from)) {
+        *to = *from;
+    }
+    else {
+        return FALSE;
+    }
+
+    if (tokens[1] != NULL) {
+        if (!parse_uint (tokens[1], to))
+            return FALSE;
+
+        if (tokens[2] != NULL) {
+            if (!parse_uint (tokens[2], step))
+                return FALSE;
+        }
+    }
+
+    if (*to < *from) {
+        g_print ("%zu is smaller than %zu\n", *to, *from);
+        return FALSE;
+    }
+
+    g_strfreev (tokens);
+    return TRUE; 
+}
+
+static gboolean
+validate_input (Options *opts)
+{
+    if (!split_triple (opts->src_range, &opts->src_from, &opts->src_to, &opts->src_step))
+        return FALSE;
+
+    if (!split_triple (opts->dst_range, &opts->dst_from, &opts->dst_to, &opts->dst_step))
+        return FALSE;
+
+    return TRUE;
+}
+
+static void
+measure_copy (Options *opts, cl_mem src_mem, cl_mem dst_mem, gsize size, gsize dst_offset, gdouble *wall_duration, gdouble *event_duration)
+{
+    GTimer *timer;
+    cl_int error;
+    cl_event event;
+    cl_ulong timestamps[4];
+
+    timer = g_timer_new ();
+
+    OCL_CHECK_ERROR (clEnqueueCopyBuffer (opts->queue, src_mem, dst_mem, 0, dst_offset, size, 0, NULL, &event));
+    OCL_CHECK_ERROR (clWaitForEvents (1, &event));
+
+    *wall_duration += g_timer_elapsed (timer, NULL);
+
+    ocl_get_event_times (event, &timestamps[0], &timestamps[1], &timestamps[2], &timestamps[3]);
+    *event_duration += (timestamps[1] - timestamps[0]) / 1000. / 1000. / 1000.;
+
+    OCL_CHECK_ERROR (clReleaseEvent (event));
+
+    g_timer_destroy (timer);
+}
+
+static void
+benchmark_intra_gpu_copy (Options *opts)
+{
+    cl_context context;
+    cl_int error;
+
+    context = ocl_get_context (opts->ocl);
+
+    for (gsize src_size = opts->src_from; src_size <= opts->src_to; src_size += opts->src_step) {
+        cl_mem src_mem;
+
+        src_mem = clCreateBuffer (context, CL_MEM_READ_ONLY, src_size, NULL, &error);
+        OCL_CHECK_ERROR (error);
+
+        for (gsize dst_size = opts->dst_from; dst_size <= opts->dst_to; dst_size += opts->dst_step) {
+            cl_mem dst_mem;
+            gsize num_blocks;
+            gsize remaining_size;
+            gdouble wall_duration;
+            gdouble event_duration;
+            gdouble throughput;
+
+            num_blocks = dst_size / src_size;
+            remaining_size = dst_size % src_size;
+
+            dst_mem = clCreateBuffer (context, CL_MEM_WRITE_ONLY, dst_size, NULL, &error);
+            OCL_CHECK_ERROR (error);
+
+            wall_duration = 0;
+            event_duration = 0;
+
+            for (guint i = 0; i < NUM_ITERATIONS; i++) {
+                /* Copy blocks that fit */
+                for (gsize block = 0; block < num_blocks; block++) {
+                    measure_copy (opts, src_mem, dst_mem, src_size, block * src_size, &wall_duration, &event_duration);
+                }
+
+                /* Copy last block if necessary */
+                if (remaining_size > 0) {
+                    measure_copy (opts, src_mem, dst_mem, remaining_size, num_blocks * src_size, &wall_duration, &event_duration);
+                }
+            }
+
+            wall_duration /= NUM_ITERATIONS;
+            event_duration /= NUM_ITERATIONS;
+            throughput = dst_size / wall_duration / 1024. / 1024.;
+
+            g_print ("%zu %zu %f %f %f [num_blocks=%zu, remaining=%zu]\n",
+                     src_size, dst_size, wall_duration, event_duration, throughput, num_blocks, remaining_size);
+
+            OCL_CHECK_ERROR (clReleaseMemObject (dst_mem));
+        }
+
+        OCL_CHECK_ERROR (clReleaseMemObject (src_mem));
+    }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+    OclPlatform *ocl;
+    GOptionContext *context;
+    GError *error = NULL;
+
+    Options opts = {
+        .src_range = NULL,
+        .dst_range = NULL
+    };
+
+    GOptionEntry entries[] = {
+        { "source-size", 0, 0, G_OPTION_ARG_STRING, &opts.src_range, "start:stop:step for source size", "M:N:S" },
+        { "dest-size", 0, 0, G_OPTION_ARG_STRING, &opts.dst_range, "start:stop:step for dstination size", "M:N:S" },
+        { NULL }
+    };
+
+    context = g_option_context_new (NULL);
+    g_option_context_add_main_entries (context, entries, NULL);
+
+    if (!g_option_context_parse (context, &argc, &argv, &error)) {
+        g_print ("Parsing options failed: %s\n", error->message);
+        return 1;
+    }
+
+    if (!validate_input (&opts))
+        return 1;
+
+    opts.ocl = ocl_new_with_queues (0, CL_DEVICE_TYPE_GPU, CL_QUEUE_PROFILING_ENABLE);
+    opts.queue = ocl_get_cmd_queues (opts.ocl)[0];
+
+    benchmark_intra_gpu_copy (&opts);
+
+    ocl_free (opts.ocl);
+
+    return 0;
+}