|
@@ -518,8 +518,8 @@ measure_fpga_to_gpu_latency_with_kernel (App *app)
|
|
|
NULL, &work_size, NULL,
|
|
|
0, NULL, &event));
|
|
|
|
|
|
- clock_gettime (CLOCK_MONOTONIC, &start);
|
|
|
WR32 (REG_DMA, 1);
|
|
|
+ clock_gettime (CLOCK_MONOTONIC, &start);
|
|
|
|
|
|
clWaitForEvents (1, &event);
|
|
|
|
|
@@ -535,9 +535,10 @@ measure_fpga_to_gpu_latency_with_kernel (App *app)
|
|
|
OCL_CHECK_ERROR (clEnqueueReadBuffer (app->queue, app->check_buffer, CL_TRUE,
|
|
|
0, CHECK_BUFFER_SIZE, check, 0, NULL, NULL));
|
|
|
|
|
|
- debug_assert_cmp ("Kernel wrote", check[0], 1);
|
|
|
+ debug_assert_cmp ("Data check", check[0], 1);
|
|
|
|
|
|
- printf ("\n%-16s %f us\n", "Wall time", host_latency * 1000.0 * 1000.0);
|
|
|
+ printf ("\n%-16s %i\n", "Kernel count", check[1]);
|
|
|
+ printf ("%-16s %f us\n", "Wall time", host_latency * 1000.0 * 1000.0);
|
|
|
printf ("%-16s %f us\n", "FPGA [counter]", ((counter << 8) * 4) / 1000.0);
|
|
|
printf ("%-16s %f us\n", "FPGA [debug]", debug_latency);
|
|
|
printf ("%-16s %.2f MB/s\n", "Throughput", GPU_BUFFER_SIZE / host_latency / 1024. / 1024.);
|