#define _PCILIB_DMA_IPE_C #define _BSD_SOURCE #define _DEFAULT_SOURCE #define _POSIX_C_SOURCE 200112L #include #include #include #include #include #include #include #include "pci.h" #include "pcilib.h" #include "error.h" #include "tools.h" #include "debug.h" #include "ipe.h" #include "ipe_private.h" typedef struct { size_t size; size_t pos; pcilib_dma_flags_t flags; } dma_ipe_skim_callback_context_t; static int dma_ipe_skim_callback(void *arg, pcilib_dma_flags_t flags, size_t bufsize, void *buf) { dma_ipe_skim_callback_context_t *ctx = (dma_ipe_skim_callback_context_t*)arg; ctx->pos += bufsize; if (flags & PCILIB_DMA_FLAG_EOP) { if ((ctx->pos < ctx->size)&&(ctx->flags&PCILIB_DMA_FLAG_MULTIPACKET)) { if (ctx->flags&PCILIB_DMA_FLAG_WAIT) return PCILIB_STREAMING_WAIT; else return PCILIB_STREAMING_CONTINUE; } return PCILIB_STREAMING_STOP; } return PCILIB_STREAMING_REQ_FRAGMENT; } int dma_ipe_skim_dma_custom(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes) { int err; dma_ipe_skim_callback_context_t opts = { size, 0, flags }; err = pcilib_stream_dma(ctx, dma, addr, size, flags, timeout, dma_ipe_skim_callback, &opts); if (read_bytes) *read_bytes = opts.pos; return err; } double dma_ipe_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) { int err = 0; ipe_dma_t *ctx = (ipe_dma_t*)vctx; int iter; size_t us = 0; struct timeval start, cur; void *buf; size_t bytes, rbytes; int (*read_dma)(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes); if ((direction == PCILIB_DMA_TO_DEVICE)||(direction == PCILIB_DMA_BIDIRECTIONAL)) return -1.; if ((dma != PCILIB_DMA_ENGINE_INVALID)&&(dma > 1)) return -1.; err = dma_ipe_start(vctx, 0, PCILIB_DMA_FLAGS_DEFAULT); if (err) return err; if (size%ctx->page_size) size = (1 + size / ctx->page_size) * ctx->page_size; if (getenv("PCILIB_BENCHMARK_HARDWARE")) read_dma = dma_ipe_skim_dma_custom; else read_dma = pcilib_read_dma_custom; // There is no significant difference and we can remove this when testing phase is over. // DS: With large number of buffers this is quite slow due to skimming of initially written buffers if (getenv("PCILIB_BENCHMARK_STREAMING")) { size_t dma_buffer_space; pcilib_dma_engine_status_t dma_status; if (read_dma == pcilib_read_dma_custom) pcilib_info_once("Benchmarking the DMA streaming (with memcpy)"); else pcilib_info_once("Benchmarking the DMA streaming (without memcpy)"); // Starting DMA WR(IPEDMA_REG_CONTROL, 0x1); gettimeofday(&start, NULL); pcilib_calc_deadline(&start, ctx->dma_timeout * IPEDMA_DMA_PAGES); #ifdef IPEDMA_BUG_LAST_READ dma_buffer_space = (IPEDMA_DMA_PAGES - 2) * ctx->page_size; #else /* IPEDMA_BUG_LAST_READ */ dma_buffer_space = (IPEDMA_DMA_PAGES - 1) * ctx->page_size; #endif /* IPEDMA_BUG_LAST_READ */ // Allocate memory and prepare data err = posix_memalign(&buf, 4096, size + dma_buffer_space); if ((err)||(!buf)) return -1; // Wait all DMA buffers are filled memset(&dma_status, 0, sizeof(dma_status)); do { usleep(10 * IPEDMA_NODATA_SLEEP); err = dma_ipe_get_status(vctx, dma, &dma_status, 0, NULL); } while ((!err)&&(dma_status.written_bytes < dma_buffer_space)&&(pcilib_calc_time_to_deadline(&start) > 0)); if (err) { pcilib_error("Error (%i) getting dma status", err); return -1; } else if (dma_status.written_bytes < dma_buffer_space) { pcilib_error("Timeout while waiting DMA engine to feel the buffer space completely, only %zu bytes of %zu written", dma_status.written_bytes, dma_buffer_space); return -1; } gettimeofday(&start, NULL); for (iter = 0; iter < iterations; iter++) { for (bytes = 0; bytes < (size + dma_buffer_space); bytes += rbytes) { err = read_dma(ctx->dmactx.pcilib, 0, addr, size + dma_buffer_space - bytes, PCILIB_DMA_FLAG_MULTIPACKET, ctx->dma_timeout, buf + bytes, &rbytes); if (err) { pcilib_error("Can't read data from DMA, error %i", err); return -1; } } dma_buffer_space = 0; } gettimeofday(&cur, NULL); us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)); // Stopping DMA WR(IPEDMA_REG_CONTROL, 0x0); usleep(IPEDMA_RESET_DELAY); pcilib_skip_dma(ctx->dmactx.pcilib, 0); } else { if (read_dma == dma_ipe_skim_dma_custom) pcilib_info_once("Benchmarking the DMA hardware (without memcpy)"); WR(IPEDMA_REG_CONTROL, 0x0); usleep(IPEDMA_RESET_DELAY); err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); if (err) { pcilib_error("Can't start benchmark, devices continuously writes unexpected data using DMA engine"); return -1; } // Allocate memory and prepare data err = posix_memalign(&buf, 4096, size); if ((err)||(!buf)) return -1; for (iter = 0; iter <= iterations; iter++) { gettimeofday(&start, NULL); // Starting DMA WR(IPEDMA_REG_CONTROL, 0x1); for (bytes = 0; bytes < size; bytes += rbytes) { err = read_dma(ctx->dmactx.pcilib, 0, addr, size - bytes, PCILIB_DMA_FLAG_MULTIPACKET, ctx->dma_timeout, buf + bytes, &rbytes); if (err) { pcilib_error("Can't read data from DMA (iteration: %zu, offset: %zu), error %i", iter, bytes, err); return -1; } } gettimeofday(&cur, NULL); // Stopping DMA WR(IPEDMA_REG_CONTROL, 0x0); usleep(IPEDMA_RESET_DELAY); if (err) break; // Heating up during the first iteration if (iter) us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)); pcilib_info("Iteration %-4i latency: %lu", iter, ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec))); err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); if (err) { pcilib_error("Can't start iteration, devices continuously writes unexpected data using DMA engine"); break; } usleep(ctx->dma_timeout); } } free(buf); return err?-1:((1. * size * iterations * 1000000) / (1024. * 1024. * us)); }