Browse Source

North West Logick DMA implementation

root 13 năm trước cách đây
mục cha
commit
2e7a7a3534
22 tập tin đã thay đổi với 1367 bổ sung227 xóa
  1. 1 1
      Makefile
  2. 37 5
      cli.c
  3. 1 1
      common.mk
  4. 197 0
      dma.c
  5. 18 0
      dma.h
  6. 560 102
      dma/nwl.c
  7. 6 2
      dma/nwl.h
  8. 145 0
      dma/nwl_defines.h
  9. 3 1
      driver/Makefile
  10. 12 0
      driver/base.c
  11. 4 0
      driver/common.h
  12. 1 1
      driver/config.h
  13. 6 1
      driver/ioctl.c
  14. 43 5
      driver/kmem.c
  15. 9 5
      driver/pciDriver.h
  16. 166 0
      kmem.c
  17. 47 0
      kmem.h
  18. 21 90
      pci.c
  19. 39 4
      pci.h
  20. 24 9
      pcilib.h
  21. 25 0
      pcilib_types.h
  22. 2 0
      tools.h

+ 1 - 1
Makefile

@@ -14,7 +14,7 @@ include common.mk
 ###############################################################
 # Target definitions
 
-OBJECTS = pci.o  default.o tools.o dma/nwl.o ipecamera/model.o ipecamera/image.o 
+OBJECTS = pci.o kmem.o dma.o  default.o tools.o dma/nwl.o ipecamera/model.o ipecamera/image.o 
 
 libpcilib.so: $(OBJECTS)
 	echo -e "LD \t$@"

+ 37 - 5
cli.c

@@ -90,7 +90,7 @@ static struct option long_options[] = {
     {"info",			no_argument, 0, OPT_INFO },
     {"list",			no_argument, 0, OPT_LIST },
     {"reset",			no_argument, 0, OPT_RESET },
-    {"benchmark",		no_argument, 0, OPT_BENCHMARK },
+    {"benchmark",		optional_argument, 0, OPT_BENCHMARK },
     {"read",			optional_argument, 0, OPT_READ },
     {"write",			optional_argument, 0, OPT_WRITE },
     {"grab",			optional_argument, 0, OPT_GRAB },
@@ -120,7 +120,7 @@ void Usage(int argc, char *argv[], const char *format, ...) {
 "  Modes:\n"
 "	-i			- Device Info\n"
 "	-l			- List Data Banks & Registers\n"
-"	-p			- Performance Evaluation\n"
+"	-p <barX|dmaX>		- Performance Evaluation\n"
 "	-r <addr|reg|dmaX>	- Read Data/Register\n"
 "	-w <addr|reg|dmaX>	- Write Data/Register\n"
 "	-g [event]		- Grab Event\n"
@@ -302,15 +302,41 @@ void Info(pcilib_t *handle, pcilib_model_t model) {
 }
 
 
-int Benchmark(pcilib_t *handle, pcilib_bar_t bar) {
+int Benchmark(pcilib_t *handle, ACCESS_MODE mode, pcilib_dma_addr_t dma, pcilib_bar_t bar) {
     int err;
     int i, errors;
     void *data, *buf, *check;
     struct timeval start, end;
     unsigned long time;
     unsigned int size, max_size;
+    double mbs_in, mbs_out, mbs;
     
     const pcilib_board_info_t *board_info = pcilib_get_board_info(handle);
+
+    if (mode == ACCESS_DMA) {
+        for (size = 1024 ; size < 16 * 1024 * 1024; size *= 4) {
+	    mbs_in = pcilib_benchmark_dma(handle, dma, 0, size, BENCHMARK_ITERATIONS, PCILIB_DMA_FROM_DEVICE);
+	    mbs_out = pcilib_benchmark_dma(handle, dma, 0, size, BENCHMARK_ITERATIONS, PCILIB_DMA_TO_DEVICE);
+	    mbs = pcilib_benchmark_dma(handle, dma, 0, size, BENCHMARK_ITERATIONS, PCILIB_DMA_BIDIRECTIONAL);
+	    printf("%8i KB - ", size / 1024);
+	    
+	    printf("RW: ");
+	    if (mbs < 0) printf("failed ...   ");
+	    else printf("%8.2lf MB/s", mbs);
+
+	    printf(", R: ");
+	    if (mbs_in < 0) printf("failed ...   ");
+	    else printf("%8.2lf MB/s", mbs_in);
+
+	    printf(", W: ");
+	    if (mbs_out < 0) printf("failed ...   ");
+	    else printf("%8.2lf MB/s", mbs_out);
+
+	    printf("\n");
+	}
+	
+	return 0;
+    }
 		
     if (bar < 0) {
 	unsigned long maxlength = 0;
@@ -425,7 +451,7 @@ int ReadData(pcilib_t *handle, ACCESS_MODE mode, pcilib_dma_addr_t dma, pcilib_b
     if (mode == ACCESS_DMA) {
 	pcilib_dma_t dmaid = pcilib_find_dma_by_addr(handle, PCILIB_DMA_FROM_DEVICE, dma);
 	if (dmaid == PCILIB_DMA_INVALID) Error("Invalid DMA engine (%lu) is specified", dma);
-	pcilib_read_dma(handle, dmaid, size, buf);
+	pcilib_read_dma(handle, dmaid, addr, size, buf);
 
 	addr = 0;
     } else {
@@ -748,6 +774,9 @@ int main(int argc, char **argv) {
 		if (mode != MODE_INVALID) Usage(argc, argv, "Multiple operations are not supported");
 
 		mode = MODE_BENCHMARK;
+
+		if (optarg) addr = optarg;
+		else if ((optind < argc)&&(argv[optind][0] != '-')) addr = argv[optind++];
 	    break;
 	    case OPT_READ:
 		if (mode != MODE_INVALID) Usage(argc, argv, "Multiple operations are not supported");
@@ -870,6 +899,9 @@ int main(int argc, char **argv) {
 	if (!strncmp(addr, "dma", 3)) {
 	    dma = atoi(addr + 3);
 	    amode = ACCESS_DMA;
+	} else if (!strncmp(addr, "bar", 3)) {
+	    bar = atoi(addr + 3);
+	    amode = ACCESS_DMA;
 	} else if ((isxnumber(addr))&&(sscanf(addr, "%lx", &start) == 1)) {
 		// check if the address in the register range
 	    pcilib_register_range_t *ranges =  pcilib_model[model].ranges;
@@ -914,7 +946,7 @@ int main(int argc, char **argv) {
         List(handle, model, bank);
      break;
      case MODE_BENCHMARK:
-        Benchmark(handle, bar);
+        Benchmark(handle, amode, dma, bar);
      break;
      case MODE_READ:
         if (addr) {

+ 1 - 1
common.mk

@@ -1,6 +1,6 @@
 # Compiler and default flags
 CC ?= gcc
-CFLAGS ?= -O2
+CFLAGS ?= -O0
 
 
 # Defaults for directories

+ 197 - 0
dma.c

@@ -0,0 +1,197 @@
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "error.h"
+#include "pcilib.h"
+#include "pci.h"
+#include "dma.h"
+
+const pcilib_dma_info_t *pcilib_get_dma_info(pcilib_t *ctx) {
+    if (!ctx->dma_ctx) {
+	pcilib_model_t model = pcilib_get_model(ctx);
+	pcilib_dma_api_description_t *api = pcilib_model[model].dma_api;
+	
+	if ((api)&&(api->init)) {
+	    pcilib_map_register_space(ctx);
+	    ctx->dma_ctx = api->init(ctx);
+	}
+	
+	if (!ctx->dma_ctx) return NULL;
+    }
+    
+    return &ctx->dma_info;
+}
+
+pcilib_dma_t pcilib_find_dma_by_addr(pcilib_t *ctx, pcilib_dma_direction_t direction, pcilib_dma_addr_t dma) {
+    pcilib_dma_t i;
+
+    const pcilib_dma_info_t *info =  pcilib_get_dma_info(ctx);
+    if (!info) {
+	pcilib_error("DMA Engine is not configured in the current model");
+	return PCILIB_ERROR_NOTSUPPORTED;
+    }
+    
+    for (i = 0; info->engines[i]; i++) {
+	if ((info->engines[i]->addr == dma)&&((info->engines[i]->direction&direction)==direction)) break;
+    }
+    
+    if (info->engines[i]) return i;
+    return PCILIB_DMA_INVALID;
+}
+
+int pcilib_set_dma_engine_description(pcilib_t *ctx, pcilib_dma_t engine, pcilib_dma_engine_description_t *desc) {
+    ctx->dma_info.engines[engine] = desc;
+}
+
+typedef struct {
+    size_t size;
+    void *data;
+    size_t pos;
+} pcilib_dma_read_callback_context_t;
+
+static int pcilib_dma_read_callback(void *arg, pcilib_dma_flags_t flags, size_t bufsize, void *buf) {
+    pcilib_dma_read_callback_context_t *ctx = (pcilib_dma_read_callback_context_t*)arg;
+    
+    if (ctx->pos + bufsize > ctx->size) {
+	pcilib_error("Buffer size (%li) is not large enough for DMA packet, at least %li bytes is required", ctx->size, ctx->pos + bufsize); 
+	return PCILIB_ERROR_INVALID_DATA;
+    }
+    
+    memcpy(ctx->data + ctx->pos, buf, bufsize);
+    ctx->pos += bufsize;
+
+    if (flags & PCILIB_DMA_FLAG_EOP) return 0;
+    return 1;
+}
+
+static int pcilib_dma_skip_callback(void *arg, pcilib_dma_flags_t flags, size_t bufsize, void *buf) {
+    return 1;
+}
+
+size_t pcilib_stream_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr) {
+    int err; 
+
+    const pcilib_dma_info_t *info =  pcilib_get_dma_info(ctx);
+    if (!info) {
+	pcilib_error("DMA is not supported by the device");
+	return 0;
+    }
+
+    if (!ctx->model_info->dma_api) {
+	pcilib_error("DMA Engine is not configured in the current model");
+	return 0;
+    }
+    
+    if (!ctx->model_info->dma_api->stream) {
+	pcilib_error("The DMA read is not supported by configured DMA engine");
+	return 0;
+    }
+    
+    if (!info->engines[dma]) {
+	pcilib_error("The DMA engine (%i) is not supported by device", dma);
+	return 0;
+    }
+
+    if (info->engines[dma]->direction&PCILIB_DMA_FROM_DEVICE == 0) {
+	pcilib_error("The selected engine (%i) is S2C-only and does not support reading", dma);
+	return 0;
+    }
+
+    return ctx->model_info->dma_api->stream(ctx->dma_ctx, dma, addr, size, flags, timeout, cb, cbattr);
+}
+
+size_t pcilib_read_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, void *buf) {
+    int err; 
+
+    pcilib_dma_read_callback_context_t opts = {
+	size, buf, 0
+    };
+
+    return pcilib_stream_dma(ctx, dma, addr, size, PCILIB_DMA_FLAGS_DEFAULT, PCILIB_DMA_TIMEOUT, pcilib_dma_read_callback, &opts);
+}
+
+int pcilib_skip_dma(pcilib_t *ctx, pcilib_dma_t dma) {
+    size_t skipped;
+    do {
+	    // IMMEDIATE timeout is not working properly, so default is set
+	skipped = pcilib_stream_dma(ctx, dma, 0, 0, PCILIB_DMA_FLAGS_DEFAULT, PCILIB_DMA_TIMEOUT, pcilib_dma_skip_callback, NULL);
+    } while (skipped > 0);
+    
+    return 0;
+}
+
+
+size_t pcilib_push_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *buf) {
+    int err; 
+
+    const pcilib_dma_info_t *info =  pcilib_get_dma_info(ctx);
+    if (!info) {
+	pcilib_error("DMA is not supported by the device");
+	return 0;
+    }
+
+    if (!ctx->model_info->dma_api) {
+	pcilib_error("DMA Engine is not configured in the current model");
+	return 0;
+    }
+    
+    if (!ctx->model_info->dma_api->push) {
+	pcilib_error("The DMA write is not supported by configured DMA engine");
+	return 0;
+    }
+    
+    if (!info->engines[dma]) {
+	pcilib_error("The DMA engine (%i) is not supported by device", dma);
+	return 0;
+    }
+
+    if (info->engines[dma]->direction&PCILIB_DMA_TO_DEVICE == 0) {
+	pcilib_error("The selected engine (%i) is C2S-only and does not support writes", dma);
+	return 0;
+    }
+    
+    return ctx->model_info->dma_api->push(ctx->dma_ctx, dma, addr, size, flags, timeout, buf);
+}
+
+
+size_t pcilib_write_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, void *buf) {
+    return pcilib_push_dma(ctx, dma, addr, size, PCILIB_DMA_FLAG_EOP, PCILIB_DMA_TIMEOUT, buf);
+}
+
+double pcilib_benchmark_dma(pcilib_t *ctx, pcilib_dma_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) {
+    int err; 
+
+    const pcilib_dma_info_t *info =  pcilib_get_dma_info(ctx);
+    if (!info) {
+	pcilib_error("DMA is not supported by the device");
+	return 0;
+    }
+
+    if (!ctx->model_info->dma_api) {
+	pcilib_error("DMA Engine is not configured in the current model");
+	return -1;
+    }
+    
+    if (!ctx->model_info->dma_api->benchmark) {
+	pcilib_error("The DMA benchmark is not supported by configured DMA engine");
+	return -1;
+   }
+    
+    if (!info->engines[dma]) {
+	pcilib_error("The DMA engine (%i) is not supported by device", dma);
+	return -1;
+    }
+
+    return ctx->model_info->dma_api->benchmark(ctx->dma_ctx, dma, addr, size, iterations, direction);
+}

+ 18 - 0
dma.h

@@ -0,0 +1,18 @@
+#ifndef _PCILIB_DMA_H
+#define _PCILIB_DMA_H
+
+#define PCILIB_DMA_BUFFER_INVALID ((size_t)-1)
+
+struct pcilib_dma_api_description_s {
+    pcilib_dma_context_t *(*init)(pcilib_t *ctx);
+    void (*free)(pcilib_dma_context_t *ctx);
+
+    size_t (*push)(pcilib_dma_context_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *buf);
+    size_t (*stream)(pcilib_dma_context_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr);
+
+    double (*benchmark)(pcilib_dma_context_t *ctx, pcilib_dma_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction);
+};
+
+int pcilib_set_dma_engine_description(pcilib_t *ctx, pcilib_dma_t engine, pcilib_dma_engine_description_t *desc);
+
+#endif /* _PCILIB_DMA_H */

+ 560 - 102
dma/nwl.c

@@ -1,112 +1,23 @@
 #define _PCILIB_DMA_NWL_C
+#define _BSD_SOURCE
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 #include <sys/time.h>
 
 #include "pci.h"
+#include "dma.h"
 #include "pcilib.h"
 #include "error.h"
 #include "tools.h"
 #include "nwl.h"
 
-/* Common DMA registers */
-#define REG_DMA_CTRL_STATUS     0x4000      /**< DMA Common Ctrl & Status */
-
-/* These engine registers are applicable to both S2C and C2S channels. 
- * Register field mask and shift definitions are later in this file.
- */
-
-#define REG_DMA_ENG_CAP         0x00000000  /**< DMA Engine Capabilities */
-#define REG_DMA_ENG_CTRL_STATUS 0x00000004  /**< DMA Engine Control */
-#define REG_DMA_ENG_NEXT_BD     0x00000008  /**< HW Next desc pointer */
-#define REG_SW_NEXT_BD          0x0000000C  /**< SW Next desc pointer */
-#define REG_DMA_ENG_LAST_BD     0x00000010  /**< HW Last completed pointer */
-#define REG_DMA_ENG_ACTIVE_TIME 0x00000014  /**< DMA Engine Active Time */
-#define REG_DMA_ENG_WAIT_TIME   0x00000018  /**< DMA Engine Wait Time */
-#define REG_DMA_ENG_COMP_BYTES  0x0000001C  /**< DMA Engine Completed Bytes */
-
-/* Register masks. The following constants define bit locations of various
- * control bits in the registers. For further information on the meaning of 
- * the various bit masks, refer to the hardware spec.
- *
- * Masks have been written assuming HW bits 0-31 correspond to SW bits 0-31 
- */
-
-/** @name Bitmasks of REG_DMA_CTRL_STATUS register.
- * @{
- */
-#define DMA_INT_ENABLE              0x00000001  /**< Enable global interrupts */
-#define DMA_INT_DISABLE             0x00000000  /**< Disable interrupts */
-#define DMA_INT_ACTIVE_MASK         0x00000002  /**< Interrupt active? */
-#define DMA_INT_PENDING_MASK        0x00000004  /**< Engine interrupt pending */
-#define DMA_INT_MSI_MODE            0x00000008  /**< MSI or Legacy mode? */
-#define DMA_USER_INT_ENABLE         0x00000010  /**< Enable user interrupts */
-#define DMA_USER_INT_ACTIVE_MASK    0x00000020  /**< Int - user interrupt */
-#define DMA_USER_INT_ACK            0x00000020  /**< Acknowledge */
-#define DMA_MPS_USED                0x00000700  /**< MPS Used */
-#define DMA_MRRS_USED               0x00007000  /**< MRRS Used */
-#define DMA_S2C_ENG_INT_VAL         0x00FF0000  /**< IRQ value of 1st 8 S2Cs */
-#define DMA_C2S_ENG_INT_VAL         0xFF000000  /**< IRQ value of 1st 8 C2Ss */
-
-/** @name Bitmasks of REG_DMA_ENG_CAP register.
- * @{
- */
-/* DMA engine characteristics */
-#define DMA_ENG_PRESENT_MASK    0x00000001  /**< DMA engine present? */
-#define DMA_ENG_DIRECTION_MASK  0x00000002  /**< DMA engine direction */
-#define DMA_ENG_C2S             0x00000002  /**< DMA engine - C2S */
-#define DMA_ENG_S2C             0x00000000  /**< DMA engine - S2C */
-#define DMA_ENG_TYPE_MASK       0x00000030  /**< DMA engine type */
-#define DMA_ENG_BLOCK           0x00000000  /**< DMA engine - Block type */
-#define DMA_ENG_PACKET          0x00000010  /**< DMA engine - Packet type */
-#define DMA_ENG_NUMBER          0x0000FF00  /**< DMA engine number */
-#define DMA_ENG_BD_MAX_BC       0x3F000000  /**< DMA engine max buffer size */
-
-
-/* Shift constants for selected masks */
-#define DMA_ENG_NUMBER_SHIFT        8
-#define DMA_ENG_BD_MAX_BC_SHIFT     24
-
-/** @name Bitmasks of REG_DMA_ENG_CTRL_STATUS register.
- * @{
- */
-/* Interrupt activity and acknowledgement bits */
-#define DMA_ENG_INT_ENABLE          0x00000001  /**< Enable interrupts */
-#define DMA_ENG_INT_DISABLE         0x00000000  /**< Disable interrupts */
-#define DMA_ENG_INT_ACTIVE_MASK     0x00000002  /**< Interrupt active? */
-#define DMA_ENG_INT_ACK             0x00000002  /**< Interrupt ack */
-#define DMA_ENG_INT_BDCOMP          0x00000004  /**< Int - BD completion */
-#define DMA_ENG_INT_BDCOMP_ACK      0x00000004  /**< Acknowledge */
-#define DMA_ENG_INT_ALERR           0x00000008  /**< Int - BD align error */
-#define DMA_ENG_INT_ALERR_ACK       0x00000008  /**< Acknowledge */
-#define DMA_ENG_INT_FETERR          0x00000010  /**< Int - BD fetch error */
-#define DMA_ENG_INT_FETERR_ACK      0x00000010  /**< Acknowledge */
-#define DMA_ENG_INT_ABORTERR        0x00000020  /**< Int - DMA abort error */
-#define DMA_ENG_INT_ABORTERR_ACK    0x00000020  /**< Acknowledge */
-#define DMA_ENG_INT_CHAINEND        0x00000080  /**< Int - BD chain ended */
-#define DMA_ENG_INT_CHAINEND_ACK    0x00000080  /**< Acknowledge */
-
-/* DMA engine control */
-#define DMA_ENG_ENABLE_MASK         0x00000100  /**< DMA enabled? */
-#define DMA_ENG_ENABLE              0x00000100  /**< Enable DMA */
-#define DMA_ENG_DISABLE             0x00000000  /**< Disable DMA */
-#define DMA_ENG_STATE_MASK          0x00000C00  /**< Current DMA state? */
-#define DMA_ENG_RUNNING             0x00000400  /**< DMA running */
-#define DMA_ENG_IDLE                0x00000000  /**< DMA idle */
-#define DMA_ENG_WAITING             0x00000800  /**< DMA waiting */
-#define DMA_ENG_STATE_WAITED        0x00001000  /**< DMA waited earlier */
-#define DMA_ENG_WAITED_ACK          0x00001000  /**< Acknowledge */
-#define DMA_ENG_USER_RESET          0x00004000  /**< Reset only user logic */
-#define DMA_ENG_RESET               0x00008000  /**< Reset DMA engine + user */
-
-#define DMA_ENG_ALLINT_MASK         0x000000BE  /**< To get only int events */
-
-#define DMA_ENGINE_PER_SIZE     0x100   /**< Separation between engine regs */
-#define DMA_OFFSET              0       /**< Starting register offset */
-                                        /**< Size of DMA engine reg space */
-#define DMA_SIZE                (MAX_DMA_ENGINES * DMA_ENGINE_PER_SIZE)
+#include "nwl_defines.h"
+
+
+#define NWL_FIX_EOP_FOR_BIG_PACKETS		// requires precise sizes in read requests
 
 /*
 pcilib_register_bank_description_t ipecamera_register_banks[] = {
@@ -123,6 +34,15 @@ pcilib_register_description_t dma_nwl_registers[] = {
 typedef struct {
     pcilib_dma_engine_description_t desc;
     char *base_addr;
+    
+    size_t ring_size, page_size;
+    size_t head, tail;
+    pcilib_kmem_handle_t *ring;
+    pcilib_kmem_handle_t *pages;
+    
+    int started;			// indicates if DMA buffers are initialized and reading is allowed
+    int writting;			// indicates if we are in middle of writting packet
+
 } pcilib_nwl_engine_description_t;
 
 
@@ -180,13 +100,26 @@ static int nwl_stop_engine(nwl_dma_t *ctx, pcilib_dma_t dma) {
     
     pcilib_nwl_engine_description_t *info = ctx->engines + dma;
     char *base = ctx->engines[dma].base_addr;
-    
+
+    if (info->desc.addr == 1) {
+	    // Stop Generators
+	nwl_read_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+	val = ~(LOOPBACK|PKTCHKR|PKTGENR);
+	nwl_write_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+
+	nwl_read_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+	val = ~(LOOPBACK|PKTCHKR|PKTGENR);
+	nwl_write_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+
+	    // Skip everything in read queue (could be we need to start and skip as well)
+	if (info->started) pcilib_skip_dma(ctx->pcilib, dma);
+    }
+
 	// Disable IRQ
     nwl_read_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
     val &= ~(DMA_ENG_INT_ENABLE);
     nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
 
-     
 	// Reseting 
     val = DMA_ENG_DISABLE|DMA_ENG_USER_RESET; nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
     gettimeofday(&start, NULL);
@@ -195,11 +128,10 @@ static int nwl_stop_engine(nwl_dma_t *ctx, pcilib_dma_t dma) {
         gettimeofday(&cur, NULL);
     } while ((val & (DMA_ENG_STATE_MASK|DMA_ENG_USER_RESET))&&(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < PCILIB_REGISTER_TIMEOUT));
     
-    if (val & DMA_ENG_RESET) {
+    if (val & (DMA_ENG_STATE_MASK|DMA_ENG_USER_RESET)) {
 	pcilib_error("Timeout during reset of DMA engine %i", info->desc.addr);
 	return PCILIB_ERROR_TIMEOUT;
     }
-    
 
     val = DMA_ENG_RESET; nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
     gettimeofday(&start, NULL);
@@ -219,12 +151,26 @@ static int nwl_stop_engine(nwl_dma_t *ctx, pcilib_dma_t dma) {
 	nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
     }
     
+	// Clean buffers
+    if (info->ring) {
+	pcilib_free_kernel_memory(ctx->pcilib, info->ring);
+	info->ring = NULL;
+    }
+
+    if (info->pages) {
+	pcilib_free_kernel_memory(ctx->pcilib, info->pages);
+	info->pages = NULL;
+    }
+    
+    info->started = 0;
+
     return 0;
 }
 
 pcilib_dma_context_t *dma_nwl_init(pcilib_t *pcilib) {
     int i;
     int err;
+    uint32_t val;
     pcilib_dma_t n_engines;
 
     pcilib_model_description_t *model_info = pcilib_get_model_description(pcilib);
@@ -243,8 +189,15 @@ pcilib_dma_context_t *dma_nwl_init(pcilib_t *pcilib) {
 	ctx->dma_bank = model_info->banks + dma_bank;
 	ctx->base_addr = pcilib_resolve_register_address(pcilib, ctx->dma_bank->bar, ctx->dma_bank->read_addr);
 
+        val = 0;
+	nwl_read_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+	nwl_read_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+
 	for (i = 0, n_engines = 0; i < 2 * PCILIB_MAX_DMA_ENGINES; i++) {
 	    char *addr = ctx->base_addr + DMA_OFFSET + i * DMA_ENGINE_PER_SIZE;
+
+	    memset(ctx->engines + n_engines, 0, sizeof(pcilib_nwl_engine_description_t));
+
 	    err = nwl_read_engine_config(ctx, ctx->engines + n_engines, addr);
 	    if (!err) err = nwl_stop_engine(ctx, n_engines);
 	    if (!err) {
@@ -270,7 +223,512 @@ void  dma_nwl_free(pcilib_dma_context_t *vctx) {
     }
 }
 
-int dma_nwl_read(pcilib_dma_context_t *vctx, pcilib_dma_t dma, size_t size, void *buf) {
+#define PCILIB_NWL_ALIGNMENT 			64  // in bytes
+#define PCILIB_NWL_DMA_DESCRIPTOR_SIZE		64  // in bytes
+#define PCILIB_NWL_DMA_PAGES			512 // 1024
+
+#define NWL_RING_GET(data, offset)  *(uint32_t*)(((char*)(data)) + (offset))
+#define NWL_RING_SET(data, offset, val)  *(uint32_t*)(((char*)(data)) + (offset)) = (val)
+#define NWL_RING_UPDATE(data, offset, mask, val) *(uint32_t*)(((char*)(data)) + (offset)) = ((*(uint32_t*)(((char*)(data)) + (offset)))&(mask))|(val)
+
+
+int dma_nwl_sync_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, pcilib_kmem_handle_t *kmem) {
+    switch (info->desc.direction) {
+     case PCILIB_DMA_FROM_DEVICE:
+        return pcilib_sync_kernel_memory(ctx->pcilib, kmem, PCILIB_KMEM_SYNC_FROMDEVICE);
+     case PCILIB_DMA_TO_DEVICE:
+        return pcilib_sync_kernel_memory(ctx->pcilib, kmem, PCILIB_KMEM_SYNC_TODEVICE);
+    }
+    
+    return 0;
+}
+
+int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
+    int err = 0;
+
+    int i;
+    uint32_t val;
+    uint32_t buf_sz;
+    uint64_t buf_pa;
+
+    char *base = info->base_addr;
+    
+    if (info->pages) return 0;
+    
+    pcilib_kmem_handle_t *ring = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_CONSISTENT, 1, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE, PCILIB_NWL_ALIGNMENT, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA, info->desc.addr), 0);
+    pcilib_kmem_handle_t *pages = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_PAGE, PCILIB_NWL_DMA_PAGES, 0, PCILIB_NWL_ALIGNMENT, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA, info->desc.addr), 0);
+
+    if ((ring)&&(pages)) err = dma_nwl_sync_buffers(ctx, info, pages);
+    else err = PCILIB_ERROR_FAILED;
+
+
+    if (err) {
+	if (pages) pcilib_free_kernel_memory(ctx->pcilib, pages);
+	if (ring) pcilib_free_kernel_memory(ctx->pcilib, ring);    
+	return err;
+    }
+    
+    unsigned char *data = (unsigned char*)pcilib_kmem_get_ua(ctx->pcilib, ring);
+    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, ring);
+    
+    memset(data, 0, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
+
+    for (i = 0; i < PCILIB_NWL_DMA_PAGES; i++, data += PCILIB_NWL_DMA_DESCRIPTOR_SIZE) {
+	buf_pa = pcilib_kmem_get_block_pa(ctx->pcilib, pages, i);
+	buf_sz = pcilib_kmem_get_block_size(ctx->pcilib, pages, i);
+
+	NWL_RING_SET(data, DMA_BD_NDESC_OFFSET, ring_pa + ((i + 1) % PCILIB_NWL_DMA_PAGES) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
+	NWL_RING_SET(data, DMA_BD_BUFAL_OFFSET, buf_pa&0xFFFFFFFF);
+	NWL_RING_SET(data, DMA_BD_BUFAH_OFFSET, buf_pa>>32);
+        NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz);
+/*
+	if (info->desc.direction == PCILIB_DMA_TO_DEVICE) {
+	    NWL_RING_SET(data, DMA_BD_BUFL_STATUS_OFFSET, buf_sz);
+	}
+*/
+    }
+
+    val = ring_pa;
+    nwl_write_register(val, ctx, base, REG_DMA_ENG_NEXT_BD);
+    nwl_write_register(val, ctx, base, REG_SW_NEXT_BD);
+    
+    info->ring = ring;
+    info->pages = pages;
+    info->page_size = buf_sz;
+    info->ring_size = PCILIB_NWL_DMA_PAGES;
+    
+    info->head = 0;
+    info->tail = 0;
+    
+    return 0;
+}
+
+static int dma_nwl_start(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
+    int err;
+    uint32_t ring_pa;
+    uint32_t val;
+
+    if (info->started) return 0;
+    
+    err = dma_nwl_allocate_engine_buffers(ctx, info);
+    if (err) return err;
+    
+    ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
+    nwl_write_register(ring_pa, ctx, info->base_addr, REG_DMA_ENG_NEXT_BD);
+    nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
+
+    __sync_synchronize();
+
+    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
+    val |= (DMA_ENG_ENABLE);
+    nwl_write_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
+
+    __sync_synchronize();
+
+    if (info->desc.direction == PCILIB_DMA_FROM_DEVICE) {
+	ring_pa += (info->ring_size - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    	nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
+//	nwl_read_register(val, ctx, info->base_addr, 0x18);
+
+	info->tail = 0;
+	info->head = (info->ring_size - 1);
+    } else {
+	info->tail = 0;
+	info->head = 0;
+    }
+    
+    info->started = 1;
+    
+    return 0;
+}
+
+static size_t dma_nwl_clean_buffers(nwl_dma_t * ctx, pcilib_nwl_engine_description_t *info) {
+    size_t res = 0;
+    uint32_t status, control;
+
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+
+next_buffer:
+    status = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET)&DMA_BD_STATUS_MASK;
+//  control = NWL_RING_GET(ring, DMA_BD_BUFL_CTRL_OFFSET)&DMA_BD_CTRL_MASK;
+    
+    if (status & DMA_BD_ERROR_MASK) {
+        pcilib_error("NWL DMA Engine reported error in ring descriptor");
+        return (size_t)-1;
+    }
+	
+    if (status & DMA_BD_SHORT_MASK) {
+        pcilib_error("NWL DMA Engine reported short error");
+        return (size_t)-1;
+    }
+	
+    if (status & DMA_BD_COMP_MASK) {
+	info->tail++;
+	if (info->tail == info->ring_size) {
+	    ring -= (info->tail - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+	    info->tail = 0;
+	} else {
+	    ring += PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+	}
+	
+	res++;
+
+	if (info->tail != info->head) goto next_buffer;
+    }
+    
+//    printf("====> Cleaned: %i\n", res);
+    return res;
+}
+
+
+static size_t dma_nwl_get_next_buffer(nwl_dma_t * ctx, pcilib_nwl_engine_description_t *info, size_t n_buffers, size_t timeout) {
+    struct timeval start, cur;
+
+    size_t res, n = 0;
+    size_t head;
+
+    for (head = info->head; (((head + 1)%info->ring_size) != info->tail)&&(n < n_buffers); head++, n++);
+    if (n == n_buffers) return info->head;
+
+    gettimeofday(&start, NULL);
+
+    res = dma_nwl_clean_buffers(ctx, info);
+    if (res == (size_t)-1) return PCILIB_DMA_BUFFER_INVALID;
+    else n += res;
+
+    
+    while (n < n_buffers) {
+	if (timeout != PCILIB_TIMEOUT_INFINITE) {
+	    gettimeofday(&cur, NULL);
+	    if  (((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) > timeout) break;
+	}
+	
+	usleep (10);	
+
+        res = dma_nwl_clean_buffers(ctx, info);
+        if (res == (size_t)-1) return PCILIB_DMA_BUFFER_INVALID;
+	else if (res > 0) {
+	    gettimeofday(&start, NULL);
+	    n += res;
+	}
+    }
+    
+    if (n < n_buffers) return PCILIB_DMA_BUFFER_INVALID;
+    
+    return info->head;
+}
+
+static int dma_nwl_push_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, size_t size, int eop, size_t timeout) {
+    int flags;
+    
+    uint32_t val;
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
+
+    ring += info->head * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+
+    
+    if (!info->writting) {
+	flags |= DMA_BD_SOP_MASK;
+	info->writting = 1;
+    }
+    if (eop) {
+	flags |= DMA_BD_EOP_MASK;
+	info->writting = 0;
+    }
+
+    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, size|flags);
+    NWL_RING_SET(ring, DMA_BD_BUFL_STATUS_OFFSET, size);
+
+    info->head++;
+    if (info->head == info->ring_size) info->head = 0;
+    
+    val = ring_pa + info->head * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    nwl_write_register(val, ctx, info->base_addr, REG_SW_NEXT_BD);
+//    nwl_read_register(val, ctx, info->base_addr, 0x18);
+
+//    usleep(10000);
+
+//    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_LAST_BD);
+//    printf("Last BD(Write): %lx %lx\n", ring, val);
+    
+    
+    return 0;
+}
+
+
+static size_t dma_nwl_wait_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, size_t *size, int *eop, size_t timeout) {
+    uint32_t val;
+    struct timeval start, cur;
+    uint32_t status_size, status, control;
+
+//    usleep(10000);
+    
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    
+//    status_size = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET);
+//    printf("Status0: %lx\n", status_size);
+
+    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+
+    gettimeofday(&start, NULL);
+    
+//    printf("Waiting %li\n", info->tail);
+//    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_LAST_BD);
+//    printf("Last BD(Read): %lx %lx\n", ring, val);
+
+    do {
+	status_size = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET);
+	status = status_size & DMA_BD_STATUS_MASK;
+	
+//	printf("%i: %lx\n", info->tail, status_size);
+    
+	if (status & DMA_BD_ERROR_MASK) {
+    	    pcilib_error("NWL DMA Engine reported error in ring descriptor");
+    	    return (size_t)-1;
+	}	
+	
+	if (status & DMA_BD_COMP_MASK) {
+	    if (status & DMA_BD_EOP_MASK) *eop = 1;
+	    else *eop = 0;
+        
+	    *size = status_size & DMA_BD_BUFL_MASK;
+	
+//	    printf("Status: %lx\n", status_size);
+	    return info->tail;
+	}
+	
+	usleep(10);
+        gettimeofday(&cur, NULL);
+    } while ((timeout == PCILIB_TIMEOUT_INFINITE)||(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < timeout));
+
+//    printf("Final status: %lx\n", status_size);
+    
+    return (size_t)-1;
+}
+
+static int dma_nwl_return_buffer(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
+    uint32_t val;
+
+    unsigned char *ring = pcilib_kmem_get_ua(ctx->pcilib, info->ring);
+    uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
+    size_t bufsz = pcilib_kmem_get_block_size(ctx->pcilib, info->pages, info->tail);
+
+    ring += info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+//    printf("Returning: %i\n", info->tail);
+
+    NWL_RING_SET(ring, DMA_BD_BUFL_CTRL_OFFSET, bufsz);
+    NWL_RING_SET(ring, DMA_BD_BUFL_STATUS_OFFSET, 0);
+
+    val = ring_pa + info->tail * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    nwl_write_register(val, ctx, info->base_addr, REG_SW_NEXT_BD);
+//    nwl_read_register(val, ctx, info->base_addr, 0x18);
+    
+    info->tail++;
+    if (info->tail == info->ring_size) info->tail = 0;
+}
+    
+
+size_t dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *data) {
+    int err;
+    size_t pos;
+    size_t bufnum;
+    nwl_dma_t *ctx = (nwl_dma_t*)vctx;
+
+    pcilib_nwl_engine_description_t *info = ctx->engines + dma;
+
+    err = dma_nwl_start(ctx, info);
+    if (err) return 0;
+
+    for (pos = 0; pos < size; pos += info->page_size) {
+	int block_size = min2(size - pos, info->page_size);
+    
+        bufnum = dma_nwl_get_next_buffer(ctx, info, 1, timeout);
+	if (bufnum == PCILIB_DMA_BUFFER_INVALID) return pos;
+	
+	    //sync
+        void *buf = pcilib_kmem_get_block_ua(ctx->pcilib, info->pages, bufnum);
+	memcpy(buf, data, block_size);
+
+	err = dma_nwl_push_buffer(ctx, info, block_size, (flags&PCILIB_DMA_FLAG_EOP)&&((pos + block_size) == size), timeout);
+	if (err) return pos;
+    }    
+    
+    return size;
+}
+
+size_t dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr) {
+    int err, ret;
+    size_t res = 0;
+    size_t bufnum;
+    size_t bufsize;
+    nwl_dma_t *ctx = (nwl_dma_t*)vctx;
+
+    size_t buf_size;
+    int eop;
+
+    pcilib_nwl_engine_description_t *info = ctx->engines + dma;
+
+    err = dma_nwl_start(ctx, info);
+    if (err) return 0;
+
+    do {
+        bufnum = dma_nwl_wait_buffer(ctx, info, &bufsize, &eop, timeout);
+	if (bufnum == PCILIB_DMA_BUFFER_INVALID) return 0;
+
+#ifdef NWL_FIX_EOP_FOR_BIG_PACKETS
+	if (size > 65536) {
+//	    printf("%i %i\n", res + bufsize, size);
+	    if ((res+bufsize) < size) eop = 0;
+	    else if ((res+bufsize) == size) eop = 1;
+	}
+#endif /*  NWL_FIX_EOP_FOR_BIG_PACKETS */
+	
+	//sync
+        void *buf = pcilib_kmem_get_block_ua(ctx->pcilib, info->pages, bufnum);
+	ret = cb(cbattr, eop?PCILIB_DMA_FLAG_EOP:0, bufsize, buf);
+	dma_nwl_return_buffer(ctx, info);
+	
+	res += bufsize;
+	
+//	printf("%i %i %i (%li)\n", ret, res, eop, size);
+    } while (ret);
+    
+    return res;
+}
+
+double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) {
+    int i;
+    int res;
+    int err;
+    size_t bytes;
+    uint32_t val;
+    uint32_t *buf, *cmp;
+    const char *error = NULL;
+
+    size_t us = 0;
+    struct timeval start, cur;
+
     nwl_dma_t *ctx = (nwl_dma_t*)vctx;
-    printf("Reading dma: %i\n", dma);
+
+    pcilib_dma_t readid = pcilib_find_dma_by_addr(ctx->pcilib, PCILIB_DMA_FROM_DEVICE, dma);
+    pcilib_dma_t writeid = pcilib_find_dma_by_addr(ctx->pcilib, PCILIB_DMA_TO_DEVICE, dma);
+
+    if (size%sizeof(uint32_t)) size = 1 + size / sizeof(uint32_t);
+    else size /= sizeof(uint32_t);
+
+
+	// Stop Generators and drain old data
+    nwl_read_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+    val = ~(LOOPBACK|PKTCHKR|PKTGENR);
+    nwl_write_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+
+    nwl_read_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+    val = ~(LOOPBACK|PKTCHKR|PKTGENR);
+    nwl_write_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+
+/*
+    nwl_stop_engine(ctx, readid);
+    nwl_stop_engine(ctx, writeid);
+
+    err = dma_nwl_start(ctx, ctx->engines + readid);
+    if (err) return -1;
+    err = dma_nwl_start(ctx, ctx->engines + writeid);
+    if (err) return -1;
+*/
+
+    __sync_synchronize();
+
+    pcilib_skip_dma(ctx->pcilib, readid);
+
+
+	// Set size and required mode
+    val = size * sizeof(uint32_t);
+    nwl_write_register(val, ctx, ctx->base_addr, PKT_SIZE_ADDRESS);
+
+    switch (direction) {
+      case PCILIB_DMA_BIDIRECTIONAL:
+	val = LOOPBACK;
+	break;
+      case PCILIB_DMA_TO_DEVICE:
+	return -1;
+      case PCILIB_DMA_FROM_DEVICE:
+        val = PKTGENR;
+	break;
+    }
+
+    nwl_write_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+    nwl_write_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+
+
+	// Allocate memory and prepare data
+    buf = malloc(size * sizeof(uint32_t));
+    cmp = malloc(size * sizeof(uint32_t));
+    if ((!buf)||(!cmp)) {
+	if (buf) free(buf);
+	if (cmp) free(cmp);
+	return -1;
+    }
+
+    memset(cmp, 0x13, size * sizeof(uint32_t));
+
+	// Benchmark
+    for (i = 0; i < iterations; i++) {
+//	printf("Iteration: %i\n", i);
+
+        gettimeofday(&start, NULL);
+	if (direction&PCILIB_DMA_TO_DEVICE) {
+	    memcpy(buf, cmp, size * sizeof(uint32_t));
+
+	    bytes = pcilib_write_dma(ctx->pcilib, writeid, addr, size * sizeof(uint32_t), buf);
+	    if (bytes != size * sizeof(uint32_t)) {
+		error = "Write failed";
+	        break;
+	    }
+	}
+
+	memset(buf, 0, size * sizeof(uint32_t));
+        
+	bytes = pcilib_read_dma(ctx->pcilib, readid, addr, size * sizeof(uint32_t), buf);
+        gettimeofday(&cur, NULL);
+	us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec));
+
+
+	if (bytes != size * sizeof(uint32_t)) {
+	     printf("RF: %li %li\n", bytes, size * 4);
+	     error = "Read failed";
+	     break;
+	}
+	
+	if (direction == PCILIB_DMA_BIDIRECTIONAL) {
+	    res = memcmp(buf, cmp, size * sizeof(uint32_t));
+	    if (res) {
+		error = "Written and read values does not match";
+		break;
+	    }
+	}
+	     
+    }
+
+
+	// Stop Generators and drain data if necessary
+    nwl_read_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+    val = ~(LOOPBACK|PKTCHKR|PKTGENR);
+    nwl_write_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
+
+    nwl_read_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+    val = ~(LOOPBACK|PKTCHKR|PKTGENR);
+    nwl_write_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
+
+    __sync_synchronize();
+    
+    if (direction == PCILIB_DMA_FROM_DEVICE) {
+	pcilib_skip_dma(ctx->pcilib, readid);
+    }
+    
+    free(cmp);
+    free(buf);
+
+    return error?-1:(1. * size * sizeof(uint32_t) * iterations * 1000000) / (1024. * 1024. * us);
 }

+ 6 - 2
dma/nwl.h

@@ -17,13 +17,17 @@ typedef struct {
 pcilib_dma_context_t *dma_nwl_init(pcilib_t *ctx);
 void  dma_nwl_free(pcilib_dma_context_t *vctx);
 
-int dma_nwl_read(pcilib_dma_context_t *ctx, pcilib_dma_t dma, size_t size, void *buf);
+size_t dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *data);
+size_t dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr);
+double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction);
 
 #ifdef _PCILIB_DMA_NWL_C
 pcilib_dma_api_description_t nwl_dma_api = {
     dma_nwl_init,
     dma_nwl_free,
-    dma_nwl_read
+    dma_nwl_write_fragment,
+    dma_nwl_stream_read,
+    dma_nwl_benchmark
 };
 #else
 extern pcilib_dma_api_description_t nwl_dma_api;

+ 145 - 0
dma/nwl_defines.h

@@ -0,0 +1,145 @@
+/** @name Buffer Descriptor offsets
+ *  USR fields are defined by higher level IP. For example, checksum offload
+ *  setup for EMAC type devices. The 1st 8 words are utilized by hardware. Any
+ *  words after the 8th are for software use only.
+ *  @{
+ */
+#define DMA_BD_BUFL_STATUS_OFFSET   0x00 /**< Buffer length + status */
+#define DMA_BD_USRL_OFFSET          0x04 /**< User logic specific - LSBytes */
+#define DMA_BD_USRH_OFFSET          0x08 /**< User logic specific - MSBytes */
+#define DMA_BD_CARDA_OFFSET         0x0C /**< Card address */
+#define DMA_BD_BUFL_CTRL_OFFSET     0x10 /**< Buffer length + control */
+#define DMA_BD_BUFAL_OFFSET         0x14 /**< Buffer address LSBytes */
+#define DMA_BD_BUFAH_OFFSET         0x18 /**< Buffer address MSBytes */
+#define DMA_BD_NDESC_OFFSET         0x1C /**< Next descriptor pointer */
+
+/* Bit masks for some BD fields */
+#define DMA_BD_BUFL_MASK            0x000FFFFF /**< Byte count */
+#define DMA_BD_STATUS_MASK          0xFF000000 /**< Status Flags */
+#define DMA_BD_CTRL_MASK            0xFF000000 /**< Control Flags */
+
+/* Bit masks for BD control field */
+#define DMA_BD_INT_ERROR_MASK       0x02000000 /**< Intr on error */
+#define DMA_BD_INT_COMP_MASK        0x01000000 /**< Intr on BD completion */
+
+/* Bit masks for BD status field */
+#define DMA_BD_SOP_MASK             0x80000000 /**< Start of packet */
+#define DMA_BD_EOP_MASK             0x40000000 /**< End of packet */
+#define DMA_BD_ERROR_MASK           0x10000000 /**< BD had error */
+#define DMA_BD_USER_HIGH_ZERO_MASK  0x08000000 /**< User High Status zero */
+#define DMA_BD_USER_LOW_ZERO_MASK   0x04000000 /**< User Low Status zero */
+#define DMA_BD_SHORT_MASK           0x02000000 /**< BD not fully used */
+#define DMA_BD_COMP_MASK            0x01000000 /**< BD completed */
+
+
+
+#define DMA_BD_MINIMUM_ALIGNMENT    0x40  /**< Minimum byte alignment
+
+/* Common DMA registers */
+#define REG_DMA_CTRL_STATUS     0x4000      /**< DMA Common Ctrl & Status */
+
+/* These engine registers are applicable to both S2C and C2S channels. 
+ * Register field mask and shift definitions are later in this file.
+ */
+
+#define REG_DMA_ENG_CAP         0x00000000  /**< DMA Engine Capabilities */
+#define REG_DMA_ENG_CTRL_STATUS 0x00000004  /**< DMA Engine Control */
+#define REG_DMA_ENG_NEXT_BD     0x00000008  /**< HW Next desc pointer */
+#define REG_SW_NEXT_BD          0x0000000C  /**< SW Next desc pointer */
+#define REG_DMA_ENG_LAST_BD     0x00000010  /**< HW Last completed pointer */
+#define REG_DMA_ENG_ACTIVE_TIME 0x00000014  /**< DMA Engine Active Time */
+#define REG_DMA_ENG_WAIT_TIME   0x00000018  /**< DMA Engine Wait Time */
+#define REG_DMA_ENG_COMP_BYTES  0x0000001C  /**< DMA Engine Completed Bytes */
+
+/* Register masks. The following constants define bit locations of various
+ * control bits in the registers. For further information on the meaning of 
+ * the various bit masks, refer to the hardware spec.
+ *
+ * Masks have been written assuming HW bits 0-31 correspond to SW bits 0-31 
+ */
+
+/** @name Bitmasks of REG_DMA_CTRL_STATUS register.
+ * @{
+ */
+#define DMA_INT_ENABLE              0x00000001  /**< Enable global interrupts */
+#define DMA_INT_DISABLE             0x00000000  /**< Disable interrupts */
+#define DMA_INT_ACTIVE_MASK         0x00000002  /**< Interrupt active? */
+#define DMA_INT_PENDING_MASK        0x00000004  /**< Engine interrupt pending */
+#define DMA_INT_MSI_MODE            0x00000008  /**< MSI or Legacy mode? */
+#define DMA_USER_INT_ENABLE         0x00000010  /**< Enable user interrupts */
+#define DMA_USER_INT_ACTIVE_MASK    0x00000020  /**< Int - user interrupt */
+#define DMA_USER_INT_ACK            0x00000020  /**< Acknowledge */
+#define DMA_MPS_USED                0x00000700  /**< MPS Used */
+#define DMA_MRRS_USED               0x00007000  /**< MRRS Used */
+#define DMA_S2C_ENG_INT_VAL         0x00FF0000  /**< IRQ value of 1st 8 S2Cs */
+#define DMA_C2S_ENG_INT_VAL         0xFF000000  /**< IRQ value of 1st 8 C2Ss */
+
+/** @name Bitmasks of REG_DMA_ENG_CAP register.
+ * @{
+ */
+/* DMA engine characteristics */
+#define DMA_ENG_PRESENT_MASK    0x00000001  /**< DMA engine present? */
+#define DMA_ENG_DIRECTION_MASK  0x00000002  /**< DMA engine direction */
+#define DMA_ENG_C2S             0x00000002  /**< DMA engine - C2S */
+#define DMA_ENG_S2C             0x00000000  /**< DMA engine - S2C */
+#define DMA_ENG_TYPE_MASK       0x00000030  /**< DMA engine type */
+#define DMA_ENG_BLOCK           0x00000000  /**< DMA engine - Block type */
+#define DMA_ENG_PACKET          0x00000010  /**< DMA engine - Packet type */
+#define DMA_ENG_NUMBER          0x0000FF00  /**< DMA engine number */
+#define DMA_ENG_BD_MAX_BC       0x3F000000  /**< DMA engine max buffer size */
+
+
+/* Shift constants for selected masks */
+#define DMA_ENG_NUMBER_SHIFT        8
+#define DMA_ENG_BD_MAX_BC_SHIFT     24
+
+/** @name Bitmasks of REG_DMA_ENG_CTRL_STATUS register.
+ * @{
+ */
+/* Interrupt activity and acknowledgement bits */
+#define DMA_ENG_INT_ENABLE          0x00000001  /**< Enable interrupts */
+#define DMA_ENG_INT_DISABLE         0x00000000  /**< Disable interrupts */
+#define DMA_ENG_INT_ACTIVE_MASK     0x00000002  /**< Interrupt active? */
+#define DMA_ENG_INT_ACK             0x00000002  /**< Interrupt ack */
+#define DMA_ENG_INT_BDCOMP          0x00000004  /**< Int - BD completion */
+#define DMA_ENG_INT_BDCOMP_ACK      0x00000004  /**< Acknowledge */
+#define DMA_ENG_INT_ALERR           0x00000008  /**< Int - BD align error */
+#define DMA_ENG_INT_ALERR_ACK       0x00000008  /**< Acknowledge */
+#define DMA_ENG_INT_FETERR          0x00000010  /**< Int - BD fetch error */
+#define DMA_ENG_INT_FETERR_ACK      0x00000010  /**< Acknowledge */
+#define DMA_ENG_INT_ABORTERR        0x00000020  /**< Int - DMA abort error */
+#define DMA_ENG_INT_ABORTERR_ACK    0x00000020  /**< Acknowledge */
+#define DMA_ENG_INT_CHAINEND        0x00000080  /**< Int - BD chain ended */
+#define DMA_ENG_INT_CHAINEND_ACK    0x00000080  /**< Acknowledge */
+
+/* DMA engine control */
+#define DMA_ENG_ENABLE_MASK         0x00000100  /**< DMA enabled? */
+#define DMA_ENG_ENABLE              0x00000100  /**< Enable DMA */
+#define DMA_ENG_DISABLE             0x00000000  /**< Disable DMA */
+#define DMA_ENG_STATE_MASK          0x00000C00  /**< Current DMA state? */
+#define DMA_ENG_RUNNING             0x00000400  /**< DMA running */
+#define DMA_ENG_IDLE                0x00000000  /**< DMA idle */
+#define DMA_ENG_WAITING             0x00000800  /**< DMA waiting */
+#define DMA_ENG_STATE_WAITED        0x00001000  /**< DMA waited earlier */
+#define DMA_ENG_WAITED_ACK          0x00001000  /**< Acknowledge */
+#define DMA_ENG_USER_RESET          0x00004000  /**< Reset only user logic */
+#define DMA_ENG_RESET               0x00008000  /**< Reset DMA engine + user */
+
+#define DMA_ENG_ALLINT_MASK         0x000000BE  /**< To get only int events */
+
+#define DMA_ENGINE_PER_SIZE     0x100   /**< Separation between engine regs */
+#define DMA_OFFSET              0       /**< Starting register offset */
+                                        /**< Size of DMA engine reg space */
+#define DMA_SIZE                (MAX_DMA_ENGINES * DMA_ENGINE_PER_SIZE)
+
+
+#define TX_CONFIG_ADDRESS   0x9108  /* Reg for controlling TX data */
+#define RX_CONFIG_ADDRESS   0x9100  /* Reg for controlling RX pkt generator */
+#define PKT_SIZE_ADDRESS    0x9104  /* Reg for programming packet size */
+#define STATUS_ADDRESS      0x910C  /* Reg for checking TX pkt checker status */
+
+/* Test start / stop conditions */
+#define PKTCHKR             0x00000001  /* Enable TX packet checker */
+#define PKTGENR             0x00000001  /* Enable RX packet generator */
+#define CHKR_MISMATCH       0x00000001  /* TX checker reported data mismatch */
+#define LOOPBACK            0x00000002  /* Enable TX data loopback onto RX */

+ 3 - 1
driver/Makefile

@@ -6,8 +6,10 @@ KERNELDIR ?= /lib/modules/$(shell uname -r)/build
 INSTALLDIR ?= /lib/modules/$(shell uname -r)/extra
 PWD := $(shell pwd)
 
+EXTRA_CFLAGS += -I$(M)/..
+
 default:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) modules
+	$(MAKE) $(CFLAGS) -C $(KERNELDIR) M=$(PWD) modules
 
 install:
 	@mkdir -p $(INSTALLDIR)

+ 12 - 0
driver/base.c

@@ -319,6 +319,17 @@ static int __devinit pcidriver_probe(struct pci_dev *pdev, const struct pci_devi
 		mod_info("Couldn't enable device\n");
 		goto probe_pcien_fail;
 	}
+	
+	/* Bus master & dma */
+	if ((id->vendor == PCIE_XILINX_VENDOR_ID)&&(id->device == PCIE_IPECAMERA_DEVICE_ID)) {
+	    pci_set_master(pdev);
+	    
+	    err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	    if (err < 0) {
+		printk(KERN_ERR "pci_set_dma_mask failed\n");
+		goto probe_dma_fail;
+	    }
+	}
 
 	/* Set Memory-Write-Invalidate support */
 	if ((err = pci_set_mwi(pdev)) != 0)
@@ -404,6 +415,7 @@ probe_irq_probe_fail:
 probe_nomem:
 	atomic_dec(&pcidriver_deviceCount);
 probe_maxdevices_fail:
+probe_dma_fail:
 	pci_disable_device(pdev);
 probe_pcien_fail:
  	return err;

+ 4 - 0
driver/common.h

@@ -1,6 +1,8 @@
 #ifndef _PCIDRIVER_COMMON_H
 #define _PCIDRIVER_COMMON_H
 
+#include "pcilib_types.h"
+
 /*************************************************************************/
 /* Private data types and structures */
 
@@ -12,6 +14,8 @@ typedef struct {
 	dma_addr_t dma_handle;
 	unsigned long cpua;
 	unsigned long size;
+	unsigned long type;
+	unsigned long use;
 	struct class_device_attribute sysfs_attr;	/* initialized when adding the entry */
 } pcidriver_kmem_entry_t;
 

+ 1 - 1
driver/config.h

@@ -3,7 +3,7 @@
 /*******************************/
 
 /* Debug messages */
-//#define DEBUG
+#define DEBUG
 
 /* Enable/disable IRQ handling */
 #define ENABLE_IRQ

+ 6 - 1
driver/ioctl.c

@@ -218,7 +218,12 @@ static int ioctl_kmem_sync(pcidriver_privdata_t *privdata, unsigned long arg)
 	int ret;
 	READ_FROM_USER(kmem_sync_t, ksync);
 
-	return pcidriver_kmem_sync(privdata, &ksync);
+	if ((ret =  pcidriver_kmem_sync(privdata, &ksync)) != 0)
+	    return ret;
+	
+	WRITE_TO_USER(kmem_sync_t, ksync);
+	
+	return 0;
 }
 
 /*

+ 43 - 5
driver/kmem.c

@@ -41,7 +41,8 @@ int pcidriver_kmem_alloc(pcidriver_privdata_t *privdata, kmem_handle_t *kmem_han
 
 	/* Initialize the kmem_entry */
 	kmem_entry->id = atomic_inc_return(&privdata->kmem_count) - 1;
-	kmem_entry->size = kmem_handle->size;
+	kmem_entry->use = kmem_handle->use;
+	kmem_entry->type = kmem_handle->type;
 	kmem_handle->handle_id = kmem_entry->id;
 
 	/* Initialize sysfs if possible */
@@ -55,9 +56,25 @@ int pcidriver_kmem_alloc(pcidriver_privdata_t *privdata, kmem_handle_t *kmem_han
 	 * CPU address is used for the mmap (internal to the driver), and
 	 * PCI address is the address passed to the DMA Controller in the device.
 	 */
-	retptr = pci_alloc_consistent( privdata->pdev, kmem_handle->size, &(kmem_entry->dma_handle) );
+	switch (kmem_entry->type) {
+	 case PCILIB_KMEM_TYPE_CONSISTENT:
+	    retptr = pci_alloc_consistent( privdata->pdev, kmem_handle->size, &(kmem_entry->dma_handle) );
+	    break;
+	 case PCILIB_KMEM_TYPE_PAGE:
+	    retptr = (void*)__get_free_pages(GFP_KERNEL, get_order(PAGE_SIZE));
+	    kmem_entry->dma_handle = 0;
+	    kmem_handle->size = PAGE_SIZE;
+
+//    	    kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, PAGE_SIZE,  PCI_DMA_FROMDEVICE);
+//	    printk("%llx %lx\n", kmem_entry->dma_handle, retptr);
+	    break;
+	 default:
+	    goto kmem_alloc_mem_fail;
+	}
 	if (retptr == NULL)
 		goto kmem_alloc_mem_fail;
+	
+	kmem_entry->size = kmem_handle->size;
 	kmem_entry->cpua = (unsigned long)retptr;
 	kmem_handle->pa = (unsigned long)(kmem_entry->dma_handle);
 
@@ -124,6 +141,17 @@ int pcidriver_kmem_sync( pcidriver_privdata_t *privdata, kmem_sync_t *kmem_sync
 	if ((kmem_entry = pcidriver_kmem_find_entry(privdata, &(kmem_sync->handle))) == NULL)
 		return -EINVAL;					/* kmem_handle is not valid */
 
+
+	if (!kmem_entry->dma_handle) {
+	    mod_info_dbg("Instead of synchronization, we are mapping kmem_entry with id: %d\n", kmem_entry->id);
+	    if (kmem_sync->dir == PCIDRIVER_DMA_TODEVICE) 
+		kmem_entry->dma_handle = pci_map_single(privdata->pdev, (void*)kmem_entry->cpua, kmem_entry->size,  PCI_DMA_TODEVICE);
+	    else
+		kmem_entry->dma_handle = pci_map_single(privdata->pdev, (void*)kmem_entry->cpua, kmem_entry->size,  PCI_DMA_FROMDEVICE);
+
+	    kmem_sync->handle.pa = kmem_entry->dma_handle;
+	}
+	
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
 	switch (kmem_sync->dir) {
 		case PCIDRIVER_DMA_TODEVICE:
@@ -197,7 +225,16 @@ int pcidriver_kmem_free_entry(pcidriver_privdata_t *privdata, pcidriver_kmem_ent
 #endif
 
 	/* Release DMA memory */
-	pci_free_consistent( privdata->pdev, kmem_entry->size, (void *)(kmem_entry->cpua), kmem_entry->dma_handle );
+	switch (kmem_entry->type) {
+	 case PCILIB_KMEM_TYPE_CONSISTENT:
+	    pci_free_consistent( privdata->pdev, kmem_entry->size, (void *)(kmem_entry->cpua), kmem_entry->dma_handle );
+	    break;
+	 case PCILIB_KMEM_TYPE_PAGE:
+	    if (kmem_entry->dma_handle) pci_unmap_single(privdata->pdev, kmem_entry->dma_handle, kmem_entry->size, PCI_DMA_TODEVICE);
+	    free_page((unsigned long)kmem_entry->cpua);
+	    break;
+	}
+
 
 	/* Remove the kmem list entry */
 	spin_lock( &(privdata->kmemlist_lock) );
@@ -226,7 +263,7 @@ pcidriver_kmem_entry_t *pcidriver_kmem_find_entry(pcidriver_privdata_t *privdata
 	list_for_each(ptr, &(privdata->kmem_list)) {
 		entry = list_entry(ptr, pcidriver_kmem_entry_t, list);
 
-		if (entry->dma_handle == kmem_handle->pa) {
+		if (entry->id == kmem_handle->handle_id) {
 			result = entry;
 			break;
 		}
@@ -288,6 +325,7 @@ int pcidriver_mmap_kmem(pcidriver_privdata_t *privdata, struct vm_area_struct *v
 
 	/* Check sizes */
 	vma_size = (vma->vm_end - vma->vm_start);
+	
 	if ((vma_size != kmem_entry->size) &&
 		((kmem_entry->size < PAGE_SIZE) && (vma_size != PAGE_SIZE))) {
 		mod_info("kem_entry size(%lu) and vma size do not match(%lu)\n", kmem_entry->size, vma_size);
@@ -316,6 +354,6 @@ int pcidriver_mmap_kmem(pcidriver_privdata_t *privdata, struct vm_area_struct *v
 		mod_info("kmem remap failed: %d (%lx)\n", ret,kmem_entry->cpua);
 		return -EAGAIN;
 	}
-
+	
 	return ret;
 }

+ 9 - 5
driver/pciDriver.h

@@ -57,6 +57,7 @@
  */
 
 #include <linux/ioctl.h>
+#include "pcilib_types.h"
 
 /* Identifies the PCI-E Xilinx ML605 */
 #define PCIE_XILINX_VENDOR_ID 0x10ee
@@ -82,9 +83,9 @@
 #define PCIDRIVER_MMAP_KMEM 1
 
 /* Direction of a DMA operation */
-#define PCIDRIVER_DMA_BIDIRECTIONAL 0
-#define	PCIDRIVER_DMA_TODEVICE		1
-#define PCIDRIVER_DMA_FROMDEVICE	2
+#define PCIDRIVER_DMA_BIDIRECTIONAL 	0
+#define	PCIDRIVER_DMA_TODEVICE		PCILIB_KMEM_SYNC_TODEVICE
+#define PCIDRIVER_DMA_FROMDEVICE	PCILIB_KMEM_SYNC_FROMDEVICE
 
 /* Possible sizes in a PCI command */
 #define PCIDRIVER_PCI_CFG_SZ_BYTE  1
@@ -100,8 +101,11 @@
 
 /* Types */
 typedef struct {
+	unsigned long type;
 	unsigned long pa;
 	unsigned long size;
+	unsigned long align;
+	unsigned long use;
 	int handle_id;
 } kmem_handle_t;
 
@@ -171,8 +175,8 @@ typedef struct {
 #define PCIDRIVER_IOC_MMAP_MODE  _IO(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 0 )
 #define PCIDRIVER_IOC_MMAP_AREA  _IO(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 1 )
 #define PCIDRIVER_IOC_KMEM_ALLOC _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 2, kmem_handle_t * )
-#define PCIDRIVER_IOC_KMEM_FREE  _IOW(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 3, kmem_handle_t * )
-#define PCIDRIVER_IOC_KMEM_SYNC  _IOW(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 4, kmem_sync_t * )
+#define PCIDRIVER_IOC_KMEM_FREE  _IOW ( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 3, kmem_handle_t * )
+#define PCIDRIVER_IOC_KMEM_SYNC  _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 4, kmem_sync_t * )
 #define PCIDRIVER_IOC_UMEM_SGMAP _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 5, umem_handle_t * )
 #define PCIDRIVER_IOC_UMEM_SGUNMAP _IOW(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 6, umem_handle_t * )
 #define PCIDRIVER_IOC_UMEM_SGGET _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 7, umem_sglist_t * )

+ 166 - 0
kmem.c

@@ -0,0 +1,166 @@
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "pcilib.h"
+#include "pci.h"
+#include "kmem.h"
+#include "error.h"
+
+pcilib_kmem_handle_t *pcilib_alloc_kernel_memory(pcilib_t *ctx, pcilib_kmem_type_t type, size_t nmemb, size_t size, size_t alignment, pcilib_kmem_use_t use, pcilib_kmem_flags_t flags) {
+    int ret;
+    int i;
+    void *addr;
+
+    kmem_handle_t kh = {0};
+    
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)malloc(sizeof(pcilib_kmem_list_t) + nmemb * sizeof(pcilib_kmem_addr_t));
+    if (!kbuf) {
+	pcilib_error("Memory allocation has failed");
+	return NULL;
+    }
+    
+    memset(kbuf, 0, sizeof(pcilib_kmem_list_t) + nmemb * sizeof(pcilib_kmem_addr_t));
+    
+
+    ret = ioctl( ctx->handle, PCIDRIVER_IOC_MMAP_MODE, PCIDRIVER_MMAP_KMEM );
+    if (ret) {
+	pcilib_error("PCIDRIVER_IOC_MMAP_MODE ioctl have failed");
+	return NULL;
+    }
+    
+    kh.type = type;
+    kh.size = size;
+    kh.align = alignment;
+    kh.use = use;
+
+    for ( i = 0; i < nmemb; i++) {
+        ret = ioctl(ctx->handle, PCIDRIVER_IOC_KMEM_ALLOC, &kh);
+	if (ret) {
+	    kbuf->buf.n_blocks = i;
+	    pcilib_free_kernel_memory(ctx, kbuf);
+	    pcilib_error("PCIDRIVER_IOC_KMEM_ALLOC ioctl have failed");
+	    return NULL;
+	}
+    
+	kbuf->buf.blocks[i].handle_id = kh.handle_id;
+	kbuf->buf.blocks[i].pa = kh.pa;
+	kbuf->buf.blocks[i].size = kh.size;
+	
+	if (!i) {
+	    if (kh.pa % alignment) printf("Alignment problem\n");
+	    else if (kh.pa & ctx->page_mask) printf("Mmap alignment problem\n");
+	}
+
+    	addr = mmap( 0, kh.size, PROT_WRITE | PROT_READ, MAP_SHARED, ctx->handle, 0 );
+	if ((!addr)||(addr == MAP_FAILED)) {
+	    kbuf->buf.n_blocks = i + 1;
+	    pcilib_free_kernel_memory(ctx, kbuf);
+	    pcilib_error("Failed to mmap allocated kernel memory");
+	    return NULL;
+	}
+
+	kbuf->buf.blocks[i].ua = addr;
+    }
+    
+    if (nmemb == 1) {
+	memcpy(&kbuf->buf.addr, &kbuf->buf.blocks[0], sizeof(pcilib_kmem_addr_t));
+    }
+    
+    kbuf->buf.n_blocks = nmemb;
+    
+    kbuf->prev = NULL;
+    kbuf->next = ctx->kmem_list;
+    if (ctx->kmem_list) ctx->kmem_list->prev = kbuf;
+    ctx->kmem_list = kbuf;
+
+    return (pcilib_kmem_handle_t*)kbuf;
+}
+
+void pcilib_free_kernel_memory(pcilib_t *ctx, pcilib_kmem_handle_t *k) {
+    int ret, err = 0; 
+    int i;
+    kmem_handle_t kh = {0};
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)k;
+
+	// if linked in to the list
+    if (kbuf->next) kbuf->next->prev = kbuf->prev;
+    if (kbuf->prev) kbuf->prev->next = kbuf->next;
+    else if (ctx->kmem_list == kbuf) ctx->kmem_list = kbuf->next;
+
+    for (i = 0; i < kbuf->buf.n_blocks; i++) {
+        if (kbuf->buf.blocks[i].ua) munmap(kbuf->buf.blocks[i].ua, kbuf->buf.blocks[i].size);
+
+        kh.handle_id = kbuf->buf.blocks[i].handle_id;
+        kh.pa = kbuf->buf.blocks[i].pa;
+	ret = ioctl(ctx->handle, PCIDRIVER_IOC_KMEM_FREE, &kh);
+	if ((ret)&&(!err)) err = ret;
+    }
+    
+    free(kbuf);
+    
+    if (err) {
+	pcilib_error("PCIDRIVER_IOC_KMEM_FREE ioctl have failed");
+    }
+}
+
+int pcilib_sync_kernel_memory(pcilib_t *ctx, pcilib_kmem_handle_t *k, pcilib_kmem_sync_direction_t dir) {
+    int i;
+    int ret;
+    kmem_sync_t ks;
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)k;
+    
+    ks.dir = dir;
+    
+    for (i = 0; i < kbuf->buf.n_blocks; i++) {
+        ks.handle.handle_id = kbuf->buf.blocks[i].handle_id;
+	ks.handle.pa = kbuf->buf.blocks[i].pa;
+	ret = ioctl(ctx->handle, PCIDRIVER_IOC_KMEM_SYNC, &ks);
+	if (ret) {
+	    pcilib_error("PCIDRIVER_IOC_KMEM_SYNC ioctl have failed");
+	    return PCILIB_ERROR_FAILED;
+	}
+	
+	if (!kbuf->buf.blocks[i].pa) {
+	    kbuf->buf.blocks[i].pa = ks.handle.pa;
+	}
+    }
+    
+    return 0;    
+}
+
+
+void *pcilib_kmem_get_ua(pcilib_t *ctx, pcilib_kmem_handle_t *k) {
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)k;
+    return kbuf->buf.addr.ua;
+}
+
+uintptr_t pcilib_kmem_get_pa(pcilib_t *ctx, pcilib_kmem_handle_t *k) {
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)k;
+    return kbuf->buf.addr.pa;
+}
+
+void *pcilib_kmem_get_block_ua(pcilib_t *ctx, pcilib_kmem_handle_t *k, size_t block) {
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)k;
+    return kbuf->buf.blocks[block].ua;
+}
+
+uintptr_t pcilib_kmem_get_block_pa(pcilib_t *ctx, pcilib_kmem_handle_t *k, size_t block) {
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)k;
+    return kbuf->buf.blocks[block].pa;
+}
+
+size_t pcilib_kmem_get_block_size(pcilib_t *ctx, pcilib_kmem_handle_t *k, size_t block) {
+    pcilib_kmem_list_t *kbuf = (pcilib_kmem_list_t*)k;
+    return kbuf->buf.blocks[block].size;
+}

+ 47 - 0
kmem.h

@@ -0,0 +1,47 @@
+#ifndef _PCILIB_KMEM_H
+#define _PCILIB_KMEM_H
+
+#include "pcilib.h"
+
+typedef int pcilib_kmem_flags_t;
+
+typedef struct {
+    int handle_id;
+    uintptr_t pa;
+//    uintptr_t va;
+    void *ua;
+    size_t size;
+} pcilib_kmem_addr_t;
+
+/**
+ * single allocation - we set only addr, n_blocks = 0
+ * multiple allocation - addr is not set, blocks are set, n_blocks > 0
+ * sgmap allocation - addr contains ua, but pa's are set in blocks, n_blocks > 0
+ */
+typedef struct {
+    pcilib_kmem_addr_t addr;
+
+    size_t n_blocks;
+    pcilib_kmem_addr_t blocks[];
+} pcilib_kmem_buffer_t;
+
+typedef void pcilib_kmem_handle_t;
+
+
+typedef struct pcilib_kmem_list_s pcilib_kmem_list_t;
+struct pcilib_kmem_list_s {
+    pcilib_kmem_list_t *next, *prev;
+
+    pcilib_kmem_buffer_t buf;	// variable size, should be last item in struct
+};
+
+pcilib_kmem_handle_t *pcilib_alloc_kernel_memory(pcilib_t *ctx, pcilib_kmem_type_t type, size_t nmemb, size_t size, size_t alignment, pcilib_kmem_use_t use, pcilib_kmem_flags_t flags);
+void pcilib_free_kernel_memory(pcilib_t *ctx, pcilib_kmem_handle_t *k);
+int pcilib_sync_kernel_memory(pcilib_t *ctx, pcilib_kmem_handle_t *k, pcilib_kmem_sync_direction_t dir);
+void *pcilib_kmem_get_ua(pcilib_t *ctx, pcilib_kmem_handle_t *k);
+uintptr_t pcilib_kmem_get_pa(pcilib_t *ctx, pcilib_kmem_handle_t *k);
+void *pcilib_kmem_get_block_ua(pcilib_t *ctx, pcilib_kmem_handle_t *k, size_t block);
+uintptr_t pcilib_kmem_get_block_pa(pcilib_t *ctx, pcilib_kmem_handle_t *k, size_t block);
+size_t pcilib_kmem_get_block_size(pcilib_t *ctx, pcilib_kmem_handle_t *k, size_t block);
+
+#endif /* _PCILIB_KMEM_H */

+ 21 - 90
pci.c

@@ -1,4 +1,5 @@
 #define _PCILIB_PCI_C
+//#define PCILIB_FILE_IO
 #define _POSIX_C_SOURCE 199309L
 
 #include <stdio.h>
@@ -18,6 +19,7 @@
 #include "kernel.h"
 #include "tools.h"
 
+#include "dma.h"
 #include "pci.h"
 #include "ipecamera/model.h"
 #include "error.h"
@@ -25,37 +27,7 @@
 #define BIT_MASK(bits) ((1l << (bits)) - 1)
 
 
-//#define PCILIB_FILE_IO
-
-struct pcilib_s {
-    int handle;
-    
-    uintptr_t page_mask;
-    pcilib_board_info_t board_info;
-    pcilib_dma_info_t dma_info;
-    pcilib_model_t model;
-    
-    char *bar_space[PCILIB_MAX_BANKS];
-
-    int reg_bar_mapped;
-    pcilib_bar_t reg_bar;
-//    char *reg_space;
-
-    int data_bar_mapped;
-    pcilib_bar_t data_bar;
-//    char *data_space;
-//    size_t data_size;
-    
 
-    pcilib_model_description_t *model_info;
-    
-    pcilib_dma_context_t *dma_ctx;
-    pcilib_context_t *event_ctx;
-    
-#ifdef PCILIB_FILE_IO
-    int file_io_handle;
-#endif /* PCILIB_FILE_IO */
-};
 
 static void pcilib_print_error(const char *msg, ...) {
     va_list va;
@@ -124,6 +96,19 @@ const pcilib_board_info_t *pcilib_get_board_info(pcilib_t *ctx) {
     return &ctx->board_info;
 }
 
+
+int pcilib_wait_irq(pcilib_t *ctx, pcilib_irq_source_t source, unsigned long timeout) {
+    int err;
+
+    err = ioctl(ctx->handle, PCIDRIVER_IOC_WAITI, source);
+    if (err) {
+	pcilib_error("PCIDRIVER_IOC_WAITI ioctl have failed");
+	return PCILIB_ERROR_FAILED;
+    }
+    
+    return 0;
+}
+
 pcilib_context_t *pcilib_get_implementation_context(pcilib_t *ctx) {
     return ctx->event_ctx;
 }
@@ -270,44 +255,6 @@ int pcilib_write(pcilib_t *ctx, pcilib_bar_t bar, uintptr_t addr, size_t size, v
     pcilib_unmap_bar(ctx, bar, data);    
 }
 
-pcilib_dma_t pcilib_find_dma_by_addr(pcilib_t *ctx, pcilib_dma_direction_t direction, pcilib_dma_addr_t dma) {
-    pcilib_dma_t i;
-
-    const pcilib_dma_info_t *info =  pcilib_get_dma_info(ctx);
-    if (!info) {
-	pcilib_error("DMA Engine is not configured in the current model");
-	return PCILIB_ERROR_NOTSUPPORTED;
-    }
-    
-    for (i = 0; info->engines[i]; i++) {
-	if ((info->engines[i]->addr == dma)&&((info->engines[i]->direction&direction)==direction)) break;
-    }
-    
-    if (info->engines[i]) return i;
-    return PCILIB_DMA_INVALID;
-}
-
-int pcilib_read_dma(pcilib_t *ctx, pcilib_dma_t dma, size_t size, void *buf) {
-    const pcilib_dma_info_t *info =  pcilib_get_dma_info(ctx);
-
-    if (!ctx->model_info->dma_api) {
-	pcilib_error("DMA Engine is not configured in the current model");
-	return PCILIB_ERROR_NOTSUPPORTED;
-    }
-    
-    if (!ctx->model_info->dma_api->read) {
-	pcilib_error("The DMA read is not supported by configured DMA engine");
-	return PCILIB_ERROR_NOTSUPPORTED;
-    }
-    
-    if (!info->engines[dma]) {
-	pcilib_error("The DMA engine (%i) is not supported by device", dma);
-	return PCILIB_ERROR_OUTOFRANGE;
-    }
-    
-    return ctx->model_info->dma_api->read(ctx->dma_ctx, dma, size, buf);
-}
-
 
 pcilib_register_bank_t pcilib_find_bank_by_addr(pcilib_t *ctx, pcilib_register_bank_addr_t bank) {
     pcilib_register_bank_t i;
@@ -393,7 +340,7 @@ pcilib_event_t pcilib_find_event(pcilib_t *ctx, const char *event) {
 }
 
 
-static int pcilib_map_register_space(pcilib_t *ctx) {
+int pcilib_map_register_space(pcilib_t *ctx) {
     int err;
     pcilib_register_bank_t i;
     
@@ -441,7 +388,7 @@ static int pcilib_map_register_space(pcilib_t *ctx) {
     return 0;
 }
 
-static int pcilib_map_data_space(pcilib_t *ctx, uintptr_t addr) {
+int pcilib_map_data_space(pcilib_t *ctx, uintptr_t addr) {
     int err;
     pcilib_bar_t i;
     
@@ -541,26 +488,6 @@ char  *pcilib_resolve_register_address(pcilib_t *ctx, pcilib_bar_t bar, uintptr_
     return NULL;
 }
 
-const pcilib_dma_info_t *pcilib_get_dma_info(pcilib_t *ctx) {
-    if (!ctx->dma_ctx) {
-	pcilib_model_t model = pcilib_get_model(ctx);
-	pcilib_dma_api_description_t *api = pcilib_model[model].dma_api;
-	
-	if ((api)&&(api->init)) {
-	    pcilib_map_register_space(ctx);
-	    ctx->dma_ctx = api->init(ctx);
-	}
-	
-	if (!ctx->dma_ctx) return NULL;
-    }
-    
-    return &ctx->dma_info;
-}
-
-int pcilib_set_dma_engine_description(pcilib_t *ctx, pcilib_dma_t engine, pcilib_dma_engine_description_t *desc) {
-    ctx->dma_info.engines[engine] = desc;
-}
-
 char *pcilib_resolve_data_space(pcilib_t *ctx, uintptr_t addr, size_t *size) {
     int err;
     
@@ -586,6 +513,10 @@ void pcilib_close(pcilib_t *ctx) {
     
         if ((eapi)&&(eapi->free)) eapi->free(ctx->event_ctx);
         if ((dapi)&&(dapi->free)) dapi->free(ctx->dma_ctx);
+	
+	while (ctx->kmem_list) {
+	    pcilib_free_kernel_memory(ctx, ctx->kmem_list);
+	}
 
 	for (i = 0; i < PCILIB_MAX_BANKS; i++) {
 	    if (ctx->bar_space[i]) {

+ 39 - 4
pci.h

@@ -1,17 +1,45 @@
 #ifndef _PCITOOL_PCI_H
 #define _PCITOOL_PCI_H
 
+#define PCILIB_DMA_TIMEOUT 10000		/**< us */
 #define PCILIB_REGISTER_TIMEOUT 10000		/**< us */
 
 #include "driver/pciDriver.h"
-
+#include "pcilib_types.h"
 
 #include "pcilib.h"
+#include "kmem.h"
 
-int pcilib_set_dma_engine_description(pcilib_t *ctx, pcilib_dma_t engine, pcilib_dma_engine_description_t *desc);
+struct pcilib_s {
+    int handle;
+    
+    uintptr_t page_mask;
+    pcilib_board_info_t board_info;
+    pcilib_dma_info_t dma_info;
+    pcilib_model_t model;
+    
+    char *bar_space[PCILIB_MAX_BANKS];
 
-const pcilib_board_info_t *pcilib_get_board_info(pcilib_t *ctx);
-const pcilib_dma_info_t *pcilib_get_dma_info(pcilib_t *ctx);
+    int reg_bar_mapped;
+    pcilib_bar_t reg_bar;
+//    char *reg_space;
+
+    int data_bar_mapped;
+    pcilib_bar_t data_bar;
+//    char *data_space;
+//    size_t data_size;
+    
+    pcilib_kmem_list_t *kmem_list;
+
+    pcilib_model_description_t *model_info;
+    
+    pcilib_dma_context_t *dma_ctx;
+    pcilib_context_t *event_ctx;
+    
+#ifdef PCILIB_FILE_IO
+    int file_io_handle;
+#endif /* PCILIB_FILE_IO */
+};
 
 #ifdef _PCILIB_PCI_C
 # include "ipecamera/model.h"
@@ -36,4 +64,11 @@ extern void (*pcilib_warning)(const char *msg, ...);
 extern pcilib_protocol_description_t pcilib_protocol[];
 #endif /* _PCILIB_PCI_C */
 
+const pcilib_board_info_t *pcilib_get_board_info(pcilib_t *ctx);
+const pcilib_dma_info_t *pcilib_get_dma_info(pcilib_t *ctx);
+
+int pcilib_map_register_space(pcilib_t *ctx);
+int pcilib_map_data_space(pcilib_t *ctx, uintptr_t addr);
+
+
 #endif /* _PCITOOL_PCI_H */

+ 24 - 9
pcilib.h

@@ -21,6 +21,11 @@ typedef struct pcilib_s pcilib_t;
 typedef void pcilib_context_t;
 typedef void pcilib_dma_context_t;
 
+typedef struct pcilib_dma_api_description_s pcilib_dma_api_description_t;
+
+
+typedef unsigned long pcilib_irq_source_t;
+
 typedef uint8_t pcilib_bar_t;			/**< Type holding the PCI Bar number */
 typedef uint8_t pcilib_register_t;		/**< Type holding the register ID within the Bank */
 typedef uint8_t pcilib_register_addr_t;		/**< Type holding the register ID within the Bank */
@@ -32,7 +37,6 @@ typedef uint64_t pcilib_event_id_t;
 typedef uint8_t pcilib_dma_addr_t;
 typedef uint8_t pcilib_dma_t;
 
-
 typedef uint32_t pcilib_event_t;
 
 typedef enum {
@@ -62,6 +66,12 @@ typedef enum {
     PCILIB_EVENT_DATA
 } pcilib_event_data_type_t;
 
+typedef enum {
+    PCILIB_DMA_FLAGS_DEFAULT = 0,
+    PCILIB_DMA_FLAG_EOP = 1
+} pcilib_dma_flags_t;
+
+
 #define PCILIB_BAR_DETECT 		((pcilib_bar_t)-1)
 #define PCILIB_BAR_INVALID		((pcilib_bar_t)-1)
 #define PCILIB_BAR0			0
@@ -82,6 +92,8 @@ typedef enum {
 #define PCILIB_EVENTS_ALL		((pcilib_event_t)-1)
 #define PCILIB_EVENT_INVALID		((pcilib_event_t)-1)
 #define PCILIB_EVENT_ID_INVALID		0
+#define PCILIB_TIMEOUT_INFINITE		((size_t)-1)
+#define PCILIB_TIMEOUT_IMMEDIATE	0
 
 typedef struct {
     pcilib_register_bank_addr_t addr;
@@ -160,14 +172,6 @@ typedef struct {
 
 typedef int (*pcilib_callback_t)(pcilib_event_t event, pcilib_event_id_t event_id, void *user);
 
-typedef struct {
-    pcilib_dma_context_t *(*init)(pcilib_t *ctx);
-    void (*free)(pcilib_dma_context_t *ctx);
-
-    int (*read)(pcilib_dma_context_t *ctx, pcilib_dma_t dma, size_t size, void *buf);
-//    int (*write)(pcilib_dma_context_t *ctx);
-} pcilib_dma_api_description_t;
-
 typedef struct {
     pcilib_context_t *(*init)(pcilib_t *ctx);
     void (*free)(pcilib_context_t *ctx);
@@ -219,10 +223,21 @@ pcilib_register_bank_t pcilib_find_bank_by_name(pcilib_t *ctx, const char *bankn
 pcilib_register_bank_t pcilib_find_bank(pcilib_t *ctx, const char *bank);
 pcilib_register_t pcilib_find_register(pcilib_t *ctx, const char *bank, const char *reg);
 pcilib_event_t pcilib_find_event(pcilib_t *ctx, const char *event);
+pcilib_dma_t pcilib_find_dma_by_addr(pcilib_t *ctx, pcilib_dma_direction_t direction, pcilib_dma_addr_t dma);
+
 
 int pcilib_read(pcilib_t *ctx, pcilib_bar_t bar, uintptr_t addr, size_t size, void *buf);
 int pcilib_write(pcilib_t *ctx, pcilib_bar_t bar, uintptr_t addr, size_t size, void *buf);
 
+typedef int (*pcilib_dma_callback_t)(void *ctx, pcilib_dma_flags_t flags, size_t bufsize, void *buf);
+
+int pcilib_skip_dma(pcilib_t *ctx, pcilib_dma_t dma);
+size_t pcilib_stream_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, pcilib_dma_callback_t cb, void *cbattr);
+size_t pcilib_push_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, size_t timeout, void *buf);
+size_t pcilib_read_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, void *buf);
+size_t pcilib_write_dma(pcilib_t *ctx, pcilib_dma_t dma, uintptr_t addr, size_t size, void *buf);
+double pcilib_benchmark_dma(pcilib_t *ctx, pcilib_dma_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction);
+
 int pcilib_read_register_space(pcilib_t *ctx, const char *bank, pcilib_register_addr_t addr, size_t n, pcilib_register_value_t *buf);
 int pcilib_write_register_space(pcilib_t *ctx, const char *bank, pcilib_register_addr_t addr, size_t n, pcilib_register_value_t *buf);
 int pcilib_read_register_by_id(pcilib_t *ctx, pcilib_register_t reg, pcilib_register_value_t *value);

+ 25 - 0
pcilib_types.h

@@ -0,0 +1,25 @@
+#ifndef _PCILIB_TYPES_H
+#define _PCILIB_TYPES_H
+
+typedef enum {
+    PCILIB_KMEM_TYPE_CONSISTENT = 0,
+    PCILIB_KMEM_TYPE_PAGE,
+} pcilib_kmem_type_t;
+
+typedef enum {
+    PCILIB_KMEM_USE_DMA = 1,
+} pcilib_kmem_use_t;
+
+typedef enum {
+    PCILIB_KMEM_SYNC_TODEVICE = 1,
+    PCILIB_KMEM_SYNC_FROMDEVICE = 2
+} pcilib_kmem_sync_direction_t;
+
+
+#define PCILIB_KMEM_USE(type, subtype) (((type) << 16)|(subtype))
+
+
+//pcilib_alloc_kmem_buffer(pcilib_t *ctx, size_t size, size_t alignment)
+
+
+#endif /* _PCILIB_TYPES_H */

+ 2 - 0
tools.h

@@ -6,6 +6,8 @@
 
 #include "pci.h"
 
+#define min2(a, b) (((a)<(b))?(a):(b))
+
 int pcilib_isnumber(const char *str);
 int pcilib_isxnumber(const char *str);