Browse Source

Initial implementation of IPEDMA, dummy driver for KAPTURE, start of API changes

Suren A. Chilingaryan 9 years ago
parent
commit
a640c40c6b

+ 2 - 0
.bzrignore

@@ -23,3 +23,5 @@ apps/pio_test
 apps/compare_to_value
 apps/heb_strip_bad_values
 *.out
+apps/check_counter
+apps/lorenzo_ipedma_test

+ 4 - 3
CMakeLists.txt

@@ -1,7 +1,7 @@
 project(pcitool)
 
-set(PCILIB_VERSION "0.0.1")
-set(PCILIB_ABI_VERSION "0")
+set(PCILIB_VERSION "0.1.0")
+set(PCILIB_ABI_VERSION "1")
 
 cmake_minimum_required(VERSION 2.6)
 
@@ -24,6 +24,7 @@ add_definitions("-fPIC --std=c99 -Wall -O2")
 
 add_subdirectory(dma)
 add_subdirectory(ipecamera)
+add_subdirectory(kapture)
 add_subdirectory(pcitool)
 add_subdirectory(apps)
 
@@ -37,7 +38,7 @@ link_directories(
 )
 
 add_library(pcilib SHARED pci.c register.c kmem.c irq.c dma.c event.c default.c tools.c error.c) 
-target_link_libraries(pcilib dma ipecamera ${CMAKE_THREAD_LIBS_INIT} ${UFODECODE_LIBRARIES} )
+target_link_libraries(pcilib dma ipecamera kapture ${CMAKE_THREAD_LIBS_INIT} ${UFODECODE_LIBRARIES} )
 add_dependencies(pcilib dma ipecamera)
 
 set_target_properties(pcilib PROPERTIES

+ 4 - 0
apps/CMakeLists.txt

@@ -7,9 +7,13 @@ link_directories(${UFODECODE_LIBRARY_DIRS})
 add_executable(xilinx xilinx.c)
 target_link_libraries(xilinx pcilib rt)
 
+add_executable(lorenzo_ipedma_test lorenzo_ipedma_test.c)
+target_link_libraries(lorenzo_ipedma_test pcilib rt)
+
 add_executable(pio_test pio_test.c)
 target_link_libraries(pio_test pcilib rt)
 
 add_executable(compare_to_value compare_to_value.c)
 
 add_executable(heb_strip_bad_values heb_strip_bad_values.c)
+add_executable(check_counter check_counter.c)

+ 45 - 0
apps/check_counter.c

@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+int main(int argc, char *argv[]) {
+    int block = 0;
+    uint32_t value = 0;
+    uint32_t buf[1024];
+    
+    if (argc < 2) {
+	printf("Usage:\n\t\t%s <file-to-check>\n", argv[0]);
+	exit(0);
+    }
+    
+    FILE *f = fopen(argv[1], "r");
+    if (!f) {
+	printf("Failed to open file %s\n", argv[1]);
+	exit(1);
+    }
+    
+    
+    while (!feof(f)) {
+	int i, n = fread(buf, 4, 1024, f);
+
+	if (block) i = 0;
+	else {
+	    i = 1;
+	    value = (buf[0]);
+	}
+
+	for (; i < n; i++) {
+	    if ((buf[i]) != ++value) {
+		printf("Pos %lx (Block %i, dword %i) expected %x, but got %x\n", block * 4096 + i * 4, block, i, value, (buf[i]));
+		exit(1);
+	    }
+	}
+	
+	if (n) block++;
+    }
+
+    fclose(f);
+    
+    printf("Checked %i blocks. All is fine\n", block);
+    return 0;
+}

+ 749 - 0
apps/lorenzo_ipedma_test.c

@@ -0,0 +1,749 @@
+#define _POSIX_C_SOURCE 199309L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <time.h>
+#include <sched.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <arpa/inet.h>
+#include <sched.h>
+#include <errno.h>
+
+#include "pcilib.h"
+#include "irq.h"
+#include "kmem.h"
+
+//#include <sys/ipc.h>
+//#include <sys/shm.h>
+
+
+#define DEVICE "/dev/fpga0"
+
+#define BAR PCILIB_BAR0
+#define USE_RING PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1)
+#define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 2)
+//#define STATIC_REGION 0x80000000 //  to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters
+
+#define BUFFERS         128
+#define ITERATIONS      1000
+#define DESC_THRESHOLD  BUFFERS/8   // Lorenzo: after how many desc the FPGA must update the "written descriptor counter" in PC mem
+                                    // if set to 0, the update only happens when INT is received
+
+#define HUGE_PAGE       1           // number of pages per huge page
+#define TLP_SIZE        32          // TLP SIZE = 64 for 256B payload, 32 for 128B payload
+#define PAGE_SIZE       4096        // other values are not supported in the kernel
+
+//#define USE_64                    // Lorenzo: use 64bit addressing
+
+//#define DUAL_CORE                 // Lorenzo: DUAL Core
+
+//#define SHARED_MEMORY               // Lorenzo: Test for fast GUI
+
+#define CHECK_READY                 // Lorenzo: Check if PCI-Express is ready by reading 0x0
+#define CHECK_RESULTS               // Lorenzo: Check if data received is ok (only for counter!)
+//#define PRINT_RESULTS               // Lorenzo: Save the received data in "data.out"
+//#define EXIT_ON_EMPTY               // Lorenzo: Exit if an "empty_detected" signal is received
+
+//#define HEB                       // Lorenzo: Testing HEB
+//#define SWITCH_GENERATOR          // Lorenzo: Testing HEB -> Turn data gen on/off
+
+//#define TEST_DDR                    // Lorenzo: Testing DDR
+
+#define TIMEOUT         1000000
+
+
+
+/* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help,
+   otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite 
+   much extra time */
+
+//#define USE_IRQ
+//#define REALTIME
+//#define ADD_DELAYS
+
+
+#define FPGA_CLOCK 250 // Lorenzo: in MHz !
+
+
+
+//#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); }
+//#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; }
+#define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; }
+#define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); }
+
+// **************************************************************************************
+// Progress BAR
+// Process has done x out of n rounds,
+// and we want a bar of width w and resolution r.
+   static inline void loadBar(int x, int n, int r, int w)
+   {
+    // Only update r times.
+    if ( x % (n/r +1) != 0 ) return;
+
+    // Calculuate the ratio of complete-to-incomplete.
+    float ratio = x/(float)n;
+    int   c     = ratio * w;
+
+    // Show the percentage complete.
+    printf("%3d%% [", (int)(ratio*100) );
+
+    // Show the load bar.
+        for (x=0; x<c; x++)
+           printf("=");
+
+       for (x=c; x<w; x++)
+           printf(" ");
+
+    // ANSI Control codes to go back to the
+    // previous line and clear it.
+       printf("]\n\033[F\033[J");
+   }
+// **************************************************************************************
+
+
+   static void fail(const char *msg, ...) {
+    va_list va;
+
+    va_start(va, msg);
+    vprintf(msg, va);
+    va_end(va);
+    printf("\n");
+
+    exit(-1);
+}
+
+void hpsleep(size_t ns) {
+    struct timespec wait, tv;
+
+    clock_gettime(CLOCK_REALTIME, &wait);
+
+    wait.tv_nsec += ns;
+    if (wait.tv_nsec > 999999999) {
+        wait.tv_sec += 1;
+        wait.tv_nsec = 1000000000 - wait.tv_nsec;
+    }
+
+    do {
+        clock_gettime(CLOCK_REALTIME, &tv);
+    } while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec)));
+}
+
+
+// **************************************************************************************
+int main() {
+
+
+
+    int err;
+    long i, j, k;
+    int mem_diff;
+    pcilib_t *pci;
+    pcilib_kmem_handle_t *kdesc;
+    pcilib_kmem_handle_t *kbuf;
+    struct timeval start, end;
+    size_t run_time, size;
+    long long int size_mb;
+    void* volatile bar;
+    uintptr_t bus_addr[BUFFERS];
+    uintptr_t kdesc_bus;
+    volatile uint32_t *desc;
+    typedef volatile uint32_t *Tbuf;
+    Tbuf ptr[BUFFERS];
+    int switch_generator = 0;
+   
+    float performance, perf_counter; 
+    pcilib_bar_t bar_tmp = BAR; 
+    uintptr_t offset = 0;
+
+    unsigned int temp;
+    int iterations_completed, buffers_filled;
+
+
+//    int shmid;
+    
+
+    printf("\n\n**** **** **** KIT-DMA TEST **** **** ****\n\n");
+
+    size = ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE;
+    size_mb = ITERATIONS * BUFFERS * HUGE_PAGE * 4 / 1024;
+    printf("Total size of memory buffer: \t %.3lf GBytes\n", (float)size_mb/1024 );
+    printf("Using %d Buffers with %d iterations\n\n", BUFFERS, ITERATIONS );
+
+#ifdef ADD_DELAYS
+    long rpt = 0, rpt2 = 0;
+    size_t best_time;
+    best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024);
+#endif /* ADD_DELAYS */
+
+
+    pcilib_kmem_flags_t flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE/*|PCILIB_KMEM_FLAG_REUSE*/; // Lorenzo: if REUSE = 1, the re-allocation fails!
+    pcilib_kmem_flags_t free_flags = PCILIB_KMEM_FLAG_HARDWARE/*|PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_REUSE*/;
+    pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE;
+
+    pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT);
+    if (!pci) fail("pcilib_open");
+
+    bar = pcilib_map_bar(pci, BAR);
+    if (!bar) {
+        pcilib_close(pci);
+        fail("map bar");
+    }
+
+    pcilib_detect_address(pci, &bar_tmp, &offset, 1);
+
+    pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0);
+    pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT);
+
+    pcilib_clean_kernel_memory(pci, USE, clean_flags);
+    pcilib_clean_kernel_memory(pci, USE_RING, clean_flags);
+
+    kdesc = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_CONSISTENT, 1, 128, 4096, USE_RING, flags);
+    kdesc_bus = pcilib_kmem_get_block_ba(pci, kdesc, 0);
+    desc = (uint32_t*)pcilib_kmem_get_block_ua(pci, kdesc, 0);
+    memset((void*)desc, 0, 5*sizeof(uint32_t));
+
+#ifdef REALTIME
+    pid_t pid;
+    struct sched_param sched = {0};
+
+    pid = getpid();
+    sched.sched_priority = sched_get_priority_min(SCHED_FIFO);
+    if (sched_setscheduler(pid, SCHED_FIFO, &sched))
+        printf("Warning: not able to get real-time priority\n");
+#endif /* REALTIME */
+
+    // ******************************************************************
+    // ****      MEM: check 4k boundary                             ***** 
+    // ******************************************************************
+
+    do  {
+        printf("* Allocating KMem, ");
+#ifdef STATIC_REGION
+        kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, flags);
+#else
+        kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, flags);
+#endif
+
+        if (!kbuf) {
+            printf("KMem allocation failed\n");
+            exit(0);
+        }
+
+        // Pointers for Virtualized Mem
+        for (j = 0; j < BUFFERS; j++) {
+            ptr[j] = (volatile uint32_t*)pcilib_kmem_get_block_ua(pci, kbuf, j);
+            memset((ptr[j]), 0, HUGE_PAGE * PAGE_SIZE);
+        }
+
+        err = 0;
+
+        // Check if HW addresses satisfy 4k boundary condition, if not -> free (!!) and reallocate memory
+        printf("4k boundary test: ");
+        for (j = 0; j < BUFFERS; j++) {
+            temp = (((unsigned int)pcilib_kmem_get_block_ba(pci, kbuf, j)) % 4096);
+            //printf("%u", temp);
+            if (temp  != 0) {
+                err = 1;
+            }
+        }
+        if (err == 1) {
+            pcilib_clean_kernel_memory(pci, USE, clean_flags);
+            pcilib_clean_kernel_memory(pci, USE_RING, clean_flags);
+            pcilib_free_kernel_memory(pci, kbuf,  free_flags);
+            printf("failed \xE2\x9C\x98\n");
+        }
+        else printf("passed \xE2\x9C\x93\n");
+
+    } while (err == 1);
+
+
+    // ******************************************************************
+    // ****      Allocate RAM buffer Memory                         ***** 
+    // ******************************************************************
+    
+    FILE * Output;
+    FILE * error_log;
+
+#ifdef CHECK_RESULTS
+
+    uint32_t *temp_data[ITERATIONS][BUFFERS];
+
+    for (j=0; j < ITERATIONS; j++) {
+        for (i=0; i < BUFFERS; i++) {
+            temp_data[j][i] = (uint32_t *)malloc(HUGE_PAGE*PAGE_SIZE);
+            if (temp_data[j][i] == 0) {
+                printf("******* Error: could not allocate memory! ********\n");
+                exit(0);
+            }
+            memset((void*)(temp_data[j][i]), 0, HUGE_PAGE * PAGE_SIZE);
+        }
+    }
+#endif
+
+#ifdef SHARED_MEMORY
+    // give your shared memory an id, anything will do
+    key_t key = 123456;
+    char *shared_memory;
+
+    // Setup shared memory, 11 is the size
+/*    if ((shmid = shmget(key, HUGE_PAGE*PAGE_SIZE, IPC_CREAT | 0666)) < 0)
+    {
+      printf("Error getting shared memory id");
+      exit(1);
+    }
+
+    // Attached shared memory
+    if ((shared_memory = shmat(shmid, NULL, 0)) == (char *) -1)
+    {
+      printf("Error attaching shared memory id");
+      exit(1);
+    }
+    printf("* Shared memory created... Id:\t %d\n", key);
+    //////////////// SHARED MEMORY TEST */
+#endif
+
+    Output = fopen ("data.out", "w");
+    fclose(Output);
+
+    error_log = fopen ("error_log.txt", "w");
+    fclose(error_log);
+   
+   // *************************************
+    Output = fopen("data.txt", "w");
+    fclose(Output);
+
+    // ******************************************************************
+    // ****      PCIe TEST                                          ***** 
+    // ******************************************************************
+
+    // Reset DMA
+    printf("* DMA: Reset...\n");
+    WR(0x00, 0x1);
+    usleep(100000);
+    WR(0x00, 0x0);
+    usleep(100000);
+ 
+#ifdef CHECK_READY       
+    printf("* PCIe: Testing...");
+    RD(0x0, err);
+    if (err != 335746816) {
+        printf("\xE2\x9C\x98\n PCIe not ready!\n");
+        exit(0);
+    } else {
+        printf("\xE2\x9C\x93 \n");
+    }
+#endif
+    
+
+    // ******************************************************************
+    // ****      DMA CONFIGURATION                                  ***** 
+    // ******************************************************************
+
+    
+    printf("* DMA: Start Data Generator...\n");
+    WR(0x04, 0x10) // Start data generator
+
+    printf("* DMA: Send Data Fill Pattern 55aa55aa\n");
+    WR(0x14, 0xbeef);
+
+    printf("* DMA: Send Data Amount\n");
+#ifdef DUAL_CORE
+    WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE)))/2);
+#else  
+    WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE))));
+#endif   
+
+    printf("* DMA: Running mode: ");
+#ifdef USE_64   
+    if (TLP_SIZE == 64) 
+    {
+        WR(0x0C, 0x80040);
+        printf ("64bit - 256B Payload\n");
+    }
+    else if (TLP_SIZE == 32) 
+    {
+        WR(0x0C, 0x80020);
+        printf ("64bit - 128B Payload\n");
+    }
+#else  
+    if (TLP_SIZE == 64) 
+    {
+        WR(0x0C, 0x0040);
+        printf ("32bit - 256B Payload\n");
+    }
+    else if (TLP_SIZE == 32) 
+    {
+        WR(0x0C, 0x0020);
+        printf ("32bit - 128B Payload\n");
+    }
+#endif
+    
+    printf("* DMA: Reset Desc Memory...\n");
+    WR(0x5C, 0x00); // RST Desc Memory
+
+    //printf("Writing SW Read Descriptor\n");
+    WR(0x58, BUFFERS-1);
+    //WR(0x58, 0x01);
+
+    //printf("Writing the Descriptor Threshold\n");
+    WR(0x60, DESC_THRESHOLD);
+
+    //printf("Writing HW write Descriptor Address: %lx\n", kdesc_bus);
+    WR(0x54, kdesc_bus);
+    usleep(100000);
+
+    printf("* DMA: Writing Descriptors\n");
+    for (j = 0; j < BUFFERS; j++ ) {
+        bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j);
+        // LEAVE THIS DELAY???!?!?!?!
+        usleep(1000);
+        printf("Writing descriptor num. %ld: \t %08lx \r", j, bus_addr[j]);
+        WR(0x50, bus_addr[j]);
+    }
+
+    // ******************************************************************
+    // ****      HEB CONFIGURATION                                  ***** 
+    // ******************************************************************
+#ifdef HEB
+
+
+    printf("* DDR REGISTERS: AXI_BUF_SIZE \n");
+    WR(0x9130, 0x1000);
+
+    usleep(100000);
+
+    printf("* HEB: Control \n");
+    WR(0x9040, 0x00000001);
+
+    usleep(100000);
+
+    printf("* HEB: Control \n");
+    WR(0x9040, 0x00000004);
+
+    usleep(100000);
+
+    printf("* HEB: Control \n");
+    WR(0x9040, 0x00000000);
+
+    usleep(100000);
+
+    printf("* HEB: Writing Total Orbit Num\n");
+    WR(0x9020, 0x2000);
+
+    printf("* HEB: Orbit Skip Num h9028\n");
+    WR(0x9028, 0x4);
+
+    //printf("* HEB: LVDS_DELAY h9080\n");
+    //WR(0x9080, 0x10101010);
+
+    //printf("* HEB: Delay ADCs \n");
+    //WR(0x9088, 0x001);
+    //WR(0x9090, 0x001);
+    //WR(0x9094, 0x001);
+    //WR(0x9098, 0x001);
+
+    //printf("* HEB: Delay TH \n");
+    //WR(0x90a0, 0x005);
+
+    //printf("* HEB: Delay_FPGA_reg \n");
+    //WR(0x90a8, 0x006);
+
+    //printf("* HEB: Control \n");
+    //WR(0x9040, 0x40000000);
+
+    //usleep(1000000);
+ 
+    printf("* HEB: Control \n");
+    WR(0x9040, 0x40000bf0);
+
+    usleep(100000);
+
+    printf("* HEB: Control \n");
+    WR(0x9040, 0x400003f0);
+
+    usleep(100000);
+
+    printf("* HEB: Control \n");
+    WR(0x9040, 0x480007F0);
+
+    usleep(100000);
+
+    printf("* HEB: Control \n");
+    WR(0x9040, 0x48000FF0);
+
+
+#endif
+
+    // ******************************************************************
+    // ****      TEST DDR conf                                      ***** 
+    // ******************************************************************
+#ifdef TEST_DDR
+
+
+    printf("* DDR: AXI_BUF_SIZE_ADDR: 4k\n");
+    WR(0x9010, 0x04000);
+
+    printf("* DDR: Control \n");
+    WR(0x9000, 0x000000F);
+
+    usleep(100000);
+    WR(0x9000, 0x00000008);
+    usleep(100000);
+    WR(0x9000, 0x08000008);
+
+    usleep(50000);
+
+    printf("* DDR: Control \n");
+    WR(0x9000, 0x08000208);
+
+
+#endif
+
+    // ******************************************************************
+    // ****     START DMA                                           *****
+    // ******************************************************************
+
+    //printf ("\n ---- Press ENTER to start DMA ---- \n");
+    //getchar();
+
+    printf("* DMA: Start \n");
+    WR(0x04, 0x1f);
+    gettimeofday(&start, NULL);
+
+    // ******************************************************************
+    // ****     Handshaking DMA                                     *****
+    // ******************************************************************
+
+    uint32_t curptr = 0, hwptr;
+    uint32_t curbuf = 0;
+    int empty = 0;
+    i = 0;
+
+
+    while (i < ITERATIONS) {
+        j = 0;
+        // printf("\ndesc0: %lx", htonl(desc[0])); 
+        // printf("\ndesc1: %lx", htonl(desc[1])); 
+        // printf("\ndesc2: %lx", htonl(desc[2])); 
+        // printf("\ndesc3: %lx", htonl(desc[3])); 
+        // printf("\ndesc4: %lx", htonl(desc[4]));
+        // printf("\ndesc5: %lx", htonl(desc[5]));
+        //printf("Iteration: %li of %li \r", i+1, ITERATIONS); 
+        //loadBar(i+1, ITERATIONS, ITERATIONS, 30);
+        // printf("\nhwptr: %zu", hwptr);  
+        // printf("\ncurptr: %zu", curptr); 
+
+        do {
+#ifdef USE_64   
+                hwptr = htonl(desc[3]);
+#else // 32-bit
+                hwptr = htonl(desc[4]);
+#endif
+        j++;    
+        //printf("\rcurptr: %lx \t \t hwptr: %lx", curptr, hwptr);
+        } while (hwptr == curptr);
+
+        do {    
+            pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, curbuf);
+#ifdef CHECK_RESULTS   
+            memcpy(temp_data[i][curbuf], ptr[curbuf], 4096);
+#endif
+#ifdef SHARED_MEMORY
+            memcpy(shared_memory, ptr[curbuf], 4096); 
+#endif            
+            //printf("\ncurbuf: %08x", curbuf); 
+            //printf("\nbus_addr[curbuf]\n: %08x",bus_addr[curbuf]);
+            // for (k = 0; k < 63; k++){
+            // if (k%16 == 0) printf("\n# %d # :", k);
+            // printf(" %08x", ptr[curbuf][k]);
+            // }
+            //pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_TODEVICE, curbuf);
+            curbuf++;
+            if (curbuf == BUFFERS) {
+                i++;
+                curbuf = 0;
+#ifdef SWITCH_GENERATOR                 
+                if (switch_generator == 1) {
+                    switch_generator = 0;
+                    WR(0x9040, 0x100007F0);
+                } else {
+                    WR(0x9040, 0x180007F0);
+                    switch_generator = 1;
+                }
+#endif
+                if (i >= ITERATIONS) break;
+                //if (i >= (ITERATIONS - 4) ) WR(0x04, 0x0f); 
+            }
+        } while (bus_addr[curbuf] != hwptr);
+
+#ifdef EXIT_ON_EMPTY
+#ifdef USE_64                 
+        if (desc[1] != 0) 
+#else // 32bit  
+        if (desc[2] != 0)  
+#endif                                 
+        {
+            if (bus_addr[curbuf] == hwptr) {
+                empty = 1;
+                break;
+            }
+        }
+#endif  
+
+        WR(0x58, curbuf + 1); 
+        //printf("WR %d\n", curbuf + 1); 
+        //printf("%u (%lu)\n", curbuf, j);
+        curptr = hwptr;
+
+    }
+    
+
+
+    // ******************************************************************
+    // **** Read performance and stop DMA                         *******
+    // ******************************************************************
+
+    gettimeofday(&end, NULL);
+    WR(0x04, 0x00);
+    WR(0x01, 0x00);
+    RD(0x28, perf_counter);
+
+
+
+    iterations_completed   = i;
+    buffers_filled      = curbuf;
+    if (empty) printf("* DMA: Empty FIFO! Last iteration: %li of %li\n", i+1, ITERATIONS);
+    printf ("* DMA: Stop\n\n");
+
+#ifdef CHECK_RESULTS
+    printf ("First value:\t %08x\n", temp_data[0][0][0]);
+    printf ("Last value:\t %08x\n\n", temp_data[ITERATIONS-1][BUFFERS-1][(PAGE_SIZE/4)-4]);
+#endif
+    
+    // ******************************************************************
+    // **** Performance                                           *******
+    // ******************************************************************
+    printf("Iterations done: %d\n", iterations_completed);
+    printf("Buffers filled on last iteration: %d\n", buffers_filled);
+
+
+    run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
+    size = (long long int) (( BUFFERS * (iterations_completed)  + buffers_filled) * HUGE_PAGE * PAGE_SIZE);
+    size_mb = (long long int) (( BUFFERS * (iterations_completed)  + buffers_filled) * HUGE_PAGE * 4 / 1024);
+    printf("Performance: transfered %zu Mbytes in %zu us using %d buffers\n", (size_mb), run_time, BUFFERS);
+    //printf("Buffers: \t %d \n", BUFFERS);
+    //printf("Buf_Size: \t %d \n", PAGE_SIZE);
+    //printf("Perf_counter: \t %f \n", perf_counter);
+    performance = ((size_mb * FPGA_CLOCK * 1000000)/(perf_counter*256));
+    printf("DMA perf counter:\t%d\n", (int)perf_counter); 
+    printf("DMA side:\t\t%.3lf MB/s\n", performance);  
+    printf("PC side:\t\t%.3lf MB/s\n\n", 1000000. * size_mb / run_time );
+
+    // ******************************************************************
+    // **** Read Data                                             *******
+    // ******************************************************************
+
+
+    #ifdef PRINT_RESULTS
+    printf("Writing Data to HDD... \n");
+    for (i=0; i < iterations_completed; i++) {
+        for (j=0; j < BUFFERS; j++)
+        {
+            Output = fopen("data.out", "a");
+            fwrite(temp_data[i][j], 4096, 1, Output);
+            fclose(Output);
+        }   
+        loadBar(i+1, ITERATIONS, ITERATIONS, 30);
+    }
+    // Save last partially filled iteration
+    for (j=0; j < buffers_filled; j++)
+    {
+        Output = fopen("data.out", "a");
+        fwrite(temp_data[iterations_completed][j], 4096, 1, Output);
+        fclose(Output);
+    }   
+    printf("Data saved in data.out. \n");
+    #endif
+
+   #ifdef CHECK_RESULTS
+    err = 0;
+    error_log = fopen ("error_log.txt", "a");
+    printf("\nChecking data ...\n");
+    for (i=0; i < iterations_completed; i++) {
+        for (j = 0; j < BUFFERS; j++) {
+            for (k = 0; k < 1024 ; k++) 
+            {
+                mem_diff = ((uint32_t)temp_data[i][j][k] - (uint32_t)temp_data[i][j][k+1]);
+                //if ((mem_diff == 1) || (mem_diff == (-7)) || (k == 1023) ) 
+                if ((mem_diff == -1) || (k == 1023) ) 
+                    {;}
+                else {
+                    fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j][k], temp_data[i][j][k+1], mem_diff);
+                    err++;
+                }
+            }
+            if (j != BUFFERS-1) {
+            // Check first and Last
+                mem_diff = (uint32_t)(temp_data[i][j+1][0] - temp_data[i][j][1023]);
+                if (mem_diff == (1)) 
+                    {;}
+                else {
+                    fprintf(error_log, "Error_2 in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j+1][0], temp_data[i][j][1023], mem_diff);
+                    err++;
+                }
+            }
+
+        }
+        loadBar(i+1, ITERATIONS, ITERATIONS, 30);
+    }
+    for (j = 0; j < buffers_filled; j++) {
+        for (k = 0; k < 1024 ; k++) 
+        {
+            mem_diff = ((uint32_t)temp_data[iterations_completed][j][k] - (uint32_t)temp_data[iterations_completed][j][k+1]);
+                if ((mem_diff == -1) || (k == 1023) ) 
+                {;}
+            else {
+                fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", iterations_completed, j, k, temp_data[iterations_completed][j][k], temp_data[iterations_completed][j][k+1], mem_diff);
+                err++;
+            }
+        }
+        if (j != buffers_filled-1) {
+        // Check first and Last
+            mem_diff = (uint32_t)(temp_data[i][j+1][0] - temp_data[i][j][1023]);
+            if (mem_diff == (1)) 
+                {;}
+            else {
+                fprintf(error_log, "Error_2 in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", iterations_completed, j, k, temp_data[iterations_completed][j+1][0], temp_data[iterations_completed][j][1023], mem_diff);
+                err++;
+            }
+        }
+    }
+    if (err != 0) printf("\rChecking data: \xE2\x9C\x98 %d errors found  \n See \"error_log.txt\" for details \n\n", err);
+    else printf("\rChecking data: \xE2\x9C\x93 no errors found  \n\n");
+    fclose(error_log);
+    #endif
+
+
+    // *********** Free Memory
+#ifdef CHECK_RESULTS
+    for (i=0; i < ITERATIONS; i++) {
+        for (j=0; j < BUFFERS; j++)
+        {
+            free(temp_data[i][j]);
+        }
+    }
+#endif CHECK_RESULTS
+
+    pcilib_free_kernel_memory(pci, kbuf,  free_flags);
+    pcilib_free_kernel_memory(pci, kdesc,  free_flags);
+    pcilib_disable_irq(pci, 0);
+    pcilib_unmap_bar(pci, BAR, bar);
+    pcilib_close(pci);
+
+//    shmdt(shmid);
+//    shmctl(shmid, IPC_RMID, NULL);
+
+}

+ 1 - 0
cli.c

@@ -2641,6 +2641,7 @@ int main(int argc, char **argv) {
 	    case OPT_MODEL:
 		if (!strcasecmp(optarg, "pci")) model = PCILIB_MODEL_PCI;
 		else if (!strcasecmp(optarg, "ipecamera")) model = PCILIB_MODEL_IPECAMERA;
+		else if (!strcasecmp(optarg, "kapture")) model = PCILIB_MODEL_KAPTURE;
 		else Usage(argc, argv, "Invalid memory model (%s) is specified", optarg);
 	    break;
 	    case OPT_BAR:

+ 2 - 0
dma.h

@@ -21,6 +21,8 @@ typedef struct {
 } pcilib_dma_buffer_status_t;
 
 struct pcilib_dma_api_description_s {
+    const char *title;
+
     pcilib_dma_context_t *(*init)(pcilib_t *ctx, pcilib_dma_modification_t type, void *arg);
     void (*free)(pcilib_dma_context_t *ctx);
     

+ 2 - 2
dma/CMakeLists.txt

@@ -3,7 +3,7 @@ include_directories(
 )
 
 
-set(HEADERS ${HEADERS} nwl.h nwl_dma.h nwl_engine.h nwl_irq.h nwl_loopback.h nwl_register.h)
+set(HEADERS ${HEADERS} nwl.h nwl_private.h nwl_engine.h nwl_irq.h nwl_loopback.h nwl_register.h ipe.h ipe_private.h ipe_registers.h)
 
-add_library(dma STATIC nwl.c nwl_engine.c nwl_irq.c nwl_loopback.c nwl_register.c)
+add_library(dma STATIC nwl.c nwl_engine.c nwl_irq.c nwl_loopback.c nwl_register.c ipe.c)
 

+ 412 - 0
dma/ipe.c

@@ -0,0 +1,412 @@
+#define _PCILIB_DMA_IPE_C
+#define _BSD_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <arpa/inet.h>
+
+#include "pci.h"
+#include "pcilib.h"
+#include "error.h"
+#include "tools.h"
+
+#include "ipe.h"
+#include "ipe_private.h"
+#include "ipe_registers.h"
+
+
+#define WR(addr, value) { *(uint32_t*)(ctx->base_addr + addr) = value; }
+#define RD(addr, value) { value = *(uint32_t*)(ctx->base_addr + addr); }
+
+
+pcilib_dma_context_t *dma_ipe_init(pcilib_t *pcilib, pcilib_dma_modification_t type, void *arg) {
+    int err = 0;
+    
+    pcilib_model_description_t *model_info = pcilib_get_model_description(pcilib);
+
+    ipe_dma_t *ctx = malloc(sizeof(ipe_dma_t));
+
+    if (ctx) {
+	memset(ctx, 0, sizeof(ipe_dma_t));
+	ctx->pcilib = pcilib;
+	
+	memset(ctx->engine, 0, 2 * sizeof(pcilib_dma_engine_description_t));
+	ctx->engine[0].addr = 0;
+	ctx->engine[0].type = PCILIB_DMA_TYPE_PACKET;
+	ctx->engine[0].direction = PCILIB_DMA_FROM_DEVICE;
+	ctx->engine[0].addr_bits = 32;
+	pcilib_set_dma_engine_description(pcilib, 0, &ctx->engine[0]);
+	pcilib_set_dma_engine_description(pcilib, 1, NULL);
+
+	pcilib_register_bank_t dma_bank = pcilib_find_bank_by_addr(pcilib, PCILIB_REGISTER_BANK_DMA);
+	if (dma_bank == PCILIB_REGISTER_BANK_INVALID) {
+	    free(ctx);
+	    pcilib_error("DMA Register Bank could not be found");
+	    return NULL;
+	}
+
+	ctx->dma_bank = model_info->banks + dma_bank;
+	ctx->base_addr = pcilib_resolve_register_address(pcilib, ctx->dma_bank->bar, ctx->dma_bank->read_addr);
+
+	err = pcilib_add_registers(ctx->pcilib, 0, ipe_dma_registers);
+	if (err) {
+	    free(ctx);
+	    pcilib_error("Error adding DMA registers");
+	    return NULL;
+	}
+    }
+
+    return (pcilib_dma_context_t*)ctx;
+}
+
+void  dma_ipe_free(pcilib_dma_context_t *vctx) {
+    ipe_dma_t *ctx = (ipe_dma_t*)vctx;
+
+    if (ctx) {
+	dma_ipe_stop(vctx, PCILIB_DMA_ENGINE_ALL, PCILIB_DMA_FLAGS_DEFAULT);
+	free(ctx);
+    }
+}
+
+
+int dma_ipe_start(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags) {
+    size_t i;
+
+    ipe_dma_t *ctx = (ipe_dma_t*)vctx;
+
+    int preserve = 0;
+    pcilib_kmem_flags_t kflags;
+    pcilib_kmem_reuse_state_t reuse_desc, reuse_pages;
+
+    volatile void *desc_va;
+    volatile uint32_t *last_written_addr_ptr;
+
+    pcilib_register_value_t value;
+
+    if (dma == PCILIB_DMA_ENGINE_INVALID) return 0;
+    else if (dma > 1) return PCILIB_ERROR_INVALID_BANK;
+
+    if (!ctx->started) ctx->started = 1;
+
+    if (flags&PCILIB_DMA_FLAG_PERSISTENT) ctx->preserve = 1;
+
+    if (ctx->pages) return 0;
+    
+    kflags = PCILIB_KMEM_FLAG_REUSE|PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_HARDWARE|(ctx->preserve?PCILIB_KMEM_FLAG_PERSISTENT:0);
+    pcilib_kmem_handle_t *desc = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_CONSISTENT, 1, IPEDMA_DESCRIPTOR_SIZE, IPEDMA_DESCRIPTOR_ALIGNMENT, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA_RING, 0x00), kflags);
+    pcilib_kmem_handle_t *pages = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, IPEDMA_DMA_PAGES, 0, 0, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA_PAGES, 0x00), kflags);
+
+    if (!desc||!pages) {
+	if (pages) pcilib_free_kernel_memory(ctx->pcilib, pages, 0);
+	if (desc) pcilib_free_kernel_memory(ctx->pcilib, desc, 0);
+	return PCILIB_ERROR_MEMORY;
+    }
+    reuse_desc = pcilib_kmem_is_reused(ctx->pcilib, desc);
+    reuse_pages = pcilib_kmem_is_reused(ctx->pcilib, pages);
+
+    if (reuse_desc == reuse_pages) {
+	if (reuse_desc & PCILIB_KMEM_REUSE_PARTIAL) pcilib_warning("Inconsistent DMA buffers are found (only part of required buffers is available), reinitializing...");
+	else if (reuse_desc & PCILIB_KMEM_REUSE_REUSED) {
+	    if ((reuse_desc & PCILIB_KMEM_REUSE_PERSISTENT) == 0) pcilib_warning("Lost DMA buffers are found (non-persistent mode), reinitializing...");
+	    else if ((reuse_desc & PCILIB_KMEM_REUSE_HARDWARE) == 0) pcilib_warning("Lost DMA buffers are found (missing HW reference), reinitializing...");
+	    else {
+#ifndef IPEDMA_BUG_DMARD
+		RD(IPEDMA_REG_PAGE_COUNT, value);
+
+		if ((value + 1) != IPEDMA_DMA_PAGES) pcilib_warning("Inconsistent DMA buffers are found (Number of allocated buffers does not match current request), reinitializing...");
+		else
+#endif /* IPEDMA_BUG_DMARD */
+		    preserve = 1;
+	    }
+	}
+    } else pcilib_warning("Inconsistent DMA buffers (modes of ring and page buffers does not match), reinitializing....");
+
+    desc_va = pcilib_kmem_get_ua(ctx->pcilib, desc);
+    if (ctx->mode64) last_written_addr_ptr = desc_va + 3 * sizeof(uint32_t);
+    else last_written_addr_ptr = desc_va + 4 * sizeof(uint32_t);
+
+    if (preserve) {
+	ctx->reused = 1;
+	ctx->preserve = 1;
+	
+
+//	usleep(100000);
+
+	    // Detect the current state of DMA engine
+#ifdef IPEDMA_BUG_DMARD
+	FILE *f = fopen("/tmp/pcitool_lastread", "r");
+	if (!f) pcilib_error("Can't read current status");
+	fread(&value, 1, sizeof(pcilib_register_value_t), f);
+	fclose(f);
+#else /* IPEDMA_BUG_DMARD */
+	RD(IPEDMA_REG_LAST_READ, value);
+	if (value == IPEDMA_DMA_PAGES) value = 0;
+#endif /* IPEDMA_BUG_DMARD */
+
+	ctx->last_read = value;
+    } else {
+	ctx->reused = 0;
+
+	    // Disable DMA
+	WR(IPEDMA_REG_CONTROL, 0x0);
+	
+	    // Reset DMA engine
+	WR(IPEDMA_REG_RESET, 0x1);
+	usleep(100000);
+	WR(IPEDMA_REG_RESET, 0x0);
+
+#ifndef IPEDMA_BUG_DMARD
+	    // Verify PCIe link status
+	RD(IPEDMA_REG_RESET, value);
+	if (value != 0x14031700) pcilib_warning("PCIe is not ready");
+#endif /* IPEDMA_BUG_DMARD */
+ 
+	    // Configuring TLP and PACKET sizes (40 bit mode can be used with big pre-allocated buffers later)
+        WR(IPEDMA_REG_TLP_SIZE, IPEDMA_TLP_SIZE);
+        WR(IPEDMA_REG_TLP_COUNT, IPEDMA_PAGE_SIZE / (4 * IPEDMA_TLP_SIZE * IPEDMA_CORES));
+
+	    // Setting progress register threshold
+	WR(IPEDMA_REG_UPDATE_THRESHOLD, IPEDMA_DMA_PROGRESS_THRESHOLD);
+        
+	    // Reseting configured DMA pages
+        WR(IPEDMA_REG_PAGE_COUNT, 0);
+        
+	    // Setting current read position and configuring progress register
+	WR(IPEDMA_REG_LAST_READ, IPEDMA_DMA_PAGES - 2 + 1);
+	WR(IPEDMA_REG_UPDATE_ADDR, pcilib_kmem_get_block_ba(ctx->pcilib, desc, 0));
+
+	    // Instructing DMA engine that writting should start from the first DMA page
+	*last_written_addr_ptr = 0;//htonl(pcilib_kmem_get_block_ba(ctx->pcilib, pages, IPEDMA_DMA_PAGES - 1));
+
+	
+	for (i = 0; i < IPEDMA_DMA_PAGES; i++) {
+	    uintptr_t bus_addr = pcilib_kmem_get_block_ba(ctx->pcilib, pages, i);
+	    WR(IPEDMA_REG_PAGE_ADDR, bus_addr);
+	    if (bus_addr%4096) printf("Bad address %lu: %lx\n", i, bus_addr);
+	    usleep(1000);
+	}
+	
+	    // Enable DMA
+	WR(IPEDMA_REG_CONTROL, 0x1);
+	
+	ctx->last_read = IPEDMA_DMA_PAGES - 1;
+
+#ifdef IPEDMA_BUG_DMARD
+	FILE *f = fopen("/tmp/pcitool_lastread", "w");
+	if (!f) pcilib_error("Can't write current status");
+	value = ctx->last_read;
+	fwrite(&value, 1, sizeof(pcilib_register_value_t), f);
+	fclose(f);
+#endif /* IPEDMA_BUG_DMARD */
+    }
+
+    ctx->last_read_addr = htonl(pcilib_kmem_get_block_ba(ctx->pcilib, pages, ctx->last_read));
+
+
+    ctx->desc = desc;
+    ctx->pages = pages;
+    ctx->page_size = pcilib_kmem_get_block_size(ctx->pcilib, pages, 0);;
+    ctx->ring_size = IPEDMA_DMA_PAGES;
+
+    return 0;
+}
+
+int dma_ipe_stop(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags) {
+    pcilib_kmem_flags_t kflags;
+
+    ipe_dma_t *ctx = (ipe_dma_t*)vctx;
+
+    if (!ctx->started) return 0;
+
+    if ((dma != PCILIB_DMA_ENGINE_INVALID)&&(dma > 1)) return PCILIB_ERROR_INVALID_BANK;
+
+	    // ignoring previous setting if flag specified
+    if (flags&PCILIB_DMA_FLAG_PERSISTENT) {
+	ctx->preserve = 0;
+    }
+
+    if (ctx->preserve) {
+	kflags = PCILIB_KMEM_FLAG_REUSE;
+    } else {
+        kflags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT;
+
+	ctx->started  = 0;
+
+	    // Disable DMA
+	WR(IPEDMA_REG_CONTROL, 0);
+	
+	    // Reset DMA engine
+	WR(IPEDMA_REG_RESET, 0x1);
+	usleep(100000);
+	WR(IPEDMA_REG_RESET, 0x0);
+
+	    // Reseting configured DMA pages
+        WR(IPEDMA_REG_PAGE_COUNT, 0);
+    }
+
+	// Clean buffers
+    if (ctx->desc) {
+	pcilib_free_kernel_memory(ctx->pcilib, ctx->desc, kflags);
+	ctx->desc = NULL;
+    }
+
+    if (ctx->pages) {
+	pcilib_free_kernel_memory(ctx->pcilib, ctx->pages, kflags);
+	ctx->pages = NULL;
+    }
+
+    return 0;
+}
+
+
+int dma_ipe_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma_engine_status_t *status, size_t n_buffers, pcilib_dma_buffer_status_t *buffers) {
+    size_t i;
+    ipe_dma_t *ctx = (ipe_dma_t*)vctx;
+
+    void *desc_va = (void*)pcilib_kmem_get_ua(ctx->pcilib, ctx->desc);
+    uint32_t *last_written_addr_ptr;
+    uint32_t last_written_addr;
+    
+
+    if (!status) return -1;
+
+    if (ctx->mode64) last_written_addr_ptr = desc_va + 3 * sizeof(uint32_t);
+    else last_written_addr_ptr = desc_va + 4 * sizeof(uint32_t);
+
+    last_written_addr = ntohl(*last_written_addr_ptr);
+
+    status->started = ctx->started;
+    status->ring_size = ctx->ring_size;
+    status->buffer_size = ctx->page_size;
+
+    status->ring_tail = ctx->last_read + 1;
+    if (status->ring_tail == status->ring_size) status->ring_tail = 0;
+
+	// Find where the ring head is actually are
+    for (i = 0; i < ctx->ring_size; i++) {
+	uintptr_t bus_addr = pcilib_kmem_get_block_ba(ctx->pcilib, ctx->pages, i);
+
+	if (bus_addr == last_written_addr) {
+	    status->ring_head = bus_addr;
+	    break;
+	}
+    }
+    
+    if (i == ctx->ring_size) {
+	// ERROR
+    }
+    
+    if (n_buffers > ctx->ring_size) n_buffers = ctx->ring_size;
+
+    memset(buffers, 0, n_buffers * sizeof(pcilib_dma_engine_status_t));
+
+    if (status->ring_head > status->ring_tail) {
+	for (i = status->ring_tail; i <= status->ring_head; i++) {
+	    buffers[i].used = 1;
+	    buffers[i].size = ctx->page_size;
+	    buffers[i].first = 1;
+	    buffers[i].last = 1;
+	}
+    } else {
+	for (i = 0; i <= status->ring_tail; i++) {
+	    buffers[i].used = 1;
+	    buffers[i].size = ctx->page_size;
+	    buffers[i].first = 1;
+	    buffers[i].last = 1;
+	} 
+	
+	for (i = status->ring_head; i < status->ring_size; i++) {
+	    buffers[i].used = 1;
+	    buffers[i].size = ctx->page_size;
+	    buffers[i].first = 1;
+	    buffers[i].last = 1;
+	} 
+    }
+
+    return 0;
+}
+
+int dma_ipe_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, pcilib_dma_callback_t cb, void *cbattr) {
+    int err, ret = PCILIB_STREAMING_REQ_PACKET;
+
+    pcilib_register_value_t value;
+
+    pcilib_timeout_t wait = 0;
+    struct timeval start, cur;
+
+    volatile void *desc_va;
+    volatile uint32_t *last_written_addr_ptr;
+    
+    size_t cur_read;
+    
+    ipe_dma_t *ctx = (ipe_dma_t*)vctx;
+
+    err = dma_ipe_start(vctx, dma, PCILIB_DMA_FLAGS_DEFAULT);
+    if (err) return err;
+
+    desc_va = (void*)pcilib_kmem_get_ua(ctx->pcilib, ctx->desc);
+
+    if (ctx->mode64) last_written_addr_ptr = desc_va + 3 * sizeof(uint32_t);
+    else last_written_addr_ptr = desc_va + 4 * sizeof(uint32_t);
+
+    do {
+	switch (ret&PCILIB_STREAMING_TIMEOUT_MASK) {
+	    case PCILIB_STREAMING_CONTINUE: wait = PCILIB_DMA_TIMEOUT; break;
+	    case PCILIB_STREAMING_WAIT: wait = timeout; break;
+//	    case PCILIB_STREAMING_CHECK: wait = 0; break;
+	}
+
+#ifdef IPEDMA_DEBUG
+	printf("Waiting for data: %u (last read) 0x%x (last read addr) 0x%x (last_written)\n", ctx->last_read, ctx->last_read_addr, *last_written_addr_ptr);
+#endif /* IPEDMA_DEBUG */
+
+	gettimeofday(&start, NULL);
+	while (((*last_written_addr_ptr == 0)||(ctx->last_read_addr == (*last_written_addr_ptr)))&&((wait == PCILIB_TIMEOUT_INFINITE)||(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < wait))) {
+	    usleep(10);
+	    gettimeofday(&cur, NULL);
+	}
+	
+	    // Failing out if we exited on timeout
+	if ((ctx->last_read_addr == (*last_written_addr_ptr))||(*last_written_addr_ptr == 0))
+	    return (ret&PCILIB_STREAMING_FAIL)?PCILIB_ERROR_TIMEOUT:0;
+
+	    // Getting next page to read
+	cur_read = ctx->last_read + 1;
+	if (cur_read == ctx->ring_size) cur_read = 0;
+
+#ifdef IPEDMA_DEBUG
+	printf("Reading: %u (last read) 0x%x (last read addr) 0x%x (last_written)\n", cur_read, ctx->last_read_addr, *last_written_addr_ptr);
+#endif /* IPEDMA_DEBUG */
+	
+	pcilib_kmem_sync_block(ctx->pcilib, ctx->pages, PCILIB_KMEM_SYNC_FROMDEVICE, cur_read);
+        void *buf = pcilib_kmem_get_block_ua(ctx->pcilib, ctx->pages, cur_read);
+	ret = cb(cbattr, PCILIB_DMA_FLAG_EOP, ctx->page_size, buf);
+	if (ret < 0) return -ret;
+	
+//	DS: Fixme, it looks like we can avoid calling this for the sake of performance
+//	pcilib_kmem_sync_block(ctx->pcilib, ctx->pages, PCILIB_KMEM_SYNC_TODEVICE, cur_read);
+
+	WR(IPEDMA_REG_LAST_READ, ctx->last_read + 1);
+
+	ctx->last_read = cur_read;
+	ctx->last_read_addr = htonl(pcilib_kmem_get_block_ba(ctx->pcilib, ctx->pages, cur_read));
+
+#ifdef IPEDMA_BUG_DMARD
+	FILE *f = fopen("/tmp/pcitool_lastread", "w");
+	if (!f) pcilib_error("Can't write current status");
+	value = cur_read;
+	fwrite(&value, 1, sizeof(pcilib_register_value_t), f);
+	fclose(f);
+#endif /* IPEDMA_BUG_DMARD */
+
+    } while (ret);
+
+    return 0;
+}
+
+double dma_ipe_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) {
+    return 0;
+}

+ 42 - 0
dma/ipe.h

@@ -0,0 +1,42 @@
+#ifndef _PCILIB_DMA_IPE_H
+#define _PCILIB_DMA_IPE_H
+
+#include <stdio.h>
+#include "../pcilib.h"
+
+//#define PCILIB_NWL_MODIFICATION_IPECAMERA 0x100
+
+pcilib_dma_context_t *dma_ipe_init(pcilib_t *ctx, pcilib_dma_modification_t type, void *arg);
+void  dma_ipe_free(pcilib_dma_context_t *vctx);
+
+int dma_ipe_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma_engine_status_t *status, size_t n_buffers, pcilib_dma_buffer_status_t *buffers);
+
+
+int dma_ipe_start(pcilib_dma_context_t *ctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags);
+int dma_ipe_stop(pcilib_dma_context_t *ctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags);
+
+int dma_ipe_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, pcilib_dma_callback_t cb, void *cbattr);
+double dma_ipe_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction);
+
+
+#ifdef _PCILIB_DMA_IPE_C
+pcilib_dma_api_description_t ipe_dma_api = {
+    "ipe_dma",
+    dma_ipe_init,
+    dma_ipe_free,
+    dma_ipe_get_status,
+    NULL,
+    NULL,
+    NULL,
+    dma_ipe_start,
+    dma_ipe_stop,
+    NULL,
+    dma_ipe_stream_read,
+    dma_ipe_benchmark
+};
+#else
+extern pcilib_dma_api_description_t ipe_dma_api;
+#endif
+
+
+#endif /* _PCILIB_DMA_IPE_H */

+ 56 - 0
dma/ipe_private.h

@@ -0,0 +1,56 @@
+#ifndef _PCILIB_DMA_IPE_PRIVATE_H
+#define _PCILIB_DMA_IPE_PRIVATE_H
+
+#define IPEDMA_CORES			1
+#define IPEDMA_TLP_SIZE			32
+#define IPEDMA_PAGE_SIZE		4096
+#define IPEDMA_DMA_PAGES		16		/**< number of DMA pages in the ring buffer to allocate */
+#define IPEDMA_DMA_PROGRESS_THRESHOLD	1		/**< how many pages the DMA engine should fill before reporting progress */
+#define IPEDMA_DESCRIPTOR_SIZE		128
+#define IPEDMA_DESCRIPTOR_ALIGNMENT	64
+
+//#define IPEDMA_DEBUG
+#define IPEDMA_BUG_DMARD				/**< No register read during DMA transfer */
+
+#define IPEDMA_REG_RESET		0x00
+#define IPEDMA_REG_CONTROL		0x04
+#define IPEDMA_REG_TLP_SIZE		0x0C
+#define IPEDMA_REG_TLP_COUNT		0x10
+#define IPEDMA_REG_PAGE_ADDR		0x50
+#define IPEDMA_REG_UPDATE_ADDR		0x54
+#define IPEDMA_REG_LAST_READ		0x58
+#define IPEDMA_REG_PAGE_COUNT		0x5C
+#define IPEDMA_REG_UPDATE_THRESHOLD	0x60
+
+
+
+typedef struct ipe_dma_s ipe_dma_t;
+
+struct ipe_dma_s {
+    struct pcilib_dma_context_s dmactx;
+    pcilib_dma_engine_description_t engine[2];
+
+    pcilib_t *pcilib;
+    
+    pcilib_register_bank_description_t *dma_bank;
+    char *base_addr;
+
+    pcilib_irq_type_t irq_enabled;	/**< indicates that IRQs are enabled */
+    pcilib_irq_type_t irq_preserve;	/**< indicates that IRQs should not be disabled during clean-up */
+    int irq_started;			/**< indicates that IRQ subsystem is initialized (detecting which types should be preserverd) */    
+
+    int started;			/**< indicates that DMA buffers are initialized and reading is allowed */
+    int writting;			/**< indicates that we are in middle of writting packet */
+    int reused;				/**< indicates that DMA was found intialized, buffers were reused, and no additional initialization is needed */
+    int preserve;			/**< indicates that DMA should not be stopped during clean-up */
+    int mode64;				/**< indicates 64-bit operation mode */
+
+    pcilib_kmem_handle_t *desc;		/**< in-memory status descriptor written by DMA engine upon operation progess */
+    pcilib_kmem_handle_t *pages;	/**< collection of memory-locked pages for DMA operation */
+
+    size_t ring_size, page_size;
+    size_t last_read, last_read_addr, last_written;
+
+};
+
+#endif /* _PCILIB_DMA_IPE_PRIVATE_H */

+ 44 - 0
dma/ipe_registers.h

@@ -0,0 +1,44 @@
+#ifndef _PCILIB_DMA_IPE_REGISTERS_H
+#define _PCILIB_DMA_IPE_REGISTERS_H 
+
+#ifdef _PCILIB_DMA_IPE_C 
+static pcilib_register_description_t ipe_dma_registers[] = {
+    {0x0000, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "dcr",  			"Device Control Status Register"},
+    {0x0000, 	0, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "reset_dma",  			""},
+    {0x0000, 	16, 	4, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "datapath_width",			""},
+    {0x0000, 	24, 	8, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "fpga_family",			""},
+    {0x0004, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "ddmacr",  			"Device DMA Control Status Register"},
+    {0x0004, 	0, 	1, 	0, 	0xFFFFFFFF,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_start",  			"Start writting memory"},
+    {0x0004, 	5, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_relxed_order",  		""},
+    {0x0004, 	6, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_nosnoop",  			""},
+    {0x0004, 	7, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_int_dis",  			""},
+    {0x0004, 	16, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mrd_start",  			""},
+    {0x0004, 	21, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mrd_relaxed_order",  		""},
+    {0x0004, 	22, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mrd_nosnoop",  			""},
+    {0x0004, 	23, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mrd_int_dis",  			""},
+    {0x000C, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "mwr_size",  			"DMA TLP size"},
+    {0x000C, 	0, 	16, 	0x20, 	0xFFFFFFFF,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_len",  			"Max TLP size"},
+    {0x000C, 	16, 	3, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_tlp_tc",  			"TC for TLP packets"},
+    {0x000C, 	19, 	1, 	0, 	0xFFFFFFFF,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_64b_en",  			"Enable 64 bit memory addressing"},
+    {0x000C, 	20, 	1, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_phant_func_dis",		"Disable MWR phantom function"},
+    {0x000C, 	24, 	8, 	0, 	0xFFFFFFFF,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "mwr_up_addr",  			"Upper address for 64 bit memory addressing"},
+    {0x0010, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "mwr_count",  		"Write DMA TLP Count"},
+    {0x0014, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "mwr_pattern",  		"DMA generator data pattern"},
+    {0x0028, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "mwr_perf",			"MWR Performance"},
+    {0x003C, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "cfg_lnk_width",		"Negotiated and max width of PCIe Link"},
+    {0x003C, 	0, 	6, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "cfg_cap_max_lnk_width", 		"Max link width"},
+    {0x003C, 	8, 	6, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "cfg_prg_max_lnk_width", 		"Negotiated link width"},
+    {0x0040, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "cfg_payload_size",  		""},
+    {0x0040, 	0, 	4, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "cfg_cap_max_payload_size",	"Max payload size"},
+    {0x0040, 	8, 	3, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "cfg_prg_max_payload_size",	"Prog max payload size"},
+    {0x0040, 	16, 	3, 	0, 	0x00000000,	PCILIB_REGISTER_R   , PCILIB_REGISTER_BITS, PCILIB_REGISTER_BANK_DMA, "cfg_max_rd_req_size",		"Max read request size"},
+    {0x0050, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "desc_mem_din",  		"Descriptor memory"},
+    {0x0054, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "update_addr",  		"Address of progress register"},
+    {0x0058, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "last_descriptor_read",	"Last descriptor read by the host"},
+    {0x005C, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "desc_mem_addr", 		"Number of descriptors configured"},
+    {0x0060, 	0, 	32, 	0, 	0x00000000,	PCILIB_REGISTER_RW  , PCILIB_REGISTER_STANDARD, PCILIB_REGISTER_BANK_DMA, "update_thresh",  		"Update threshold of progress register"},
+    {0,		0,	0,	0,	0x00000000,	0,                                           0,                        0, NULL, 			NULL}
+};
+#endif /* _PCILIB_DMA_IPE_C */
+
+#endif /* _PCILIB_DMA_IPE_REGISTERS_H */

+ 1 - 1
dma/nwl.c

@@ -11,7 +11,7 @@
 #include "pcilib.h"
 #include "error.h"
 #include "tools.h"
-#include "nwl.h"
+#include "nwl_private.h"
 
 #include "nwl_defines.h"
 

+ 34 - 55
dma/nwl.h

@@ -1,67 +1,46 @@
-#ifndef _PCILIB_NWL_H
-#define _PCILIB_NWL_H
+#ifndef _PCILIB_DMA_NWL_H
+#define _PCILIB_DMA_NWL_H
 
-typedef struct nwl_dma_s nwl_dma_t;
-typedef struct pcilib_nwl_engine_description_s pcilib_nwl_engine_description_t;
+#include <stdio.h>
+#include "../pcilib.h"
 
-#define NWL_DMA_IRQ_SOURCE 0
+#define PCILIB_NWL_MODIFICATION_IPECAMERA 0x100
 
-#define NWL_XAUI_ENGINE 0
-#define NWL_XRAWDATA_ENGINE 1
-#define NWL_MAX_PACKET_SIZE 4096 //16384
-//#define NWL_GENERATE_DMA_IRQ
+pcilib_dma_context_t *dma_nwl_init(pcilib_t *ctx, pcilib_dma_modification_t type, void *arg);
+void  dma_nwl_free(pcilib_dma_context_t *vctx);
 
-#define PCILIB_NWL_ALIGNMENT 			64  // in bytes
-#define PCILIB_NWL_DMA_DESCRIPTOR_SIZE		64  // in bytes
-#define PCILIB_NWL_DMA_PAGES			256 // 1024
+int dma_nwl_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma_engine_status_t *status, size_t n_buffers, pcilib_dma_buffer_status_t *buffers);
 
-//#define DEBUG_HARDWARE
-//#define DEBUG_NWL
+int dma_nwl_enable_irq(pcilib_dma_context_t *vctx, pcilib_irq_type_t type, pcilib_dma_flags_t flags);
+int dma_nwl_disable_irq(pcilib_dma_context_t *vctx, pcilib_dma_flags_t flags);
+int dma_nwl_acknowledge_irq(pcilib_dma_context_t *ctx, pcilib_irq_type_t irq_type, pcilib_irq_source_t irq_source);
 
-#include "nwl_dma.h"
-#include "nwl_irq.h"
-#include "nwl_register.h"
-#include "nwl_engine.h"
-#include "nwl_loopback.h"
+int dma_nwl_start(pcilib_dma_context_t *ctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags);
+int dma_nwl_stop(pcilib_dma_context_t *ctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags);
 
-#define nwl_read_register(var, ctx, base, reg) pcilib_datacpy(&var, base + reg, 4, 1, ctx->dma_bank->raw_endianess)
-#define nwl_write_register(var, ctx, base, reg) pcilib_datacpy(base + reg, &var, 4, 1, ctx->dma_bank->raw_endianess)
+int dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *data, size_t *written);
+int dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, pcilib_dma_callback_t cb, void *cbattr);
+double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction);
 
-struct pcilib_nwl_engine_description_s {
-    pcilib_dma_engine_description_t desc;
-    char *base_addr;
-    
-    size_t ring_size, page_size;
-    size_t head, tail;
-    pcilib_kmem_handle_t *ring;
-    pcilib_kmem_handle_t *pages;
-    
-    int started;			/**< indicates that DMA buffers are initialized and reading is allowed */
-    int writting;			/**< indicates that we are in middle of writting packet */
-    int reused;				/**< indicates that DMA was found intialized, buffers were reused, and no additional initialization is needed */
-    int preserve;			/**< indicates that DMA should not be stopped during clean-up */
-};
-
-
-struct nwl_dma_s {
-    struct pcilib_dma_context_s dmactx;
-    
-    pcilib_t *pcilib;
-    
-    pcilib_dma_modification_t type;
-    
-    pcilib_register_bank_description_t *dma_bank;
-    char *base_addr;
-
-    pcilib_irq_type_t irq_enabled;	/**< indicates that IRQs are enabled */
-    pcilib_irq_type_t irq_preserve;	/**< indicates that IRQs should not be disabled during clean-up */
-    int started;			/**< indicates that DMA subsystem is initialized and DMA engine can start */
-    int irq_started;			/**< indicates that IRQ subsystem is initialized (detecting which types should be preserverd) */    
-    int loopback_started;		/**< indicates that benchmarking subsystem is initialized */
 
-    pcilib_dma_engine_t n_engines;
-    pcilib_nwl_engine_description_t engines[PCILIB_MAX_DMA_ENGINES + 1];
+#ifdef _PCILIB_DMA_NWL_C
+pcilib_dma_api_description_t nwl_dma_api = {
+    "nwl_dma",
+    dma_nwl_init,
+    dma_nwl_free,
+    dma_nwl_get_status,
+    dma_nwl_enable_irq,
+    dma_nwl_disable_irq,
+    dma_nwl_acknowledge_irq,
+    dma_nwl_start,
+    dma_nwl_stop,
+    dma_nwl_write_fragment,
+    dma_nwl_stream_read,
+    dma_nwl_benchmark
 };
+#else
+extern pcilib_dma_api_description_t nwl_dma_api;
+#endif
 
 
-#endif /* _PCILIB_NWL_H */
+#endif /* _PCILIB_DMA_NWL_H */

+ 0 - 45
dma/nwl_dma.h

@@ -1,45 +0,0 @@
-#ifndef _PCILIB_DMA_NWL_H
-#define _PCILIB_DMA_NWL_H
-
-#include <stdio.h>
-#include "../pcilib.h"
-
-#define PCILIB_NWL_MODIFICATION_IPECAMERA 0x100
-
-pcilib_dma_context_t *dma_nwl_init(pcilib_t *ctx, pcilib_dma_modification_t type, void *arg);
-void  dma_nwl_free(pcilib_dma_context_t *vctx);
-
-int dma_nwl_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma_engine_status_t *status, size_t n_buffers, pcilib_dma_buffer_status_t *buffers);
-
-int dma_nwl_enable_irq(pcilib_dma_context_t *vctx, pcilib_irq_type_t type, pcilib_dma_flags_t flags);
-int dma_nwl_disable_irq(pcilib_dma_context_t *vctx, pcilib_dma_flags_t flags);
-int dma_nwl_acknowledge_irq(pcilib_dma_context_t *ctx, pcilib_irq_type_t irq_type, pcilib_irq_source_t irq_source);
-
-int dma_nwl_start(pcilib_dma_context_t *ctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags);
-int dma_nwl_stop(pcilib_dma_context_t *ctx, pcilib_dma_engine_t dma, pcilib_dma_flags_t flags);
-
-int dma_nwl_write_fragment(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *data, size_t *written);
-int dma_nwl_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, pcilib_dma_callback_t cb, void *cbattr);
-double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction);
-
-
-#ifdef _PCILIB_DMA_NWL_C
-pcilib_dma_api_description_t nwl_dma_api = {
-    dma_nwl_init,
-    dma_nwl_free,
-    dma_nwl_get_status,
-    dma_nwl_enable_irq,
-    dma_nwl_disable_irq,
-    dma_nwl_acknowledge_irq,
-    dma_nwl_start,
-    dma_nwl_stop,
-    dma_nwl_write_fragment,
-    dma_nwl_stream_read,
-    dma_nwl_benchmark
-};
-#else
-extern pcilib_dma_api_description_t nwl_dma_api;
-#endif
-
-
-#endif /* _PCILIB_DMA_NWL_H */

+ 1 - 1
dma/nwl_engine.c

@@ -10,7 +10,7 @@
 #include "pcilib.h"
 #include "error.h"
 #include "tools.h"
-#include "nwl.h"
+#include "nwl_private.h"
 
 #include "nwl_defines.h"
 

+ 3 - 3
dma/nwl_engine_buffers.h

@@ -94,10 +94,10 @@ static int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_des
     pcilib_kmem_handle_t *ring = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_CONSISTENT, 1, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE, PCILIB_NWL_ALIGNMENT, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA_RING, sub_use), flags);
     pcilib_kmem_handle_t *pages = pcilib_alloc_kernel_memory(ctx->pcilib, type, PCILIB_NWL_DMA_PAGES, 0, 0, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA_PAGES, sub_use), flags);
 
-    if (err) {
+    if (!ring||!pages) {
 	if (pages) pcilib_free_kernel_memory(ctx->pcilib, pages, 0);
-	if (ring) pcilib_free_kernel_memory(ctx->pcilib, ring, 0);    
-	return err;
+	if (ring) pcilib_free_kernel_memory(ctx->pcilib, ring, 0);
+	return PCILIB_ERROR_MEMORY;
     }
 
     reuse_ring = pcilib_kmem_is_reused(ctx->pcilib, ring);

+ 1 - 1
dma/nwl_irq.c

@@ -10,7 +10,7 @@
 #include "error.h"
 #include "tools.h"
 
-#include "nwl.h"
+#include "nwl_private.h"
 #include "nwl_defines.h"
 
 int dma_nwl_init_irq(nwl_dma_t *ctx, uint32_t val) {

+ 1 - 1
dma/nwl_loopback.c

@@ -9,7 +9,7 @@
 #include "pcilib.h"
 #include "error.h"
 #include "tools.h"
-#include "nwl.h"
+#include "nwl_private.h"
 
 #include "nwl_defines.h"
 

+ 67 - 0
dma/nwl_private.h

@@ -0,0 +1,67 @@
+#ifndef _PCILIB_DMA_NWL_PRIVATE_H
+#define _PCILIB_DMA_NWL_PRIVATE_H
+
+typedef struct nwl_dma_s nwl_dma_t;
+typedef struct pcilib_nwl_engine_description_s pcilib_nwl_engine_description_t;
+
+#define NWL_DMA_IRQ_SOURCE 0
+
+#define NWL_XAUI_ENGINE 0
+#define NWL_XRAWDATA_ENGINE 1
+#define NWL_MAX_PACKET_SIZE 4096 //16384
+//#define NWL_GENERATE_DMA_IRQ
+
+#define PCILIB_NWL_ALIGNMENT 			64  // in bytes
+#define PCILIB_NWL_DMA_DESCRIPTOR_SIZE		64  // in bytes
+#define PCILIB_NWL_DMA_PAGES			256 // 1024
+
+//#define DEBUG_HARDWARE
+//#define DEBUG_NWL
+
+#include "nwl.h"
+#include "nwl_irq.h"
+#include "nwl_register.h"
+#include "nwl_engine.h"
+#include "nwl_loopback.h"
+
+#define nwl_read_register(var, ctx, base, reg) pcilib_datacpy(&var, base + reg, 4, 1, ctx->dma_bank->raw_endianess)
+#define nwl_write_register(var, ctx, base, reg) pcilib_datacpy(base + reg, &var, 4, 1, ctx->dma_bank->raw_endianess)
+
+struct pcilib_nwl_engine_description_s {
+    pcilib_dma_engine_description_t desc;
+    char *base_addr;
+    
+    size_t ring_size, page_size;
+    size_t head, tail;
+    pcilib_kmem_handle_t *ring;
+    pcilib_kmem_handle_t *pages;
+    
+    int started;			/**< indicates that DMA buffers are initialized and reading is allowed */
+    int writting;			/**< indicates that we are in middle of writting packet */
+    int reused;				/**< indicates that DMA was found intialized, buffers were reused, and no additional initialization is needed */
+    int preserve;			/**< indicates that DMA should not be stopped during clean-up */
+};
+
+
+struct nwl_dma_s {
+    struct pcilib_dma_context_s dmactx;
+    
+    pcilib_t *pcilib;
+    
+    pcilib_dma_modification_t type;
+    
+    pcilib_register_bank_description_t *dma_bank;
+    char *base_addr;
+
+    pcilib_irq_type_t irq_enabled;	/**< indicates that IRQs are enabled */
+    pcilib_irq_type_t irq_preserve;	/**< indicates that IRQs should not be disabled during clean-up */
+    int started;			/**< indicates that DMA subsystem is initialized and DMA engine can start */
+    int irq_started;			/**< indicates that IRQ subsystem is initialized (detecting which types should be preserverd) */    
+    int loopback_started;		/**< indicates that benchmarking subsystem is initialized */
+
+    pcilib_dma_engine_t n_engines;
+    pcilib_nwl_engine_description_t engines[PCILIB_MAX_DMA_ENGINES + 1];
+};
+
+
+#endif /* _PCILIB_DMA_NWL_PRIVATE_H */

+ 1 - 1
dma/nwl_register.c

@@ -12,7 +12,7 @@
 #include "error.h"
 #include "tools.h"
 
-#include "nwl.h"
+#include "nwl_private.h"
 #include "nwl_register.h"
 
 int nwl_add_registers(nwl_dma_t *ctx) {

+ 2 - 0
driver/base.c

@@ -307,6 +307,8 @@ static int __devinit pcidriver_probe(struct pci_dev *pdev, const struct pci_devi
 		mod_info("Found ML605 board at %s\n", dev_name(&pdev->dev));
 	    } else if (id->device == PCIE_IPECAMERA_DEVICE_ID) {
 		mod_info("Found IPE Camera at %s\n", dev_name(&pdev->dev));
+	    } else if (id->device == PCIE_KAPTURE_DEVICE_ID) {
+		mod_info("Found KAPTURE board at %s\n", dev_name(&pdev->dev));
 	    } else {
 		mod_info("Found unknown Xilinx device (%x) at %s\n", id->device, dev_name(&pdev->dev));
 	    }

+ 1 - 0
driver/base.h

@@ -47,6 +47,7 @@ static void pcidriver_exit(void);
 static const __devinitdata struct pci_device_id pcidriver_ids[] = {
 	{ PCI_DEVICE( PCIE_XILINX_VENDOR_ID, PCIE_ML605_DEVICE_ID ) },          // PCI-E Xilinx ML605
 	{ PCI_DEVICE( PCIE_XILINX_VENDOR_ID, PCIE_IPECAMERA_DEVICE_ID ) },      // PCI-E IPE Camera
+	{ PCI_DEVICE( PCIE_XILINX_VENDOR_ID, PCIE_KAPTURE_DEVICE_ID ) },        // PCI-E KAPTURE board for HEB
 	{0,0,0,0},
 };
 

+ 1 - 0
driver/pciDriver.h

@@ -65,6 +65,7 @@
 
 /* Identifies the PCI-E IPE Camera */
 #define PCIE_IPECAMERA_DEVICE_ID 0x6081
+#define PCIE_KAPTURE_DEVICE_ID 0x6028
 //#define PCIE_IPECAMERA_DEVICE_ID 0x6018
 
 

+ 4 - 2
event.h

@@ -17,9 +17,13 @@
  */
 
 struct pcilib_event_api_description_s {
+    const char *title;
+    
     pcilib_context_t *(*init)(pcilib_t *ctx);
     void (*free)(pcilib_context_t *ctx);
 
+    pcilib_dma_context_t *(*init_dma)(pcilib_context_t *ctx);
+
     int (*reset)(pcilib_context_t *ctx);
 
     int (*start)(pcilib_context_t *ctx, pcilib_event_t event_mask, pcilib_event_flags_t flags);
@@ -31,8 +35,6 @@ struct pcilib_event_api_description_s {
 
     int (*get_data)(pcilib_context_t *ctx, pcilib_event_id_t event_id, pcilib_event_data_type_t data_type, size_t arg_size, void *arg, size_t *size, void **data);
     int (*return_data)(pcilib_context_t *ctx, pcilib_event_id_t event_id, pcilib_event_data_type_t data_type, void *data);
-    
-    pcilib_dma_context_t *(*init_dma)(pcilib_context_t *ctx);
 };
 
 

+ 0 - 4
ipecamera/ipecamera.c

@@ -23,12 +23,8 @@
 #include "events.h"
 #include "data.h"
 
-#include "dma/nwl_dma.h"
 
-#ifdef IPECAMERA_DEBUG
 #include "dma/nwl.h"
-#endif /* IPECAMERA_DEBUG */
-
 
 #define FIND_REG(var, bank, name)  \
         ctx->var = pcilib_find_register(pcilib, bank, name); \

+ 5 - 2
ipecamera/model.h

@@ -152,9 +152,13 @@ extern pcilib_event_data_type_description_t ipecamera_data_types[];
 
 #ifdef _IPECAMERA_IMAGE_C
 pcilib_event_api_description_t ipecamera_image_api = {
+    "ipecamera",
+    
     ipecamera_init,
     ipecamera_free,
 
+    ipecamera_init_dma,
+
     ipecamera_reset,
     ipecamera_start,
     ipecamera_stop,
@@ -163,8 +167,7 @@ pcilib_event_api_description_t ipecamera_image_api = {
     ipecamera_stream,
     ipecamera_next_event,
     ipecamera_get,
-    ipecamera_return,
-    ipecamera_init_dma
+    ipecamera_return
 };
 #else
 extern pcilib_event_api_description_t ipecamera_image_api;

+ 9 - 0
kapture/CMakeLists.txt

@@ -0,0 +1,9 @@
+include_directories(
+    ${CMAKE_SOURCE_DIR}
+    ${UFODECODE_INCLUDE_DIRS}
+)
+
+set(HEADERS ${HEADERS} model.h)
+
+add_library(kapture STATIC kapture.c)
+

+ 62 - 0
kapture/kapture.c

@@ -0,0 +1,62 @@
+#define _KAPTURE_C
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include <assert.h>
+
+#include "../tools.h"
+#include "../error.h"
+#include "../event.h"
+
+#include "pcilib.h"
+#include "model.h"
+#include "kapture.h"
+#include "private.h"
+
+
+pcilib_context_t *kapture_init(pcilib_t *vctx) {
+    kapture_t *ctx = malloc(sizeof(kapture_t));
+
+    if (ctx) {
+	memset(ctx, 0, sizeof(kapture_t));
+    }
+
+    return ctx;
+}
+
+void kapture_free(pcilib_context_t *vctx) {
+    if (vctx) {
+	kapture_t *ctx = (kapture_t*)vctx;
+	kapture_stop(vctx, PCILIB_EVENT_FLAGS_DEFAULT);
+	free(ctx);
+    }
+}
+
+int kapture_reset(pcilib_context_t *ctx) {
+}
+
+int kapture_start(pcilib_context_t *ctx, pcilib_event_t event_mask, pcilib_event_flags_t flags) {
+}
+
+int kapture_stop(pcilib_context_t *ctx, pcilib_event_flags_t flags) {
+}
+
+int kapture_trigger(pcilib_context_t *ctx, pcilib_event_t event, size_t trigger_size, void *trigger_data) {
+}
+
+int kapture_stream(pcilib_context_t *ctx, pcilib_event_callback_t callback, void *user) {
+}
+
+int kapture_next_event(pcilib_context_t *ctx, pcilib_timeout_t timeout, pcilib_event_id_t *evid, size_t info_size, pcilib_event_info_t *info) {
+}
+
+int kapture_get(pcilib_context_t *ctx, pcilib_event_id_t event_id, pcilib_event_data_type_t data_type, size_t arg_size, void *arg, size_t *size, void **data) {
+}
+
+int kapture_return(pcilib_context_t *ctx, pcilib_event_id_t event_id, pcilib_event_data_type_t data_type, void *data) {
+}
+
+

+ 6 - 0
kapture/kapture.h

@@ -0,0 +1,6 @@
+#ifndef _KAPTURE_H
+#define _KAPTURE_H
+
+typedef struct kapture_s kapture_t;
+
+#endif /* _KAPTURE_H */

+ 81 - 0
kapture/model.h

@@ -0,0 +1,81 @@
+#ifndef _KAPTURE_MODEL_H
+#define _KAPTURE_MODEL_H
+
+#include <stdio.h>
+
+#include "../pcilib.h"
+
+
+#define KAPTURE_REGISTER_SPACE 0x9000
+
+#ifdef _KAPTURE_C
+pcilib_register_bank_description_t kapture_register_banks[] = {
+//    { PCILIB_REGISTER_BANK0,    PCILIB_BAR0, 0x0200, PCILIB_DEFAULT_PROTOCOL    , KAPTURE_REGISTER_SPACE, KAPTURE_REGISTER_SPACE, PCILIB_LITTLE_ENDIAN, 32, PCILIB_LITTLE_ENDIAN, "0x%lx", "fpga", "KAPTURE Registers" },
+    { PCILIB_REGISTER_BANK_DMA, PCILIB_BAR0, 0x0200, PCILIB_DEFAULT_PROTOCOL    , 0,                        0,                    PCILIB_LITTLE_ENDIAN, 32, PCILIB_LITTLE_ENDIAN, "0x%lx", "dma", "DMA Registers"},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL }
+};
+
+pcilib_register_description_t kapture_registers[] = {
+{0,	0,	0,	0,	0,                        0,                  0,                        0,                     NULL, NULL}
+};
+
+pcilib_register_range_t kapture_register_ranges[] = {
+    {0, 0, 0, 0}
+};
+
+pcilib_event_description_t kapture_events[] = {
+    {PCILIB_EVENT0, "event", ""},
+    {0, NULL, NULL}
+};
+
+pcilib_event_data_type_description_t kapture_data_types[] = {
+    {PCILIB_EVENT_RAW_DATA, PCILIB_EVENT0, "raw", "raw data from kapture" },
+    {0, 0, NULL, NULL}
+};
+
+#else
+extern pcilib_register_description_t kapture_registers[];
+extern pcilib_register_bank_description_t kapture_register_banks[];
+extern pcilib_register_range_t kapture_register_ranges[];
+extern pcilib_event_description_t kapture_events[];
+extern pcilib_event_data_type_description_t kapture_data_types[];
+#endif 
+
+
+pcilib_context_t *kapture_init(pcilib_t *pcilib);
+void kapture_free(pcilib_context_t *ctx);
+
+int kapture_reset(pcilib_context_t *ctx);
+int kapture_start(pcilib_context_t *ctx, pcilib_event_t event_mask, pcilib_event_flags_t flags);
+int kapture_stop(pcilib_context_t *ctx, pcilib_event_flags_t flags);
+int kapture_trigger(pcilib_context_t *ctx, pcilib_event_t event, size_t trigger_size, void *trigger_data);
+int kapture_stream(pcilib_context_t *vctx, pcilib_event_callback_t callback, void *user);
+int kapture_next_event(pcilib_context_t *vctx, pcilib_timeout_t timeout, pcilib_event_id_t *evid, size_t info_size, pcilib_event_info_t *info);
+int kapture_get(pcilib_context_t *ctx, pcilib_event_id_t event_id, pcilib_event_data_type_t data_type, size_t arg_size, void *arg, size_t *size, void **buf);
+int kapture_return(pcilib_context_t *ctx, pcilib_event_id_t event_id, pcilib_event_data_type_t data_type, void *data);
+
+#ifdef _KAPTURE_C
+pcilib_event_api_description_t kapture_api = {
+    "kapture",
+    
+    kapture_init,
+    kapture_free,
+
+    NULL,
+
+    kapture_reset,
+    kapture_start,
+    kapture_stop,
+    kapture_trigger,
+    
+    kapture_stream,
+    kapture_next_event,
+    kapture_get,
+    kapture_return
+};
+#else
+extern pcilib_event_api_description_t kapture_api;
+#endif
+
+
+#endif /* _KAPTURE_MODEL_H */

+ 10 - 0
kapture/private.h

@@ -0,0 +1,10 @@
+#ifndef _KAPTURE_PRIVATE_H
+#define _KAPTURE_PRIVATE_H
+
+struct kapture_s {
+    pcilib_context_t event;
+
+};
+
+
+#endif /* _KAPTURE_PRIVATE_H */

+ 3 - 0
pci.c

@@ -22,6 +22,7 @@
 #include "error.h"
 
 #include "ipecamera/model.h"
+#include "kapture/model.h"
 
 
 pcilib_t *pcilib_open(const char *device, pcilib_model_t model) {
@@ -88,6 +89,8 @@ pcilib_model_t pcilib_get_model(pcilib_t *ctx) {
 
 	if ((board_info->vendor_id == PCIE_XILINX_VENDOR_ID)&&(board_info->device_id == PCIE_IPECAMERA_DEVICE_ID))
 	    ctx->model = PCILIB_MODEL_IPECAMERA;
+	else if ((board_info->vendor_id == PCIE_XILINX_VENDOR_ID)&&(board_info->device_id == PCIE_KAPTURE_DEVICE_ID))
+	    ctx->model = PCILIB_MODEL_KAPTURE;
 	else
 	    ctx->model = PCILIB_MODEL_PCI;
     }

+ 5 - 2
pci.h

@@ -52,13 +52,16 @@ struct pcilib_s {
 
 #ifdef _PCILIB_PCI_C
 # include "ipecamera/model.h"
+# include "kapture/model.h"
 # include "dma/nwl.h"
+# include "dma/ipe.h"
 # include "default.h"
 
-pcilib_model_description_t pcilib_model[3] = {
+pcilib_model_description_t pcilib_model[4] = {
     { 4, PCILIB_HOST_ENDIAN, 	NULL, NULL, NULL, NULL, NULL, NULL },
     { 4, PCILIB_HOST_ENDIAN, 	NULL, NULL, NULL, NULL, NULL, NULL },
-    { 4, PCILIB_LITTLE_ENDIAN,	ipecamera_registers, ipecamera_register_banks, ipecamera_register_ranges, ipecamera_events, ipecamera_data_types, &nwl_dma_api, &ipecamera_image_api }
+    { 4, PCILIB_LITTLE_ENDIAN,	ipecamera_registers, ipecamera_register_banks, ipecamera_register_ranges, ipecamera_events, ipecamera_data_types, &nwl_dma_api, &ipecamera_image_api },
+    { 4, PCILIB_LITTLE_ENDIAN,	kapture_registers, kapture_register_banks, kapture_register_ranges, kapture_events, kapture_data_types, &ipe_dma_api, &kapture_api },
 };
 
 pcilib_protocol_description_t pcilib_protocol[3] = {

+ 2 - 1
pcilib.h

@@ -43,7 +43,8 @@ typedef enum {
 typedef enum {
     PCILIB_MODEL_DETECT,
     PCILIB_MODEL_PCI,
-    PCILIB_MODEL_IPECAMERA
+    PCILIB_MODEL_IPECAMERA,
+    PCILIB_MODEL_KAPTURE
 } pcilib_model_t;
 
 typedef enum {

+ 0 - 1
register.c

@@ -25,7 +25,6 @@ int pcilib_add_registers(pcilib_t *ctx, size_t n, pcilib_register_description_t
 	for (n = 0; registers[n].bits; n++);
     }
 
-
     if (ctx->model_info.registers == pcilib_model[ctx->model].registers) {
         for (n_present = 0; ctx->model_info.registers[n_present].bits; n_present++);
 	for (size = 1024; size < 2 * (n + n_present + 1); size<<=1);

+ 31 - 0
tests/dma/ipe/bench.sh

@@ -0,0 +1,31 @@
+#!/bin/bash
+
+size=65536
+
+function pci {
+    PCILIB_PATH=`pwd`/../../..
+    LD_LIBRARY_PATH="$PCILIB_PATH" $PCILIB_PATH/pci -m kapture $*
+}
+
+
+rm -f bench.out
+
+pci --stop-dma dma0r
+pci --start-dma dma0r
+
+# Clean DMA buffers
+#while [ $? -eq 0 ]; do
+#    pci -r dma0 -s 65536 &> /dev/null
+#done
+
+for i in `seq 1 100`; do
+    pci -r dma0 --multipacket -s $size -o bench.out
+    if [ $? -ne 0 ]; then
+	pci --stop-dma dma0r
+	exit
+    fi
+done
+
+pci --stop-dma dma0r
+
+../../../apps/check_counter bench.out