Prechádzať zdrojové kódy

Implement DMA access synchronization for NWL implementation

Suren A. Chilingaryan 13 rokov pred
rodič
commit
112030c40f
11 zmenil súbory, kde vykonal 311 pridanie a 137 odobranie
  1. 38 9
      NOTES
  2. 3 1
      ToDo
  3. 4 4
      cli.c
  4. 2 2
      dma/nwl.c
  5. 1 0
      dma/nwl.h
  6. 133 24
      dma/nwl_buffers.h
  7. 66 59
      dma/nwl_engine.c
  8. 2 2
      dma/nwl_irq.c
  9. 55 31
      kmem.c
  10. 6 3
      kmem.h
  11. 1 2
      tools.h

+ 38 - 9
NOTES

@@ -85,10 +85,13 @@ DMA Access Synchronization
      HW flag anyway)
      
     On allocation error at some of the buffer, call clean routine and
-     * Preserve HW reference if buffers held HW reference before
-     * Preserve PERSISTENT mode if buffers were in PERSISTENT mode before
-     * Remove REUSE flag, we want to clean if it is allowed by current buffer
-     status
+     * Preserve PERSISTENT mode and HW reference if buffers held them before
+     unsuccessful kmem initialization. Until the last failed block, the blocks
+     of kmem should be consistent. The HW/PERSISTENT flags should be removed
+     if all reused blocks were in HW/PERSISTENT mode. The last block needs
+     special treatment. The flags may be removed for the block if it was
+     HW/PERSISTENT state (and others not).
+     * Remove REUSE flag, we want to clean if allowed by current buffer status
      * EXCLUSIVE flag is not important for kmem_free routine.
     
  - At DMA level
@@ -99,7 +102,7 @@ DMA Access Synchronization
     * Ring start/stop pointers
     
     To prevent multiple processes accessing DMA engine in parallel, the first
-    action is buffer initialization 
+    action is buffer initialization which will fail if buffers already used
 	* Always with REUSE, EXCLUSIVE, and HW flags 
 	* Optionally with PERSISTENT flag (if DMA_PERSISTENT flag is set)
     If another DMA app is running, the buffer allocation will fail (no dma_stop 
@@ -119,8 +122,8 @@ DMA Access Synchronization
     If PRESERVE flag is on, the engine will not be stopped at the end of
     execution (and buffers will stay because of HW flag).
     
-    If buffers are reused and in PERSISTENT mode, DMA engine was on before 
-    dma_start (we not basing on PRESERVE flag, because it can be enforced), 
+    If buffers are reused and are already in PERSISTENT mode, DMA engine was on 
+    before dma_start (PRESERVE flag is ignored, because it can be enforced), 
     ring pointers are calculated from LAST_BD and states of ring elements.
     If previous application crashed (i.e. buffers may be corrupted). Two
     cases are possible:
@@ -138,9 +141,35 @@ Register Access Synchronization
  We need to serialize access to the registers by the different running 
  applications and handle case when registers are accessed indirectly by
  writting PCI BARs (DMA implementations, for instance).
- * An option would be to serialize at least access to CMOSIS registers
- which are always accessed over register functions.
 
+ - Module-assisted locking:
+ * During initialization the locking context is created (which is basicaly
+ a kmem_handle of type LOCK_PAGE. 
+ * This locking context is passed to the kernel module along with lock type 
+ (LOCK_BANK) and lock item (BANK ADDRESS). If lock context is already owns
+ lock on the specified bank, just reference number is increased, otherwise
+ we are trying to obtain new lock.
+ * Kernel module just iterates over all registered lock pages and checks if
+ any holds the specified lock. if not, the lock is obtained and registered
+ in the our lock page.
+ * This allows to share access between multiple threads of single application
+ (by using the same lock page) or protect (by using own lock pages by each of
+ the threads)
+ * Either on application cleanup or if application crashed, the memory mapping
+ of lock page is removed and, hence, locks are freed.
+ 
+ - Multiple-ways of accessing registers
+ Because of reference counting, we can successfully obtain locks multiple 
+ times if necessary. The following locks are protecting register access:
+  a) Global register_read/write lock bank before executing implementation
+  b) DMA bank is locked by global DMA functions. So we can access the 
+  registers using plain PCI bar read/write.
+  c) Sequence of register operations can be protected with pcilib_lock_bank
+  function
+ Reading raw register space or PCI bank is not locked.
+  * Ok. We can detect banks which will be affected by PCI read/write and 
+  lock them. But shall we do it?
+ 
 Register/DMA Configuration
 ==========================
  - XML description of registers

+ 3 - 1
ToDo

@@ -1,7 +1,8 @@
 High Priority (we would need it for IPE Camera)
 =============
  1. Serialize access to the registers across applications
- 2. CMake build system
+ 2. Protect kmem_entries in the driver using spinlock
+ 3. CMake build system
  
 Normal Priority (it would make just few things a bit easier)
 ===============
@@ -12,3 +13,4 @@ Normal Priority (it would make just few things a bit easier)
 Low Priority (only as generalization for other projects)
 ============
  1. XML configurations describing registers (and DMA engines?)
+ 2. Access register/bank lookups using hash tables

+ 4 - 4
cli.c

@@ -904,10 +904,10 @@ int StartStopDMA(pcilib_t *handle,  pcilib_model_description_t *model_info, pcil
 	}
 	
 	if (start) {
-	    err = pcilib_start_dma(handle, dmaid, PCILIB_DMA_FLAG_PERMANENT);
+	    err = pcilib_start_dma(handle, dmaid, PCILIB_DMA_FLAG_PERSISTENT);
     	    if (err) Error("Error starting DMA engine (C2S %lu)", dma);
 	} else {
-	    err = pcilib_stop_dma(handle, dmaid, PCILIB_DMA_FLAG_PERMANENT);
+	    err = pcilib_stop_dma(handle, dmaid, PCILIB_DMA_FLAG_PERSISTENT);
     	    if (err) Error("Error stopping DMA engine (C2S %lu)", dma);
 	}
     }
@@ -922,10 +922,10 @@ int StartStopDMA(pcilib_t *handle,  pcilib_model_description_t *model_info, pcil
 	}
 	
 	if (start) {
-	    err = pcilib_start_dma(handle, dmaid, PCILIB_DMA_FLAG_PERMANENT);
+	    err = pcilib_start_dma(handle, dmaid, PCILIB_DMA_FLAG_PERSISTENT);
     	    if (err) Error("Error starting DMA engine (S2C %lu)", dma);
 	} else {
-	    err = pcilib_stop_dma(handle, dmaid, PCILIB_DMA_FLAG_PERMANENT);
+	    err = pcilib_stop_dma(handle, dmaid, PCILIB_DMA_FLAG_PERSISTENT);
     	    if (err) Error("Error stopping DMA engine (S2C %lu)", dma);
 	}
     }

+ 2 - 2
dma/nwl.c

@@ -41,7 +41,7 @@ int dma_nwl_stop(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma
 	// stop everything
     if (dma == PCILIB_DMA_ENGINE_INVALID) {
         for (dma = 0; dma < ctx->n_engines; dma++) {
-	    if (flags&PCILIB_DMA_FLAG_PERMANENT) {
+	    if (flags&PCILIB_DMA_FLAG_PERSISTENT) {
 		ctx->engines[dma].preserve = 0;
 	    }
 	
@@ -62,7 +62,7 @@ int dma_nwl_stop(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma
     if (dma > ctx->n_engines) return PCILIB_ERROR_INVALID_BANK;
     
 	    // ignorign previous setting if flag specified
-    if (flags&PCILIB_DMA_FLAG_PERMANENT) {
+    if (flags&PCILIB_DMA_FLAG_PERSISTENT) {
 	ctx->engines[dma].preserve = 0;
     }
     

+ 1 - 0
dma/nwl.h

@@ -38,6 +38,7 @@ struct pcilib_nwl_engine_description_s {
     
     int started;			/**< indicates that DMA buffers are initialized and reading is allowed */
     int writting;			/**< indicates that we are in middle of writting packet */
+    int reused;				/**< indicates that DMA was found intialized, buffers were reused, and no additional initialization is needed */
     int preserve;			/**< indicates that DMA should not be stopped during clean-up */
 };
 

+ 133 - 24
dma/nwl_buffers.h

@@ -13,15 +13,101 @@ int dma_nwl_sync_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info,
     return 0;
 }
 
-int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
+static int dma_nwl_compute_read_s2c_pointers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, unsigned char *ring, uint32_t ring_pa) {
+    size_t pos;
+    uint32_t val;
+
+    char *base = info->base_addr;
+    
+    nwl_read_register(val, ctx, base, REG_SW_NEXT_BD);
+    if ((val < ring_pa)||((val - ring_pa) % PCILIB_NWL_DMA_DESCRIPTOR_SIZE)) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_SW_NEXT_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+
+    info->head = (val - ring_pa) / PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    if (info->head >= PCILIB_NWL_DMA_PAGES) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_SW_NEXT_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+
+    nwl_read_register(val, ctx, base, REG_DMA_ENG_NEXT_BD);
+    if ((val < ring_pa)||((val - ring_pa) % PCILIB_NWL_DMA_DESCRIPTOR_SIZE)) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_DMA_ENG_NEXT_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+
+    info->tail = (val - ring_pa) / PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    if (info->tail >= PCILIB_NWL_DMA_PAGES) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_DMA_ENG_NEXT_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+    
+    return 0;
+}
+
+static int dma_nwl_compute_read_c2s_pointers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info, unsigned char *ring, uint32_t ring_pa) {
+    size_t pos;
+    uint32_t val;
+    size_t prev;
+
+    char *base = info->base_addr;
+
+    
+    nwl_read_register(val, ctx, base, REG_SW_NEXT_BD);
+    if ((val < ring_pa)||((val - ring_pa) % PCILIB_NWL_DMA_DESCRIPTOR_SIZE)) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_SW_NEXT_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+
+    info->head = (val - ring_pa) / PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    if (info->head >= PCILIB_NWL_DMA_PAGES) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_SW_NEXT_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+    
+    info->tail = info->head;
+    
+    
+	// Last read BD
+    nwl_read_register(val, ctx, base, REG_DMA_ENG_LAST_BD);
+    if ((val < ring_pa)||((val - ring_pa) % PCILIB_NWL_DMA_DESCRIPTOR_SIZE)) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_DMA_ENG_LAST_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+
+    prev = (val - ring_pa) / PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    if (prev >= PCILIB_NWL_DMA_PAGES) {
+	pcilib_warning("Inconsistent DMA Ring buffer is found (REG_DMA_ENG_LAST_BD register out of range)");
+	return PCILIB_ERROR_INVALID_STATE;
+    }
+    
+prev_buffer:
+    val = NWL_RING_GET(ring + prev * PCILIB_NWL_DMA_DESCRIPTOR_SIZE, DMA_BD_BUFL_STATUS_OFFSET)&DMA_BD_STATUS_MASK;
+
+    if (val & (DMA_BD_ERROR_MASK|DMA_BD_COMP_MASK)) {
+	info->tail = prev;
+
+        if (prev > 0) prev -= 1;
+	else prev = PCILIB_NWL_DMA_PAGES - 1;
+	
+	if (prev != info->head)	goto prev_buffer;
+    }
+
+    return 0;
+}
+
+
+static int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_description_t *info) {
     int err = 0;
 
     int i;
+    int preserve = 0;
     uint16_t sub_use;
     uint32_t val;
     uint32_t buf_sz;
     uint64_t buf_pa;
-    pcilib_kmem_reuse_t reuse_ring, reuse_pages;
+    pcilib_kmem_reuse_state_t reuse_ring, reuse_pages;
     pcilib_kmem_flags_t flags;
 
     char *base = info->base_addr;
@@ -30,7 +116,7 @@ int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_descriptio
 
 	// Or bidirectional specified by 0x0|addr, or read 0x0|addr and write 0x80|addr
     sub_use = info->desc.addr|(info->desc.direction == PCILIB_DMA_TO_DEVICE)?0x80:0x00;
-    flags = PCILIB_KMEM_FLAG_REUSE|PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_HARDWARE|info->preserve?PCILIB_KMEM_FLAG_PRESERVE:0;
+    flags = PCILIB_KMEM_FLAG_REUSE|PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_HARDWARE|info->preserve?PCILIB_KMEM_FLAG_PERSISTENT:0;
     pcilib_kmem_handle_t *ring = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_CONSISTENT, 1, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE, PCILIB_NWL_ALIGNMENT, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA_RING, sub_use), flags);
     pcilib_kmem_handle_t *pages = pcilib_alloc_kernel_memory(ctx->pcilib, PCILIB_KMEM_TYPE_PAGE, PCILIB_NWL_DMA_PAGES, 0, 0, PCILIB_KMEM_USE(PCILIB_KMEM_USE_DMA_PAGES, sub_use), flags);
 
@@ -43,44 +129,67 @@ int dma_nwl_allocate_engine_buffers(nwl_dma_t *ctx, pcilib_nwl_engine_descriptio
 	return err;
     }
 
-
-/*    
     reuse_ring = pcilib_kmem_is_reused(ctx->pcilib, ring);
     reuse_pages = pcilib_kmem_is_reused(ctx->pcilib, pages);
-    if ((reuse_ring == PCILIB_KMEM_REUSE_REUSED)&&(reuse_pages == PCILIB_KMEM_REUSE_REUSED)) info->preserve = 1;
-    else if (reuse_ring||reuse_pages) pcilib_warning("Inconsistent buffers in the kernel module are detected");
-*/
+
+    if (!info->preserve) {
+	if (reuse_ring == reuse_pages) {
+	    if (reuse_ring & PCILIB_KMEM_REUSE_PARTIAL) pcilib_warning("Inconsistent DMA buffers are found (only part of required buffers is available), reinitializing...");
+	    else if (reuse_ring & PCILIB_KMEM_REUSE_REUSED) {
+		if (reuse_ring & PCILIB_KMEM_REUSE_PERSISTENT == 0) pcilib_warning("Lost DMA buffers are found (non-persistent mode), reinitializing...");
+		else if (reuse_ring & PCILIB_KMEM_REUSE_HARDWARE == 0) pcilib_warning("Lost DMA buffers are found (missing HW reference), reinitializing...");
+		else {
+		    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
+		    if (val&DMA_ENG_RUNNING == 0) pcilib_warning("Lost DMA buffers are found (DMA engine is stopped), reinitializing...");
+		    else preserve = 1;
+		}
+	    } 	
+	} else pcilib_warning("Inconsistent DMA buffers (modes of ring and page buffers does not match), reinitializing....");
+    }
+
     
     unsigned char *data = (unsigned char*)pcilib_kmem_get_ua(ctx->pcilib, ring);
     uint32_t ring_pa = pcilib_kmem_get_pa(ctx->pcilib, ring);
-    
-    memset(data, 0, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
 
-    for (i = 0; i < PCILIB_NWL_DMA_PAGES; i++, data += PCILIB_NWL_DMA_DESCRIPTOR_SIZE) {
-	buf_pa = pcilib_kmem_get_block_pa(ctx->pcilib, pages, i);
-	buf_sz = pcilib_kmem_get_block_size(ctx->pcilib, pages, i);
+    if (preserve) {
+	if (info->desc.direction == PCILIB_DMA_FROM_DEVICE) err = dma_nwl_compute_read_c2s_pointers(ctx, info, data, ring_pa);
+	else err = dma_nwl_compute_read_s2c_pointers(ctx, info, data, ring_pa);
 
-	NWL_RING_SET(data, DMA_BD_NDESC_OFFSET, ring_pa + ((i + 1) % PCILIB_NWL_DMA_PAGES) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
-	NWL_RING_SET(data, DMA_BD_BUFAL_OFFSET, buf_pa&0xFFFFFFFF);
-	NWL_RING_SET(data, DMA_BD_BUFAH_OFFSET, buf_pa>>32);
+	if (err) preserve = 0;
+    }
+    
+    if (preserve)
+	info->reused = 1;
+    else {
+	memset(data, 0, PCILIB_NWL_DMA_PAGES * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
+
+	for (i = 0; i < PCILIB_NWL_DMA_PAGES; i++, data += PCILIB_NWL_DMA_DESCRIPTOR_SIZE) {
+	    buf_pa = pcilib_kmem_get_block_pa(ctx->pcilib, pages, i);
+	    buf_sz = pcilib_kmem_get_block_size(ctx->pcilib, pages, i);
+
+	    NWL_RING_SET(data, DMA_BD_NDESC_OFFSET, ring_pa + ((i + 1) % PCILIB_NWL_DMA_PAGES) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE);
+	    NWL_RING_SET(data, DMA_BD_BUFAL_OFFSET, buf_pa&0xFFFFFFFF);
+	    NWL_RING_SET(data, DMA_BD_BUFAH_OFFSET, buf_pa>>32);
 #ifdef NWL_GENERATE_DMA_IRQ
-        NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz | DMA_BD_INT_ERROR_MASK | DMA_BD_INT_COMP_MASK);
+    	    NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz | DMA_BD_INT_ERROR_MASK | DMA_BD_INT_COMP_MASK);
 #else /* NWL_GENERATE_DMA_IRQ */
-        NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz);
+    	    NWL_RING_SET(data, DMA_BD_BUFL_CTRL_OFFSET, buf_sz);
 #endif /* NWL_GENERATE_DMA_IRQ */
-    }
+	}
+
+	val = ring_pa;
+	nwl_write_register(val, ctx, base, REG_DMA_ENG_NEXT_BD);
+	nwl_write_register(val, ctx, base, REG_SW_NEXT_BD);
 
-    val = ring_pa;
-    nwl_write_register(val, ctx, base, REG_DMA_ENG_NEXT_BD);
-    nwl_write_register(val, ctx, base, REG_SW_NEXT_BD);
+        info->head = 0;
+	info->tail = 0;
+    }
     
     info->ring = ring;
     info->pages = pages;
     info->page_size = buf_sz;
     info->ring_size = PCILIB_NWL_DMA_PAGES;
     
-    info->head = 0;
-    info->tail = 0;
     
     return 0;
 }

+ 66 - 59
dma/nwl_engine.c

@@ -65,88 +65,89 @@ int dma_nwl_start_engine(nwl_dma_t *ctx, pcilib_dma_engine_t dma) {
     
     if (info->started) return 0;
 
+
 	// This will only successed if there are no parallel access to DMA engine
     err = dma_nwl_allocate_engine_buffers(ctx, info);
     if (err) return err;
     
-	// Check if DMA engine is enabled
-    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
-    if (val&DMA_ENG_RUNNING) {	
-//	info->preserve = 1;
-	
-	// We need to positionate buffers correctly (both read and write)
-	//DSS info->tail, info->head
-    
-//	pcilib_error("Not implemented");
-	
-//        info->started = 1;
-//	return 0;
-    }
+    if (info->reused) {
+    	info->preserve = 1;
+
+	    // Acknowledge asserted engine interrupts    
+	if (val & DMA_ENG_INT_ACTIVE_MASK) {
+	    val |= DMA_ENG_ALLINT_MASK;
+	    nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
+	}
 
+#ifdef NWL_GENERATE_DMA_IRQ
+	dma_nwl_enable_engine_irq(ctx, dma);
+#endif /* NWL_GENERATE_DMA_IRQ */
+    } else {
 	// Disable IRQs
-    err = dma_nwl_disable_engine_irq(ctx, dma);
-    if (err) return err;
+	err = dma_nwl_disable_engine_irq(ctx, dma);
+	if (err) return err;
 
 	// Disable Engine & Reseting 
-    val = DMA_ENG_DISABLE|DMA_ENG_USER_RESET;
-    nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
+	val = DMA_ENG_DISABLE|DMA_ENG_USER_RESET;
+	nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
 
-    gettimeofday(&start, NULL);
-    do {
-	nwl_read_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
-        gettimeofday(&cur, NULL);
-    } while ((val & (DMA_ENG_STATE_MASK|DMA_ENG_USER_RESET))&&(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < PCILIB_REGISTER_TIMEOUT));
+	gettimeofday(&start, NULL);
+	do {
+	    nwl_read_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
+    	    gettimeofday(&cur, NULL);
+	} while ((val & (DMA_ENG_STATE_MASK|DMA_ENG_USER_RESET))&&(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < PCILIB_REGISTER_TIMEOUT));
     
-    if (val & (DMA_ENG_STATE_MASK|DMA_ENG_USER_RESET)) {
-	pcilib_error("Timeout during reset of DMA engine %i", info->desc.addr);
-	return PCILIB_ERROR_TIMEOUT;
-    }
+	if (val & (DMA_ENG_STATE_MASK|DMA_ENG_USER_RESET)) {
+	    pcilib_error("Timeout during reset of DMA engine %i", info->desc.addr);
+	    return PCILIB_ERROR_TIMEOUT;
+	}
 
-    val = DMA_ENG_RESET; 
-    nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
+	val = DMA_ENG_RESET; 
+	nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
     
-    gettimeofday(&start, NULL);
-    do {
-	nwl_read_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
-        gettimeofday(&cur, NULL);
-    } while ((val & DMA_ENG_RESET)&&(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < PCILIB_REGISTER_TIMEOUT));
+	gettimeofday(&start, NULL);
+	do {
+	    nwl_read_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
+    	    gettimeofday(&cur, NULL);
+	} while ((val & DMA_ENG_RESET)&&(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < PCILIB_REGISTER_TIMEOUT));
     
-    if (val & DMA_ENG_RESET) {
-	pcilib_error("Timeout during reset of DMA engine %i", info->desc.addr);
-	return PCILIB_ERROR_TIMEOUT;
-    }
+	if (val & DMA_ENG_RESET) {
+	    pcilib_error("Timeout during reset of DMA engine %i", info->desc.addr);
+	    return PCILIB_ERROR_TIMEOUT;
+	}
     
-	// Acknowledge asserted engine interrupts    
-    if (val & DMA_ENG_INT_ACTIVE_MASK) {
-	val |= DMA_ENG_ALLINT_MASK;
-	nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
-    }
+	    // Acknowledge asserted engine interrupts    
+	if (val & DMA_ENG_INT_ACTIVE_MASK) {
+	    val |= DMA_ENG_ALLINT_MASK;
+	    nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
+	}
 
-    ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
-    nwl_write_register(ring_pa, ctx, info->base_addr, REG_DMA_ENG_NEXT_BD);
-    nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
+	ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
+	nwl_write_register(ring_pa, ctx, info->base_addr, REG_DMA_ENG_NEXT_BD);
+	nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
 
-    __sync_synchronize();
+	__sync_synchronize();
 
-    nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
-    val |= (DMA_ENG_ENABLE);
-    nwl_write_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
+	nwl_read_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
+	val |= (DMA_ENG_ENABLE);
+	nwl_write_register(val, ctx, info->base_addr, REG_DMA_ENG_CTRL_STATUS);
 
-    __sync_synchronize();
+	__sync_synchronize();
 
 #ifdef NWL_GENERATE_DMA_IRQ
-    dma_nwl_enable_engine_irq(ctx, dma);
+	dma_nwl_enable_engine_irq(ctx, dma);
 #endif /* NWL_GENERATE_DMA_IRQ */
 
-    if (info->desc.direction == PCILIB_DMA_FROM_DEVICE) {
-	ring_pa += (info->ring_size - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
-    	nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
+	if (info->desc.direction == PCILIB_DMA_FROM_DEVICE) {
+	    ring_pa += (info->ring_size - 1) * PCILIB_NWL_DMA_DESCRIPTOR_SIZE;
+    	    nwl_write_register(ring_pa, ctx, info->base_addr, REG_SW_NEXT_BD);
 
-	info->tail = 0;
-	info->head = (info->ring_size - 1);
-    } else {
-	info->tail = 0;
-	info->head = 0;
+	    info->tail = 0;
+	    info->head = (info->ring_size - 1);
+	} else {
+	    info->tail = 0;
+	    info->head = 0;
+	}
     }
     
     info->started = 1;
@@ -174,6 +175,12 @@ int dma_nwl_stop_engine(nwl_dma_t *ctx, pcilib_dma_engine_t dma) {
 	val = DMA_ENG_DISABLE|DMA_ENG_USER_RESET|DMA_ENG_RESET;
 	nwl_write_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
 
+	gettimeofday(&start, NULL);
+	do {
+	    nwl_read_register(val, ctx, base, REG_DMA_ENG_CTRL_STATUS);
+    	    gettimeofday(&cur, NULL);
+	} while ((val & (DMA_ENG_RUNNING))&&(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < PCILIB_REGISTER_TIMEOUT));
+
 	if (info->ring) {
 	    ring_pa = pcilib_kmem_get_pa(ctx->pcilib, info->ring);
 	    nwl_write_register(ring_pa, ctx, info->base_addr, REG_DMA_ENG_NEXT_BD);

+ 2 - 2
dma/nwl_irq.c

@@ -37,7 +37,7 @@ int dma_nwl_enable_irq(pcilib_dma_context_t *vctx, pcilib_irq_type_t type, pcili
     uint32_t val;
     nwl_dma_t *ctx = (nwl_dma_t*)vctx;
     
-    if (flags&PCILIB_DMA_FLAG_PERMANENT) ctx->irq_preserve |= type;
+    if (flags&PCILIB_DMA_FLAG_PERSISTENT) ctx->irq_preserve |= type;
 
     if ((ctx->irq_enabled&type) == type) return 0;
     
@@ -72,7 +72,7 @@ int dma_nwl_disable_irq(pcilib_dma_context_t *vctx, pcilib_dma_flags_t flags) {
     val &= ~(DMA_INT_ENABLE|DMA_USER_INT_ENABLE);
     nwl_write_register(val, ctx, ctx->base_addr, REG_DMA_CTRL_STATUS);
     
-    if (flags&PCILIB_DMA_FLAG_PERMANENT) ctx->irq_preserve = 0;
+    if (flags&PCILIB_DMA_FLAG_PERSISTENT) ctx->irq_preserve = 0;
 
     return 0;
 }

+ 55 - 31
kmem.c

@@ -17,6 +17,38 @@
 #include "kmem.h"
 #include "error.h"
 
+static int pcilib_free_kernel_buffer(pcilib_t *ctx, pcilib_kmem_list_t *kbuf, size_t i, pcilib_kmem_flags_t flags) {
+    kmem_handle_t kh = {0};
+
+    if (kbuf->buf.blocks[i].ua) munmap(kbuf->buf.blocks[i].ua, kbuf->buf.blocks[i].size + kbuf->buf.blocks[i].alignment_offset);
+    kh.handle_id = kbuf->buf.blocks[i].handle_id;
+    kh.pa = kbuf->buf.blocks[i].pa;
+    kh.flags = flags;
+
+    return ioctl(ctx->handle, PCIDRIVER_IOC_KMEM_FREE, &kh);
+}
+
+static void pcilib_cancel_kernel_memory(pcilib_t *ctx, pcilib_kmem_list_t *kbuf, pcilib_kmem_flags_t flags, int last_flags) {
+    int ret;
+    
+    if (!kbuf->buf.n_blocks) return;
+
+	// consistency error during processing of last block, special treatment could be needed
+    if (last_flags) {
+	pcilib_kmem_flags_t failed_flags = flags;
+	
+	if (last_flags&KMEM_FLAG_REUSED_PERSISTENT) flags&=~PCILIB_KMEM_FLAG_PERSISTENT;
+	if (last_flags&KMEM_FLAG_REUSED_HW) flags&=~PCILIB_KMEM_FLAG_HARDWARE;
+	
+	if (failed_flags != flags) {
+	    ret = pcilib_free_kernel_buffer(ctx, kbuf, --kbuf->buf.n_blocks, failed_flags);
+	    if (ret) pcilib_error("PCIDRIVER_IOC_KMEM_FREE ioctl have failed");
+	}
+    }
+
+    pcilib_free_kernel_memory(ctx, kbuf, flags);
+}
+
 pcilib_kmem_handle_t *pcilib_alloc_kernel_memory(pcilib_t *ctx, pcilib_kmem_type_t type, size_t nmemb, size_t size, size_t alignment, pcilib_kmem_use_t use, pcilib_kmem_flags_t flags) {
     int err = 0;
     const char *error = NULL;
@@ -77,24 +109,31 @@ pcilib_kmem_handle_t *pcilib_alloc_kernel_memory(pcilib_t *ctx, pcilib_kmem_type
 	    else if (!reused) reused = PCILIB_TRISTATE_PARTIAL;
 	
 	    if (persistent) {
-		if (persistent < 0) persistent = (kh.flags&KMEM_FLAG_REUSED_PERSISTENT)?1:0;
-		else if (kh.flags&KMEM_FLAG_REUSED_PERSISTENT == 0) err = PCILIB_ERROR_INVALID_STATE;
+		if (persistent < 0) {
+		    if ((flags&PCILIB_KMEM_FLAG_PERSISTENT == 0)&&(kh.flags&KMEM_FLAG_REUSED_PERSISTENT)) err = PCILIB_ERROR_INVALID_STATE;
+		    else persistent = (kh.flags&KMEM_FLAG_REUSED_PERSISTENT)?1:0;
+		} else if (kh.flags&KMEM_FLAG_REUSED_PERSISTENT == 0) err = PCILIB_ERROR_INVALID_STATE;
 	    } else if (kh.flags&KMEM_FLAG_REUSED_PERSISTENT) err = PCILIB_ERROR_INVALID_STATE;
 	    
 	    if (hardware) {
-		if (hardware < 0) (kh.flags&KMEM_FLAG_REUSED_HW)?1:0;
-		else if (kh.flags&KMEM_FLAG_REUSED_HW == 0) err = PCILIB_ERROR_INVALID_STATE;
+		if (hardware < 0) {
+		    if ((flags&PCILIB_KMEM_FLAG_HARDWARE == 0)&&(kh.flags&KMEM_FLAG_REUSED_HW)) err = PCILIB_ERROR_INVALID_STATE;
+		    else hardware = (kh.flags&KMEM_FLAG_REUSED_HW)?1:0;
+		} else if (kh.flags&KMEM_FLAG_REUSED_HW == 0) err = PCILIB_ERROR_INVALID_STATE;
 	    } else if (kh.flags&KMEM_FLAG_REUSED_HW) err = PCILIB_ERROR_INVALID_STATE;
 	    
-	    if (err) {
-		kbuf->buf.n_blocks = i + 1;
-		break;
-	    }
 	} else {
 	    if (!i) reused = PCILIB_TRISTATE_NO;
 	    else if (reused) reused = PCILIB_TRISTATE_PARTIAL;
+	    
+	    if ((persistent > 0)&&(flags&PCILIB_KMEM_FLAG_PERSISTENT == 0)) err = PCILIB_ERROR_INVALID_STATE;
+	    if ((hardware > 0)&&(flags&PCILIB_KMEM_FLAG_HARDWARE == 0)) err = PCILIB_ERROR_INVALID_STATE;
 	}
 	
+	if (err) {
+	    kbuf->buf.n_blocks = i + 1;
+	    break;
+	}
     
         if ((alignment)&&(type != PCILIB_KMEM_TYPE_PAGE)) {
 	    if (kh.pa % alignment) kbuf->buf.blocks[i].alignment_offset = alignment - kh.pa % alignment;
@@ -113,30 +152,21 @@ pcilib_kmem_handle_t *pcilib_alloc_kernel_memory(pcilib_t *ctx, pcilib_kmem_type
 	kbuf->buf.blocks[i].mmap_offset = kh.pa & ctx->page_mask;
     }
 
-    if (persistent) {
-	if (persistent < 0) persistent = 0;
-	else if (flags&PCILIB_KMEM_FLAG_PERSISTENT == 0) err = PCILIB_ERROR_INVALID_STATE;
-    }
-    
-    if (hardware) {
-	if (hardware < 0) hardware = 0;
-	else if (flags&PCILIB_KMEM_FLAG_HARDWARE == 0) err = PCILIB_ERROR_INVALID_STATE;
-    }
+    if (persistent < 0) persistent = 0;
+    if (hardware < 0) hardware = 0;
 
     if (err||error) {
 	pcilib_kmem_flags_t free_flags = 0;
 	
-	if ((!persistent)&&(flags&PCILIB_KMEM_FLAG_PERSISTENT)) {
-		// if last one is persistent? Ignore?
+	if ((persistent <= 0)&&(flags&PCILIB_KMEM_FLAG_PERSISTENT)) {
 	    free_flags |= PCILIB_KMEM_FLAG_PERSISTENT;
 	}
-
-	if ((!hardware)&&(flags&PCILIB_KMEM_FLAG_HARDWARE)) {
-		// if last one is persistent? Ignore?
+	
+	if ((hardware <= 0)&&(flags&PCILIB_KMEM_FLAG_HARDWARE)) {
 	    free_flags |= PCILIB_KMEM_FLAG_HARDWARE;
 	}
 	
-	pcilib_free_kernel_memory(ctx, kbuf, free_flags);
+	pcilib_cancel_kernel_memory(ctx, kbuf, free_flags, err?kh.flags:0);
 
 	if (err) error = "Reused buffers are inconsistent";
 	pcilib_error(error);
@@ -172,13 +202,8 @@ void pcilib_free_kernel_memory(pcilib_t *ctx, pcilib_kmem_handle_t *k, pcilib_km
     else if (ctx->kmem_list == kbuf) ctx->kmem_list = kbuf->next;
 
     for (i = 0; i < kbuf->buf.n_blocks; i++) {
-        if (kbuf->buf.blocks[i].ua) munmap(kbuf->buf.blocks[i].ua, kbuf->buf.blocks[i].size + kbuf->buf.blocks[i].alignment_offset);
-
-        kh.handle_id = kbuf->buf.blocks[i].handle_id;
-        kh.pa = kbuf->buf.blocks[i].pa;
-	kh.flags = flags;
-	ret = ioctl(ctx->handle, PCIDRIVER_IOC_KMEM_FREE, &kh);
-	if ((ret)&&(!err)) err = ret;
+        ret = pcilib_free_kernel_buffer(ctx, kbuf, --kbuf->buf.n_blocks, flags);
+    	if ((ret)&&(!err)) err = ret;
     }
     
     free(kbuf);
@@ -188,7 +213,6 @@ void pcilib_free_kernel_memory(pcilib_t *ctx, pcilib_kmem_handle_t *k, pcilib_km
     }
 }
 
-
 int pcilib_sync_kernel_memory(pcilib_t *ctx, pcilib_kmem_handle_t *k, pcilib_kmem_sync_direction_t dir) {
     int i;
     int ret;

+ 6 - 3
kmem.h

@@ -2,18 +2,20 @@
 #define _PCILIB_KMEM_H
 
 #include "pcilib.h"
+#include "tools.h"
 
 typedef enum {
     PCILIB_KMEM_FLAG_REUSE = KMEM_FLAG_REUSE,
     PCILIB_KMEM_FLAG_EXCLUSIVE = KMEM_FLAG_EXCLUSIVE,
     PCILIB_KMEM_FLAG_PERSISTENT = KMEM_FLAG_PERSISTENT,
-    PCILIB_KMEM_FLAG_HARDWARE = KMEM_FLAG_HW
+    PCILIB_KMEM_FLAG_HARDWARE = KMEM_FLAG_HW,
 //    PCILIB_KMEM_FLAG_FORCE = 2		/**< Force buffer 
 } pcilib_kmem_flags_t;
 
+
 typedef enum {
-    PCILIB_KMEM_REUSE_REUSED = PCILIB_TRISTATE_YES,
     PCILIB_KMEM_REUSE_ALLOCATED = PCILIB_TRISTATE_NO,
+    PCILIB_KMEM_REUSE_REUSED = PCILIB_TRISTATE_YES,
     PCILIB_KMEM_REUSE_PARTIAL = PCILIB_TRISTATE_PARTIAL,
     PCILIB_KMEM_REUSE_PERSISTENT = 0x100,
     PCILIB_KMEM_REUSE_HARDWARE = 0x200
@@ -22,7 +24,8 @@ typedef enum {
 
 typedef struct {
     int handle_id;
-    int reused;
+    pcilib_kmem_reuse_state_t reused;
+    
     uintptr_t pa;
 //    uintptr_t va;
     void *ua;

+ 1 - 2
tools.h

@@ -4,8 +4,6 @@
 #include <stdio.h>
 #include <stdint.h>
 
-#include "pci.h"
-
 #define BIT_MASK(bits) ((1ll << (bits)) - 1)
 
 #define min2(a, b) (((a)<(b))?(a):(b))
@@ -16,6 +14,7 @@ typedef enum {
     PCILIB_TRISTATE_YES = 2
 } pcilib_tristate_t;
 
+#include "pci.h"
 
 int pcilib_isnumber(const char *str);
 int pcilib_isxnumber(const char *str);