Browse Source

Support for 64-bit registes

Suren A. Chilingaryan 8 years ago
parent
commit
52e32b2c9f
23 changed files with 674 additions and 295 deletions
  1. 1 1
      CMakeLists.txt
  2. 2 0
      dma/nwl_private.h
  3. 3 3
      pcilib/CMakeLists.txt
  4. 115 0
      pcilib/cpu.c
  5. 17 0
      pcilib/cpu.h
  6. 90 0
      pcilib/datacpy.c
  7. 21 0
      pcilib/datacpy.h
  8. 4 2
      pcilib/dma.c
  9. 66 0
      pcilib/memcpy.c
  10. 22 0
      pcilib/memcpy.h
  11. 153 0
      pcilib/pagecpy.c
  12. 29 0
      pcilib/pagecpy.h
  13. 3 0
      pcilib/pci.h
  14. 1 1
      pcilib/pcilib.h
  15. 1 1
      pcilib/py.c
  16. 1 0
      pcilib/register.c
  17. 89 0
      pcilib/timing.c
  18. 25 0
      pcilib/timing.h
  19. 0 249
      pcilib/tools.c
  20. 0 21
      pcilib/tools.h
  21. 2 2
      pcitool/cli.c
  22. 1 0
      protocols/default.c
  23. 28 15
      protocols/software.c

+ 1 - 1
CMakeLists.txt

@@ -1,6 +1,6 @@
 project(pcitool)
 
-set(PCILIB_VERSION "0.2.3")
+set(PCILIB_VERSION "0.2.4")
 set(PCILIB_ABI_VERSION "2")
 
 cmake_minimum_required(VERSION 2.6)

+ 2 - 0
dma/nwl_private.h

@@ -17,6 +17,8 @@ typedef struct pcilib_nwl_engine_context_s pcilib_nwl_engine_context_t;
 
 #define PCILIB_NWL_REGISTER_TIMEOUT 10000	/**< us */
 
+#include "datacpy.h"
+
 #include "nwl.h"
 #include "nwl_irq.h"
 #include "nwl_engine.h"

+ 3 - 3
pcilib/CMakeLists.txt

@@ -8,8 +8,8 @@ include_directories(
     ${UTHASH_INCLUDE_DIRS}
 )
 
-set(HEADERS pcilib.h pci.h export.h value.h bar.h fifo.h model.h bank.h register.h view.h property.h unit.h xml.h py.h kmem.h irq.h locking.h lock.h dma.h event.h plugin.h tools.h error.h debug.h env.h version.h config.h)
-add_library(pcilib SHARED pci.c export.c value.c bar.c fifo.c model.c bank.c register.c view.c unit.c property.c xml.c py.c kmem.c irq.c locking.c lock.c dma.c event.c plugin.c tools.c error.c debug.c env.c )
+set(HEADERS pcilib.h pci.h datacpy.h memcpy.h pagecpy.h cpu.h timing.h export.h value.h bar.h fifo.h model.h bank.h register.h view.h property.h unit.h xml.h py.h kmem.h irq.h locking.h lock.h dma.h event.h plugin.h tools.h error.h debug.h env.h version.h config.h)
+add_library(pcilib SHARED pci.c datacpy.c memcpy.c pagecpy.c cpu.c timing.c export.c value.c bar.c fifo.c model.c bank.c register.c view.c unit.c property.c xml.c py.c kmem.c irq.c locking.c lock.c dma.c event.c plugin.c tools.c error.c debug.c env.c )
 target_link_libraries(pcilib dma protocols views ${CMAKE_THREAD_LIBS_INIT} ${UFODECODE_LIBRARIES} ${CMAKE_DL_LIBS} ${EXTRA_SYSTEM_LIBS} ${LIBXML2_LIBRARIES} ${PYTHON_LIBRARIES})
 add_dependencies(pcilib dma protocols views)
 
@@ -21,6 +21,6 @@ install(FILES pcilib.h
     DESTINATION include
 )
 
-install(FILES bar.h kmem.h locking.h lock.h bank.h register.h xml.h dma.h event.h model.h error.h debug.h env.h tools.h export.h version.h view.h unit.h
+install(FILES bar.h kmem.h locking.h lock.h bank.h register.h xml.h dma.h event.h model.h error.h debug.h env.h tools.h timing.h cpu.h datacpy.h pagecpy.h memcpy.h export.h version.h view.h unit.h
     DESTINATION include/pcilib
 )

+ 115 - 0
pcilib/cpu.c

@@ -0,0 +1,115 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+static void pcilib_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) {
+    uint32_t ebx = 0, edx;
+# if defined( __i386__ ) && defined ( __PIC__ )
+     /* in case of PIC under 32-bit EBX cannot be clobbered */
+    __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
+# else
+    __asm__ ( "cpuid" : "+b" (ebx),
+# endif
+              "+a" (eax), "+c" (ecx), "=d" (edx) );
+    abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
+}
+
+static int pcilib_check_xcr0_ymm() {
+    uint32_t xcr0;
+    __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
+    return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
+}
+
+static int pcilib_check_4th_gen_intel_core_features() {
+    uint32_t abcd[4];
+    uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27));
+    uint32_t avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8);
+
+    /* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1   && 
+       CPUID.(EAX=01H, ECX=0H):ECX.MOVBE[bit 22]==1 && 
+       CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 */
+    pcilib_run_cpuid( 1, 0, abcd );
+    if ( (abcd[2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask ) 
+        return 0;
+
+    if ( ! pcilib_check_xcr0_ymm() )
+        return 0;
+
+    /*  CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1  &&
+        CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]==1  &&
+        CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1  */
+    pcilib_run_cpuid( 7, 0, abcd );
+    if ( (abcd[1] & avx2_bmi12_mask) != avx2_bmi12_mask ) 
+        return 0;
+
+    /* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */
+    pcilib_run_cpuid( 0x80000001, 0, abcd );
+    if ( (abcd[2] & (1 << 5)) == 0)
+        return 0;
+
+    return 1;
+}
+
+static int pcilib_detect_cpu_gen() {
+    if (pcilib_check_4th_gen_intel_core_features())
+	return 4;
+    return 0;
+}
+
+int pcilib_get_cpu_gen() {
+    int gen = -1;
+
+    if (gen < 0 )
+        gen = pcilib_detect_cpu_gen();
+
+    return gen;
+}
+
+int pcilib_get_page_mask() {
+    int pagesize,pagemask,temp;
+
+    pagesize = sysconf(_SC_PAGESIZE);
+
+    for( pagemask=0, temp = pagesize; temp != 1; ) {
+	temp = (temp >> 1);
+	pagemask = (pagemask << 1)+1;
+    }
+    return pagemask;
+}
+
+int pcilib_get_cpu_count() {
+    int err;
+
+    int cpu_count;
+    cpu_set_t mask;
+
+    err = sched_getaffinity(getpid(), sizeof(mask), &mask);
+    if (err) return 1;
+
+#ifdef CPU_COUNT
+    cpu_count = CPU_COUNT(&mask);
+#else
+    for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) {
+	if (!CPU_ISSET(cpu_count, &mask)) break;
+    }
+#endif
+
+    if (!cpu_count) cpu_count = PCILIB_DEFAULT_CPU_COUNT;
+    return cpu_count;    
+}
+

+ 17 - 0
pcilib/cpu.h

@@ -0,0 +1,17 @@
+#ifndef _PCILIB_CPU_H
+#define _PCILIB_CPU_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int pcilib_get_page_mask();
+int pcilib_get_cpu_count();
+int pcilib_get_cpu_gen();
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _PCILIB_CPU_H */

+ 90 - 0
pcilib/datacpy.c

@@ -0,0 +1,90 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+void *pcilib_datacpy32(void * dst, void const * src, size_t n, pcilib_endianess_t endianess) {
+    uint32_t * plDst = (uint32_t *) dst;
+    uint32_t const * plSrc = (uint32_t const *) src;
+
+    int swap = 0;
+
+    if (endianess) 
+        swap = (endianess == PCILIB_BIG_ENDIAN)?(ntohs(1)!=1):(ntohs(1)==1);
+
+    if (swap) {
+        while (n > 0) {
+            *plDst = ntohl(*plSrc);
+            ++plSrc;
+            ++plDst;
+            --n;
+        }
+    } else {
+        while (n > 0) {
+            *plDst = *plSrc;
+            ++plSrc;
+            ++plDst;
+            --n;
+        }
+    }
+
+    return dst;
+} 
+
+void *pcilib_datacpy64(void * dst, void const * src, size_t n, pcilib_endianess_t endianess) {
+    uint64_t * plDst = (uint64_t *) dst;
+    uint64_t const * plSrc = (uint64_t const *) src;
+
+    int swap = 0;
+
+    if (endianess) 
+        swap = (endianess == PCILIB_BIG_ENDIAN)?(be64toh(1)!=1):(be64toh(1)==1);
+
+    if (swap) {
+        while (n > 0) {
+            *plDst = ntohl(*plSrc);
+            ++plSrc;
+            ++plDst;
+            --n;
+        }
+    } else {
+        while (n > 0) {
+            *plDst = *plSrc;
+            ++plSrc;
+            ++plDst;
+            --n;
+        }
+    }
+
+    return dst;
+}
+
+typedef void* (*pcilib_datacpy_routine_t)(void * dst, void const * src, size_t n, pcilib_endianess_t endianess);
+static pcilib_datacpy_routine_t pcilib_datacpy_routines[4] = {
+    NULL, NULL, pcilib_datacpy32, pcilib_datacpy64
+};
+
+void *pcilib_datacpy(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess) {
+    size_t pos = 0;
+    pcilib_datacpy_routine_t routine;
+
+    assert((size)&&(size < 64));
+
+    while (size >>= 1) ++pos;
+    routine = pcilib_datacpy_routines[pos];
+
+    return routine(dst, src, n, endianess);
+}

+ 21 - 0
pcilib/datacpy.h

@@ -0,0 +1,21 @@
+#ifndef _PCILIB_DATACPY_H
+#define _PCILIB_DATACPY_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include <pcilib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *pcilib_datacpy32(void * dst, void const * src, size_t n, pcilib_endianess_t endianess);
+void *pcilib_datacpy64(void * dst, void const * src, size_t n, pcilib_endianess_t endianess);
+void *pcilib_datacpy(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PCILIB_DATACPY_H */

+ 4 - 2
pcilib/dma.c

@@ -17,6 +17,8 @@
 #include "pcilib.h"
 #include "pci.h"
 #include "dma.h"
+#include "tools.h"
+#include "pagecpy.h"
 
 const pcilib_dma_description_t *pcilib_get_dma_description(pcilib_t *ctx) {
     int err;
@@ -194,8 +196,8 @@ static int pcilib_dma_read_callback(void *arg, pcilib_dma_flags_t flags, size_t
 	    pcilib_error("Buffer size (%li) is not large enough for DMA packet, at least %li bytes is required", ctx->size, ctx->pos + bufsize); 
 	return -PCILIB_ERROR_TOOBIG;
     }
-    
-    memcpy(ctx->data + ctx->pos, buf, bufsize);
+
+    pcilib_pagecpy(ctx->data + ctx->pos, buf, bufsize);
     ctx->pos += bufsize;
 
     if (flags & PCILIB_DMA_FLAG_EOP) {

+ 66 - 0
pcilib/memcpy.c

@@ -0,0 +1,66 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+void *pcilib_memcpy8(void * dst, void const * src, size_t len) {
+    int i;
+    for (i = 0; i < len; i++) ((char*)dst)[i] = ((char*)src)[i];
+    return dst;
+}
+
+void *pcilib_memcpy32(void * dst, void const * src, size_t len) {
+    uint32_t * plDst = (uint32_t *) dst;
+    uint32_t const * plSrc = (uint32_t const *) src;
+
+    while (len >= 4) {
+//        *plDst = ntohl(*plSrc);
+	*plDst = *plSrc;
+        plSrc++;
+        plDst++;
+        len -= 4;
+    }
+
+    char * pcDst = (char *) plDst;
+    char const * pcSrc = (char const *) plSrc;
+
+    while (len--) {
+        *pcDst++ = *pcSrc++;
+    }
+
+    return (dst);
+} 
+
+
+void *pcilib_memcpy64(void * dst, void const * src, size_t len) {
+    uint64_t * plDst = (uint64_t *) dst;
+    uint64_t const * plSrc = (uint64_t const *) src;
+
+    while (len >= 8) {
+        *plDst++ = *plSrc++;
+        len -= 8;
+    }
+
+    char * pcDst = (char *) plDst;
+    char const * pcSrc = (char const *) plSrc;
+
+    while (len--) {
+        *pcDst++ = *pcSrc++;
+    }
+
+    return (dst);
+} 
+

+ 22 - 0
pcilib/memcpy.h

@@ -0,0 +1,22 @@
+#ifndef _PCILIB_MEMCPY_H
+#define _PCILIB_MEMCPY_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+
+#define pcilib_memcpy pcilib_memcpy32
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *pcilib_memcpy8(void * dst, void const * src, size_t len);
+void *pcilib_memcpy32(void * dst, void const * src, size_t len);
+void *pcilib_memcpy64(void * dst, void const * src, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PCILIB_MEMCPY_H */

+ 153 - 0
pcilib/pagecpy.c

@@ -0,0 +1,153 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "cpu.h"
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+
+/*
+void *memcpy128(void * dst, void const * src, size_t len) {
+
+    long pos = - (len>>2);
+    char * plDst = (char *) dst - 4 * pos;
+    char const * plSrc = (char const *) src - 4 * pos;
+
+    if (pos) {
+        __asm__ __volatile__ (
+            "1:						\n\t"
+            "mov	(%0,%2,4), %%edi		\n\t"
+            "mov	%%edi, (%1,%2,4)		\n\t"
+            "inc	%2				\n\t"
+            "jnz 	1b				\n\t"
+	: 
+	: "r" (plSrc), "r" (plDst), "r" (pos)
+	: "%edi"
+        );
+    }
+
+
+
+    long pos = - ((len>>4)<<4);
+    char * plDst = (char *) dst - pos;
+    char const * plSrc = (char const *) src - pos;
+
+    if (pos) {
+        __asm__ __volatile__ (
+            "1:						\n\t"
+//            "movdqa	(%0,%2), %%xmm0			\n\t"
+            "mov	(%0,%2), %%esi			\n\t"
+            "movd	%%esi, %%xmm0			\n\t"
+            "mov	4(%0,%2), %%esi			\n\t"
+            "movd	%%esi, %%xmm1			\n\t"
+            "mov	8(%0,%2), %%esi			\n\t"
+            "movd	%%esi, %%xmm2			\n\t"
+            "mov	12(%0,%2), %%esi		\n\t"
+            "movd	%%esi, %%xmm3			\n\t"
+	    "pslldq	$4, %%xmm1			\n\t"
+	    "por	%%xmm1, %%xmm0			\n\t"
+	    "pslldq	$8, %%xmm2			\n\t"
+	    "por	%%xmm2, %%xmm0			\n\t"
+	    "pslldq	$12, %%xmm3			\n\t"
+	    "por	%%xmm3, %%xmm0			\n\t"
+	    
+            "movntdq	%%xmm0, (%1,%2)			\n\t"
+            "add	$16, %2				\n\t"
+            "jnz 	1b				\n\t"
+	: 
+	: "r" (plSrc), "r" (plDst), "r" (pos)
+	: "%rsi"
+        );
+    }
+
+
+
+    len &= 0x3;
+
+    char * pcDst = (char *) plDst;
+    char const * pcSrc = (char const *) plSrc;
+
+    while (len--) {
+        *pcDst++ = *pcSrc++;
+    }
+
+    return (dst);
+} 
+*/
+
+void pcilib_memcpy4k_avx(void *dst, void *src, size_t size) {
+    size_t sse_size = (size / 512);
+
+    __asm__ __volatile__ (
+            "push 	%2			\n\t"
+            "mov        $0, %%rax		\n\t"
+
+            "1:					\n\t"
+
+            "vmovdqa 	   (%0,%%rax), %%ymm0	\n\t"
+            "vmovdqa 	 32(%0,%%rax), %%ymm1	\n\t"
+            "vmovdqa 	 64(%0,%%rax), %%ymm2	\n\t"
+            "vmovdqa 	 96(%0,%%rax), %%ymm3	\n\t"
+            "vmovdqa 	128(%0,%%rax), %%ymm4	\n\t"
+            "vmovdqa 	160(%0,%%rax), %%ymm5	\n\t"
+            "vmovdqa 	192(%0,%%rax), %%ymm6	\n\t"
+            "vmovdqa 	224(%0,%%rax), %%ymm7	\n\t"
+
+            "vmovdqa 	256(%0,%%rax), %%ymm8	\n\t"
+            "vmovdqa 	288(%0,%%rax), %%ymm9	\n\t"
+            "vmovdqa 	320(%0,%%rax), %%ymm10	\n\t"
+            "vmovdqa 	352(%0,%%rax), %%ymm11	\n\t"
+            "vmovdqa 	384(%0,%%rax), %%ymm12	\n\t"
+            "vmovdqa 	416(%0,%%rax), %%ymm13	\n\t"
+            "vmovdqa 	448(%0,%%rax), %%ymm14	\n\t"
+            "vmovdqa 	480(%0,%%rax), %%ymm15	\n\t"
+
+            "vmovntps	%%ymm0,    (%1,%%rax)	\n\t"
+            "vmovntps	%%ymm1,  32(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm2,  64(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm3,  96(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm4, 128(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm5, 160(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm6, 192(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm7, 224(%1,%%rax)	\n\t"
+
+            "vmovntps	%%ymm8,  256(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm9,  288(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm10, 320(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm11, 352(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm12, 384(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm13, 416(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm14, 448(%1,%%rax)	\n\t"
+            "vmovntps	%%ymm15, 480(%1,%%rax)	\n\t"
+
+            "add	$512, %%rax		\n\t"
+            "dec	%2			\n\t"
+            "jnz 	1b			\n\t"
+            "pop 	%2			\n\t"
+
+            "sfence"
+    :
+    : "p" (dst), "p" (src), "r" (sse_size)
+    : "%rax"
+        );
+}
+
+void pcilib_pagecpy(void *dst, void *src, size_t size) {
+    int gen = pcilib_get_cpu_gen();
+    if ((gen > 3)&&(size%4096==0)&&((uintptr_t)dst%32==0)&&((uintptr_t)src%32==0)) {
+	pcilib_memcpy4k_avx(dst, src, size);
+    } else
+	memcpy(dst, src, size);
+}

+ 29 - 0
pcilib/pagecpy.h

@@ -0,0 +1,29 @@
+#ifndef _PCILIB_PAGECPY_H
+#define _PCILIB_PAGECPY_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * This function should be used to move large blocks of non-cached memory between
+ * aligned memory locations. The function will determine the CPU model and alginment
+ * and call appropriate implementation. If nothing suitable found, standard memcpy
+ * will be used. It is OK to call on small or unligned data, the standard memcpy
+ * will be executed in this case. The memory regions should not intersect.
+ * Only AVX implementation so far.
+ * @param[out] dst - destination memory region
+ * @param[in] src - source memory region
+ * @param[in] size - size of memory region in bytes.
+ * @return - `dst` or NULL on error
+ */
+void pcilib_pagecpy(void *dst, void *src, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PCILIB_PAGECPY_H */

+ 3 - 0
pcilib/pci.h

@@ -20,6 +20,8 @@
 #include "linux-3.10.h"
 #include "driver/pciDriver.h"
 
+#include "timing.h"
+#include "cpu.h"
 #include "pcilib.h"
 #include "register.h"
 #include "kmem.h"
@@ -32,6 +34,7 @@
 #include "xml.h"
 #include "py.h"
 #include "view.h"
+#include "memcpy.h"
 
 typedef struct {
     uint8_t max_link_speed, link_speed;

+ 1 - 1
pcilib/pcilib.h

@@ -17,7 +17,7 @@ typedef uint16_t pcilib_view_t;			/**< Type holding the register view position w
 typedef uint16_t pcilib_unit_t;			/**< Type holding the value unit position within unit listing in the model */
 typedef uint32_t pcilib_register_addr_t;	/**< Type holding the register address within address-space of BARs */
 typedef uint8_t pcilib_register_size_t;		/**< Type holding the size in bits of the register */
-typedef uint32_t pcilib_register_value_t;	/**< Type holding the register value */
+typedef uint64_t pcilib_register_value_t;	/**< Type holding the register value */
 typedef uint8_t pcilib_dma_engine_addr_t;
 typedef uint8_t pcilib_dma_engine_t;
 typedef uint64_t pcilib_event_id_t;

+ 1 - 1
pcilib/py.c

@@ -142,7 +142,7 @@ static char *pcilib_py_parse_string(pcilib_t *ctx, const char *codestr, pcilib_v
             } else {
                 err = pcilib_read_register(ctx, NULL, reg, &regval);
                 if (err) break;
-                sprintf(dst + offset, "0x%x", regval);
+                sprintf(dst + offset, "0x%lx", regval);
             }
         }
 

+ 1 - 0
pcilib/register.c

@@ -16,6 +16,7 @@
 #include "pci.h"
 #include "bank.h"
 
+#include "datacpy.h"
 #include "tools.h"
 #include "error.h"
 #include "property.h"

+ 89 - 0
pcilib/timing.c

@@ -0,0 +1,89 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout) {
+    tv->tv_usec += timeout%1000000;
+    if (tv->tv_usec > 999999) {
+	tv->tv_usec -= 1000000;
+	tv->tv_sec += 1 + timeout/1000000;
+    } else {
+	tv->tv_sec += timeout/1000000;
+    }
+
+    return 0;
+}
+
+int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout) {
+    gettimeofday(tv, NULL);
+    pcilib_add_timeout(tv, timeout);
+
+    return 0;
+}
+
+int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout) {
+    int64_t res;
+    struct timeval tvs;
+
+    if (!tve->tv_sec) return 0;
+
+    gettimeofday(&tvs, NULL);
+    res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
+	// Hm... Some problems comparing signed and unsigned. So, sign check first
+    if ((res < 0)||(res < timeout)) {
+	return 1;
+    }
+
+    return 0;
+}
+
+pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve) {
+    int64_t res;
+    struct timeval tvs;
+    
+    gettimeofday(&tvs, NULL);
+    res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
+    
+    if (res < 0) return 0;
+    return res;
+}
+
+int pcilib_sleep_until_deadline(struct timeval *tv) {
+    struct timespec wait;
+    pcilib_timeout_t duration;
+
+    duration = pcilib_calc_time_to_deadline(tv);
+    if (duration > 0) {
+	wait.tv_sec = duration / 1000000;
+	wait.tv_nsec = 1000 * (duration % 1000000);
+	nanosleep(&wait, NULL);
+    }
+
+    return 0;
+}
+
+pcilib_timeout_t pcilib_timediff(struct timeval *tvs, struct timeval *tve) {
+    return ((tve->tv_sec - tvs->tv_sec)*1000000 + (tve->tv_usec - tvs->tv_usec));
+}
+
+int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2) {
+    if (tv1->tv_sec > tv2->tv_sec) return 1;
+    else if (tv1->tv_sec < tv2->tv_sec) return -1;
+    else if (tv1->tv_usec > tv2->tv_usec) return 1;
+    else if (tv1->tv_usec < tv2->tv_usec) return -1;
+    return 0;
+}

+ 25 - 0
pcilib/timing.h

@@ -0,0 +1,25 @@
+#ifndef _PCILIB_TIMING_H
+#define _PCILIB_TIMING_H
+
+#include <sys/time.h>
+#include <pcilib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout);
+int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout);
+int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout);
+pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve);
+int pcilib_sleep_until_deadline(struct timeval *tv);
+int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2);
+pcilib_timeout_t pcilib_timediff(struct timeval *tve, struct timeval *tvs);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _PCILIB_TIMING_H */

+ 0 - 249
pcilib/tools.c

@@ -101,255 +101,6 @@ void pcilib_swap(void *dst, void *src, size_t size, size_t n) {
     }
 }
 
-void *pcilib_memcpy8(void * dst, void const * src, size_t len) {
-    int i;
-    for (i = 0; i < len; i++) ((char*)dst)[i] = ((char*)src)[i];
-    return dst;
-}
-
-void *pcilib_memcpy32(void * dst, void const * src, size_t len) {
-    uint32_t * plDst = (uint32_t *) dst;
-    uint32_t const * plSrc = (uint32_t const *) src;
-
-    while (len >= 4) {
-//        *plDst = ntohl(*plSrc);
-	*plDst = *plSrc;
-        plSrc++;
-        plDst++;
-        len -= 4;
-    }
-
-    char * pcDst = (char *) plDst;
-    char const * pcSrc = (char const *) plSrc;
-
-    while (len--) {
-        *pcDst++ = *pcSrc++;
-    }
-
-    return (dst);
-} 
-
-
-void *pcilib_memcpy64(void * dst, void const * src, size_t len) {
-    uint64_t * plDst = (uint64_t *) dst;
-    uint64_t const * plSrc = (uint64_t const *) src;
-
-    while (len >= 8) {
-        *plDst++ = *plSrc++;
-        len -= 8;
-    }
-
-    char * pcDst = (char *) plDst;
-    char const * pcSrc = (char const *) plSrc;
-
-    while (len--) {
-        *pcDst++ = *pcSrc++;
-    }
-
-    return (dst);
-} 
-
-/*
-void *memcpy128(void * dst, void const * src, size_t len) {
-
-    long pos = - (len>>2);
-    char * plDst = (char *) dst - 4 * pos;
-    char const * plSrc = (char const *) src - 4 * pos;
-
-    if (pos) {
-        __asm__ __volatile__ (
-            "1:						\n\t"
-            "mov	(%0,%2,4), %%edi		\n\t"
-            "mov	%%edi, (%1,%2,4)		\n\t"
-            "inc	%2				\n\t"
-            "jnz 	1b				\n\t"
-	: 
-	: "r" (plSrc), "r" (plDst), "r" (pos)
-	: "%edi"
-        );
-    }
-
-
-
-    long pos = - ((len>>4)<<4);
-    char * plDst = (char *) dst - pos;
-    char const * plSrc = (char const *) src - pos;
-
-    if (pos) {
-        __asm__ __volatile__ (
-            "1:						\n\t"
-//            "movdqa	(%0,%2), %%xmm0			\n\t"
-            "mov	(%0,%2), %%esi			\n\t"
-            "movd	%%esi, %%xmm0			\n\t"
-            "mov	4(%0,%2), %%esi			\n\t"
-            "movd	%%esi, %%xmm1			\n\t"
-            "mov	8(%0,%2), %%esi			\n\t"
-            "movd	%%esi, %%xmm2			\n\t"
-            "mov	12(%0,%2), %%esi		\n\t"
-            "movd	%%esi, %%xmm3			\n\t"
-	    "pslldq	$4, %%xmm1			\n\t"
-	    "por	%%xmm1, %%xmm0			\n\t"
-	    "pslldq	$8, %%xmm2			\n\t"
-	    "por	%%xmm2, %%xmm0			\n\t"
-	    "pslldq	$12, %%xmm3			\n\t"
-	    "por	%%xmm3, %%xmm0			\n\t"
-	    
-            "movntdq	%%xmm0, (%1,%2)			\n\t"
-            "add	$16, %2				\n\t"
-            "jnz 	1b				\n\t"
-	: 
-	: "r" (plSrc), "r" (plDst), "r" (pos)
-	: "%rsi"
-        );
-    }
-
-
-
-    len &= 0x3;
-
-    char * pcDst = (char *) plDst;
-    char const * pcSrc = (char const *) plSrc;
-
-    while (len--) {
-        *pcDst++ = *pcSrc++;
-    }
-
-    return (dst);
-} 
-*/
-
-void *pcilib_datacpy32(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess) {
-    uint32_t * plDst = (uint32_t *) dst;
-    uint32_t const * plSrc = (uint32_t const *) src;
-
-    int swap = 0;
-
-    if (endianess) 
-        swap = (endianess == PCILIB_BIG_ENDIAN)?(ntohs(1)!=1):(ntohs(1)==1);
-
-    assert(size == 4);	// only 32 bit at the moment
-
-    if (swap) {
-        while (n > 0) {
-            *plDst = ntohl(*plSrc);
-            ++plSrc;
-            ++plDst;
-            --n;
-        }
-    } else {
-        while (n > 0) {
-            *plDst = *plSrc;
-            ++plSrc;
-            ++plDst;
-            --n;
-        }
-    }
-
-    return dst;
-} 
-
-int pcilib_get_page_mask() {
-    int pagesize,pagemask,temp;
-
-    pagesize = sysconf(_SC_PAGESIZE);
-
-    for( pagemask=0, temp = pagesize; temp != 1; ) {
-	temp = (temp >> 1);
-	pagemask = (pagemask << 1)+1;
-    }
-    return pagemask;
-}
-
-int pcilib_get_cpu_count() {
-    int err;
-
-    int cpu_count;
-    cpu_set_t mask;
-
-    err = sched_getaffinity(getpid(), sizeof(mask), &mask);
-    if (err) return 1;
-
-#ifdef CPU_COUNT
-    cpu_count = CPU_COUNT(&mask);
-#else
-    for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) {
-	if (!CPU_ISSET(cpu_count, &mask)) break;
-    }
-#endif
-
-    if (!cpu_count) cpu_count = PCILIB_DEFAULT_CPU_COUNT;
-    return cpu_count;    
-}
-
-
-int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout) {
-    tv->tv_usec += timeout%1000000;
-    if (tv->tv_usec > 999999) {
-	tv->tv_usec -= 1000000;
-	tv->tv_sec += 1 + timeout/1000000;
-    } else {
-	tv->tv_sec += timeout/1000000;
-    }
-
-    return 0;
-}
 
-int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout) {
-    gettimeofday(tv, NULL);
-    pcilib_add_timeout(tv, timeout);
 
-    return 0;
-}
-
-int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout) {
-    int64_t res;
-    struct timeval tvs;
-
-    if (!tve->tv_sec) return 0;
-
-    gettimeofday(&tvs, NULL);
-    res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
-	// Hm... Some problems comparing signed and unsigned. So, sign check first
-    if ((res < 0)||(res < timeout)) {
-	return 1;
-    }
 
-    return 0;
-}
-
-pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve) {
-    int64_t res;
-    struct timeval tvs;
-    
-    gettimeofday(&tvs, NULL);
-    res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
-    
-    if (res < 0) return 0;
-    return res;
-}
-
-int pcilib_sleep_until_deadline(struct timeval *tv) {
-    struct timespec wait;
-    pcilib_timeout_t duration;
-
-    duration = pcilib_calc_time_to_deadline(tv);
-    if (duration > 0) {
-	wait.tv_sec = duration / 1000000;
-	wait.tv_nsec = 1000 * (duration % 1000000);
-	nanosleep(&wait, NULL);
-    }
-
-    return 0;
-}
-
-pcilib_timeout_t pcilib_timediff(struct timeval *tvs, struct timeval *tve) {
-    return ((tve->tv_sec - tvs->tv_sec)*1000000 + (tve->tv_usec - tvs->tv_usec));
-}
-
-int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2) {
-    if (tv1->tv_sec > tv2->tv_sec) return 1;
-    else if (tv1->tv_sec < tv2->tv_sec) return -1;
-    else if (tv1->tv_usec > tv2->tv_usec) return 1;
-    else if (tv1->tv_usec < tv2->tv_usec) return -1;
-    return 0;
-}

+ 0 - 21
pcilib/tools.h

@@ -6,14 +6,10 @@
 
 #include <pcilib.h>
 
-#define pcilib_memcpy pcilib_memcpy32
-#define pcilib_datacpy pcilib_datacpy32
-
 #define BIT_MASK(bits) ((1ll << (bits)) - 1)
 
 #define min2(a, b) (((a)<(b))?(a):(b))
 
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -28,23 +24,6 @@ uint32_t pcilib_swap32(uint32_t x);
 uint64_t pcilib_swap64(uint64_t x);
 void pcilib_swap(void *dst, void *src, size_t size, size_t n);
 
-void * pcilib_memcpy8(void * dst, void const * src, size_t len);
-void * pcilib_memcpy32(void * dst, void const * src, size_t len);
-void * pcilib_memcpy64(void * dst, void const * src, size_t len);
-void * pcilib_datacpy32(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess);
-
-int pcilib_get_page_mask();
-int pcilib_get_cpu_count();
-
-
-int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout);
-int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout);
-int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout);
-pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve);
-int pcilib_sleep_until_deadline(struct timeval *tv);
-int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2);
-pcilib_timeout_t pcilib_timediff(struct timeval *tve, struct timeval *tvs);
-
 #ifdef __cplusplus
 }
 #endif

+ 2 - 2
pcitool/cli.c

@@ -753,9 +753,9 @@ void ViewInfo(pcilib_t *handle, pcilib_register_t reg, size_t id) {
         printf("    Value aliases  :");
 	for (i = 0; vnames[i].name; i++) {
 	    if (i) printf(",");
-	    printf(" %s = %u", vnames[i].name, vnames[i].value);
+	    printf(" %s = %lu", vnames[i].name, vnames[i].value);
 	    if (vnames[i].min != vnames[i].max) 
-	        printf(" (%u - %u)", vnames[i].min, vnames[i].max);
+	        printf(" (%lu - %lu)", vnames[i].min, vnames[i].max);
 	}
 	printf("\n");
     } else if (v->api == &pcilib_transform_view_api) {

+ 1 - 0
protocols/default.c

@@ -6,6 +6,7 @@
 #include "model.h"
 #include "error.h"
 #include "bar.h"
+#include "datacpy.h"
 
 #define default_datacpy(dst, src, access, bank)   pcilib_datacpy(dst, src, access, 1, bank->raw_endianess)
 

+ 28 - 15
protocols/software.c

@@ -4,11 +4,14 @@
 #include <string.h>
 #include <sys/file.h>
 
+
+#include "tools.h"
 #include "model.h"
 #include "error.h"
 #include "kmem.h"
 #include "pcilib.h"
 #include "pci.h"
+#include "datacpy.h"
 
 typedef struct pcilib_software_register_bank_context_s pcilib_software_register_bank_context_t;
 
@@ -103,23 +106,33 @@ pcilib_register_bank_context_t* pcilib_software_registers_open(pcilib_t *ctx, pc
 }
 
 int pcilib_software_registers_read(pcilib_t *ctx, pcilib_register_bank_context_t *bank_ctx, pcilib_register_addr_t addr, pcilib_register_value_t *value){
-	if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
-	    pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
-	    return PCILIB_ERROR_INVALID_ADDRESS;
-	}
+    const pcilib_register_bank_description_t *b = bank_ctx->bank;
+    int access = b->access / 8;
+
+    pcilib_register_value_t val = 0;
+
+    if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
+	pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
+	return PCILIB_ERROR_INVALID_ADDRESS;
+    }
+
+    pcilib_datacpy(&val, ((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr, access, 1, b->raw_endianess);
+    *value = val;
 
-	    // we consider this atomic operation and, therefore, do no locking
-	*value = *(pcilib_register_value_t*)(((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr);
-	return 0;
+    return 0;
 }
 
 int pcilib_software_registers_write(pcilib_t *ctx, pcilib_register_bank_context_t *bank_ctx, pcilib_register_addr_t addr, pcilib_register_value_t value) {
-	if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
-	    pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
-	    return PCILIB_ERROR_INVALID_ADDRESS;
-	}
-	
-	    // we consider this atomic operation and, therefore, do no locking
-	*(pcilib_register_value_t*)(((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr) = value;
-	return 0;
+    const pcilib_register_bank_description_t *b = bank_ctx->bank;
+    int access = b->access / 8;
+
+    if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
+	pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
+	return PCILIB_ERROR_INVALID_ADDRESS;
+    }
+
+    // we consider this atomic operation and, therefore, do no locking
+    pcilib_datacpy(((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr, &value, access, 1, b->raw_endianess);
+
+    return 0;
 }