7 years ago · 53c68316ee
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,30 @@
 
				+project(ipecamera)
			
 
				+
			
 
				+set(IPECAMERA_VERSION "0.0.1")
			
 
				+set(IPECAMERA_ABI_VERSION "0")
			
 
				+
			
 
				+cmake_minimum_required(VERSION 2.6)
			
 
				+list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
			
 
				+
			
 
				+add_definitions("-fPIC --std=gnu99 -Wall -O2 -gdwarf-2 -g3  -fno-omit-frame-pointer")
			
 
				+
			
 
				+find_package(CUDA REQUIRED)
			
 
				+find_package(PkgConfig REQUIRED)
			
 
				+
			
 
				+pkg_check_modules(PCILIB pcitool>=0.2 REQUIRED)
			
 
				+
			
 
				+
			
 
				+include_directories(
			
 
				+    ${CMAKE_SOURCE_DIR}
			
 
				+    ${PCILIB_INCLUDE_DIRS}
			
 
				+    ${CUDA_INCLUDE_DIRS}
			
 
				+)
			
 
				+
			
 
				+link_directories(
			
 
				+    ${PCILIB_LIBRARY_DIRS}
			
 
				+    ${CUDA_LIBRARY_DIRS}
			
 
				+)
			
 
				+
			
 
				+
			
 
				+cuda_add_executable(pio_test pio_test.cu)
			
 
				+target_link_libraries(pio_test pcilib rt cuda)
			
--- a/pio_test.cu
+++ b/pio_test.cu
@@ -0,0 +1,205 @@
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <unistd.h>
			
 
				+#include <stdarg.h>
			
 
				+#include <time.h>
			
 
				+#include <sched.h>
			
 
				+#include <sys/time.h>
			
 
				+
			
 
				+#include <cuda.h>
			
 
				+
			
 
				+#include <pcilib.h>
			
 
				+#include <pcilib/bar.h>
			
 
				+
			
 
				+#define DEVICE "/dev/fpga0"
			
 
				+
			
 
				+#define BAR PCILIB_BAR0
			
 
				+
			
 
				+
			
 
				+#define initAssert(ans) { initError((ans), __FILE__, __LINE__); }
			
 
				+inline int initError(CUresult code, const char *file, int line)
			
 
				+{
			
 
				+    if (code != CUDA_SUCCESS) 
			
 
				+    {
			
 
				+        const char *error = NULL;
			
 
				+        cuGetErrorString (code, &error);
			
 
				+        fprintf(stderr,"GPUassert: %s (Code: %i) %s %d\n",
			
 
				+                error, code, file, line);
			
 
				+        return code;
			
 
				+    } else {
			
 
				+        return 0;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
			
 
				+inline int gpuAssert(cudaError_t code, const char *file, int line)
			
 
				+{
			
 
				+    if (code != cudaSuccess) 
			
 
				+    {
			
 
				+        fprintf(stderr,"GPUassert: %s (Code: %i) %s %d\n",
			
 
				+                cudaGetErrorString(code), code, file, line);
			
 
				+        return code;
			
 
				+    } else {
			
 
				+        return 0;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int main(int argc, char *argv[]) {
			
 
				+
			
 
				+    printf("\n");
			
 
				+
			
 
				+
			
 
				+    int manual_init = 1;
			
 
				+    if (manual_init) {
			
 
				+        //CUDA initialization
			
 
				+        initAssert (cuInit(0));
			
 
				+
			
 
				+        int num_gpus;
			
 
				+        initAssert (cuDeviceGetCount (&num_gpus));
			
 
				+        printf ("Found %i GPUs on the system\n", num_gpus);
			
 
				+
			
 
				+
			
 
				+        CUdevice gpu; //will be used to find the correct GPU
			
 
				+        for (num_gpus--; num_gpus >= 0; num_gpus--) {
			
 
				+
			
 
				+            CUdevice current_gpu;
			
 
				+            initAssert (cuDeviceGet (&current_gpu, num_gpus));
			
 
				+
			
 
				+            char gpu_name[30] = {0};
			
 
				+            initAssert (cuDeviceGetName (gpu_name, 30, current_gpu));
			
 
				+
			
 
				+            printf("GPU %i: %s\n", num_gpus, gpu_name);
			
 
				+
			
 
				+
			
 
				+            if (strncmp (gpu_name, "Tesla K40", 9) == 0) {
			
 
				+                printf ("Found a Tesla GPU! I'll use that one.\n");
			
 
				+                gpu = current_gpu;
			
 
				+                break;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        //The CU_CTX_MAP_HOST is what we are interested in! 
			
 
				+        CUcontext context;
			
 
				+        initAssert (cuCtxCreate (&context, CU_CTX_MAP_HOST | CU_CTX_SCHED_AUTO, gpu));
			
 
				+        initAssert (cuCtxSetCurrent (context));
			
 
				+
			
 
				+        //NOTE: API Version 3010 is problematic
			
 
				+        //(see https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDART__DRIVER.html)
			
 
				+        unsigned int api_version;
			
 
				+        initAssert (cuCtxGetApiVersion (context, &api_version));
			
 
				+        printf ("CUDA API Version: %u\n", api_version);
			
 
				+    }
			
 
				+    else {
			
 
				+
			
 
				+        int gpu_found = 0;
			
 
				+
			
 
				+        int num_gpus;
			
 
				+        gpuErrchk (cudaGetDeviceCount(&num_gpus));
			
 
				+
			
 
				+        for (num_gpus--; num_gpus >= 0; num_gpus--) {
			
 
				+            struct cudaDeviceProp dev_props;
			
 
				+            gpuErrchk (cudaGetDeviceProperties (&dev_props, num_gpus));
			
 
				+
			
 
				+            if (dev_props.canMapHostMemory > 0) {
			
 
				+                printf ("%s is able to map host memory. I'll use that one.\n", dev_props.name);
			
 
				+                gpuErrchk (cudaSetDevice (num_gpus));
			
 
				+                gpuErrchk (cudaSetDeviceFlags(cudaDeviceMapHost));
			
 
				+                gpu_found = 1;
			
 
				+                break;
			
 
				+            }
			
 
				+            else {
			
 
				+                printf ("%s can not map host memory. Won't use it.\n", dev_props.name);
			
 
				+            }
			
 
				+        }
			
 
				+        if (!gpu_found) {
			
 
				+            printf ("None of the installed GPUs can map host memory. Aborting...\n");
			
 
				+            exit(1);
			
 
				+        }
			
 
				+    }
			
 
				+    printf ("CUDA init done\n\n");
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+    pcilib_t *pci;
			
 
				+    void* volatile bar;
			
 
				+    const pcilib_bar_info_t *bar_info;
			
 
				+
			
 
				+    pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT);
			
 
				+    if (!pci) {
			
 
				+        printf("pcilib_open\n");
			
 
				+        exit(1);
			
 
				+    }
			
 
				+
			
 
				+    bar = pcilib_resolve_bar_address(pci, BAR, 0);
			
 
				+    if (!bar) {
			
 
				+        pcilib_close(pci);
			
 
				+        printf("map bar\n");
			
 
				+        exit(1);
			
 
				+    }
			
 
				+    printf("BAR mapped to: %p\n", bar);
			
 
				+
			
 
				+    bar_info = pcilib_get_bar_info(pci, BAR);
			
 
				+    printf("%p (Phys: 0x%lx, Size: 0x%x)\n", bar_info[BAR].virt_addr, bar_info[BAR].phys_addr, bar_info[BAR].size);
			
 
				+
			
 
				+
			
 
				+    //write some test data to the BAR
			
 
				+    int *test = ((int *)(((unsigned long)bar)+0x9100));
			
 
				+    /* int *test = (int *)malloc(sizeof(int)); */
			
 
				+    /* int *test = (int *)0xfb000000; */
			
 
				+
			
 
				+    printf ("Pointing to: 0x%lx\n", test);
			
 
				+    *(test) = 0xdeadbeef;
			
 
				+    printf ("Set BAR content to: 0x%x\n", *(test));
			
 
				+
			
 
				+
			
 
				+    //try to map the BAR address as host memory
			
 
				+    if (manual_init) {
			
 
				+        /* initAssert (cuMemHostRegister (foobar, 0x1000, CU_MEMHOSTREGISTER_DEVICEMAP)); */
			
 
				+        /* initAssert (cuMemHostRegister (bar, 0x4, CU_MEMHOSTREGISTER_DEVICEMAP)); */
			
 
				+        initAssert (cuMemHostRegister (test, 0x4, CU_MEMHOSTREGISTER_DEVICEMAP));
			
 
				+        /* *(volatile int *)(test) = 0xdeadbeef; */
			
 
				+    }
			
 
				+    else {
			
 
				+        /* gpuErrchk(cudaHostRegister(bar, bar_info[BAR].size, cudaHostRegisterMapped | cudaHostRegisterPortable)) */
			
 
				+        gpuErrchk(cudaHostRegister(test, 0x1000, cudaHostRegisterMapped | cudaHostRegisterPortable))
			
 
				+        /* gpuErrchk(cudaHostRegister(bar, 0x1000, cudaHostRegisterDefault)) */
			
 
				+        /* gpuErrchk(cudaHostRegister((void *)bar_info[BAR].phys_addr, 4, cudaHostRegisterMapped)) */
			
 
				+        /* gpuErrchk(cudaHostRegister(foobar, 0x4, cudaHostRegisterDefault)) */
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+    /* get the GPU-Mapped pointer */
			
 
				+    void *bar_gpu_pointer;
			
 
				+    gpuErrchk (cudaHostGetDevicePointer (&bar_gpu_pointer, test, 0));
			
 
				+
			
 
				+
			
 
				+    /* create 'overwrite' data and load it to a normal CUDA memory */
			
 
				+    int *overwrite = (int *)malloc(sizeof(int));
			
 
				+    *overwrite = 0xdefaced;
			
 
				+    printf ("Set overwrite value to: 0x%x\n", *overwrite);
			
 
				+
			
 
				+    void *normal_gpu_mem;
			
 
				+    gpuErrchk (cudaMalloc(&normal_gpu_mem, sizeof(int)));
			
 
				+    cudaDeviceSynchronize();
			
 
				+
			
 
				+    gpuErrchk (cudaMemcpy (normal_gpu_mem, overwrite, sizeof(int), cudaMemcpyHostToDevice)); 
			
 
				+
			
 
				+
			
 
				+    /* read the 'overwrite' data back to mapped bar address purely in GPU address space */
			
 
				+    gpuErrchk (cudaMemcpy (bar_gpu_pointer, normal_gpu_mem, sizeof(int), cudaMemcpyDeviceToDevice)); 
			
 
				+    cudaDeviceSynchronize();
			
 
				+
			
 
				+
			
 
				+
			
 
				+    /* if everything works, this should show the overwrite data */
			
 
				+    printf ("Read BAR as: 0x%x\n", *(test));
			
 
				+
			
 
				+
			
 
				+    free (overwrite);
			
 
				+    pcilib_close(pci);
			
 
				+    printf("PCI closed\n");
			
 
				+}