1
0

4 Commity 3b1a6705a0 ... 500e8339e5

Autor SHA1 Správa Dátum
  mathiasb 500e8339e5 End of tests. Try to use FPGA 6 rokov pred
  mathiasb 95a2657605 A simple test to handle the different devices' memory 6 rokov pred
  mathiasb 5c18f1de70 Include of kmem, allocating a buffer and getting a user space pointer. Closing unsuccessful? 6 rokov pred
  mathiasb c7198accdc Fitting CMake and open/close PCI context 6 rokov pred
6 zmenil súbory, kde vykonal 170 pridanie a 23 odobranie
  1. 5 0
      .gitignore
  2. 2 2
      CMakeLists.txt
  3. 32 0
      include/ipedma.h
  4. 2 0
      include/kernels.h
  5. 25 0
      src/kernels.cu
  6. 104 21
      src/main.cu

+ 5 - 0
.gitignore

@@ -0,0 +1,5 @@
+CMakeCache.txt
+CMakeFiles/
+cmake_install.cmake
+gpufirstcomm
+Makefile

+ 2 - 2
CMakeLists.txt

@@ -10,7 +10,7 @@ include_directories(include)
 
 #link_directories(${CUDA_LIBRARY_DIRS})
 
-set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS})
+set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}-Wno-deprecated-gpu-targets)
 set(CMAKE_C_COMPILER "/usr/bin/clang")
 set(CMAKE_C_FLAGS "-msse -msse4.1 -mavx")
 
@@ -23,4 +23,4 @@ cuda_add_executable(gpufirstcomm
   src/memcpy_sse.c
   src/kernels.cu)
 
-target_link_libraries(gpufirstcomm cuda)
+target_link_libraries(gpufirstcomm cuda pcilib)

+ 32 - 0
include/ipedma.h

@@ -0,0 +1,32 @@
+#define REG_RESET_DMA                   0x00
+#define REG_DMA                         0x04
+#define REG_NUM_PACKETS_PER_DESCRIPTOR  0x10
+#define REG_PERF_COUNTER		0x20
+//#define REG_PERF_COUNTER                0x28
+#define REG_PACKET_LENGTH               0x0C
+#define REG_DESCRIPTOR_ADDRESS          0x50
+#define REG_UPDATE_ADDRESS              0x58
+#define REG_UPDATE_THRESHOLD            0x60
+#define REG_UPDATE_COUNTER		0x70
+#define REG_TIMER_THRESHOLD          0x64
+
+#define REG_INTERCONNECT		0x9048
+#define REG_COUNTER                     0x9000
+
+
+#define DESCRIPTOR_OFFSET		256
+
+#define REG_CONTROL                  0x9040
+#define CONTROL_ENABLE_READ          0x00000400
+#define CONTROL_ENABLE_MULTI_READ    0x00000800
+#define CONTROL_SOURCE_RX_FIFO       0x00010000
+
+
+#define WR32(addr, value) *(volatile uint32_t *) (((char*)(pciVa)) + (addr)) = (value);
+#define RD32(addr) (*(volatile uint32_t *) (((char*)(pciVa)) + (addr)))
+#define WR32_sleep(addr, value) *(volatile uint32_t *) (((char*)(pciVa)) + (addr)) = (value); usleep (100);
+
+#define WR64(addr, value) *(volatile uint64_t *) (((char*)(pciVa)) + (addr)) = (value);
+#define RD64(addr) (*(volatile uint64_t *) (((char*)(pciVa)) + (addr)))
+#define WR64_sleep(addr, value) *(uint64_t *) (((char*)(pciVa)) + (addr)) = (value); usleep (100);
+

+ 2 - 0
include/kernels.h

@@ -1,6 +1,8 @@
 #include "cuda.h"
 #include "cuda_runtime_api.h"
+#include "gdrapi.h"
 
 __device__ void add_two_device(CUdeviceptr number);
 __global__ void add_three_global(CUdeviceptr number);
 __global__ void add_one_global(CUdeviceptr number);
+void firstest(CUdeviceptr dptr, void* va);

+ 25 - 0
src/kernels.cu

@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "kernels.h"
+#include "common.h"
 
 __device__
 void add_two_device(CUdeviceptr number)
@@ -22,3 +23,27 @@ void add_one_global(CUdeviceptr number)
 {
     (* (int*) number)++;
 }
+
+void first_test(CUdeviceptr dptr, void* va)
+{
+    int set, get;
+    printf("Use the nvidia api\n");
+    set = 4242;
+    get = 0;
+    printf("set = %d\nget = %d\n",set,get);
+    assert_cu( cuMemcpyHtoD(dptr,&set,sizeof(set)) );
+    add_three_global<<< 1,1 >>>(dptr);
+    cudaDeviceSynchronize();
+    assert_cu( cuMemcpyDtoH(&get,dptr,sizeof(get)) );
+    printf("set = %d\nget = %d\n",set,get);
+
+    printf("Use the gdr api\n");
+    set = 4242;
+    get = 0;
+    printf("set = %d\nget = %d\nva = %d\n",set,get,*(int*) va);
+    assert_gdr( gdr_copy_to_bar(va,&set,sizeof(set)) );
+    add_one_global<<< 1,1 >>>(dptr);
+    cudaDeviceSynchronize();
+    assert_gdr( gdr_copy_from_bar(&get,va,sizeof(get)) );
+    printf("set = %d\nget = %d\nva = %d\n",set,get,*(int*) va);
+}

+ 104 - 21
src/main.cu

@@ -3,9 +3,14 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "common.h"
-/*#include "cuda.h"*/
 #include "kernels.h"
 #include "gdrapi.h"
+#include <pcilib.h>
+#include <pcilib/kmem.h>
+#include <pcilib/bar.h>
+#include "ipedma.h"
+#include <unistd.h>
+
 
 int main()
 {
@@ -15,6 +20,7 @@ int main()
 
     /* First check if a NVIDIA GPU is on the system and see which to use */
     /* For the time being, use number 0 */
+    printf("\nInitialisation of the GPU\n");
     int i;
     int countGPU;
     CUdevice GPU;
@@ -36,12 +42,16 @@ int main()
     /* Check context */
     assert_cu( cuCtxGetDevice(&GPU) );
     printf("Device for this context: %d\n",GPU);
-    CUcontext ctx;
-    assert_cu( cuCtxCreate(&ctx,0,GPU) );
+    CUcontext cuCtx;
+    assert_cu( cuCtxCreate(&cuCtx,0,GPU) );
     assert_cu( cuCtxGetDevice(&GPU) );
     printf("Device for this context: %d\n",GPU);
+    int pi;
+    assert_cu( cuDeviceGetAttribute(&pi,CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,GPU) );
+    printf("Support unified addressing? %d\n",pi);
     
     /* Allocate memory on the device, pin and map */
+    printf("\nMemory mapping with the GPU\n");
     CUdeviceptr dptr;
     assert_cu( cuMemAlloc(&dptr,(size_t) GPUProp.sharedMemPerBlock) );
     gdr_mh_t GPUMemHandle;
@@ -61,29 +71,102 @@ int main()
     
     /* At this point the GPU's mem is mapped to a CPU buffer to enable DMA */
 
-    int set, get;
-    printf("Use the nvidia api\n");
-    set = 4242;
-    get = 0;
-    printf("set = %d\nget = %d\n",set,get);
-    assert_cu( cuMemcpyHtoD(dptr,&set,sizeof(set)) );
+    /* PCI */
+    printf("\nSetting up the PCI\n");
+    pcilib_t* pciCtx;
+    char* pciVa;
+    pciCtx = pcilib_open("/dev/fpga0",NULL);
+    if( pciCtx == NULL )
+    {
+	printf("Cannot open a context for pci\n");
+	exit( EXIT_FAILURE );
+    }
+    pciVa = pcilib_resolve_bar_address(pciCtx,0, 0);
+    if( pciVa == NULL )
+    {
+	printf("Cannot resolve PCI physical adress to virtual\n");
+	exit( EXIT_FAILURE );
+    }
+
+    CUdeviceptr dBAR;
+    int getdBAR = 10;
+    assert_cu( cuMemHostRegister((void*)pciVa,128,CU_MEMHOSTREGISTER_IOMEMORY) );
+    assert_cu( cuMemHostGetDevicePointer(&dBAR,(void*)pciVa, 0) );
+    printf("pciVa = %p\ndBar = 0x%llx\n",pciVa,dBAR);
+    assert_cu( cuMemcpyDtoH(&getdBAR,dBAR,sizeof(int)) );
+    printf("getdBAR = %d\n",getdBAR);
+    
+    pcilib_kmem_handle_t* pciHandle;
+    pciHandle = pcilib_alloc_kernel_memory(pciCtx,PCILIB_KMEM_TYPE_CONSISTENT,1,64,4096,PCILIB_KMEM_USE_STANDARD,PCILIB_KMEM_FLAG_REUSE);
+    if( pciHandle == NULL )
+    {
+	printf("Cannot allocate  PCI kernel memory\n");
+	exit( EXIT_FAILURE );
+    }
+    volatile void* pciMemPtr;
+    uintptr_t pciBus;
+    pciMemPtr = pcilib_kmem_get_ua(pciCtx,pciHandle);
+    if( pciMemPtr == NULL )
+    {
+	printf("Cannot get PCI pointer to kernel memory\n");
+	exit( EXIT_FAILURE );
+    }
+    pciBus = pcilib_kmem_get_ba(pciCtx,pciHandle);
+    if( pciBus == 0 )
+    {
+	printf("Cannot get PCI Bus address on kernel memory\n");
+	exit( EXIT_FAILURE );
+    }
+
+    printf("pciMemPtr = %p\npciBus = %p\n",pciMemPtr,pciBus);
+
+    const pcilib_bar_info_t* bar_info;
+    bar_info = pcilib_get_bar_info(pciCtx,0);
+    if( bar_info == NULL )
+    {
+	printf("Cannot get BAR info\n");
+	exit( EXIT_FAILURE );
+    }
+    else
+	printf("bar = %d\nsize = %lu\nphys_addr = %p\nvirt_addr = %p\n",bar_info->bar,bar_info->size,bar_info->phys_addr,bar_info->virt_addr);
+
+    /* Try some stuff... */
+    printf("\nRunning some tests\n");
+    *(int*) pciMemPtr = 65;
+    printf("Try to read pciMem in another way : %d %d\n",*(int*)pciMemPtr,*(int*)dBAR);
+    assert_cu( cuMemcpyHtoD(dptr,(void*)pciMemPtr,sizeof(int)) );
+    assert_cu( cuMemcpyHtoD(dBAR,(void*)pciMemPtr,sizeof(int)) );
+    assert_cu( cuMemcpyDtoH(&getdBAR,dBAR,sizeof(int)) );
+    printf("getdBAR = %d\n",getdBAR);    
+    printf("Try to read pciMem in another way : %d %d\n",*(int*)pciMemPtr,*(int*)dBAR);
+    printf("Try to read pciMem in another way : %d\n",*(int*)va);
+    printf("A small computation\n");
     add_three_global<<< 1,1 >>>(dptr);
-    cudaDeviceSynchronize();
-    assert_cu( cuMemcpyDtoH(&get,dptr,sizeof(get)) );
-    printf("set = %d\nget = %d\n",set,get);
+    assert_cuda( cudaDeviceSynchronize() );
+    int foo;
+    assert_gdr( gdr_copy_from_bar(&foo,va,sizeof(foo)) );
+    printf("foo = %d\nva = %d\n",foo,*(int*) va);
+    printf("Now the other way around\n");
+    foo = 100;
+    assert_gdr( gdr_copy_to_bar(va,&foo,sizeof(foo)) );
+    add_three_global<<< 1,1 >>>(dptr);
+    assert_cuda( cudaDeviceSynchronize() );
+    assert_cu( cuMemcpyDtoH((void*)pciMemPtr,dptr,sizeof(int)) );
+    printf("pciMem = %d\n",*(int*)pciMemPtr);
 
-    printf("Use the gdr api\n");
-    set = 4242;
-    get = 0;
-    printf("set = %d\nget = %d\nva = %d\n",set,get,*(int*) va);
-    assert_gdr( gdr_copy_to_bar(va,&set,sizeof(set)) );
-    add_one_global<<< 1,1 >>>(dptr);
-    cudaDeviceSynchronize();
-    assert_gdr( gdr_copy_from_bar(&get,va,sizeof(get)) );
-    printf("set = %d\nget = %d\nva = %d\n",set,get,*(int*) va);
+    /* FPGA */
+    printf("\nWorking on the FPGA\n");
+    WR32 (REG_RESET_DMA, 1);
+    usleep (100000);
+    WR32 (REG_RESET_DMA, 0);
+    usleep (100000);
+    
 
     
     /* Close everything */
+    printf("\nClosing the connections\n");
+    assert_cu( cuMemHostUnregister((void*) pciVa) );
+    pcilib_close(pciCtx);
     assert_gdr( gdr_unmap(g,GPUMemHandle,va,(size_t) GPUProp.sharedMemPerBlock) );
     assert_gdr( gdr_unpin_buffer(g,GPUMemHandle) );
     assert_gdr( gdr_close(g) );