Selaa lähdekoodia

Taking the changes in gdrapi into account. Version of DMA from FPGA to GPU.

mathiasb 7 vuotta sitten
vanhempi
commit
d64f28c9f4
5 muutettua tiedostoa jossa 207 lisäystä ja 29 poistoa
  1. 1 2
      CMakeLists.txt
  2. 5 10
      include/gdrapi.h
  3. 124 0
      include/gdrapi.h.save
  4. 1 1
      launch.sh
  5. 76 16
      src/main.cu

+ 1 - 2
CMakeLists.txt

@@ -17,10 +17,9 @@ set(CMAKE_C_FLAGS "-msse -msse4.1 -mavx")
 cuda_add_executable(gpufirstcomm
   src/main.cu
   src/common.cu
-  src/gdrapi.c
   src/memcpy_avx.c
   src/memcpy_sse41.c
   src/memcpy_sse.c
   src/kernels.cu)
 
-target_link_libraries(gpufirstcomm cuda pcilib)
+target_link_libraries(gpufirstcomm cuda pcilib gdrapi)

+ 5 - 10
include/gdrapi.h

@@ -23,16 +23,11 @@
 #ifndef __GDRAPI_H__
 #define __GDRAPI_H__
 
-#include <stdint.h> // for standard [u]intX_t types
+#include <inttypes.h>
 #include <stddef.h>
 
-#define GDR_API_MAJOR_VERSION    1
-#define GDR_API_MINOR_VERSION    2
-#define GDR_API_VERSION          ((GDR_API_MAJOR_VERSION << 16) | GDR_API_MINOR_VERSION)
-
-
 #define GPU_PAGE_SHIFT   16
-#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
+#define GPU_PAGE_SIZE    ((unsigned long)1 << GPU_PAGE_SHIFT)
 #define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
 #define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
 
@@ -53,11 +48,10 @@
 extern "C" {
 #endif
 
-struct gdr;
-typedef struct gdr *gdr_t;
-
 // Initialize the library, e.g. by opening a connection to the kernel-mode
 // driver. Returns an handle to the library state object.
+struct gdr;
+typedef struct gdr *gdr_t;
 gdr_t gdr_open();
 
 // Destroy library state object, e.g. it closes the connection to kernel-mode
@@ -95,6 +89,7 @@ struct gdr_info {
     uint32_t page_size;
     uint64_t tm_cycles;
     uint32_t cycles_per_ms;
+    uint64_t bus_addr; 
 };
 typedef struct gdr_info gdr_info_t;
 int gdr_get_info(gdr_t g, gdr_mh_t handle, gdr_info_t *info);

+ 124 - 0
include/gdrapi.h.save

@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in 
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __GDRAPI_H__
+#define __GDRAPI_H__
+
+#include <stdint.h> // for standard [u]intX_t types
+#include <stddef.h>
+
+#define GDR_API_MAJOR_VERSION    1
+#define GDR_API_MINOR_VERSION    2
+#define GDR_API_VERSION          ((GDR_API_MAJOR_VERSION << 16) | GDR_API_MINOR_VERSION)
+
+
+#define GPU_PAGE_SHIFT   16
+#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
+#define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
+#define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
+
+/*
+ * GDRCopy, a low-latency GPU memory copy library (and a kernel-mode
+ * driver) based on NVIDIA GPUDirect RDMA technology.
+ *
+ * supported environment variables:
+ *
+ * - GDRCOPY_ENABLE_LOGGING, if defined logging is enabled, default is
+ *   disabled.
+ *
+ * - GDRCOPY_LOG_LEVEL, overrides log threshold, default is to print errors
+ *   only.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct gdr;
+typedef struct gdr *gdr_t;
+
+// Initialize the library, e.g. by opening a connection to the kernel-mode
+// driver. Returns an handle to the library state object.
+gdr_t gdr_open();
+
+// Destroy library state object, e.g. it closes the connection to kernel-mode
+// driver.
+//
+// Note that altough BAR mappings of GPU memory are destroyed, user-space
+// mappings are not. So therefore user code is responsible of calling
+// gdr_unmap on all mappings before calling gdr_close.
+int gdr_close(gdr_t g);
+
+// Map device memory buffer on GPU BAR1, returning an handle.
+// Memory is still not accessible to user-space.
+typedef uint32_t gdr_mh_t;
+int gdr_pin_buffer(gdr_t g, unsigned long addr, size_t size, uint64_t p2p_token, uint32_t va_space, gdr_mh_t *handle);
+
+// Unmap the handle. 
+//
+// If there exists a corresponding user-space mapping, gdr_unmap should be
+// called before this one.
+int gdr_unpin_buffer(gdr_t g, gdr_mh_t handle);
+
+// flag is set when the kernel callback (relative to the
+// nvidia_p2p_get_pages) gets invoked, e.g. cuMemFree() before
+// gdr_unpin_buffer.
+int gdr_get_callback_flag(gdr_t g, gdr_mh_t handle, int *flag);
+
+// After pinning, info struct contains details of the mapped area.  
+//
+// Note that both info->va and info->mapped_size might be different from
+// the original address passed to gdr_pin_buffer due to aligning happening
+// in the kernel-mode driver
+struct gdr_info {
+    uint64_t va;
+    uint64_t mapped_size;
+    uint32_t page_size;
+    uint64_t tm_cycles;
+    uint32_t cycles_per_ms;
+};
+typedef struct gdr_info gdr_info_t;
+int gdr_get_info(gdr_t g, gdr_mh_t handle, gdr_info_t *info);
+
+// create a user-space mapping for the BAR1 info, length is bar1->size
+// above.
+//
+// WARNING: the BAR physical address will be aligned to the page size
+// before being mapped in user-space, so the pointer returned might be
+// affected by an offset. gdr_get_info can be used to calculate that
+// offset.
+int gdr_map(gdr_t g, gdr_mh_t handle, void **va, size_t size);
+
+// get rid of a user-space mapping.
+// First invoke gdr_unmap() then gdr_unpin_buffer().
+int gdr_unmap(gdr_t g, gdr_mh_t handle, void *va, size_t size);
+
+// gpubar_ptr is a user-space virtual address, i.e. one returned by gdr_map()
+int gdr_copy_to_bar(void  *gpubar_ptr, const void *cpumem_ptr, size_t size);
+int gdr_copy_from_bar(void *cpumem_ptr, const void *gpubar_ptr, size_t size);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __GDRAPI_H__

+ 1 - 1
launch.sh

@@ -7,4 +7,4 @@ echo "Resetting the FPGA"
 sleep 1
 echo "Putting the data on the FPGA. WARNING, hardcoded size"
 ddrio -v -i /home/mathiasb/sources/gpuFirstComm/to_send -s 0
-#ddrio -v -o /home/mathiasb/sources/gpuFirstComm/written -i /home/mathiasb/sources/gpuFirstComm/to_send -s 0
+

+ 76 - 16
src/main.cu

@@ -3,6 +3,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "common.h"
+#include "kernels.h"
+#include "gdrapi.h"
 #include <pcilib.h>
 #include <pcilib/kmem.h>
 #include <pcilib/bar.h>
@@ -18,6 +20,10 @@
 
 int main()
 {
+    /* Initialisation of the APIs */
+    assert_cu( cuInit(0) );
+    gdr_t g = gdr_open();
+
     /* Try some stuff... */
     printf("Using binary data to feed FPGA...\n");
     char* data=(char*)calloc(4096,sizeof(*data));
@@ -25,6 +31,46 @@ int main()
     init_to_send(data);
     system("/home/mathiasb/sources/gpuFirstComm/launch.sh");
 
+    /* Manage NVIDIA GPU */
+    printf("\nInitialisation of the GPU\n");
+    CUdevice GPU;
+    CUdevprop GPUProp;
+    assert_cuda( cudaSetDevice(0) );
+    assert_cu( cuDeviceGet(&GPU,0) );
+    assert_cu( cuDeviceGetProperties(&GPUProp,GPU) );
+
+    /* Check context */
+    assert_cu( cuCtxGetDevice(&GPU) );
+    CUcontext cuCtx;
+    assert_cu( cuCtxCreate(&cuCtx,CU_CTX_MAP_HOST,GPU) );
+    assert_cu( cuCtxGetDevice(&GPU) );
+
+    /* Allocate memory on the device, pin and map */
+    uint8_t flagValueToSet = 1;
+    printf("\nMemory mapping with the GPU for pages\n");
+    CUdeviceptr gpuPagePtr;
+    assert_cu( cuMemAlloc(&gpuPagePtr,3*PAGE_SIZE) );
+    assert_cu( cuPointerSetAttribute(&flagValueToSet,CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,gpuPagePtr) );
+    gdr_mh_t GPUMemHandlePage;
+    assert_gdr( gdr_pin_buffer(g,gpuPagePtr,3*PAGE_SIZE,0,0,&GPUMemHandlePage) );
+    void* gpuPageVa;
+    assert_gdr( gdr_map(g,GPUMemHandlePage,&gpuPageVa,3*PAGE_SIZE) );
+    gdr_info_t pageInfo;
+    assert_gdr( gdr_get_info(g,GPUMemHandlePage,&pageInfo) );
+    printf("Bus ptr = %lx\nVA = 0x%lx\nSize = %lu\n",pageInfo.bus_addr,pageInfo.va,pageInfo.mapped_size);
+    printf("Memory mapping with the GPU for descriptors\n");
+    CUdeviceptr gpuDescPtr;
+    assert_cu( cuMemAlloc(&gpuDescPtr,PAGE_SIZE) );
+    assert_cu( cuPointerSetAttribute(&flagValueToSet,CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,gpuDescPtr) );
+    gdr_mh_t GPUMemHandleDesc;
+    assert_gdr( gdr_pin_buffer(g,gpuDescPtr,PAGE_SIZE,0,0,&GPUMemHandleDesc) );
+    void* gpuDescVa;
+    assert_gdr( gdr_map(g,GPUMemHandleDesc,&gpuDescVa,PAGE_SIZE) );
+    
+    printf("All set\n");
+    gdr_info_t descInfo;
+    assert_gdr( gdr_get_info(g,GPUMemHandleDesc,&descInfo) );
+    printf("Bus ptr = %lx\nVA = 0x%lx\nSize = %lu\n",descInfo.bus_addr,descInfo.va,descInfo.mapped_size);
     
     /* PCI */
     printf("\nSetting up the PCI\n");
@@ -43,10 +89,9 @@ int main()
 	exit( EXIT_FAILURE );
     }
 
-    const pcilib_bar_info_t *bar_info;
-    bar_info = pcilib_get_bar_info(pciCtx, 0);
-    printf("Bar: %p (Phys: 0x%lx, Size: 0x%x)\n", bar_info[0].virt_addr, bar_info[0].phys_addr, bar_info[0].size);
-
+    CUdeviceptr dBAR;
+    assert_cu( cuMemHostRegister((void*)pciVa,128,CU_MEMHOSTREGISTER_IOMEMORY) );
+    assert_cu( cuMemHostGetDevicePointer(&dBAR,(void*)pciVa, 0) );
     
     /* Config PCI for Pages*/
     pcilib_kmem_handle_t* pciHandlePage;
@@ -71,8 +116,6 @@ int main()
 	exit( EXIT_FAILURE );
     }
 
-    printf("pciMemPtrPage = %p\npciBusPage = %p\n",pciMemPtrPage,pciBusPage);
-
     /* Config PCI for Desc */
     pcilib_kmem_handle_t* pciHandleDesc;
     pciHandleDesc = pcilib_alloc_kernel_memory(pciCtx,PCILIB_KMEM_TYPE_CONSISTENT, 1, 128, 4096, KMEM_USE_RING, KMEM_DEFAULT_FLAGS);
@@ -95,10 +138,6 @@ int main()
 	printf("Cannot get PCI Bus address on kernel memory\n");
 	exit( EXIT_FAILURE );
     }
-
-    printf("pciMemPtrDesc = %p\npciBusDesc = %p\n",pciMemPtrDesc,pciBusDesc);
-    
-    printf("pciMemPtrPage = %x\npciMemPtrDesc = %x\n",*(uint32_t*)pciMemPtrPage,*(uint32_t*)pciMemPtrDesc);
     
     /* FPGA */
     printf("\nWorking on the FPGA\n");
@@ -108,30 +147,51 @@ int main()
     usleep(100000);
     WR32_sleep(REG_NUM_PACKETS_PER_DESCRIPTOR,16);
     WR32_sleep(REG_PACKET_LENGTH,64);
-    WR32_sleep (REG_UPDATE_THRESHOLD, 0x1);
+    WR32_sleep(REG_UPDATE_THRESHOLD, 0x1);
+    /* WR64_sleep(REG_UPDATE_COUNTER,descInfo.bus_addr); */
     WR64_sleep(REG_UPDATE_ADDRESS,pciBusPage+DESCRIPTOR_OFFSET);
     WR32_sleep(REG_CONTROL,CONTROL_ENABLE_READ|CONTROL_SOURCE_RX_FIFO);
-    WR64_sleep(REG_DESCRIPTOR_ADDRESS,pciBusDesc);
+    WR64_sleep(REG_DESCRIPTOR_ADDRESS,descInfo.bus_addr);
     WR32_sleep(REG_DMA,1);
     WR32_sleep(REG_INTERCONNECT, 0x262);
+    /* WR32_sleep(REG_COUNTER,1); */
     usleep(100000);
 
-    WR64_sleep(REG_DESCRIPTOR_ADDRESS,pciBusDesc);
+    WR64_sleep(REG_DESCRIPTOR_ADDRESS,descInfo.bus_addr);
     printf("pciVa = %x\npciMemPtrPage = %lx\npciMemPtrDesc = %lx\n",pciVa,*(uint64_t*)pciMemPtrPage,*(uint64_t*)pciMemPtrDesc);
+
+    
     char* getBack=(char*)calloc(4096,sizeof(*getBack));
-    memcpy(getBack,(const void*)pciMemPtrDesc,4096);
+    memcpy(getBack,(const void*)gpuDescVa,4096);
     int i;
     for(i=0;i<4096;i++)
     {
-	printf("%hhx",getBack[i]);
+    	printf("%hhx",getBack[i]);
     }
     printf("\n");
     
+    
     /* Close everything */
     printf("\nClosing the connections\n");
+    WR32(REG_COUNTER, 0);
+    WR32(REG_DMA, 0);
+    WR32(REG_RESET_DMA, 1);
+    usleep (100000);
+    WR32(REG_RESET_DMA, 0);
+    usleep (100000);
+
     pcilib_free_kernel_memory(pciCtx,pciHandleDesc,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
     pcilib_free_kernel_memory(pciCtx,pciHandlePage,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
-    pcilib_close(pciCtx);    
+    assert_cu( cuMemHostUnregister((void*) pciVa) );
+    pcilib_close(pciCtx);
+    assert_gdr( gdr_unmap(g,GPUMemHandlePage,gpuPageVa,3*PAGE_SIZE) );
+    assert_gdr( gdr_unpin_buffer(g,GPUMemHandlePage) );
+    assert_gdr( gdr_unmap(g,GPUMemHandleDesc,gpuDescVa,3*PAGE_SIZE) );
+    assert_gdr( gdr_unpin_buffer(g,GPUMemHandleDesc) );
+    assert_gdr( gdr_close(g) );
+    assert_cu( cuMemFree(gpuPagePtr) );
+    assert_cu( cuMemFree(gpuDescPtr) );
+    
     printf("All Cleared\n");
     
     exit(EXIT_SUCCESS);