SHA1
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ CMakeFiles/
 
				 cmake_install.cmake
			
 
				 gpufirstcomm
			
 
				 Makefile
			
 
				+to_send
			
 
				+written
			
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,9 @@ set(CMAKE_C_FLAGS "-msse -msse4.1 -mavx")
 
				 cuda_add_executable(gpufirstcomm
			
 
				   src/main.cu
			
 
				   src/common.cu
			
 
				-  src/gdrapi.c
			
 
				   src/memcpy_avx.c
			
 
				   src/memcpy_sse41.c
			
 
				   src/memcpy_sse.c
			
 
				   src/kernels.cu)
			
 
				 
			
 
				-target_link_libraries(gpufirstcomm cuda pcilib)
			
 
				+target_link_libraries(gpufirstcomm cuda pcilib gdrapi)
			
--- a/include/common.h
+++ b/include/common.h
@@ -7,9 +7,11 @@
 
				 #define ASSERT_FAIL 0
			
 
				 #define ASSERT_SUCCESS 1
			
 
				 #define GPU_NAME_LENGTH 30
			
 
				+#define PAGE_SIZE 4096
			
 
				 
			
 
				 void assert_cuda(cudaError_t err_id); /* for runtime api*/
			
 
				 void assert_cu(CUresult res_id); /* for driver api */
			
 
				 void assert_gdr(int gdr_id);
			
 
				+void init_to_send(const void* dataPtr);
			
 
				 
			
 
				 #endif
			
--- a/include/gdrapi.h
+++ b/include/gdrapi.h
@@ -23,16 +23,11 @@
 
				 #ifndef __GDRAPI_H__
			
 
				 #define __GDRAPI_H__
			
 
				 
			
 
				-#include <stdint.h> // for standard [u]intX_t types
			
 
				+#include <inttypes.h>
			
 
				 #include <stddef.h>
			
 
				 
			
 
				-#define GDR_API_MAJOR_VERSION    1
			
 
				-#define GDR_API_MINOR_VERSION    2
			
 
				-#define GDR_API_VERSION          ((GDR_API_MAJOR_VERSION << 16) | GDR_API_MINOR_VERSION)
			
 
				-
			
 
				-
			
 
				 #define GPU_PAGE_SHIFT   16
			
 
				-#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
			
 
				+#define GPU_PAGE_SIZE    ((unsigned long)1 << GPU_PAGE_SHIFT)
			
 
				 #define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
			
 
				 #define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
			
 
				 
			
@@ -53,11 +48,10 @@
 
				 extern "C" {
			
 
				 #endif
			
 
				 
			
 
				-struct gdr;
			
 
				-typedef struct gdr *gdr_t;
			
 
				-
			
 
				 // Initialize the library, e.g. by opening a connection to the kernel-mode
			
 
				 // driver. Returns an handle to the library state object.
			
 
				+struct gdr;
			
 
				+typedef struct gdr *gdr_t;
			
 
				 gdr_t gdr_open();
			
 
				 
			
 
				 // Destroy library state object, e.g. it closes the connection to kernel-mode
			
@@ -95,6 +89,7 @@ struct gdr_info {
 
				     uint32_t page_size;
			
 
				     uint64_t tm_cycles;
			
 
				     uint32_t cycles_per_ms;
			
 
				+    uint64_t bus_addr; 
			
 
				 };
			
 
				 typedef struct gdr_info gdr_info_t;
			
 
				 int gdr_get_info(gdr_t g, gdr_mh_t handle, gdr_info_t *info);
			
--- a/include/gdrapi.h.save
+++ b/include/gdrapi.h.save
@@ -0,0 +1,124 @@
 
				+/*
			
 
				+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				+ * copy of this software and associated documentation files (the "Software"),
			
 
				+ * to deal in the Software without restriction, including without limitation
			
 
				+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				+ * and/or sell copies of the Software, and to permit persons to whom the
			
 
				+ * Software is furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in 
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
			
 
				+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __GDRAPI_H__
			
 
				+#define __GDRAPI_H__
			
 
				+
			
 
				+#include <stdint.h> // for standard [u]intX_t types
			
 
				+#include <stddef.h>
			
 
				+
			
 
				+#define GDR_API_MAJOR_VERSION    1
			
 
				+#define GDR_API_MINOR_VERSION    2
			
 
				+#define GDR_API_VERSION          ((GDR_API_MAJOR_VERSION << 16) | GDR_API_MINOR_VERSION)
			
 
				+
			
 
				+
			
 
				+#define GPU_PAGE_SHIFT   16
			
 
				+#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
			
 
				+#define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
			
 
				+#define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
			
 
				+
			
 
				+/*
			
 
				+ * GDRCopy, a low-latency GPU memory copy library (and a kernel-mode
			
 
				+ * driver) based on NVIDIA GPUDirect RDMA technology.
			
 
				+ *
			
 
				+ * supported environment variables:
			
 
				+ *
			
 
				+ * - GDRCOPY_ENABLE_LOGGING, if defined logging is enabled, default is
			
 
				+ *   disabled.
			
 
				+ *
			
 
				+ * - GDRCOPY_LOG_LEVEL, overrides log threshold, default is to print errors
			
 
				+ *   only.
			
 
				+ */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct gdr;
			
 
				+typedef struct gdr *gdr_t;
			
 
				+
			
 
				+// Initialize the library, e.g. by opening a connection to the kernel-mode
			
 
				+// driver. Returns an handle to the library state object.
			
 
				+gdr_t gdr_open();
			
 
				+
			
 
				+// Destroy library state object, e.g. it closes the connection to kernel-mode
			
 
				+// driver.
			
 
				+//
			
 
				+// Note that altough BAR mappings of GPU memory are destroyed, user-space
			
 
				+// mappings are not. So therefore user code is responsible of calling
			
 
				+// gdr_unmap on all mappings before calling gdr_close.
			
 
				+int gdr_close(gdr_t g);
			
 
				+
			
 
				+// Map device memory buffer on GPU BAR1, returning an handle.
			
 
				+// Memory is still not accessible to user-space.
			
 
				+typedef uint32_t gdr_mh_t;
			
 
				+int gdr_pin_buffer(gdr_t g, unsigned long addr, size_t size, uint64_t p2p_token, uint32_t va_space, gdr_mh_t *handle);
			
 
				+
			
 
				+// Unmap the handle. 
			
 
				+//
			
 
				+// If there exists a corresponding user-space mapping, gdr_unmap should be
			
 
				+// called before this one.
			
 
				+int gdr_unpin_buffer(gdr_t g, gdr_mh_t handle);
			
 
				+
			
 
				+// flag is set when the kernel callback (relative to the
			
 
				+// nvidia_p2p_get_pages) gets invoked, e.g. cuMemFree() before
			
 
				+// gdr_unpin_buffer.
			
 
				+int gdr_get_callback_flag(gdr_t g, gdr_mh_t handle, int *flag);
			
 
				+
			
 
				+// After pinning, info struct contains details of the mapped area.  
			
 
				+//
			
 
				+// Note that both info->va and info->mapped_size might be different from
			
 
				+// the original address passed to gdr_pin_buffer due to aligning happening
			
 
				+// in the kernel-mode driver
			
 
				+struct gdr_info {
			
 
				+    uint64_t va;
			
 
				+    uint64_t mapped_size;
			
 
				+    uint32_t page_size;
			
 
				+    uint64_t tm_cycles;
			
 
				+    uint32_t cycles_per_ms;
			
 
				+};
			
 
				+typedef struct gdr_info gdr_info_t;
			
 
				+int gdr_get_info(gdr_t g, gdr_mh_t handle, gdr_info_t *info);
			
 
				+
			
 
				+// create a user-space mapping for the BAR1 info, length is bar1->size
			
 
				+// above.
			
 
				+//
			
 
				+// WARNING: the BAR physical address will be aligned to the page size
			
 
				+// before being mapped in user-space, so the pointer returned might be
			
 
				+// affected by an offset. gdr_get_info can be used to calculate that
			
 
				+// offset.
			
 
				+int gdr_map(gdr_t g, gdr_mh_t handle, void **va, size_t size);
			
 
				+
			
 
				+// get rid of a user-space mapping.
			
 
				+// First invoke gdr_unmap() then gdr_unpin_buffer().
			
 
				+int gdr_unmap(gdr_t g, gdr_mh_t handle, void *va, size_t size);
			
 
				+
			
 
				+// gpubar_ptr is a user-space virtual address, i.e. one returned by gdr_map()
			
 
				+int gdr_copy_to_bar(void  *gpubar_ptr, const void *cpumem_ptr, size_t size);
			
 
				+int gdr_copy_from_bar(void *cpumem_ptr, const void *gpubar_ptr, size_t size);
			
 
				+
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif // __GDRAPI_H__
			
--- a/launch.sh
+++ b/launch.sh
@@ -0,0 +1,10 @@
 
				+#!/bin/bash
			
 
				+echo "Resetting the FPGA"
			
 
				+/usr/local/bin/pci -w 0 1
			
 
				+/usr/local/bin/pci -w 0 0
			
 
				+/usr/local/bin/pci -w 9040 F
			
 
				+/usr/local/bin/pci -w 9040 0
			
 
				+sleep 1
			
 
				+echo "Putting the data on the FPGA. WARNING, hardcoded size"
			
 
				+ddrio -v -i /home/mathiasb/sources/gpuFirstComm/to_send -s 0
			
 
				+
			
--- a/src/common.cu
+++ b/src/common.cu
@@ -30,3 +30,21 @@ void assert_gdr(int gdr_id)
 
				 	exit(EXIT_FAILURE);
			
 
				     }
			
 
				 }
			
 
				+
			
 
				+void init_to_send(const void* dataPtr)
			
 
				+{
			
 
				+    FILE* filePtr = fopen("/home/mathiasb/sources/gpuFirstComm/to_send","wb");
			
 
				+    if( filePtr == NULL )
			
 
				+    {
			
 
				+	printf("Could not open to_send file. Exiting...\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    int errCheck;
			
 
				+    errCheck = fwrite(dataPtr,4096,1,filePtr);
			
 
				+    if( errCheck == 0 )
			
 
				+    {
			
 
				+	printf("Could not write the items. Exiting...\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    fclose(filePtr);
			
 
				+}
			
--- a/src/main.cu
+++ b/src/main.cu
@@ -11,6 +11,12 @@
 
				 #include "ipedma.h"
			
 
				 #include <unistd.h>
			
 
				 
			
 
				+#define KMEM_DEFAULT_FLAGS      (pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_HARDWARE | PCILIB_KMEM_FLAG_PERSISTENT | PCILIB_KMEM_FLAG_EXCLUSIVE)
			
 
				+
			
 
				+#define KMEM_USE_RING           PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1)
			
 
				+#define KMEM_USE_DEFAULT        PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 2)
			
 
				+
			
 
				+
			
 
				 
			
 
				 int main()
			
 
				 {
			
@@ -18,64 +24,59 @@ int main()
 
				     assert_cu( cuInit(0) );
			
 
				     gdr_t g = gdr_open();
			
 
				 
			
 
				-    /* First check if a NVIDIA GPU is on the system and see which to use */
			
 
				-    /* For the time being, use number 0 */
			
 
				+    /* Try some stuff... */
			
 
				+    printf("Using binary data to feed FPGA...\n");
			
 
				+    char* data=(char*)calloc(4096,sizeof(*data));
			
 
				+    memset(data,0xBB,4096);
			
 
				+    init_to_send(data);
			
 
				+    system("/home/mathiasb/sources/gpuFirstComm/launch.sh");
			
 
				+
			
 
				+    /* Manage NVIDIA GPU */
			
 
				     printf("\nInitialisation of the GPU\n");
			
 
				-    int i;
			
 
				-    int countGPU;
			
 
				     CUdevice GPU;
			
 
				-    char nameGPU[GPU_NAME_LENGTH];
			
 
				     CUdevprop GPUProp;
			
 
				-    
			
 
				-    assert_cuda( cudaGetDeviceCount(&countGPU) );
			
 
				-    for(i=0; i<countGPU; i++)
			
 
				-    {
			
 
				-	assert_cu( cuDeviceGet(&GPU,i) );
			
 
				-	assert_cu( cuDeviceGetName(nameGPU,GPU_NAME_LENGTH,GPU) );
			
 
				-	printf("GPU %d is %s\n",i,nameGPU);
			
 
				-    }
			
 
				-
			
 
				     assert_cuda( cudaSetDevice(0) );
			
 
				     assert_cu( cuDeviceGet(&GPU,0) );
			
 
				     assert_cu( cuDeviceGetProperties(&GPUProp,GPU) );
			
 
				 
			
 
				     /* Check context */
			
 
				     assert_cu( cuCtxGetDevice(&GPU) );
			
 
				-    printf("Device for this context: %d\n",GPU);
			
 
				     CUcontext cuCtx;
			
 
				-    assert_cu( cuCtxCreate(&cuCtx,0,GPU) );
			
 
				+    assert_cu( cuCtxCreate(&cuCtx,CU_CTX_MAP_HOST,GPU) );
			
 
				     assert_cu( cuCtxGetDevice(&GPU) );
			
 
				-    printf("Device for this context: %d\n",GPU);
			
 
				-    int pi;
			
 
				-    assert_cu( cuDeviceGetAttribute(&pi,CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,GPU) );
			
 
				-    printf("Support unified addressing? %d\n",pi);
			
 
				-    
			
 
				-    /* Allocate memory on the device, pin and map */
			
 
				-    printf("\nMemory mapping with the GPU\n");
			
 
				-    CUdeviceptr dptr;
			
 
				-    assert_cu( cuMemAlloc(&dptr,(size_t) GPUProp.sharedMemPerBlock) );
			
 
				-    gdr_mh_t GPUMemHandle;
			
 
				-    assert_gdr( gdr_pin_buffer(g,dptr,(size_t) GPUProp.sharedMemPerBlock,0,0,&GPUMemHandle) );
			
 
				-    void* va;
			
 
				-    assert_gdr( gdr_map(g,GPUMemHandle,&va,(size_t) GPUProp.sharedMemPerBlock) );
			
 
				-    /*CHECK THE OFFSET*/
			
 
				-    gdr_info_t GPUInfo;
			
 
				-    int offset;
			
 
				-    assert_gdr( gdr_get_info(g,GPUMemHandle,&GPUInfo) );
			
 
				-    offset = (GPUInfo.va > dptr) ? GPUInfo.va - dptr:dptr - GPUInfo.va;
			
 
				-    uint32_t *buf_ptr = (uint32_t *)((char *)va + offset);
			
 
				 
			
 
				+    /* Allocate memory on the device, pin and map */
			
 
				+    uint8_t flagValueToSet = 1;
			
 
				+    printf("\nMemory mapping with the GPU for pages\n");
			
 
				+    CUdeviceptr gpuPagePtr;
			
 
				+    assert_cu( cuMemAlloc(&gpuPagePtr,3*PAGE_SIZE) );
			
 
				+    assert_cu( cuPointerSetAttribute(&flagValueToSet,CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,gpuPagePtr) );
			
 
				+    gdr_mh_t GPUMemHandlePage;
			
 
				+    assert_gdr( gdr_pin_buffer(g,gpuPagePtr,3*PAGE_SIZE,0,0,&GPUMemHandlePage) );
			
 
				+    void* gpuPageVa;
			
 
				+    assert_gdr( gdr_map(g,GPUMemHandlePage,&gpuPageVa,3*PAGE_SIZE) );
			
 
				+    gdr_info_t pageInfo;
			
 
				+    assert_gdr( gdr_get_info(g,GPUMemHandlePage,&pageInfo) );
			
 
				+    printf("Bus ptr = %lx\nVA = 0x%lx\nSize = %lu\n",pageInfo.bus_addr,pageInfo.va,pageInfo.mapped_size);
			
 
				+    printf("Memory mapping with the GPU for descriptors\n");
			
 
				+    CUdeviceptr gpuDescPtr;
			
 
				+    assert_cu( cuMemAlloc(&gpuDescPtr,PAGE_SIZE) );
			
 
				+    assert_cu( cuPointerSetAttribute(&flagValueToSet,CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,gpuDescPtr) );
			
 
				+    gdr_mh_t GPUMemHandleDesc;
			
 
				+    assert_gdr( gdr_pin_buffer(g,gpuDescPtr,PAGE_SIZE,0,0,&GPUMemHandleDesc) );
			
 
				+    void* gpuDescVa;
			
 
				+    assert_gdr( gdr_map(g,GPUMemHandleDesc,&gpuDescVa,PAGE_SIZE) );
			
 
				+    
			
 
				     printf("All set\n");
			
 
				-    printf("va: 0x%lx\ndptr: 0x%llx\nGPUInfo.va: 0x%lx\noffset: 0x%x\nbuf_ptr: 0x%lx\n",
			
 
				-    	   (uint64_t) va,dptr,GPUInfo.va,offset,buf_ptr);
			
 
				+    gdr_info_t descInfo;
			
 
				+    assert_gdr( gdr_get_info(g,GPUMemHandleDesc,&descInfo) );
			
 
				+    printf("Bus ptr = %lx\nVA = 0x%lx\nSize = %lu\n",descInfo.bus_addr,descInfo.va,descInfo.mapped_size);
			
 
				     
			
 
				-    /* At this point the GPU's mem is mapped to a CPU buffer to enable DMA */
			
 
				-
			
 
				     /* PCI */
			
 
				     printf("\nSetting up the PCI\n");
			
 
				     pcilib_t* pciCtx;
			
 
				     char* pciVa;
			
 
				-    pciCtx = pcilib_open("/dev/fpga0",NULL);
			
 
				+    pciCtx = pcilib_open("/dev/fpga0",PCILIB_MODEL_DETECT);
			
 
				     if( pciCtx == NULL )
			
 
				     {
			
 
				 	printf("Cannot open a context for pci\n");
			
@@ -89,89 +90,108 @@ int main()
 
				     }
			
 
				 
			
 
				     CUdeviceptr dBAR;
			
 
				-    int getdBAR = 10;
			
 
				     assert_cu( cuMemHostRegister((void*)pciVa,128,CU_MEMHOSTREGISTER_IOMEMORY) );
			
 
				     assert_cu( cuMemHostGetDevicePointer(&dBAR,(void*)pciVa, 0) );
			
 
				-    printf("pciVa = %p\ndBar = 0x%llx\n",pciVa,dBAR);
			
 
				-    assert_cu( cuMemcpyDtoH(&getdBAR,dBAR,sizeof(int)) );
			
 
				-    printf("getdBAR = %d\n",getdBAR);
			
 
				     
			
 
				-    pcilib_kmem_handle_t* pciHandle;
			
 
				-    pciHandle = pcilib_alloc_kernel_memory(pciCtx,PCILIB_KMEM_TYPE_CONSISTENT,1,64,4096,PCILIB_KMEM_USE_STANDARD,PCILIB_KMEM_FLAG_REUSE);
			
 
				-    if( pciHandle == NULL )
			
 
				+    /* Config PCI for Pages*/
			
 
				+    pcilib_kmem_handle_t* pciHandlePage;
			
 
				+    pciHandlePage = pcilib_alloc_kernel_memory(pciCtx, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, 1, ((PAGE_SIZE%4096)?(4096 * (1 + PAGE_SIZE/4096)):PAGE_SIZE), 4096, KMEM_USE_DEFAULT, KMEM_DEFAULT_FLAGS);
			
 
				+    if( pciHandlePage == NULL )
			
 
				     {
			
 
				 	printf("Cannot allocate  PCI kernel memory\n");
			
 
				 	exit( EXIT_FAILURE );
			
 
				     }
			
 
				-    volatile void* pciMemPtr;
			
 
				-    uintptr_t pciBus;
			
 
				-    pciMemPtr = pcilib_kmem_get_ua(pciCtx,pciHandle);
			
 
				-    if( pciMemPtr == NULL )
			
 
				+    volatile void* pciMemPtrPage;
			
 
				+    uintptr_t pciBusPage;
			
 
				+    pciMemPtrPage = (uint32_t*) pcilib_kmem_get_block_ua(pciCtx,pciHandlePage,0);
			
 
				+    if( pciMemPtrPage == NULL )
			
 
				     {
			
 
				 	printf("Cannot get PCI pointer to kernel memory\n");
			
 
				 	exit( EXIT_FAILURE );
			
 
				     }
			
 
				-    pciBus = pcilib_kmem_get_ba(pciCtx,pciHandle);
			
 
				-    if( pciBus == 0 )
			
 
				+    pciBusPage = pcilib_kmem_get_block_ba(pciCtx,pciHandlePage,0);
			
 
				+    if( pciBusPage == 0 )
			
 
				     {
			
 
				 	printf("Cannot get PCI Bus address on kernel memory\n");
			
 
				 	exit( EXIT_FAILURE );
			
 
				     }
			
 
				 
			
 
				-    printf("pciMemPtr = %p\npciBus = %p\n",pciMemPtr,pciBus);
			
 
				-
			
 
				-    const pcilib_bar_info_t* bar_info;
			
 
				-    bar_info = pcilib_get_bar_info(pciCtx,0);
			
 
				-    if( bar_info == NULL )
			
 
				+    /* Config PCI for Desc */
			
 
				+    pcilib_kmem_handle_t* pciHandleDesc;
			
 
				+    pciHandleDesc = pcilib_alloc_kernel_memory(pciCtx,PCILIB_KMEM_TYPE_CONSISTENT, 1, 128, 4096, KMEM_USE_RING, KMEM_DEFAULT_FLAGS);
			
 
				+    if( pciHandleDesc == NULL )
			
 
				     {
			
 
				-	printf("Cannot get BAR info\n");
			
 
				+	printf("Cannot allocate  PCI kernel memory\n");
			
 
				 	exit( EXIT_FAILURE );
			
 
				     }
			
 
				-    else
			
 
				-	printf("bar = %d\nsize = %lu\nphys_addr = %p\nvirt_addr = %p\n",bar_info->bar,bar_info->size,bar_info->phys_addr,bar_info->virt_addr);
			
 
				-
			
 
				-    /* Try some stuff... */
			
 
				-    printf("\nRunning some tests\n");
			
 
				-    *(int*) pciMemPtr = 65;
			
 
				-    printf("Try to read pciMem in another way : %d %d\n",*(int*)pciMemPtr,*(int*)dBAR);
			
 
				-    assert_cu( cuMemcpyHtoD(dptr,(void*)pciMemPtr,sizeof(int)) );
			
 
				-    assert_cu( cuMemcpyHtoD(dBAR,(void*)pciMemPtr,sizeof(int)) );
			
 
				-    assert_cu( cuMemcpyDtoH(&getdBAR,dBAR,sizeof(int)) );
			
 
				-    printf("getdBAR = %d\n",getdBAR);    
			
 
				-    printf("Try to read pciMem in another way : %d %d\n",*(int*)pciMemPtr,*(int*)dBAR);
			
 
				-    printf("Try to read pciMem in another way : %d\n",*(int*)va);
			
 
				-    printf("A small computation\n");
			
 
				-    add_three_global<<< 1,1 >>>(dptr);
			
 
				-    assert_cuda( cudaDeviceSynchronize() );
			
 
				-    int foo;
			
 
				-    assert_gdr( gdr_copy_from_bar(&foo,va,sizeof(foo)) );
			
 
				-    printf("foo = %d\nva = %d\n",foo,*(int*) va);
			
 
				-    printf("Now the other way around\n");
			
 
				-    foo = 100;
			
 
				-    assert_gdr( gdr_copy_to_bar(va,&foo,sizeof(foo)) );
			
 
				-    add_three_global<<< 1,1 >>>(dptr);
			
 
				-    assert_cuda( cudaDeviceSynchronize() );
			
 
				-    assert_cu( cuMemcpyDtoH((void*)pciMemPtr,dptr,sizeof(int)) );
			
 
				-    printf("pciMem = %d\n",*(int*)pciMemPtr);
			
 
				-
			
 
				+    volatile void* pciMemPtrDesc;
			
 
				+    uintptr_t pciBusDesc;
			
 
				+    pciMemPtrDesc = (uint32_t*) pcilib_kmem_get_block_ua(pciCtx,pciHandleDesc,0);
			
 
				+    if( pciMemPtrDesc == NULL )
			
 
				+    {
			
 
				+	printf("Cannot get PCI pointer to kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    pciBusDesc = pcilib_kmem_get_block_ba(pciCtx,pciHandleDesc,0);
			
 
				+    if( pciBusDesc == 0 )
			
 
				+    {
			
 
				+	printf("Cannot get PCI Bus address on kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    
			
 
				     /* FPGA */
			
 
				     printf("\nWorking on the FPGA\n");
			
 
				-    WR32 (REG_RESET_DMA, 1);
			
 
				-    usleep (100000);
			
 
				-    WR32 (REG_RESET_DMA, 0);
			
 
				-    usleep (100000);
			
 
				-    
			
 
				+    WR32(REG_RESET_DMA, 1);
			
 
				+    usleep(100000);
			
 
				+    WR32(REG_RESET_DMA, 0);
			
 
				+    usleep(100000);
			
 
				+    WR32_sleep(REG_NUM_PACKETS_PER_DESCRIPTOR,16);
			
 
				+    WR32_sleep(REG_PACKET_LENGTH,64);
			
 
				+    WR32_sleep(REG_UPDATE_THRESHOLD, 0x1);
			
 
				+    /* WR64_sleep(REG_UPDATE_COUNTER,descInfo.bus_addr); */
			
 
				+    WR64_sleep(REG_UPDATE_ADDRESS,pciBusPage+DESCRIPTOR_OFFSET);
			
 
				+    WR32_sleep(REG_CONTROL,CONTROL_ENABLE_READ|CONTROL_SOURCE_RX_FIFO);
			
 
				+    WR64_sleep(REG_DESCRIPTOR_ADDRESS,descInfo.bus_addr);
			
 
				+    WR32_sleep(REG_DMA,1);
			
 
				+    WR32_sleep(REG_INTERCONNECT, 0x262);
			
 
				+    /* WR32_sleep(REG_COUNTER,1); */
			
 
				+    usleep(100000);
			
 
				+
			
 
				+    WR64_sleep(REG_DESCRIPTOR_ADDRESS,descInfo.bus_addr);
			
 
				+    printf("pciVa = %x\npciMemPtrPage = %lx\npciMemPtrDesc = %lx\n",pciVa,*(uint64_t*)pciMemPtrPage,*(uint64_t*)pciMemPtrDesc);
			
 
				 
			
 
				     
			
 
				+    char* getBack=(char*)calloc(4096,sizeof(*getBack));
			
 
				+    memcpy(getBack,(const void*)gpuDescVa,4096);
			
 
				+    int i;
			
 
				+    for(i=0;i<4096;i++)
			
 
				+    {
			
 
				+    	printf("%hhx",getBack[i]);
			
 
				+    }
			
 
				+    printf("\n");
			
 
				+    
			
 
				+    
			
 
				     /* Close everything */
			
 
				     printf("\nClosing the connections\n");
			
 
				+    WR32(REG_COUNTER, 0);
			
 
				+    WR32(REG_DMA, 0);
			
 
				+    WR32(REG_RESET_DMA, 1);
			
 
				+    usleep (100000);
			
 
				+    WR32(REG_RESET_DMA, 0);
			
 
				+    usleep (100000);
			
 
				+
			
 
				+    pcilib_free_kernel_memory(pciCtx,pciHandleDesc,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
			
 
				+    pcilib_free_kernel_memory(pciCtx,pciHandlePage,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
			
 
				     assert_cu( cuMemHostUnregister((void*) pciVa) );
			
 
				     pcilib_close(pciCtx);
			
 
				-    assert_gdr( gdr_unmap(g,GPUMemHandle,va,(size_t) GPUProp.sharedMemPerBlock) );
			
 
				-    assert_gdr( gdr_unpin_buffer(g,GPUMemHandle) );
			
 
				+    assert_gdr( gdr_unmap(g,GPUMemHandlePage,gpuPageVa,3*PAGE_SIZE) );
			
 
				+    assert_gdr( gdr_unpin_buffer(g,GPUMemHandlePage) );
			
 
				+    assert_gdr( gdr_unmap(g,GPUMemHandleDesc,gpuDescVa,3*PAGE_SIZE) );
			
 
				+    assert_gdr( gdr_unpin_buffer(g,GPUMemHandleDesc) );
			
 
				     assert_gdr( gdr_close(g) );
			
 
				-    assert_cu( cuMemFree(dptr) );
			
 
				-
			
 
				+    assert_cu( cuMemFree(gpuPagePtr) );
			
 
				+    assert_cu( cuMemFree(gpuDescPtr) );
			
 
				+    
			
 
				     printf("All Cleared\n");
			
 
				     
			
 
				     exit(EXIT_SUCCESS);
			
--- a/src/main.cu.save
+++ b/src/main.cu.save
@@ -0,0 +1,185 @@
 
				+/* The main program */
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include "common.h"
			
 
				+#include "kernels.h"
			
 
				+#include "gdrapi.h"
			
 
				+#include <pcilib.h>
			
 
				+#include <pcilib/kmem.h>
			
 
				+#include <pcilib/bar.h>
			
 
				+#include "ipedma.h"
			
 
				+#include <unistd.h>
			
 
				+
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+    /* Initialisation of the APIs */
			
 
				+    assert_cu( cuInit(0) );
			
 
				+    gdr_t g = gdr_open();
			
 
				+
			
 
				+    /* First check if a NVIDIA GPU is on the system and see which to use */
			
 
				+    /* For the time being, use number 0 */
			
 
				+    printf("\nInitialisation of the GPU\n");
			
 
				+    int i;
			
 
				+    int countGPU;
			
 
				+    CUdevice GPU;
			
 
				+    char nameGPU[GPU_NAME_LENGTH];
			
 
				+    CUdevprop GPUProp;
			
 
				+    
			
 
				+    assert_cuda( cudaGetDeviceCount(&countGPU) );
			
 
				+    for(i=0; i<countGPU; i++)
			
 
				+    {
			
 
				+	assert_cu( cuDeviceGet(&GPU,i) );
			
 
				+	assert_cu( cuDeviceGetName(nameGPU,GPU_NAME_LENGTH,GPU) );
			
 
				+	printf("GPU %d is %s\n",i,nameGPU);
			
 
				+    }
			
 
				+
			
 
				+    assert_cuda( cudaSetDevice(0) );
			
 
				+    assert_cu( cuDeviceGet(&GPU,0) );
			
 
				+    assert_cu( cuDeviceGetProperties(&GPUProp,GPU) );
			
 
				+
			
 
				+    /* Check context */
			
 
				+    assert_cu( cuCtxGetDevice(&GPU) );
			
 
				+    printf("Device for this context: %d\n",GPU);
			
 
				+    CUcontext cuCtx;
			
 
				+    assert_cu( cuCtxCreate(&cuCtx,CU_CTX_MAP_HOST,GPU) );
			
 
				+    assert_cu( cuCtxGetDevice(&GPU) );
			
 
				+    printf("Device for this context: %d\n",GPU);
			
 
				+    int pi;
			
 
				+    assert_cu( cuDeviceGetAttribute(&pi,CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,GPU) );
			
 
				+    printf("Support unified addressing? %d\n",pi);
			
 
				+    
			
 
				+    /* Allocate memory on the device, pin and map */
			
 
				+    printf("\nMemory mapping with the GPU for pages\n");
			
 
				+    CUdeviceptr gpuPagePtr;
			
 
				+    assert_cu( cuMemAlloc(&gpuPagePtr,3*PAGE_SIZE) );
			
 
				+    gdr_mh_t GPUMemHandlePage;
			
 
				+    assert_gdr( gdr_pin_buffer(g,gpuPagePtr,3*PAGE_SIZE,0,0,&GPUMemHandlePage) );
			
 
				+    void* gpuPageVa;
			
 
				+    assert_gdr( gdr_map(g,GPUMemHandlePage,&gpuPageVa,3*PAGE_SIZE) );
			
 
				+    
			
 
				+    printf("Memory mapping with the GPU for descriptors\n");
			
 
				+    CUdeviceptr gpuDescPtr;
			
 
				+    assert_cu( cuMemAlloc(&gpuDescPtr,PAGE_SIZE) );
			
 
				+    gdr_mh_t GPUMemHandleDesc;
			
 
				+    assert_gdr( gdr_pin_buffer(g,gpuDescPtr,PAGE_SIZE,0,0,&GPUMemHandleDesc) );
			
 
				+    void* gpuDescVa;
			
 
				+    assert_gdr( gdr_map(g,GPUMemHandleDesc,&gpuDescVa,PAGE_SIZE) );
			
 
				+    
			
 
				+    printf("All set\n");
			
 
				+    printf("gpuPageVa: 0x%lx\ngpuPagePtr: 0x%llx\n",
			
 
				+    	   (uint64_t) gpuPageVa,gpuPagePtr);
			
 
				+    printf("gpuDesc: 0x%lx\ngpuDescPtr: 0x%llx\n",
			
 
				+    	   (uint64_t) gpuDescVa,gpuDescPtr);
			
 
				+    
			
 
				+    /* At this point the GPU's mem is mapped to a CPU buffer to enable DMA */
			
 
				+
			
 
				+    /* PCI */
			
 
				+    printf("\nSetting up the PCI\n");
			
 
				+    pcilib_t* pciCtx;
			
 
				+    char* pciVa;
			
 
				+    pciCtx = pcilib_open("/dev/fpga0",NULL);
			
 
				+    if( pciCtx == NULL )
			
 
				+    {
			
 
				+	printf("Cannot open a context for pci\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    pciVa = pcilib_resolve_bar_address(pciCtx,0, 0);
			
 
				+    if( pciVa == NULL )
			
 
				+    {
			
 
				+	printf("Cannot resolve PCI physical adress to virtual\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+
			
 
				+    CUdeviceptr dBAR;
			
 
				+    assert_cu( cuMemHostRegister((void*)pciVa,128,CU_MEMHOSTREGISTER_IOMEMORY) );
			
 
				+    assert_cu( cuMemHostGetDevicePointer(&dBAR,(void*)pciVa, 0) );
			
 
				+
			
 
				+    /* Config PCI for Pages*/
			
 
				+    pcilib_kmem_handle_t* pciHandlePage;
			
 
				+    pciHandlePage = pcilib_alloc_kernel_memory(pciCtx,PCILIB_KMEM_TYPE_DMA_C2S_PAGE,1,PAGE_SIZE,4096,PCILIB_KMEM_USE_DMA_PAGES,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
			
 
				+    if( pciHandlePage == NULL )
			
 
				+    {
			
 
				+	printf("Cannot allocate  PCI kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    volatile void* pciMemPtrPage;
			
 
				+    uintptr_t pciBusPage;
			
 
				+    pciMemPtrPage = pcilib_kmem_get_ua(pciCtx,pciHandlePage);
			
 
				+    if( pciMemPtrPage == NULL )
			
 
				+    {
			
 
				+	printf("Cannot get PCI pointer to kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    pciBusPage = pcilib_kmem_get_ba(pciCtx,pciHandlePage);
			
 
				+    if( pciBusPage == 0 )
			
 
				+    {
			
 
				+	printf("Cannot get PCI Bus address on kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+
			
 
				+    printf("pciMemPtrPage = %p\npciBusPage = %p\n",pciMemPtrPage,pciBusPage);
			
 
				+
			
 
				+    /* Config PCI for Desc */
			
 
				+    pcilib_kmem_handle_t* pciHandleDesc;
			
 
				+    pciHandleDesc = pcilib_alloc_kernel_memory(pciCtx,PCILIB_KMEM_TYPE_CONSISTENT,1,128,4096,PCILIB_KMEM_USE_DMA_RING,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
			
 
				+    if( pciHandleDesc == NULL )
			
 
				+    {
			
 
				+	printf("Cannot allocate  PCI kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    volatile void* pciMemPtrDesc;
			
 
				+    uintptr_t pciBusDesc;
			
 
				+    pciMemPtrDesc = pcilib_kmem_get_ua(pciCtx,pciHandleDesc);
			
 
				+    if( pciMemPtrDesc == NULL )
			
 
				+    {
			
 
				+	printf("Cannot get PCI pointer to kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+    pciBusDesc = pcilib_kmem_get_ba(pciCtx,pciHandleDesc);
			
 
				+    if( pciBusDesc == 0 )
			
 
				+    {
			
 
				+	printf("Cannot get PCI Bus address on kernel memory\n");
			
 
				+	exit( EXIT_FAILURE );
			
 
				+    }
			
 
				+
			
 
				+    printf("pciMemPtrDesc = %p\npciBusDesc = %p\n",pciMemPtrDesc,pciBusDesc);
			
 
				+
			
 
				+    
			
 
				+    /* Try some stuff... */
			
 
				+
			
 
				+    /* FPGA */
			
 
				+    printf("\nWorking on the FPGA\n");
			
 
				+    WR32(REG_RESET_DMA, 1);
			
 
				+    usleep(100000);
			
 
				+    WR32(REG_RESET_DMA, 0);
			
 
				+    usleep(100000);
			
 
				+    WR32_sleep(REG_NUM_PACKETS_PER_DESCRIPTOR,16);
			
 
				+    WR32_sleep(REG_PACKET_LENGTH,64);
			
 
				+    WR64_sleep(REG_UPDATE_ADDRESS,pciBusPage+DESCRIPTOR_OFFSET);
			
 
				+    WR32_sleep(REG_CONTROL,CONTROL_ENABLE_READ|CONTROL_SOURCE_RX_FIFO);
			
 
				+    WR64_sleep(REG_DESCRIPTOR_ADDRESS,pciBusDesc);
			
 
				+    WR32_sleep(REG_DMA,1);
			
 
				+    WR32_sleep(REG_INTERCONNECT, 0x262);
			
 
				+    usleep(100000);
			
 
				+
			
 
				+    
			
 
				+    /* Close everything */
			
 
				+    printf("\nClosing the connections\n");
			
 
				+    pcilib_free_kernel_memory(pciCtx,pciHandleDesc,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
			
 
				+    pcilib_free_kernel_memory(pciCtx,pciHandlePage,(pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_HARDWARE));
			
 
				+    assert_cu( cuMemHostUnregister((void*) pciVa) );
			
 
				+    pcilib_close(pciCtx);
			
 
				+    assert_gdr( gdr_unmap(g,GPUMemHandlePage,gpuPageVa,3*PAGE_SIZE) );
			
 
				+    assert_gdr( gdr_unpin_buffer(g,GPUMemHandlePage) );
			
 
				+    assert_gdr( gdr_unmap(g,GPUMemHandleDesc,gpuDescVa,3*PAGE_SIZE) );
			
 
				+    assert_gdr( gdr_unpin_buffer(g,GPUMemHandleDesc) );
			
 
				+    assert_gdr( gdr_close(g) );
			
 
				+    assert_cu( cuMemFree(gpuPagePtr) );
			
 
				+    assert_cu( cuMemFree(gpuDescPtr) );
			
 
				+
			
 
				+    printf("All Cleared\n");
			
 
				+    
			
 
				+    exit(EXIT_SUCCESS);
			
 
				+}
작성자	SHA1 메시지	날짜
mathiasb	d64f28c9f4 Taking the changes in gdrapi into account. Version of DMA from FPGA to GPU.	6 년 전
mathiasb	d802a89ff2 Working version of DMA from FPGA to CPU's RAM.	6 년 전
mathiasb	7d1340a256 Corrected some flags and completed the FPGA set-up.	6 년 전
mathiasb	0296cab89b Clean up of main. Allocation of memory for both pages and descriptors	6 년 전