123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 |
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <stdarg.h>
- #include <time.h>
- #include <sched.h>
- #include <sys/time.h>
- #include <cuda.h>
- #include <pcilib.h>
- #include <pcilib/bar.h>
- #define DEVICE "/dev/fpga0"
- #define BAR PCILIB_BAR0
- #define initAssert(ans) { initError((ans), __FILE__, __LINE__); }
- inline int initError(CUresult code, const char *file, int line)
- {
- if (code != CUDA_SUCCESS)
- {
- const char *error = NULL;
- cuGetErrorString (code, &error);
- fprintf(stderr,"GPUassert: %s (Code: %i) %s %d\n",
- error, code, file, line);
- return code;
- } else {
- return 0;
- }
- }
- #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
- inline int gpuAssert(cudaError_t code, const char *file, int line)
- {
- if (code != cudaSuccess)
- {
- fprintf(stderr,"GPUassert: %s (Code: %i) %s %d\n",
- cudaGetErrorString(code), code, file, line);
- return code;
- } else {
- return 0;
- }
- }
- int main(int argc, char *argv[]) {
- printf("\n");
- int manual_init = 1;
- if (manual_init) {
- //CUDA initialization
- initAssert (cuInit(0));
- int num_gpus;
- initAssert (cuDeviceGetCount (&num_gpus));
- printf ("Found %i GPUs on the system\n", num_gpus);
- CUdevice gpu; //will be used to find the correct GPU
- for (num_gpus--; num_gpus >= 0; num_gpus--) {
- CUdevice current_gpu;
- initAssert (cuDeviceGet (¤t_gpu, num_gpus));
- char gpu_name[30] = {0};
- initAssert (cuDeviceGetName (gpu_name, 30, current_gpu));
- printf("GPU %i: %s\n", num_gpus, gpu_name);
- if (strncmp (gpu_name, "Tesla K40", 9) == 0) {
- printf ("Found a Tesla GPU! I'll use that one.\n");
- gpu = current_gpu;
- break;
- }
- }
- //The CU_CTX_MAP_HOST is what we are interested in!
- CUcontext context;
- initAssert (cuCtxCreate (&context, CU_CTX_MAP_HOST | CU_CTX_SCHED_AUTO, gpu));
- initAssert (cuCtxSetCurrent (context));
- //NOTE: API Version 3010 is problematic
- //(see https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDART__DRIVER.html)
- unsigned int api_version;
- initAssert (cuCtxGetApiVersion (context, &api_version));
- printf ("CUDA API Version: %u\n", api_version);
- }
- else {
- int gpu_found = 0;
- int num_gpus;
- gpuErrchk (cudaGetDeviceCount(&num_gpus));
- for (num_gpus--; num_gpus >= 0; num_gpus--) {
- struct cudaDeviceProp dev_props;
- gpuErrchk (cudaGetDeviceProperties (&dev_props, num_gpus));
- if (dev_props.canMapHostMemory > 0) {
- printf ("%s is able to map host memory. I'll use that one.\n", dev_props.name);
- gpuErrchk (cudaSetDevice (num_gpus));
- gpuErrchk (cudaSetDeviceFlags(cudaDeviceMapHost));
- gpu_found = 1;
- break;
- }
- else {
- printf ("%s can not map host memory. Won't use it.\n", dev_props.name);
- }
- }
- if (!gpu_found) {
- printf ("None of the installed GPUs can map host memory. Aborting...\n");
- exit(1);
- }
- }
- printf ("CUDA init done\n\n");
- pcilib_t *pci;
- void* volatile bar;
- const pcilib_bar_info_t *bar_info;
- pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT);
- if (!pci) {
- printf("pcilib_open\n");
- exit(1);
- }
- bar = pcilib_resolve_bar_address(pci, BAR, 0);
- if (!bar) {
- pcilib_close(pci);
- printf("map bar\n");
- exit(1);
- }
- printf("BAR mapped to: %p\n", bar);
- bar_info = pcilib_get_bar_info(pci, BAR);
- printf("%p (Phys: 0x%lx, Size: 0x%x)\n", bar_info[BAR].virt_addr, bar_info[BAR].phys_addr, bar_info[BAR].size);
- //write some test data to the BAR
- int *test = ((int *)(((unsigned long)bar)+0x9100));
- /* int *test = (int *)malloc(sizeof(int)); */
- /* int *test = (int *)0xfb000000; */
- printf ("Pointing to: 0x%lx\n", test);
- *(test) = 0xdeadbeef;
- printf ("Set BAR content to: 0x%x\n", *(test));
- //try to map the BAR address as host memory
- if (manual_init) {
- /* initAssert (cuMemHostRegister (foobar, 0x1000, CU_MEMHOSTREGISTER_DEVICEMAP)); */
- /* initAssert (cuMemHostRegister (bar, 0x4, CU_MEMHOSTREGISTER_DEVICEMAP)); */
- initAssert (cuMemHostRegister (test, 0x4, CU_MEMHOSTREGISTER_IOMEMORY));
- /* *(volatile int *)(test) = 0xdeadbeef; */
- }
- else {
- /* gpuErrchk(cudaHostRegister(bar, bar_info[BAR].size, cudaHostRegisterMapped | cudaHostRegisterPortable)) */
- gpuErrchk(cudaHostRegister(test, 0x1000, cudaHostRegisterMapped | cudaHostRegisterPortable))
- /* gpuErrchk(cudaHostRegister(bar, 0x1000, cudaHostRegisterDefault)) */
- /* gpuErrchk(cudaHostRegister((void *)bar_info[BAR].phys_addr, 4, cudaHostRegisterMapped)) */
- /* gpuErrchk(cudaHostRegister(foobar, 0x4, cudaHostRegisterDefault)) */
- }
- /* get the GPU-Mapped pointer */
- void *bar_gpu_pointer;
- gpuErrchk (cudaHostGetDevicePointer (&bar_gpu_pointer, test, 0));
- /* create 'overwrite' data and load it to a normal CUDA memory */
- int *overwrite = (int *)malloc(sizeof(int));
- *overwrite = 0xdefaced;
- printf ("Set overwrite value to: 0x%x\n", *overwrite);
- void *normal_gpu_mem;
- gpuErrchk (cudaMalloc(&normal_gpu_mem, sizeof(int)));
- cudaDeviceSynchronize();
- gpuErrchk (cudaMemcpy (normal_gpu_mem, overwrite, sizeof(int), cudaMemcpyHostToDevice));
- /* read the 'overwrite' data back to mapped bar address purely in GPU address space */
- gpuErrchk (cudaMemcpy (bar_gpu_pointer, normal_gpu_mem, sizeof(int), cudaMemcpyDeviceToDevice));
- cudaDeviceSynchronize();
- /* if everything works, this should show the overwrite data */
- printf ("Read BAR as: 0x%x\n", *(test));
- free (overwrite);
- pcilib_close(pci);
- printf("PCI closed\n");
- }
|