pio_test.cu 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <unistd.h>
  4. #include <stdarg.h>
  5. #include <time.h>
  6. #include <sched.h>
  7. #include <sys/time.h>
  8. #include <cuda.h>
  9. #include <pcilib.h>
  10. #include <pcilib/bar.h>
  11. #define DEVICE "/dev/fpga0"
  12. #define BAR PCILIB_BAR0
  13. #define initAssert(ans) { initError((ans), __FILE__, __LINE__); }
  14. inline int initError(CUresult code, const char *file, int line)
  15. {
  16. if (code != CUDA_SUCCESS)
  17. {
  18. const char *error = NULL;
  19. cuGetErrorString (code, &error);
  20. fprintf(stderr,"GPUassert: %s (Code: %i) %s %d\n",
  21. error, code, file, line);
  22. return code;
  23. } else {
  24. return 0;
  25. }
  26. }
  27. #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
  28. inline int gpuAssert(cudaError_t code, const char *file, int line)
  29. {
  30. if (code != cudaSuccess)
  31. {
  32. fprintf(stderr,"GPUassert: %s (Code: %i) %s %d\n",
  33. cudaGetErrorString(code), code, file, line);
  34. return code;
  35. } else {
  36. return 0;
  37. }
  38. }
  39. int main(int argc, char *argv[]) {
  40. printf("\n");
  41. int manual_init = 1;
  42. if (manual_init) {
  43. //CUDA initialization
  44. initAssert (cuInit(0));
  45. int num_gpus;
  46. initAssert (cuDeviceGetCount (&num_gpus));
  47. printf ("Found %i GPUs on the system\n", num_gpus);
  48. CUdevice gpu; //will be used to find the correct GPU
  49. for (num_gpus--; num_gpus >= 0; num_gpus--) {
  50. CUdevice current_gpu;
  51. initAssert (cuDeviceGet (&current_gpu, num_gpus));
  52. char gpu_name[30] = {0};
  53. initAssert (cuDeviceGetName (gpu_name, 30, current_gpu));
  54. printf("GPU %i: %s\n", num_gpus, gpu_name);
  55. if (strncmp (gpu_name, "Tesla K40", 9) == 0) {
  56. printf ("Found a Tesla GPU! I'll use that one.\n");
  57. gpu = current_gpu;
  58. break;
  59. }
  60. }
  61. //The CU_CTX_MAP_HOST is what we are interested in!
  62. CUcontext context;
  63. initAssert (cuCtxCreate (&context, CU_CTX_MAP_HOST | CU_CTX_SCHED_AUTO, gpu));
  64. initAssert (cuCtxSetCurrent (context));
  65. //NOTE: API Version 3010 is problematic
  66. //(see https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDART__DRIVER.html)
  67. unsigned int api_version;
  68. initAssert (cuCtxGetApiVersion (context, &api_version));
  69. printf ("CUDA API Version: %u\n", api_version);
  70. }
  71. else {
  72. int gpu_found = 0;
  73. int num_gpus;
  74. gpuErrchk (cudaGetDeviceCount(&num_gpus));
  75. for (num_gpus--; num_gpus >= 0; num_gpus--) {
  76. struct cudaDeviceProp dev_props;
  77. gpuErrchk (cudaGetDeviceProperties (&dev_props, num_gpus));
  78. if (dev_props.canMapHostMemory > 0) {
  79. printf ("%s is able to map host memory. I'll use that one.\n", dev_props.name);
  80. gpuErrchk (cudaSetDevice (num_gpus));
  81. gpuErrchk (cudaSetDeviceFlags(cudaDeviceMapHost));
  82. gpu_found = 1;
  83. break;
  84. }
  85. else {
  86. printf ("%s can not map host memory. Won't use it.\n", dev_props.name);
  87. }
  88. }
  89. if (!gpu_found) {
  90. printf ("None of the installed GPUs can map host memory. Aborting...\n");
  91. exit(1);
  92. }
  93. }
  94. printf ("CUDA init done\n\n");
  95. pcilib_t *pci;
  96. void* volatile bar;
  97. const pcilib_bar_info_t *bar_info;
  98. pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT);
  99. if (!pci) {
  100. printf("pcilib_open\n");
  101. exit(1);
  102. }
  103. bar = pcilib_resolve_bar_address(pci, BAR, 0);
  104. if (!bar) {
  105. pcilib_close(pci);
  106. printf("map bar\n");
  107. exit(1);
  108. }
  109. printf("BAR mapped to: %p\n", bar);
  110. bar_info = pcilib_get_bar_info(pci, BAR);
  111. printf("%p (Phys: 0x%lx, Size: 0x%x)\n", bar_info[BAR].virt_addr, bar_info[BAR].phys_addr, bar_info[BAR].size);
  112. //write some test data to the BAR
  113. int *test = ((int *)(((unsigned long)bar)+0x9100));
  114. /* int *test = (int *)malloc(sizeof(int)); */
  115. /* int *test = (int *)0xfb000000; */
  116. printf ("Pointing to: 0x%lx\n", test);
  117. *(test) = 0xdeadbeef;
  118. printf ("Set BAR content to: 0x%x\n", *(test));
  119. //try to map the BAR address as host memory
  120. if (manual_init) {
  121. /* initAssert (cuMemHostRegister (foobar, 0x1000, CU_MEMHOSTREGISTER_DEVICEMAP)); */
  122. /* initAssert (cuMemHostRegister (bar, 0x4, CU_MEMHOSTREGISTER_DEVICEMAP)); */
  123. initAssert (cuMemHostRegister (test, 0x4, CU_MEMHOSTREGISTER_IOMEMORY));
  124. /* *(volatile int *)(test) = 0xdeadbeef; */
  125. }
  126. else {
  127. /* gpuErrchk(cudaHostRegister(bar, bar_info[BAR].size, cudaHostRegisterMapped | cudaHostRegisterPortable)) */
  128. gpuErrchk(cudaHostRegister(test, 0x1000, cudaHostRegisterMapped | cudaHostRegisterPortable))
  129. /* gpuErrchk(cudaHostRegister(bar, 0x1000, cudaHostRegisterDefault)) */
  130. /* gpuErrchk(cudaHostRegister((void *)bar_info[BAR].phys_addr, 4, cudaHostRegisterMapped)) */
  131. /* gpuErrchk(cudaHostRegister(foobar, 0x4, cudaHostRegisterDefault)) */
  132. }
  133. /* get the GPU-Mapped pointer */
  134. void *bar_gpu_pointer;
  135. gpuErrchk (cudaHostGetDevicePointer (&bar_gpu_pointer, test, 0));
  136. /* create 'overwrite' data and load it to a normal CUDA memory */
  137. int *overwrite = (int *)malloc(sizeof(int));
  138. *overwrite = 0xdefaced;
  139. printf ("Set overwrite value to: 0x%x\n", *overwrite);
  140. void *normal_gpu_mem;
  141. gpuErrchk (cudaMalloc(&normal_gpu_mem, sizeof(int)));
  142. cudaDeviceSynchronize();
  143. gpuErrchk (cudaMemcpy (normal_gpu_mem, overwrite, sizeof(int), cudaMemcpyHostToDevice));
  144. /* read the 'overwrite' data back to mapped bar address purely in GPU address space */
  145. gpuErrchk (cudaMemcpy (bar_gpu_pointer, normal_gpu_mem, sizeof(int), cudaMemcpyDeviceToDevice));
  146. cudaDeviceSynchronize();
  147. /* if everything works, this should show the overwrite data */
  148. printf ("Read BAR as: 0x%x\n", *(test));
  149. free (overwrite);
  150. pcilib_close(pci);
  151. printf("PCI closed\n");
  152. }