xilinx.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. #define _BSD_SOURCE
  2. #define _POSIX_C_SOURCE 199309L
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include <unistd.h>
  7. #include <stdarg.h>
  8. #include <time.h>
  9. #include <sched.h>
  10. #include <sys/time.h>
  11. #include "pcilib.h"
  12. #include "irq.h"
  13. #include "kmem.h"
  14. #define DEVICE "/dev/fpga0"
  15. #define BAR PCILIB_BAR0
  16. #define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1)
  17. #define STATIC_REGION 0x80000000 // to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters
  18. #define BUFFERS 1
  19. #define ITERATIONS 100
  20. #define TLP_SIZE 64
  21. #define HUGE_PAGE 4096 // number of pages per huge page
  22. #define PAGE_SIZE 4096 // other values are not supported in the kernel
  23. #define TIMEOUT 100000
  24. /* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help,
  25. otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite
  26. much extra time */
  27. #define USE_IRQ
  28. //#define CHECK_READY
  29. //#define REALTIME
  30. //#define ADD_DELAYS
  31. #define CHECK_RESULT
  32. //#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); }
  33. //#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; }
  34. #define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; }
  35. #define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); }
  36. static void fail(const char *msg, ...) {
  37. va_list va;
  38. va_start(va, msg);
  39. vprintf(msg, va);
  40. va_end(va);
  41. printf("\n");
  42. exit(-1);
  43. }
  44. void hpsleep(size_t ns) {
  45. struct timespec wait, tv;
  46. clock_gettime(CLOCK_REALTIME, &wait);
  47. wait.tv_nsec += ns;
  48. if (wait.tv_nsec > 999999999) {
  49. wait.tv_sec += 1;
  50. wait.tv_nsec = 1000000000 - wait.tv_nsec;
  51. }
  52. do {
  53. clock_gettime(CLOCK_REALTIME, &tv);
  54. } while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec)));
  55. }
  56. int main() {
  57. int err;
  58. long i, j;
  59. pcilib_t *pci;
  60. pcilib_kmem_handle_t *kbuf;
  61. uint32_t status;
  62. struct timeval start, end;
  63. size_t size, run_time;
  64. void* volatile bar;
  65. uintptr_t bus_addr[BUFFERS];
  66. pcilib_bar_t bar_tmp = BAR;
  67. uintptr_t offset = 0;
  68. pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE;
  69. #ifdef ADD_DELAYS
  70. long rpt = 0, rpt2 = 0;
  71. size_t best_time;
  72. best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024);
  73. #endif /* ADD_DELAYS */
  74. #ifdef REALTIME
  75. pid_t pid;
  76. struct sched_param sched = {0};
  77. pid = getpid();
  78. sched.sched_priority = sched_get_priority_min(SCHED_FIFO);
  79. if (sched_setscheduler(pid, SCHED_FIFO, &sched))
  80. printf("Warning: not able to get real-time priority\n");
  81. #endif /* REALTIME */
  82. pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT);
  83. if (!pci) fail("pcilib_open");
  84. bar = pcilib_map_bar(pci, BAR);
  85. if (!bar) {
  86. pcilib_close(pci);
  87. fail("map bar");
  88. }
  89. pcilib_detect_address(pci, &bar_tmp, &offset, 1);
  90. // Reset
  91. WR(0x00, 1)
  92. usleep(1000);
  93. WR(0x00, 0)
  94. pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0);
  95. pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT);
  96. pcilib_clean_kernel_memory(pci, USE, clean_flags);
  97. #ifdef STATIC_REGION
  98. kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, 0);
  99. #else /* STATIC_REGION */
  100. kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, 0);
  101. #endif /* STATIC_REGION */
  102. if (!kbuf) {
  103. printf("KMem allocation failed\n");
  104. exit(0);
  105. }
  106. #ifdef CHECK_RESULT
  107. volatile uint32_t *ptr0 = pcilib_kmem_get_block_ua(pci, kbuf, 0);
  108. memset((void*)ptr0, 0, (HUGE_PAGE * PAGE_SIZE));
  109. for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) {
  110. if (ptr0[i] != 0) break;
  111. }
  112. if (i < (HUGE_PAGE * PAGE_SIZE / 4)) {
  113. printf("Initialization error in position %lu, value = %x\n", i * 4, ptr0[i]);
  114. }
  115. #endif /* CHECK_RESULT */
  116. WR(0x04, 0)
  117. WR(0x0C, TLP_SIZE)
  118. WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE))))
  119. WR(0x14, 0x13131313)
  120. for (j = 0; j < BUFFERS; j++ ) {
  121. bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j);
  122. }
  123. gettimeofday(&start, NULL);
  124. for (i = 0; i < ITERATIONS; i++) {
  125. for (j = 0; j < BUFFERS; j++ ) {
  126. // uintptr_t ba = pcilib_kmem_get_block_ba(pci, kbuf, j);
  127. // WR(0x08, ba)
  128. WR(0x08, bus_addr[j]);
  129. WR(0x04, 0x01)
  130. #ifdef USE_IRQ
  131. err = pcilib_wait_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT, TIMEOUT, NULL);
  132. if (err) printf("Timeout waiting for IRQ, err: %i\n", err);
  133. RD(0x04, status);
  134. if ((status&0xFFFF) != 0x101) printf("Invalid status %x\n", status);
  135. // WR(0x04, 0x00);
  136. #else /* USE_IRQ */
  137. # ifdef ADD_DELAYS
  138. // hpsleep(best_time);
  139. do {
  140. rpt++;
  141. RD(0x04, status);
  142. } while (status != 0x101);
  143. # else /* ADD_DELAYS */
  144. do {
  145. RD(0x04, status);
  146. } while (status != 0x101);
  147. # endif /* ADD_DELAYS */
  148. #endif /* USE_IRQ */
  149. WR(0x00, 1)
  150. #ifdef CHECK_READY
  151. do {
  152. rpt2++;
  153. RD(0x04, status);
  154. } while (status != 0);
  155. #endif /* CHECK_READY */
  156. WR(0x00, 0)
  157. }
  158. }
  159. gettimeofday(&end, NULL);
  160. #ifdef CHECK_RESULT
  161. pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0);
  162. for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) {
  163. // printf("%lx ", ptr0[i]);
  164. if (ptr0[i] != 0x13131313) break;
  165. }
  166. if (i < (HUGE_PAGE * PAGE_SIZE / 4)) {
  167. printf("Error in position %lu, value = %x\n", i * 4, ptr0[i]);
  168. }
  169. #endif /* CHECK_RESULT */
  170. pcilib_free_kernel_memory(pci, kbuf, 0);
  171. pcilib_disable_irq(pci, 0);
  172. pcilib_unmap_bar(pci, BAR, bar);
  173. pcilib_close(pci);
  174. run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
  175. size = (long long int)ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE;
  176. printf("%.3lf GB/s: transfered %zu bytes in %zu us using %u buffers\n", 1000000. * size / run_time / 1024 / 1024 / 1024, size, run_time, BUFFERS);
  177. # ifdef ADD_DELAYS
  178. printf("Repeats: %lf, %lf\n",1. * rpt / (ITERATIONS * BUFFERS), 1. * rpt2 / (ITERATIONS * BUFFERS));
  179. #endif /* USE_IRQ */
  180. }