xilinx.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #define _BSD_SOURCE
  2. #define _DEFAULT_SOURCE
  3. #define _POSIX_C_SOURCE 199309L
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include <unistd.h>
  8. #include <stdarg.h>
  9. #include <time.h>
  10. #include <sched.h>
  11. #include <sys/time.h>
  12. #include "pcilib.h"
  13. #include "irq.h"
  14. #include "kmem.h"
  15. #include "bar.h"
  16. #define DEVICE "/dev/fpga0"
  17. #define BAR PCILIB_BAR0
  18. #define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1)
  19. #define STATIC_REGION 0x80000000 // to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters
  20. #define BUFFERS 1
  21. #define ITERATIONS 100
  22. #define TLP_SIZE 64
  23. #define HUGE_PAGE 4096 // number of pages per huge page
  24. #define PAGE_SIZE 4096 // other values are not supported in the kernel
  25. #define TIMEOUT 100000
  26. /* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help,
  27. otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite
  28. much extra time */
  29. #define USE_IRQ
  30. //#define CHECK_READY
  31. //#define REALTIME
  32. //#define ADD_DELAYS
  33. #define CHECK_RESULT
  34. //#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); }
  35. //#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; }
  36. #define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; }
  37. #define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); }
  38. static void fail(const char *msg, ...) {
  39. va_list va;
  40. va_start(va, msg);
  41. vprintf(msg, va);
  42. va_end(va);
  43. printf("\n");
  44. exit(-1);
  45. }
  46. void hpsleep(size_t ns) {
  47. struct timespec wait, tv;
  48. clock_gettime(CLOCK_REALTIME, &wait);
  49. wait.tv_nsec += ns;
  50. if (wait.tv_nsec > 999999999) {
  51. wait.tv_sec += 1;
  52. wait.tv_nsec = 1000000000 - wait.tv_nsec;
  53. }
  54. do {
  55. clock_gettime(CLOCK_REALTIME, &tv);
  56. } while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec)));
  57. }
  58. int main() {
  59. int err;
  60. long i, j;
  61. pcilib_t *pci;
  62. pcilib_kmem_handle_t *kbuf;
  63. uint32_t status;
  64. struct timeval start, end;
  65. size_t size, run_time;
  66. void* volatile bar;
  67. uintptr_t bus_addr[BUFFERS];
  68. pcilib_bar_t bar_tmp = BAR;
  69. uintptr_t offset = 0;
  70. pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE;
  71. #ifdef ADD_DELAYS
  72. long rpt = 0, rpt2 = 0;
  73. size_t best_time;
  74. best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024);
  75. #endif /* ADD_DELAYS */
  76. #ifdef REALTIME
  77. pid_t pid;
  78. struct sched_param sched = {0};
  79. pid = getpid();
  80. sched.sched_priority = sched_get_priority_min(SCHED_FIFO);
  81. if (sched_setscheduler(pid, SCHED_FIFO, &sched))
  82. printf("Warning: not able to get real-time priority\n");
  83. #endif /* REALTIME */
  84. pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT);
  85. if (!pci) fail("pcilib_open");
  86. bar = pcilib_map_bar(pci, BAR);
  87. if (!bar) {
  88. pcilib_close(pci);
  89. fail("map bar");
  90. }
  91. pcilib_detect_address(pci, &bar_tmp, &offset, 1);
  92. // Reset
  93. WR(0x00, 1)
  94. usleep(1000);
  95. WR(0x00, 0)
  96. pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0);
  97. pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT);
  98. pcilib_clean_kernel_memory(pci, USE, clean_flags);
  99. #ifdef STATIC_REGION
  100. kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, 0);
  101. #else /* STATIC_REGION */
  102. kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, 0);
  103. #endif /* STATIC_REGION */
  104. if (!kbuf) {
  105. printf("KMem allocation failed\n");
  106. exit(0);
  107. }
  108. #ifdef CHECK_RESULT
  109. volatile uint32_t *ptr0 = pcilib_kmem_get_block_ua(pci, kbuf, 0);
  110. memset((void*)ptr0, 0, (HUGE_PAGE * PAGE_SIZE));
  111. for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) {
  112. if (ptr0[i] != 0) break;
  113. }
  114. if (i < (HUGE_PAGE * PAGE_SIZE / 4)) {
  115. printf("Initialization error in position %lu, value = %x\n", i * 4, ptr0[i]);
  116. }
  117. #endif /* CHECK_RESULT */
  118. WR(0x04, 0)
  119. WR(0x0C, TLP_SIZE)
  120. WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE))))
  121. WR(0x14, 0x13131313)
  122. for (j = 0; j < BUFFERS; j++ ) {
  123. bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j);
  124. }
  125. gettimeofday(&start, NULL);
  126. for (i = 0; i < ITERATIONS; i++) {
  127. for (j = 0; j < BUFFERS; j++ ) {
  128. // uintptr_t ba = pcilib_kmem_get_block_ba(pci, kbuf, j);
  129. // WR(0x08, ba)
  130. WR(0x08, bus_addr[j]);
  131. WR(0x04, 0x01)
  132. #ifdef USE_IRQ
  133. err = pcilib_wait_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT, TIMEOUT, NULL);
  134. if (err) printf("Timeout waiting for IRQ, err: %i\n", err);
  135. RD(0x04, status);
  136. if ((status&0xFFFF) != 0x101) printf("Invalid status %x\n", status);
  137. // WR(0x04, 0x00);
  138. #else /* USE_IRQ */
  139. # ifdef ADD_DELAYS
  140. // hpsleep(best_time);
  141. do {
  142. rpt++;
  143. RD(0x04, status);
  144. } while (status != 0x101);
  145. # else /* ADD_DELAYS */
  146. do {
  147. RD(0x04, status);
  148. } while (status != 0x101);
  149. # endif /* ADD_DELAYS */
  150. #endif /* USE_IRQ */
  151. WR(0x00, 1)
  152. #ifdef CHECK_READY
  153. do {
  154. rpt2++;
  155. RD(0x04, status);
  156. } while (status != 0);
  157. #endif /* CHECK_READY */
  158. WR(0x00, 0)
  159. }
  160. }
  161. gettimeofday(&end, NULL);
  162. #ifdef CHECK_RESULT
  163. pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0);
  164. for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) {
  165. // printf("%lx ", ptr0[i]);
  166. if (ptr0[i] != 0x13131313) break;
  167. }
  168. if (i < (HUGE_PAGE * PAGE_SIZE / 4)) {
  169. printf("Error in position %lu, value = %x\n", i * 4, ptr0[i]);
  170. }
  171. #endif /* CHECK_RESULT */
  172. pcilib_free_kernel_memory(pci, kbuf, 0);
  173. pcilib_disable_irq(pci, 0);
  174. pcilib_unmap_bar(pci, BAR, bar);
  175. pcilib_close(pci);
  176. run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
  177. size = (long long int)ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE;
  178. printf("%.3lf GB/s: transfered %zu bytes in %zu us using %u buffers\n", 1000000. * size / run_time / 1024 / 1024 / 1024, size, run_time, BUFFERS);
  179. # ifdef ADD_DELAYS
  180. printf("Repeats: %lf, %lf\n",1. * rpt / (ITERATIONS * BUFFERS), 1. * rpt2 / (ITERATIONS * BUFFERS));
  181. #endif /* USE_IRQ */
  182. }