Browse Source

Fix writing from the GPU

Matthias Vogelgesang 8 years ago
parent
commit
8dd9aa2277
3 changed files with 255 additions and 18 deletions
  1. 3 5
      kernel.cl
  2. 220 0
      pciDriver.h
  3. 32 13
      signal.c

+ 3 - 5
kernel.cl

@@ -1,11 +1,9 @@
 kernel void
-write_to_fpga (global uint8 *buffer, global uint *check, uint addr, uint value)
+write_to_fpga (global uint *buffer, global uint *check, uint addr, uint value)
 {
     if (get_global_id (0) == 0) {
-        buffer[addr] = value >> 24;
-        buffer[addr + 1] = (value >> 16) & 0xff;
-        buffer[addr + 2] = (value >> 8) & 0xff;
-        buffer[addr + 3] = value & 0xff;
+        /* divide by four to avoid word addressing */
+        buffer[addr / 4] = value;
         check[0] = addr;
         check[1] = value;
     }

+ 220 - 0
pciDriver.h

@@ -0,0 +1,220 @@
+#ifndef PCIDRIVER_H_
+#define PCIDRIVER_H_
+
+/**
+ * This is a full rewrite of the pciDriver.
+ * New default is to support kernel 2.6, using kernel 2.6 APIs.
+ * 
+ * This header defines the interface to the outside world.
+ * 
+ * $Revision: 1.6 $
+ * $Date: 2008-01-24 14:21:36 $
+ * 
+ */
+
+/*
+ * Change History:
+ * 
+ * $Log: not supported by cvs2svn $
+ * Revision 1.5  2008-01-11 10:15:14  marcus
+ * Removed unused interrupt code.
+ * Added intSource to the wait interrupt call.
+ *
+ * Revision 1.4  2006/11/17 18:44:42  marcus
+ * Type of SG list can now be selected at runtime. Added type to sglist.
+ *
+ * Revision 1.3  2006/11/17 16:23:02  marcus
+ * Added slot number to the PCI info IOctl.
+ *
+ * Revision 1.2  2006/11/13 12:29:09  marcus
+ * Added a IOctl call, to confiure the interrupt response. (testing pending).
+ * Basic interrupts are now supported.
+ *
+ * Revision 1.1  2006/10/10 14:46:52  marcus
+ * Initial commit of the new pciDriver for kernel 2.6
+ *
+ * Revision 1.7  2006/10/06 15:18:06  marcus
+ * Updated PCI info and PCI cmd
+ *
+ * Revision 1.6  2006/09/25 16:51:07  marcus
+ * Added PCI config IOctls, and implemented basic mmap functions.
+ *
+ * Revision 1.5  2006/09/18 17:13:12  marcus
+ * backup commit.
+ *
+ * Revision 1.4  2006/09/15 15:44:41  marcus
+ * backup commit.
+ *
+ * Revision 1.3  2006/08/15 11:40:02  marcus
+ * backup commit.
+ *
+ * Revision 1.2  2006/08/12 18:28:42  marcus
+ * Sync with the laptop
+ *
+ * Revision 1.1  2006/08/11 15:30:46  marcus
+ * Sync with the laptop
+ *
+ */
+
+#include <linux/ioctl.h>
+
+/* Identifies the PCI-E Xilinx ML605 */
+#define PCIE_XILINX_VENDOR_ID 0x10ee
+#define PCIE_ML605_DEVICE_ID 0x6024
+
+/* Identifies the PCI-E IPE Hardware */
+#define PCIE_IPECAMERA_DEVICE_ID 0x6081
+#define PCIE_KAPTURE_DEVICE_ID 0x6028
+
+
+/* Possible values for ioctl commands */
+
+/* PCI mmap areas */
+#define	PCIDRIVER_BAR0			0
+#define	PCIDRIVER_BAR1			1
+#define	PCIDRIVER_BAR2			2
+#define	PCIDRIVER_BAR3			3
+#define	PCIDRIVER_BAR4			4
+#define	PCIDRIVER_BAR5			5
+
+/* mmap mode of the device */
+#define PCIDRIVER_MMAP_PCI		0
+#define PCIDRIVER_MMAP_KMEM 		1
+
+/* Direction of a DMA operation */
+#define PCIDRIVER_DMA_BIDIRECTIONAL 	0
+#define	PCIDRIVER_DMA_TODEVICE		1//PCILIB_KMEM_SYNC_TODEVICE
+#define PCIDRIVER_DMA_FROMDEVICE	2//PCILIB_KMEM_SYNC_FROMDEVICE
+
+/* Possible sizes in a PCI command */
+#define PCIDRIVER_PCI_CFG_SZ_BYTE  	1
+#define PCIDRIVER_PCI_CFG_SZ_WORD  	2
+#define PCIDRIVER_PCI_CFG_SZ_DWORD 	3
+
+/* Possible types of SG lists */
+#define PCIDRIVER_SG_NONMERGED 		0
+#define PCIDRIVER_SG_MERGED 		1
+
+/* Maximum number of interrupt sources */
+#define PCIDRIVER_INT_MAXSOURCES 	16
+
+#define KMEM_REF_HW 		0x80000000				/**< Special reference to indicate hardware access */
+#define KMEM_REF_COUNT		0x0FFFFFFF				/**< Mask of reference counter (mmap/munmap), couting in mmaped memory pages */
+
+#define KMEM_MODE_REUSABLE	0x80000000				/**< Indicates reusable buffer */
+#define KMEM_MODE_EXCLUSIVE	0x40000000				/**< Only a single process is allowed to mmap the buffer */
+#define KMEM_MODE_PERSISTENT	0x20000000				/**< Persistent mode instructs kmem_free to preserve buffer in memory */
+#define KMEM_MODE_COUNT		0x0FFFFFFF				/**< Mask of reuse counter (alloc/free) */
+
+#define KMEM_FLAG_REUSE 		PCILIB_KMEM_FLAG_REUSE		/**< Try to reuse existing buffer with the same use & item */
+#define KMEM_FLAG_EXCLUSIVE 		PCILIB_KMEM_FLAG_EXCLUSIVE	/**< Allow only a single application accessing a specified use & item */
+#define KMEM_FLAG_PERSISTENT		PCILIB_KMEM_FLAG_PERSISTENT	/**< Sets persistent mode */
+#define KMEM_FLAG_HW			PCILIB_KMEM_FLAG_HARDWARE	/**< The buffer may be accessed by hardware, the hardware access will not occur any more if passed to _free function */
+#define KMEM_FLAG_FORCE			PCILIB_KMEM_FLAG_FORCE		/**< Force memory cleanup even if references are present */
+#define KMEM_FLAG_MASS			PCILIB_KMEM_FLAG_MASS		/**< Apply to all buffers of selected use */
+#define KMEM_FLAG_TRY			PCILIB_KMEM_FLAG_TRY		/**< Do not allocate buffers, try to reuse and fail if not possible */
+
+#define KMEM_FLAG_REUSED 		PCILIB_KMEM_FLAG_REUSE		/**< Indicates if buffer with specified use & item was already allocated and reused */
+#define KMEM_FLAG_REUSED_PERSISTENT 	PCILIB_KMEM_FLAG_PERSISTENT	/**< Indicates that reused buffer was persistent before the call */
+#define KMEM_FLAG_REUSED_HW 		PCILIB_KMEM_FLAG_HARDWARE	/**< Indicates that reused buffer had a HW reference before the call */
+
+/* Types */
+typedef struct {
+    unsigned short vendor_id;
+    unsigned short device_id;
+    unsigned short bus;
+    unsigned short slot;
+    unsigned short func;
+    unsigned short devfn;
+    unsigned char interrupt_pin;
+    unsigned char interrupt_line;
+    unsigned int irq;
+    unsigned long bar_start[6];
+    unsigned long bar_length[6];
+    unsigned long bar_flags[6];
+} pcilib_board_info_t;
+
+typedef struct {
+	unsigned long type;
+	unsigned long pa;
+	unsigned long size;
+	unsigned long align;
+	unsigned long use;
+	unsigned long item;
+	int flags;
+	int handle_id;
+} kmem_handle_t;
+
+typedef struct {
+	unsigned long addr;
+	unsigned long size;
+} umem_sgentry_t;
+
+typedef struct {
+	int handle_id;
+	int type;
+	int nents;
+	umem_sgentry_t *sg;
+} umem_sglist_t;
+
+typedef struct {
+	unsigned long vma;
+	unsigned long size;
+	int handle_id;
+	int dir;
+} umem_handle_t;
+
+typedef struct {
+	kmem_handle_t handle;
+	int dir;
+} kmem_sync_t;
+
+typedef struct {
+    unsigned long count;
+    unsigned long timeout;	// microseconds
+    unsigned int source;
+} interrupt_wait_t;
+
+typedef struct {
+	int size;
+	int addr;
+	union {
+		unsigned char byte;
+		unsigned short word;
+		unsigned int dword; 	/* not strict C, but if not can have problems */
+	} val;
+} pci_cfg_cmd;
+
+/* ioctl interface */
+/* See documentation for a detailed usage explanation */
+
+/* 
+ * one of the problems of ioctl, is that requires a type definition.
+ * This type is only 8-bits wide, and half-documented in 
+ * <linux-src>/Documentation/ioctl-number.txt.
+ * previous SHL -> 'S' definition, conflicts with several devices,
+ * so I changed it to be pci -> 'p', in the range 0xA0-AF
+ */
+#define PCIDRIVER_IOC_MAGIC 'p'
+#define PCIDRIVER_IOC_BASE  0xA0
+
+#define PCIDRIVER_IOC_MMAP_MODE  _IO(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 0 )
+#define PCIDRIVER_IOC_MMAP_AREA  _IO(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 1 )
+#define PCIDRIVER_IOC_KMEM_ALLOC _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 2, kmem_handle_t * )
+#define PCIDRIVER_IOC_KMEM_FREE  _IOW ( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 3, kmem_handle_t * )
+#define PCIDRIVER_IOC_KMEM_SYNC  _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 4, kmem_sync_t * )
+#define PCIDRIVER_IOC_UMEM_SGMAP _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 5, umem_handle_t * )
+#define PCIDRIVER_IOC_UMEM_SGUNMAP _IOW(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 6, umem_handle_t * )
+#define PCIDRIVER_IOC_UMEM_SGGET _IOWR( PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 7, umem_sglist_t * )
+#define PCIDRIVER_IOC_UMEM_SYNC  _IOW(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 8, umem_handle_t * )
+#define PCIDRIVER_IOC_WAITI      _IO(   PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 9 )
+
+/* And now, the methods to access the PCI configuration area */
+#define PCIDRIVER_IOC_PCI_CFG_RD  _IOWR(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 10, pci_cfg_cmd * )
+#define PCIDRIVER_IOC_PCI_CFG_WR  _IOWR(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 11, pci_cfg_cmd * )
+#define PCIDRIVER_IOC_PCI_INFO    _IOWR(  PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 12, pcilib_board_info_t * )
+
+/* Clear interrupt queues */
+#define PCIDRIVER_IOC_CLEAR_IOQ   _IO(   PCIDRIVER_IOC_MAGIC, PCIDRIVER_IOC_BASE + 13 )
+
+#endif

+ 32 - 13
signal.c

@@ -8,10 +8,14 @@
 #include <CL/cl_ext.h>
 #include "ocl.h"
 
+/* this should actually come from the distributed pcitool sources */
+#include "pciDriver.h"
+
 typedef struct {
     /* pcilib */
     pcilib_t *pci;
     uint8_t *bar;
+    cl_ulong bar_phys;
 
     /* OpenCL */
     OclPlatform *ocl;
@@ -24,13 +28,20 @@ typedef struct {
     cl_mem check_buffer;
 } App;
 
+
 #define WR32(addr, value) *(uint32_t *) (app->bar + (addr)) = (value);
 #define RD32(addr) (*(uint32_t *) (app->bar + (addr)))
 
+
+/* declaration should actually come from a distributed header file */
+const pcilib_board_info_t *pcilib_get_board_info (pcilib_t *);
+
+
 static bool
 init_pcilib (App *app)
 {
     static const char *DEVICE = "/dev/fpga0";
+    const pcilib_board_info_t *board;
 
     app->pci = pcilib_open (DEVICE, "pci");
 
@@ -47,6 +58,9 @@ init_pcilib (App *app)
         return false;
     }
 
+    board = pcilib_get_board_info (app->pci);
+    app->bar_phys = board->bar_start[PCILIB_BAR0];
+
     return true;
 }
 
@@ -65,18 +79,17 @@ create_fpga_buffer (App *app, size_t size, cl_int *error)
     cl_bus_address_amd addr;
 
     flags = CL_MEM_EXTERNAL_PHYSICAL_AMD;
-    addr.surface_bus_address = (cl_ulong) base_addr;
-    addr.marker_bus_address = (cl_ulong) base_addr;
+    addr.surface_bus_address = (cl_ulong) app->bar_phys;
+    addr.marker_bus_address = (cl_ulong) app->bar_phys;
 
-    buffer = clCreateBuffer (app->context, flags, size, &addr, error);
-
-    return buffer;
+    return clCreateBuffer (app->context, flags, size, &addr, error);
 }
 
 static bool
 init_opencl (App *app)
 {
     cl_int error;
+    cl_platform_id platform;
 
     app->ocl = ocl_new_with_queues (0, CL_DEVICE_TYPE_GPU, 0);
     app->device = ocl_get_devices (app->ocl)[0];
@@ -113,7 +126,6 @@ check_value (App *app, uint32_t addr, uint32_t expected)
 {
     uint32_t value;
 
-    printf ("CHECK .... ");
     value = RD32 (addr);
 
     if (value != expected)
@@ -122,6 +134,13 @@ check_value (App *app, uint32_t addr, uint32_t expected)
         printf ("success\n");
 }
 
+static void
+wait_on_and_release_event (cl_event event)
+{
+    OCL_CHECK_ERROR (clWaitForEvents (1, &event));
+    OCL_CHECK_ERROR (clReleaseEvent (event));
+}
+
 static void
 launch_signal (App *app)
 {
@@ -134,12 +153,13 @@ launch_signal (App *app)
     addr = 0x9168;
 
     /* try to override defaultvalue */
-    value = 123;
+    value = 0xc001;
 
     WR32 (addr, value);
+    printf ("CPU WRITE ... ");
     check_value (app, addr, value);
 
-    value = 456;
+    value = 0xdeadf00d;
     OCL_CHECK_ERROR (clSetKernelArg (app->kernel, 0, sizeof (cl_mem), &app->fpga_buffer));
     OCL_CHECK_ERROR (clSetKernelArg (app->kernel, 1, sizeof (cl_mem), &app->check_buffer));
     OCL_CHECK_ERROR (clSetKernelArg (app->kernel, 2, sizeof (uint32_t), &addr));
@@ -150,15 +170,14 @@ launch_signal (App *app)
     OCL_CHECK_ERROR (clEnqueueNDRangeKernel (app->queue, app->kernel, 1,
                                              NULL, &global_work_size, NULL,
                                              0, NULL, &event));
-
-    OCL_CHECK_ERROR (clWaitForEvents (1, &event));
-    OCL_CHECK_ERROR (clReleaseEvent (event));
+    wait_on_and_release_event (event);
 
     /* let's see if the GPU wrote anything */
+    printf ("GPU WRITE ... ");
     check_value (app, addr, value);
 
     /* let's see if the kernel did at least something */
-    printf ("SANITY ... ");
+    printf ("SANITY ...... ");
 
     check[0] = check[1] = 0;
     OCL_CHECK_ERROR (clEnqueueReadBuffer (app->queue, app->check_buffer, CL_TRUE, 0, 8, check, 0, NULL, NULL));
@@ -166,7 +185,7 @@ launch_signal (App *app)
     if (check[0] == addr && check[1] == value)
         printf ("success\n");
     else
-        printf ("failed [0x%x != 0x%x || %u != %u]\n", check[0], addr, check[1], value);
+        printf ("failed [0x%x != 0x%x || 0x%x != 0x%x]\n", check[0], addr, check[1], value);
 }
 
 int