8 years ago · 60f5751469
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,8 @@ FIGURES=figures/intra-copy.pdf \
 
				 		figures/transf.pdf \
			
 
				 		figures/throughput.pdf \
			
 
				 		figures/latency.pdf \
			
 
				-		figures/latency-hist.pdf
			
 
				+		figures/latency-hist.pdf \
			
 
				+		figures/fpga-arch.pdf
			
 
				 
			
 
				 .PHONY: clean figures
			
 
				 
			
--- a/data/ipedirectgma/latency-hist.py
+++ b/data/ipedirectgma/latency-hist.py
@@ -12,14 +12,17 @@ gpu_data = gpu_data[gpu_data < 4.5]
 
				 
			
 
				 plt.rc('font', **dict(family='serif'))
			
 
				 
			
 
				-plt.figure(figsize=(6, 4))
			
 
				+plt.figure(figsize=(8, 3))
			
 
				+
			
 
				+cpu_weights = np.ones_like(cpu_data)/float(len(cpu_data))
			
 
				+gpu_weights = np.ones_like(gpu_data)/float(len(gpu_data))
			
 
				 
			
 
				 # divide by 2 for one-way latency
			
 
				 # plt.ylim(0.1, 10000)
			
 
				 # plt.hist(gpu_data, bins=200, label='GPU', log=True)
			
 
				 # plt.hist(cpu_data, bins=200, label='CPU', log=True)
			
 
				-plt.hist(gpu_data, bins=100, color='#3b5b92', label='GPU')
			
 
				-plt.hist(cpu_data, bins=100, color='#d54d4d', label='CPU')
			
 
				+plt.hist(gpu_data, weights=gpu_weights, bins=50, color='#3b5b92', label='GPU', linewidth=0)
			
 
				+plt.hist(cpu_data, weights=cpu_weights, bins=50, color='#d54d4d', label='CPU', linewidth=0)
			
 
				 # plt.semilogy()
			
 
				 
			
 
				 plt.xlabel(u'Latency in \u00b5s')
			
--- a/data/latency-hist.py
+++ b/data/latency-hist.py
@@ -17,8 +17,8 @@ plt.rc('font', **dict(family='serif'))
 
				 plt.figure(figsize=(4, 3))
			
 
				 
			
 
				 # divide by 2 for one-way latency
			
 
				-plt.hist(gpu_data / 2, bins=100, normed=False, color='#3b5b92', label='GPU')
			
 
				-plt.hist(cpu_data / 2, bins=100, normed=False, color='#d54d4d', label='CPU')
			
 
				+plt.hist(gpu_data / 2, bins=100, normed=True, color='#3b5b92', label='GPU', linewidth=0)
			
 
				+plt.hist(cpu_data / 2, bins=100, normed=True, color='#d54d4d', label='CPU', linewidth=0)
			
 
				 
			
 
				 plt.xlabel(u'Latency in \u00b5s')
			
 
				 plt.ylabel('Frequency')
			
--- a/data/throughput.cpu
+++ b/data/throughput.cpu
@@ -17,8 +17,7 @@
 
				 1048000000 		6472		
			
 
				 2097000000 		6528		
			
 
				 4194000000 		6561		
			
 
				-
			
 
				-
			
 
				+8388000000 		6581		
			
 
				 
			
 
				 
			
 
				 
			
--- a/data/throughput.gpu
+++ b/data/throughput.gpu
@@ -18,6 +18,3 @@
 
				 2147483648        6386.3333333333
			
 
				 4294967296        6408
			
 
				 8589934592        6393.8333333333
			
 
				-17179869184       6370.6666666667
			
 
				-34359738368       6372.1666666667
			
 
				-68719476736       6372.3333333333
			
--- a/data/throughput.py
+++ b/data/throughput.py
@@ -6,12 +6,14 @@ cpu_data = np.loadtxt('throughput.cpu')
 
				 
			
 
				 plt.rc('font', **dict(family='serif'))
			
 
				 
			
 
				-plt.figure(figsize=(8, 1))
			
 
				+plt.figure(figsize=(8, 3))
			
 
				 
			
 
				 plt.semilogx(gpu_data[:,0], gpu_data[:,1], '*-', color='#3b5b92', label='GPU')
			
 
				 plt.semilogx(cpu_data[:,0], cpu_data[:,1], 'o-', color='#d54d4d', label='CPU')
			
 
				+plt.xticks([1e4,1e6,1e8,1e10])
			
 
				+plt.yticks([0,2000,4000,6000,8000])
			
 
				 
			
 
				-plt.xlabel(u'Data size in B')
			
 
				+plt.xlabel('Data size in B')
			
 
				 plt.ylabel('Throughput in MB/s')
			
 
				 plt.legend(loc='lower right')
			
 
				 plt.savefig('throughput.pdf', dpi=300, bbox_inches='tight')
			
--- a/paper.tex
+++ b/paper.tex
@@ -162,6 +162,15 @@ friendly interfaces with the custom logic with an input bandwidth of 7.45
 
				 GB/s. The user logic and the DMA engine are configured by the host through PIO
			
 
				 registers.
			
 
				 
			
 
				+\begin{figure}[t]
			
 
				+  \centering
			
 
				+  \includegraphics[width=0.5\textwidth]{figures/fpga-arch}
			
 
				+  \caption{%
			
 
				+    FPGA AAA
			
 
				+  }
			
 
				+  \label{fig:fpga-arch}
			
 
				+\end{figure}
			
 
				+
			
 
				 The physical addresses of the host's memory buffers are stored into an internal
			
 
				 memory and are dynamically updated by the driver or user, allowing highly
			
 
				 efficient zero-copy data transfers. The maximum size associated with each
			
@@ -290,31 +299,6 @@ PCIe link (FPGA-GPU)    & x8 Gen3                        & x8 Gen3     \\
 
				 \label{fig:throughput}
			
 
				 \end{figure}
			
 
				 
			
 
				-% \begin{figure}
			
 
				-%   \centering
			
 
				-%   \begin{subfigure}[b]{.49\textwidth}
			
 
				-%     \centering
			
 
				-%     \includegraphics[width=\textwidth]{figures/throughput}
			
 
				-%     \caption{%
			
 
				-%       DMA data transfer throughput.
			
 
				-%     }
			
 
				-%     \label{fig:throughput}
			
 
				-%   \end{subfigure}
			
 
				-%   \begin{subfigure}[b]{.49\textwidth}
			
 
				-%     \includegraphics[width=\textwidth]{figures/latency}
			
 
				-%     \caption{%
			
 
				-%       Latency distribution.
			
 
				-%       % for a single 4 KB packet transferred
			
 
				-%       % from FPGA-to-CPU and FPGA-to-GPU.
			
 
				-%     }
			
 
				-%     \label{fig:latency}
			
 
				-%   \end{subfigure}
			
 
				-%   \caption{%
			
 
				-%     Measured throuhput for data transfers from FPGA to main memory
			
 
				-%     (CPU) and from FPGA to the global GPU memory (GPU). 
			
 
				-%   }
			
 
				-% \end{figure}
			
 
				-
			
 
				 The measured results for the pure data throughput is shown in
			
 
				 \figref{fig:throughput} for transfers from the FPGA to the system's main
			
 
				 memory as well as to the global memory as explained in \ref{sec:host}. 
			
@@ -359,14 +343,20 @@ latency.
 
				 
			
 
				 
			
 
				 \subsection{Latency}
			
 
				-
			
 
				-\begin{figure}
			
 
				-  \includegraphics[width=\textwidth]{figures/latency-hist}
			
 
				-  \caption{%
			
 
				-    Latency distribution for a single 1024 B packet transferred from FPGA to
			
 
				-    GPU memory and to main memory.
			
 
				-  }
			
 
				-  \label{fig:latency-distribution}
			
 
				+\begin{figure}[t]
			
 
				+  \centering
			
 
				+  \begin{subfigure}[b]{.8\textwidth}
			
 
				+    \centering
			
 
				+    \includegraphics[width=\textwidth]{figures/latency}
			
 
				+    \caption{Latency }
			
 
				+    \label{fig:latency_vs_size}
			
 
				+  \end{subfigure}
			
 
				+  \begin{subfigure}[b]{.8\textwidth}
			
 
				+    \includegraphics[width=\textwidth]{figures/latency-hist}
			
 
				+    \caption{Latency distribution.}
			
 
				+    \label{fig:latency_hist}
			
 
				+  \end{subfigure}
			
 
				+  \label{fig:latency}
			
 
				 \end{figure}
			
 
				 
			
 
				 For HEP experiments, low latencies are necessary to react in a reasonable time