8 years ago · 8b4e89656c
--- a/data/throughput.py
+++ b/data/throughput.py
@@ -18,7 +18,7 @@ ax.semilogx(gpu_data[:,0], gpu_data[:,1], '*-', color='#3b5b92', label='GPU Memo
 
				 ax.semilogx(cpu_data[:,0], cpu_data[:,1], 'o-', color='#77DD77', label='Main Memory')
			
 
				 ax.axis["right"].set_visible(False)
			
 
				 ax.axis["top"].set_visible(False)
			
 
				-plt.xlim(0, 7000)
			
 
				+plt.ylim(0, 7000)
			
 
				 plt.xticks([1e4,1e6,1e8,1e10])
			
 
				 plt.yticks([0,2000,4000,6000])
			
 
				 
			
--- a/paper.tex
+++ b/paper.tex
@@ -167,6 +167,7 @@ utilization on a Virtex 7 device is reported in Table~\ref{table:utilization}.
 
				 
			
 
				 
			
 
				 \begin{figure}[t]
			
 
				+\small
			
 
				 \begin{floatrow}
			
 
				 \ffigbox{%
			
 
				     \includegraphics[width=0.4\textwidth]{figures/fpga-arch}
			
@@ -318,6 +319,7 @@ in~\cite{rota2015dma}.
 
				 
			
 
				 \begin{table}[]
			
 
				 \centering
			
 
				+\small
			
 
				 \caption{Description of the measurement setup}
			
 
				 \label{table:setups}
			
 
				 \tabcolsep=0.11cm
			
@@ -328,8 +330,8 @@ in~\cite{rota2015dma}.
 
				 CPU           & Intel Xeon E5-1630             & Intel Atom D525   \\
			
 
				 Chipset       & Intel C612                     & Intel ICH9R Express   \\
			
 
				 GPU           & AMD FirePro W9100              & AMD FirePro W9100   \\
			
 
				-PCIe link: FPGA-System memory    & x8 Gen3                        & x4 Gen1     \\
			
 
				-PCIe link: FPGA-GPU    & x8 Gen3                        & x8 Gen3     \\
			
 
				+PCIe slot: System memory    & x8 Gen3 (same RC) & x4 Gen1 (different RC)    \\
			
 
				+PCIe slot: FPGA \& GPU    & x8 Gen3 (different RC) & x8 Gen3 (same RC)    \\
			
 
				   \bottomrule
			
 
				 \end{tabular}
			
 
				 
			
@@ -351,14 +353,13 @@ PCIe link: FPGA-GPU    & x8 Gen3                        & x8 Gen3     \\
 
				 
			
 
				 The measured results for the pure data throughput is shown in
			
 
				 \figref{fig:throughput} for transfers from the FPGA to the system's main
			
 
				-memory as well as to the global memory as explained in \ref{sec:host}. 
			
 
				-% Must ask Suren about this
			
 
				-
			
 
				-In the case of FPGA-to-GPU data transfers, the duoble buffering solution was
			
 
				-used. As one can see, in both cases the write performance is primarily limited
			
 
				-by the PCIe bus. Up until 2 MB data transfer size, the throughput to the GPU
			
 
				-is approaching slowly 100 MB/s. From there on, the throughput increases up to
			
 
				-6.4 GB/s when PCIe bus saturation sets in at about 1 GB data size. The CPU
			
 
				+memory as well as to the global memory as explained in \ref{sec:host}.  In the
			
 
				+case of FPGA-to-GPU data transfers, the double buffering solution was used:
			
 
				+data are copied from the buffer exposed to FPGA into a different buffer.
			
 
				+As one can see, in both cases the write performance is primarily limited by
			
 
				+the PCIe bus. Up until 2 MB data transfer size, the throughput to the GPU is
			
 
				+approaching slowly 100 MB/s. From there on, the throughput increases up to 6.4
			
 
				+GB/s when PCIe bus saturation sets in at about 1 GB data size. The CPU
			
 
				 throughput saturates earlier but the maximum throughput is 6.6 GB/s.