Browse Source

Minor fixes

Lorenzo 8 years ago
parent
commit
c31762c733
4 changed files with 45 additions and 35 deletions
  1. 6 4
      data/ipedirectgma/latency-hist.py
  2. 6 5
      data/ipedirectgma/plot.py
  3. 2 2
      data/throughput.py
  4. 31 24
      paper.tex

+ 6 - 4
data/ipedirectgma/latency-hist.py

@@ -7,12 +7,12 @@ g = np.loadtxt('ipedirectgma.1024.gpu.txt')
 cpu_data = c[:,1]
 gpu_data = g[:,1]
 
-cpu_data = cpu_data[cpu_data < 4.5]
-gpu_data = gpu_data[gpu_data < 4.5]
+cpu_data = cpu_data[cpu_data < 5]
+gpu_data = gpu_data[gpu_data < 5]
 
 plt.rc('font', **dict(family='serif'))
 
-plt.figure(figsize=(8, 3))
+plt.figure(figsize=(4, 4.2))
 
 cpu_weights = np.ones_like(cpu_data)/float(len(cpu_data))
 gpu_weights = np.ones_like(gpu_data)/float(len(gpu_data))
@@ -23,9 +23,11 @@ gpu_weights = np.ones_like(gpu_data)/float(len(gpu_data))
 # plt.hist(cpu_data, bins=200, label='CPU', log=True)
 plt.hist(gpu_data, weights=gpu_weights, bins=50, color='#3b5b92', label='GPU', linewidth=0)
 plt.hist(cpu_data, weights=cpu_weights, bins=50, color='#d54d4d', label='CPU', linewidth=0)
+plt.xticks([2.0, 3.0, 4.0, 5.0])
+plt.yticks([0,0.25,0.5])
 # plt.semilogy()
 
 plt.xlabel(u'Latency in \u00b5s')
 plt.ylabel('Frequency')
-plt.legend(loc='upper right')
+plt.legend(loc='upper right',frameon=False)
 plt.savefig('latency-hist.pdf', dpi=300, bbox_inches='tight')

+ 6 - 5
data/ipedirectgma/plot.py

@@ -31,16 +31,17 @@ for i in range(128, 4096 + 128, 128):
 
 
 plt.rc('font', **dict(family='serif'))
-plt.figure(figsize=(10, 4))
+plt.figure(figsize=(4, 4))
 plt.xlabel('Packet size in (B)')
 plt.ylabel('Latency (us)')
 # plt.errorbar(xs, yscm, yerr=yscs, label='CPU')
 # plt.errorbar(xs, ysgm, ls='dotted', yerr=ysgs, label='GPU')
-plt.plot(xs, yscm, 'o-', markersize=4, label='Main memory')
-plt.plot(xs, ysgm, 'x-', label='GPU memory')
+plt.plot(xs, yscm, 'o-', markersize=4, label='Main memory', color='#d54d4d')
+plt.plot(xs, ysgm, 'x-', label='GPU memory', color='#3b5b92')
 plt.xticks([128, 1024, 2048, 2048+1024, 4096])
-plt.xlim(128, 4096)
-plt.legend(loc='upper left')
+plt.yticks([2,4,6,8])
+plt.xlim(0, 4200)
+plt.legend(loc='upper left', frameon=False)
 plt.savefig('latency.pdf', dpi=300, bbox_inches='tight')
 
 A = np.vstack([xs, np.ones(len(xs))]).T

+ 2 - 2
data/throughput.py

@@ -6,7 +6,7 @@ cpu_data = np.loadtxt('throughput.cpu')
 
 plt.rc('font', **dict(family='serif'))
 
-plt.figure(figsize=(8, 3))
+plt.figure(figsize=(8, 4))
 
 plt.semilogx(gpu_data[:,0], gpu_data[:,1], '*-', color='#3b5b92', label='GPU')
 plt.semilogx(cpu_data[:,0], cpu_data[:,1], 'o-', color='#d54d4d', label='CPU')
@@ -15,5 +15,5 @@ plt.yticks([0,2000,4000,6000,8000])
 
 plt.xlabel('Data size in B')
 plt.ylabel('Throughput in MB/s')
-plt.legend(loc='lower right')
+plt.legend(loc='lower right',frameon=False)
 plt.savefig('throughput.pdf', dpi=300, bbox_inches='tight')

+ 31 - 24
paper.tex

@@ -7,6 +7,7 @@
 \usepackage{subcaption}
 \usepackage{textcomp}
 \usepackage{booktabs}
+\usepackage{floatrow}
 
 \newboolean{draft}
 \setboolean{draft}{true}
@@ -164,7 +165,7 @@ registers.
 
 \begin{figure}[t]
   \centering
-  \includegraphics[width=0.5\textwidth]{figures/fpga-arch}
+  \includegraphics[width=0.75\textwidth]{figures/fpga-arch}
   \caption{%
     FPGA AAA
   }
@@ -174,24 +175,7 @@ registers.
 The physical addresses of the host's memory buffers are stored into an internal
 memory and are dynamically updated by the driver or user, allowing highly
 efficient zero-copy data transfers. The maximum size associated with each
-address is 2 GB. The resource utilization
-on a Virtex 7 device is reported in \ref{table:utilization}.
-
-\begin{table}[]
-\centering
-\caption{Resource utilization on a Virtex7 device X240VT}
-\label{table:utilization}
-\begin{tabular}{@{}llll@{}}
-  \toprule
-Resource & Utilization & Available & Utilization \% \\
-  \midrule
-LUT      & 5331        & 433200    & 1.23           \\
-LUTRAM   & 56          & 174200    & 0.03           \\
-FF       & 5437        & 866400    & 0.63           \\
-BRAM     & 20.50       & 1470      & 1.39           \\
-  \bottomrule
-\end{tabular}
-\end{table}
+address is 2 GB. 
 
 \subsection{OpenCL management on host side}
 \label{sec:host}
@@ -224,7 +208,7 @@ Using the \texttt{cl\-Enqueue\-Copy\-Buffer} function call it is possible to
 write entire memory regions in DMA fashion to the FPGA. In this case, the GPU
 acts as bus master and pushes data to the FPGA.
 
-\begin{figure}
+\begin{figure}[t]
   \centering
   \includegraphics[width=0.75\textwidth]{figures/opencl-setup}
   \caption{The FPGA writes to GPU memory by mapping the physical address of a
@@ -264,10 +248,32 @@ cases, a Xilinx VC709 evaluation board was plugged into a PCIe 3.0 x8 slots.
 In case of FPGA-to-CPU data transfers, the software implementation is the one
 described in~\cite{rota2015dma}.
 
+The resource utilization
+on a Virtex 7 device is reported in \ref{table:utilization}.
+
+\begin{table}[]
+\centering
+\caption{Resource utilization on a Virtex7 device X240VT}
+\label{table:utilization}
+\tabcolsep=0.11cm
+\small
+\begin{tabular}{@{}llll@{}}
+  \toprule
+Resource & Utilization & Available & Utilization \% \\
+  \midrule
+LUT      & 5331        & 433200    & 1.23           \\
+LUTRAM   & 56          & 174200    & 0.03           \\
+FF       & 5437        & 866400    & 0.63           \\
+BRAM     & 20.50       & 1470      & 1.39           \\
+  \bottomrule
+\end{tabular}
+\end{table}
+
 \begin{table}[b]
 \centering
 \caption{Hardware used for throughput and latency measurements}
 \label{table:setups}
+\tabcolsep=0.11cm
 \begin{tabular}{@{}llll@{}}
   \toprule
 Component & Setup 1 & Setup 2 \\
@@ -279,6 +285,7 @@ PCIe link (FPGA-System memory)    & x8 Gen3                        & x4 Gen1
 PCIe link (FPGA-GPU)    & x8 Gen3                        & x8 Gen3     \\
   \bottomrule
 \end{tabular}
+
 \end{table}
 
 
@@ -290,8 +297,8 @@ PCIe link (FPGA-GPU)    & x8 Gen3                        & x8 Gen3     \\
 % requirements, this result makes smaller acquisition system a cost-effective
 % alternative to larger workstations.
 
-\begin{figure}
-  \includegraphics[width=\textwidth]{figures/throughput}
+\begin{figure}[t]
+  \includegraphics[width=0.85\textwidth]{figures/throughput}
   \caption{%
     Measured results for data transfers from FPGA to main memory
     (CPU) and from FPGA to the global GPU memory (GPU).
@@ -345,13 +352,13 @@ latency.
 \subsection{Latency}
 \begin{figure}[t]
   \centering
-  \begin{subfigure}[b]{.8\textwidth}
+  \begin{subfigure}[b]{.45\textwidth}
     \centering
     \includegraphics[width=\textwidth]{figures/latency}
     \caption{Latency }
     \label{fig:latency_vs_size}
   \end{subfigure}
-  \begin{subfigure}[b]{.8\textwidth}
+  \begin{subfigure}[b]{.45\textwidth}
     \includegraphics[width=\textwidth]{figures/latency-hist}
     \caption{Latency distribution.}
     \label{fig:latency_hist}