vor 8 Jahren · 60f5751469
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,8 @@ FIGURES=figures/intra-copy.pdf \
 
															 		figures/transf.pdf \
														
 
															 		figures/throughput.pdf \
														
 
															 		figures/latency.pdf \
														
 
															-		figures/latency-hist.pdf
														
 
															+		figures/latency-hist.pdf \
														
 
															+		figures/fpga-arch.pdf
														
 
															 .PHONY: clean figures
														
--- a/data/ipedirectgma/latency-hist.py
+++ b/data/ipedirectgma/latency-hist.py
@@ -12,14 +12,17 @@ gpu_data = gpu_data[gpu_data < 4.5]
 
															 plt.rc('font', **dict(family='serif'))
														
 
															-plt.figure(figsize=(6, 4))
														
 
															+plt.figure(figsize=(8, 3))
														
 
															+
														
 
															+cpu_weights = np.ones_like(cpu_data)/float(len(cpu_data))
														
 
															+gpu_weights = np.ones_like(gpu_data)/float(len(gpu_data))
														
 
															 # divide by 2 for one-way latency
														
 
															 # plt.ylim(0.1, 10000)
														
 
															 # plt.hist(gpu_data, bins=200, label='GPU', log=True)
														
 
															 # plt.hist(cpu_data, bins=200, label='CPU', log=True)
														
 
															-plt.hist(gpu_data, bins=100, color='#3b5b92', label='GPU')
														
 
															-plt.hist(cpu_data, bins=100, color='#d54d4d', label='CPU')
														
 
															+plt.hist(gpu_data, weights=gpu_weights, bins=50, color='#3b5b92', label='GPU', linewidth=0)
														
 
															+plt.hist(cpu_data, weights=cpu_weights, bins=50, color='#d54d4d', label='CPU', linewidth=0)
														
 
															 # plt.semilogy()
														
 
															 plt.xlabel(u'Latency in \u00b5s')
														
--- a/data/latency-hist.py
+++ b/data/latency-hist.py
@@ -17,8 +17,8 @@ plt.rc('font', **dict(family='serif'))
 
															 plt.figure(figsize=(4, 3))
														
 
															 # divide by 2 for one-way latency
														
 
															-plt.hist(gpu_data / 2, bins=100, normed=False, color='#3b5b92', label='GPU')
														
 
															-plt.hist(cpu_data / 2, bins=100, normed=False, color='#d54d4d', label='CPU')
														
 
															+plt.hist(gpu_data / 2, bins=100, normed=True, color='#3b5b92', label='GPU', linewidth=0)
														
 
															+plt.hist(cpu_data / 2, bins=100, normed=True, color='#d54d4d', label='CPU', linewidth=0)
														
 
															 plt.xlabel(u'Latency in \u00b5s')
														
 
															 plt.ylabel('Frequency')
														
--- a/data/throughput.cpu
+++ b/data/throughput.cpu
@@ -17,8 +17,7 @@
 
															 1048000000 		6472		
														
 
															 2097000000 		6528		
														
 
															 4194000000 		6561		
														
 
															-
														
 
															-
														
 
															+8388000000 		6581		
														
--- a/data/throughput.gpu
+++ b/data/throughput.gpu
@@ -18,6 +18,3 @@
 
															 2147483648        6386.3333333333
														
 
															 4294967296        6408
														
 
															 8589934592        6393.8333333333
														
 
															-17179869184       6370.6666666667
														
 
															-34359738368       6372.1666666667
														
 
															-68719476736       6372.3333333333
														
--- a/data/throughput.py
+++ b/data/throughput.py
@@ -6,12 +6,14 @@ cpu_data = np.loadtxt('throughput.cpu')
 
															 plt.rc('font', **dict(family='serif'))
														
 
															-plt.figure(figsize=(8, 1))
														
 
															+plt.figure(figsize=(8, 3))
														
 
															 plt.semilogx(gpu_data[:,0], gpu_data[:,1], '*-', color='#3b5b92', label='GPU')
														
 
															 plt.semilogx(cpu_data[:,0], cpu_data[:,1], 'o-', color='#d54d4d', label='CPU')
														
 
															+plt.xticks([1e4,1e6,1e8,1e10])
														
 
															+plt.yticks([0,2000,4000,6000,8000])
														
 
															-plt.xlabel(u'Data size in B')
														
 
															+plt.xlabel('Data size in B')
														
 
															 plt.ylabel('Throughput in MB/s')
														
 
															 plt.legend(loc='lower right')
														
 
															 plt.savefig('throughput.pdf', dpi=300, bbox_inches='tight')
														
--- a/paper.tex
+++ b/paper.tex
@@ -162,6 +162,15 @@ friendly interfaces with the custom logic with an input bandwidth of 7.45
 
															 GB/s. The user logic and the DMA engine are configured by the host through PIO
														
 
															 registers.
														
 
															+\begin{figure}[t]
														
 
															+  \centering
														
 
															+  \includegraphics[width=0.5\textwidth]{figures/fpga-arch}
														
 
															+  \caption{%
														
 
															+    FPGA AAA
														
 
															+  }
														
 
															+  \label{fig:fpga-arch}
														
 
															+\end{figure}
														
 
															+
														
 
															 The physical addresses of the host's memory buffers are stored into an internal
														
 
															 memory and are dynamically updated by the driver or user, allowing highly
														
 
															 efficient zero-copy data transfers. The maximum size associated with each
														
@@ -290,31 +299,6 @@ PCIe link (FPGA-GPU)    & x8 Gen3                        & x8 Gen3     \\
 
															 \label{fig:throughput}
														
 
															 \end{figure}
														
 
															-% \begin{figure}
														
 
															-%   \centering
														
 
															-%   \begin{subfigure}[b]{.49\textwidth}
														
 
															-%     \centering
														
 
															-%     \includegraphics[width=\textwidth]{figures/throughput}
														
 
															-%     \caption{%
														
 
															-%       DMA data transfer throughput.
														
 
															-%     }
														
 
															-%     \label{fig:throughput}
														
 
															-%   \end{subfigure}
														
 
															-%   \begin{subfigure}[b]{.49\textwidth}
														
 
															-%     \includegraphics[width=\textwidth]{figures/latency}
														
 
															-%     \caption{%
														
 
															-%       Latency distribution.
														
 
															-%       % for a single 4 KB packet transferred
														
 
															-%       % from FPGA-to-CPU and FPGA-to-GPU.
														
 
															-%     }
														
 
															-%     \label{fig:latency}
														
 
															-%   \end{subfigure}
														
 
															-%   \caption{%
														
 
															-%     Measured throuhput for data transfers from FPGA to main memory
														
 
															-%     (CPU) and from FPGA to the global GPU memory (GPU). 
														
 
															-%   }
														
 
															-% \end{figure}
														
 
															-
														
 
															 The measured results for the pure data throughput is shown in
														
 
															 \figref{fig:throughput} for transfers from the FPGA to the system's main
														
 
															 memory as well as to the global memory as explained in \ref{sec:host}. 
														
@@ -359,14 +343,20 @@ latency.
 
															 \subsection{Latency}
														
 
															-
														
 
															-\begin{figure}
														
 
															-  \includegraphics[width=\textwidth]{figures/latency-hist}
														
 
															-  \caption{%
														
 
															-    Latency distribution for a single 1024 B packet transferred from FPGA to
														
 
															-    GPU memory and to main memory.
														
 
															-  }
														
 
															-  \label{fig:latency-distribution}
														
 
															+\begin{figure}[t]
														
 
															+  \centering
														
 
															+  \begin{subfigure}[b]{.8\textwidth}
														
 
															+    \centering
														
 
															+    \includegraphics[width=\textwidth]{figures/latency}
														
 
															+    \caption{Latency }
														
 
															+    \label{fig:latency_vs_size}
														
 
															+  \end{subfigure}
														
 
															+  \begin{subfigure}[b]{.8\textwidth}
														
 
															+    \includegraphics[width=\textwidth]{figures/latency-hist}
														
 
															+    \caption{Latency distribution.}
														
 
															+    \label{fig:latency_hist}
														
 
															+  \end{subfigure}
														
 
															+  \label{fig:latency}
														
 
															 \end{figure}
														
 
															 For HEP experiments, low latencies are necessary to react in a reasonable time