Quellcode durchsuchen

Added placeholder for fpga-arch figure, minor changes plots

Lorenzo vor 8 Jahren
Ursprung
Commit
60f5751469
7 geänderte Dateien mit 38 neuen und 46 gelöschten Zeilen
  1. 2 1
      Makefile
  2. 6 3
      data/ipedirectgma/latency-hist.py
  3. 2 2
      data/latency-hist.py
  4. 1 2
      data/throughput.cpu
  5. 0 3
      data/throughput.gpu
  6. 4 2
      data/throughput.py
  7. 23 33
      paper.tex

+ 2 - 1
Makefile

@@ -3,7 +3,8 @@ FIGURES=figures/intra-copy.pdf \
 		figures/transf.pdf \
 		figures/transf.pdf \
 		figures/throughput.pdf \
 		figures/throughput.pdf \
 		figures/latency.pdf \
 		figures/latency.pdf \
-		figures/latency-hist.pdf
+		figures/latency-hist.pdf \
+		figures/fpga-arch.pdf
 
 
 .PHONY: clean figures
 .PHONY: clean figures
 
 

+ 6 - 3
data/ipedirectgma/latency-hist.py

@@ -12,14 +12,17 @@ gpu_data = gpu_data[gpu_data < 4.5]
 
 
 plt.rc('font', **dict(family='serif'))
 plt.rc('font', **dict(family='serif'))
 
 
-plt.figure(figsize=(6, 4))
+plt.figure(figsize=(8, 3))
+
+cpu_weights = np.ones_like(cpu_data)/float(len(cpu_data))
+gpu_weights = np.ones_like(gpu_data)/float(len(gpu_data))
 
 
 # divide by 2 for one-way latency
 # divide by 2 for one-way latency
 # plt.ylim(0.1, 10000)
 # plt.ylim(0.1, 10000)
 # plt.hist(gpu_data, bins=200, label='GPU', log=True)
 # plt.hist(gpu_data, bins=200, label='GPU', log=True)
 # plt.hist(cpu_data, bins=200, label='CPU', log=True)
 # plt.hist(cpu_data, bins=200, label='CPU', log=True)
-plt.hist(gpu_data, bins=100, color='#3b5b92', label='GPU')
-plt.hist(cpu_data, bins=100, color='#d54d4d', label='CPU')
+plt.hist(gpu_data, weights=gpu_weights, bins=50, color='#3b5b92', label='GPU', linewidth=0)
+plt.hist(cpu_data, weights=cpu_weights, bins=50, color='#d54d4d', label='CPU', linewidth=0)
 # plt.semilogy()
 # plt.semilogy()
 
 
 plt.xlabel(u'Latency in \u00b5s')
 plt.xlabel(u'Latency in \u00b5s')

+ 2 - 2
data/latency-hist.py

@@ -17,8 +17,8 @@ plt.rc('font', **dict(family='serif'))
 plt.figure(figsize=(4, 3))
 plt.figure(figsize=(4, 3))
 
 
 # divide by 2 for one-way latency
 # divide by 2 for one-way latency
-plt.hist(gpu_data / 2, bins=100, normed=False, color='#3b5b92', label='GPU')
-plt.hist(cpu_data / 2, bins=100, normed=False, color='#d54d4d', label='CPU')
+plt.hist(gpu_data / 2, bins=100, normed=True, color='#3b5b92', label='GPU', linewidth=0)
+plt.hist(cpu_data / 2, bins=100, normed=True, color='#d54d4d', label='CPU', linewidth=0)
 
 
 plt.xlabel(u'Latency in \u00b5s')
 plt.xlabel(u'Latency in \u00b5s')
 plt.ylabel('Frequency')
 plt.ylabel('Frequency')

+ 1 - 2
data/throughput.cpu

@@ -17,8 +17,7 @@
 1048000000 		6472		
 1048000000 		6472		
 2097000000 		6528		
 2097000000 		6528		
 4194000000 		6561		
 4194000000 		6561		
-
-
+8388000000 		6581		
 
 
 
 
 
 

+ 0 - 3
data/throughput.gpu

@@ -18,6 +18,3 @@
 2147483648        6386.3333333333
 2147483648        6386.3333333333
 4294967296        6408
 4294967296        6408
 8589934592        6393.8333333333
 8589934592        6393.8333333333
-17179869184       6370.6666666667
-34359738368       6372.1666666667
-68719476736       6372.3333333333

+ 4 - 2
data/throughput.py

@@ -6,12 +6,14 @@ cpu_data = np.loadtxt('throughput.cpu')
 
 
 plt.rc('font', **dict(family='serif'))
 plt.rc('font', **dict(family='serif'))
 
 
-plt.figure(figsize=(8, 1))
+plt.figure(figsize=(8, 3))
 
 
 plt.semilogx(gpu_data[:,0], gpu_data[:,1], '*-', color='#3b5b92', label='GPU')
 plt.semilogx(gpu_data[:,0], gpu_data[:,1], '*-', color='#3b5b92', label='GPU')
 plt.semilogx(cpu_data[:,0], cpu_data[:,1], 'o-', color='#d54d4d', label='CPU')
 plt.semilogx(cpu_data[:,0], cpu_data[:,1], 'o-', color='#d54d4d', label='CPU')
+plt.xticks([1e4,1e6,1e8,1e10])
+plt.yticks([0,2000,4000,6000,8000])
 
 
-plt.xlabel(u'Data size in B')
+plt.xlabel('Data size in B')
 plt.ylabel('Throughput in MB/s')
 plt.ylabel('Throughput in MB/s')
 plt.legend(loc='lower right')
 plt.legend(loc='lower right')
 plt.savefig('throughput.pdf', dpi=300, bbox_inches='tight')
 plt.savefig('throughput.pdf', dpi=300, bbox_inches='tight')

+ 23 - 33
paper.tex

@@ -162,6 +162,15 @@ friendly interfaces with the custom logic with an input bandwidth of 7.45
 GB/s. The user logic and the DMA engine are configured by the host through PIO
 GB/s. The user logic and the DMA engine are configured by the host through PIO
 registers.
 registers.
 
 
+\begin{figure}[t]
+  \centering
+  \includegraphics[width=0.5\textwidth]{figures/fpga-arch}
+  \caption{%
+    FPGA AAA
+  }
+  \label{fig:fpga-arch}
+\end{figure}
+
 The physical addresses of the host's memory buffers are stored into an internal
 The physical addresses of the host's memory buffers are stored into an internal
 memory and are dynamically updated by the driver or user, allowing highly
 memory and are dynamically updated by the driver or user, allowing highly
 efficient zero-copy data transfers. The maximum size associated with each
 efficient zero-copy data transfers. The maximum size associated with each
@@ -290,31 +299,6 @@ PCIe link (FPGA-GPU)    & x8 Gen3                        & x8 Gen3     \\
 \label{fig:throughput}
 \label{fig:throughput}
 \end{figure}
 \end{figure}
 
 
-% \begin{figure}
-%   \centering
-%   \begin{subfigure}[b]{.49\textwidth}
-%     \centering
-%     \includegraphics[width=\textwidth]{figures/throughput}
-%     \caption{%
-%       DMA data transfer throughput.
-%     }
-%     \label{fig:throughput}
-%   \end{subfigure}
-%   \begin{subfigure}[b]{.49\textwidth}
-%     \includegraphics[width=\textwidth]{figures/latency}
-%     \caption{%
-%       Latency distribution.
-%       % for a single 4 KB packet transferred
-%       % from FPGA-to-CPU and FPGA-to-GPU.
-%     }
-%     \label{fig:latency}
-%   \end{subfigure}
-%   \caption{%
-%     Measured throuhput for data transfers from FPGA to main memory
-%     (CPU) and from FPGA to the global GPU memory (GPU). 
-%   }
-% \end{figure}
-
 The measured results for the pure data throughput is shown in
 The measured results for the pure data throughput is shown in
 \figref{fig:throughput} for transfers from the FPGA to the system's main
 \figref{fig:throughput} for transfers from the FPGA to the system's main
 memory as well as to the global memory as explained in \ref{sec:host}. 
 memory as well as to the global memory as explained in \ref{sec:host}. 
@@ -359,14 +343,20 @@ latency.
 
 
 
 
 \subsection{Latency}
 \subsection{Latency}
-
-\begin{figure}
-  \includegraphics[width=\textwidth]{figures/latency-hist}
-  \caption{%
-    Latency distribution for a single 1024 B packet transferred from FPGA to
-    GPU memory and to main memory.
-  }
-  \label{fig:latency-distribution}
+\begin{figure}[t]
+  \centering
+  \begin{subfigure}[b]{.8\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{figures/latency}
+    \caption{Latency }
+    \label{fig:latency_vs_size}
+  \end{subfigure}
+  \begin{subfigure}[b]{.8\textwidth}
+    \includegraphics[width=\textwidth]{figures/latency-hist}
+    \caption{Latency distribution.}
+    \label{fig:latency_hist}
+  \end{subfigure}
+  \label{fig:latency}
 \end{figure}
 \end{figure}
 
 
 For HEP experiments, low latencies are necessary to react in a reasonable time
 For HEP experiments, low latencies are necessary to react in a reasonable time