This commit is contained in:
Armin Friedl 2016-06-24 23:12:26 +02:00
parent 0cecff4079
commit 3c79104312
14 changed files with 1652 additions and 42 deletions

View file

@ -30,6 +30,8 @@ basepeak = 54.4
ymem = [] ymem = []
ypeak = [] ypeak = []
ybasepeak = [] ybasepeak = []
yeight = []
ysix = []
for i in np.arange(0,64,0.1): for i in np.arange(0,64,0.1):
if bandwidth*i < peak: if bandwidth*i < peak:
@ -39,6 +41,13 @@ for i in np.arange(0,64,0.1):
i=1/32 i=1/32
while i<=64: while i<=64:
if i < 16 and i > 4:
yeight.append(21.78)
ysix.append(peak)
else:
yeight.append(None)
ysix.append(None)
if bandwidth*i < peak and bandwidth*i*2 < peak: if bandwidth*i < peak and bandwidth*i*2 < peak:
ymem.append(bandwidth*i) ymem.append(bandwidth*i)
ypeak.append(None) ypeak.append(None)
@ -55,7 +64,7 @@ while i<=64:
#plot data #plot data
#data = pd.Series(data=values, name='Peak Memory Bandwidth', index=np.arange(0,64,0.1)) #data = pd.Series(data=values, name='Peak Memory Bandwidth', index=np.arange(0,64,0.1))
data = {'Peak Memory Bandwidth': pd.Series(ymem, index=xlbl), 'Peak Floating-Point Performance (Turbo)': pd.Series(ypeak, index=xlbl)} data = {'Peak Memory Bandwidth': pd.Series(ymem, index=xlbl), 'Peak Floating-Point Performance (Turbo)': pd.Series(ypeak, index=xlbl), 'Best 8 OI Kernel': pd.Series(yeight, index=xlbl), 'Best 1/16 OI Kernel': pd.Series(yeight, index=xlbl)}
df = pd.DataFrame(data) df = pd.DataFrame(data)
ax = df.plot() ax = df.plot()

210
reduce/size.log Normal file
View file

@ -0,0 +1,210 @@
1,0.015060, 0.012820, 0.006505, 0.000924
1,0.013010, 0.008138, 0.006707, 0.000929
1,0.013959, 0.018517, 0.012665, 0.010040
1,0.017139, 0.008266, 0.006602, 0.000933
1,0.012186, 0.009795, 0.011971, 0.000928
1,0.011320, 0.011973, 0.006519, 0.000926
1,0.019044, 0.008140, 0.008787, 0.000913
1,0.008048, 0.026552, 0.006572, 0.000934
1,0.014868, 0.008198, 0.006745, 0.000927
1,0.018982, 0.012490, 0.006542, 0.000935
1,0.014860, 0.010456, 0.015956, 0.000929
1,0.015556, 0.008380, 0.006694, 0.000931
1,0.011853, 0.008191, 0.006520, 0.000935
1,0.017231, 0.016554, 0.006573, 0.000938
1,0.019388, 0.008211, 0.006490, 0.000940
1,0.017562, 0.008292, 0.006570, 0.000940
1,0.008377, 0.008385, 0.008122, 0.016472
1,0.016700, 0.008092, 0.006615, 0.000940
1,0.008701, 0.008410, 0.006554, 0.000936
1,0.021345, 0.008315, 0.006538, 0.000935
1,0.012839, 0.010863, 0.015722, 0.006039
1,0.010784, 0.012452, 0.006464, 0.000943
1,0.013310, 0.009412, 0.006499, 0.000939
1,0.015488, 0.008262, 0.006519, 0.000957
1,0.016392, 0.008346, 0.006548, 0.000946
1,0.022343, 0.008379, 0.006572, 0.000951
1,0.009021, 0.008462, 0.006554, 0.000950
1,0.019012, 0.008520, 0.006710, 0.000960
1,0.011137, 0.014027, 0.010778, 0.014246
1,0.016490, 0.008227, 0.006638, 0.010030
10,0.015200, 0.008364, 0.006514, 0.000942
10,0.018563, 0.019307, 0.006683, 0.000939
10,0.015394, 0.010670, 0.010332, 0.000954
10,0.007525, 0.008310, 0.007347, 0.000953
10,0.012634, 0.010634, 0.006643, 0.000956
10,0.014186, 0.008059, 0.006486, 0.000947
10,0.016306, 0.009247, 0.006542, 0.000958
10,0.009274, 0.008334, 0.006675, 0.000944
10,0.015010, 0.008208, 0.006568, 0.000956
10,0.007146, 0.008374, 0.009934, 0.000952
10,0.015609, 0.008445, 0.006520, 0.000954
10,0.015018, 0.008323, 0.006675, 0.000961
10,0.015876, 0.008303, 0.006915, 0.020554
10,0.009362, 0.008383, 0.006504, 0.000963
10,0.010540, 0.013352, 0.011971, 0.000955
10,0.016023, 0.008194, 0.011112, 0.010048
10,0.008929, 0.008427, 0.006597, 0.000962
10,0.011935, 0.008371, 0.006765, 0.000969
10,0.012526, 0.008457, 0.006569, 0.000962
10,0.014575, 0.008361, 0.006579, 0.000950
10,0.016678, 0.021072, 0.006916, 0.000959
10,0.019073, 0.012327, 0.009451, 0.000967
10,0.016419, 0.008326, 0.007186, 0.000963
10,0.009798, 0.008325, 0.006630, 0.000959
10,0.007363, 0.016486, 0.006578, 0.000957
10,0.017285, 0.009254, 0.021329, 0.000991
10,0.015300, 0.008662, 0.006476, 0.000973
10,0.008793, 0.008317, 0.006629, 0.000961
10,0.016962, 0.008466, 0.006804, 0.000973
10,0.011350, 0.008511, 0.006569, 0.000978
100,0.011944, 0.008318, 0.006828, 0.000974
100,0.013588, 0.009846, 0.006560, 0.000975
100,0.009091, 0.008647, 0.010100, 0.010636
100,0.016221, 0.008714, 0.011530, 0.010031
100,0.014302, 0.008184, 0.006780, 0.006496
100,0.016849, 0.008263, 0.006687, 0.000981
100,0.012719, 0.008396, 0.006918, 0.000976
100,0.014992, 0.008173, 0.006631, 0.004980
100,0.024122, 0.008202, 0.006504, 0.000982
100,0.014949, 0.008518, 0.006513, 0.000981
100,0.005903, 0.013908, 0.006588, 0.000970
100,0.018213, 0.008668, 0.006526, 0.000983
100,0.012493, 0.018007, 0.006539, 0.000981
100,0.014951, 0.008529, 0.012096, 0.000977
100,0.020768, 0.008437, 0.006678, 0.000983
100,0.014121, 0.008367, 0.006633, 0.000984
100,0.017092, 0.008173, 0.006618, 0.000984
100,0.014134, 0.008309, 0.006683, 0.007205
100,0.021945, 0.016707, 0.010962, 0.005524
100,0.017444, 0.008253, 0.006535, 0.000998
100,0.018334, 0.010120, 0.014758, 0.008821
100,0.013700, 0.008144, 0.006571, 0.000988
100,0.018027, 0.014722, 0.006547, 0.000987
100,0.016101, 0.008572, 0.006593, 0.000992
100,0.013204, 0.008207, 0.006552, 0.000990
100,0.015996, 0.008204, 0.006574, 0.000989
100,0.031931, 0.009827, 0.006538, 0.000990
100,0.016046, 0.008238, 0.006489, 0.000992
100,0.008523, 0.013016, 0.006636, 0.000994
100,0.016014, 0.008304, 0.006663, 0.001004
1000,0.013056, 0.022506, 0.006647, 0.001072
1000,0.008438, 0.008262, 0.006646, 0.001060
1000,0.010373, 0.008534, 0.006729, 0.001073
1000,0.025268, 0.008545, 0.006731, 0.001070
1000,0.016263, 0.008354, 0.006659, 0.001078
1000,0.014329, 0.008434, 0.006751, 0.001071
1000,0.017146, 0.010863, 0.006634, 0.001073
1000,0.018675, 0.008679, 0.012135, 0.021216
1000,0.014633, 0.008818, 0.006577, 0.001090
1000,0.023196, 0.011840, 0.006592, 0.001076
1000,0.015533, 0.008400, 0.006624, 0.001078
1000,0.008505, 0.008295, 0.006727, 0.001077
1000,0.014966, 0.008623, 0.006701, 0.001077
1000,0.019072, 0.009632, 0.006680, 0.009179
1000,0.018672, 0.008424, 0.006804, 0.003599
1000,0.017340, 0.009541, 0.007161, 0.001083
1000,0.014347, 0.008685, 0.006701, 0.001082
1000,0.011189, 0.008424, 0.006667, 0.001084
1000,0.015610, 0.008340, 0.006706, 0.001082
1000,0.010836, 0.008901, 0.006614, 0.001098
1000,0.012571, 0.011788, 0.007889, 0.007866
1000,0.019443, 0.009326, 0.006642, 0.001086
1000,0.021744, 0.008486, 0.006620, 0.001093
1000,0.022898, 0.018285, 0.031074, 0.009292
1000,0.012526, 0.008323, 0.006738, 0.001088
1000,0.010241, 0.008389, 0.006595, 0.001090
1000,0.015677, 0.011739, 0.008517, 0.001094
1000,0.013255, 0.008391, 0.006764, 0.001085
1000,0.014460, 0.008330, 0.007717, 0.001111
1000,0.013080, 0.013283, 0.006677, 0.001104
10000,0.018514, 0.009322, 0.007049, 0.001389
10000,0.013228, 0.009583, 0.007220, 0.001445
10000,0.011737, 0.008952, 0.007170, 0.001427
10000,0.014161, 0.013269, 0.007176, 0.001614
10000,0.009160, 0.009017, 0.012309, 0.001398
10000,0.025428, 0.008998, 0.006959, 0.001429
10000,0.012735, 0.011255, 0.007631, 0.024461
10000,0.008205, 0.012843, 0.007041, 0.001425
10000,0.024209, 0.008824, 0.007011, 0.001421
10000,0.012522, 0.010046, 0.007270, 0.001418
10000,0.017439, 0.012809, 0.007340, 0.001598
10000,0.018774, 0.009051, 0.007153, 0.001395
10000,0.014256, 0.008903, 0.007264, 0.001412
10000,0.017022, 0.009033, 0.007169, 0.001419
10000,0.021680, 0.020549, 0.014481, 0.008859
10000,0.019863, 0.011795, 0.007531, 0.001393
10000,0.022391, 0.006808, 0.007279, 0.001422
10000,0.015490, 0.009025, 0.007194, 0.001431
10000,0.014665, 0.010363, 0.007027, 0.001424
10000,0.015784, 0.009613, 0.007224, 0.001418
10000,0.023222, 0.010015, 0.023339, 0.030465
10000,0.018942, 0.025240, 0.011122, 0.001415
10000,0.011123, 0.009108, 0.007154, 0.001444
10000,0.019147, 0.008789, 0.006974, 0.001461
10000,0.013667, 0.012343, 0.015495, 0.001466
10000,0.015624, 0.009109, 0.009302, 0.001402
10000,0.019067, 0.013941, 0.016134, 0.001437
10000,0.011807, 0.012808, 0.007084, 0.001426
10000,0.014506, 0.011155, 0.007108, 0.001466
10000,0.012350, 0.009386, 0.007166, 0.001446
100000,0.022371, 0.035526, 0.023131, 0.027448
100000,0.013378, 0.023856, 0.013041, 0.024717
100000,0.010871, 0.029044, 0.015932, 0.017511
100000,0.012205, 0.032383, 0.017835, 0.015721
100000,0.012234, 0.024945, 0.017423, 0.025463
100000,0.014343, 0.035935, 0.016265, 0.022189
100000,0.012570, 0.023612, 0.016249, 0.023553
100000,0.014353, 0.027281, 0.019160, 0.019438
100000,0.014454, 0.024914, 0.024764, 0.030581
100000,0.013496, 0.029832, 0.014719, 0.023770
100000,0.018304, 0.024292, 0.014693, 0.021875
100000,0.028399, 0.023915, 0.027137, 0.025491
100000,0.010145, 0.022987, 0.015735, 0.021145
100000,0.012533, 0.032294, 0.028012, 0.019748
100000,0.013240, 0.027931, 0.014247, 0.026066
100000,0.013468, 0.025389, 0.014612, 0.017768
100000,0.014450, 0.030766, 0.021046, 0.023127
100000,0.013543, 0.030922, 0.017547, 0.022809
100000,0.013719, 0.023514, 0.016720, 0.028750
100000,0.011630, 0.027707, 0.017063, 0.024498
100000,0.013445, 0.028254, 0.016900, 0.016538
100000,0.013426, 0.026310, 0.014950, 0.025251
100000,0.016215, 0.020926, 0.023538, 0.022055
100000,0.012278, 0.023712, 0.015968, 0.014424
100000,0.012335, 0.022143, 0.014598, 0.014518
100000,0.013790, 0.053877, 0.016792, 0.021680
100000,0.015594, 0.020772, 0.017987, 0.020527
100000,0.012329, 0.029190, 0.015424, 0.021301
100000,0.012230, 0.024379, 0.017230, 0.017684
100000,0.013589, 0.040487, 0.027573, 0.022245
1000000,0.020727, 0.124509, 0.112477, 0.108229
1000000,0.032473, 0.126158, 0.118394, 0.098192
1000000,0.021973, 0.125135, 0.114933, 0.097082
1000000,0.018970, 0.122516, 0.115200, 0.095380
1000000,0.020313, 0.127562, 0.111321, 0.101456
1000000,0.021555, 0.127997, 0.114297, 0.101177
1000000,0.022448, 0.123110, 0.109968, 0.100089
1000000,0.022387, 0.123137, 0.115687, 0.143453
1000000,0.020858, 0.126027, 0.112245, 0.097318
1000000,0.021095, 0.129142, 0.114619, 0.120646
1000000,0.021187, 0.134334, 0.117879, 0.103980
1000000,0.022047, 0.124922, 0.110836, 0.120224
1000000,0.020579, 0.132466, 0.117571, 0.098158
1000000,0.022642, 0.124157, 0.113329, 0.111869
1000000,0.020913, 0.120785, 0.115758, 0.097079
1000000,0.021255, 0.129725, 0.114023, 0.115193
1000000,0.022199, 0.130058, 0.111212, 0.100436
1000000,0.022980, 0.126308, 0.113577, 0.102413
1000000,0.022109, 0.126565, 0.111826, 0.098784
1000000,0.021139, 0.123117, 0.113357, 0.139509
1000000,0.021675, 0.121999, 0.116524, 0.115985
1000000,0.022481, 0.125771, 0.114231, 0.098526
1000000,0.021303, 0.128222, 0.119330, 0.114650
1000000,0.020648, 0.122423, 0.113259, 0.102972
1000000,0.020828, 0.124934, 0.113054, 0.095407
1000000,0.021351, 0.131414, 0.114445, 0.103877
1000000,0.020946, 0.130915, 0.113755, 0.097917
1000000,0.020846, 0.121703, 0.117466, 0.097012
1000000,0.021515, 0.124847, 0.117515, 0.105351
1000000,0.020116, 0.119978, 0.112513, 0.099434

View file

@ -114,6 +114,7 @@ for(size_t i=0; i<size; i++){
\bigskip \bigskip
\subsubsection{Some Further 8/1 Kernel} \subsubsection{Some Further 8/1 Kernel}
\label{sec:advanced-kernels}
Since some effort was put in getting results near peak performance \verb|-Ofast -ffast-math| was used to stretch compiler optimization to the maximum. Unfortunately \verb|-ffast-math| does not preserve strict IEEE compliance. It is therefore allowed to ignore non-associativity of floating point operations. For example $x = x*x*x*x*x*x*x*x$ can be optimized to $x~*=~x; x~*=~x; x~*=~x;$. Clearly this has an effect on the OI of the kernel. To test fastmath the kernel in~\prettyref{lst:8-1-fma-fastmath} was introduced. Mind that a[i] is written out only once and held in registers during a single iteration. Since some effort was put in getting results near peak performance \verb|-Ofast -ffast-math| was used to stretch compiler optimization to the maximum. Unfortunately \verb|-ffast-math| does not preserve strict IEEE compliance. It is therefore allowed to ignore non-associativity of floating point operations. For example $x = x*x*x*x*x*x*x*x$ can be optimized to $x~*=~x; x~*=~x; x~*=~x;$. Clearly this has an effect on the OI of the kernel. To test fastmath the kernel in~\prettyref{lst:8-1-fma-fastmath} was introduced. Mind that a[i] is written out only once and held in registers during a single iteration.
\bigskip \bigskip

View file

@ -1,5 +1,7 @@
The best results for various kernels are given in~\prettyref{tbl:res-kernels}. The optimization binary \verb|roofline_full_manpack| was used for these results. This is the binary with all optimizations and the intrinsics kernel enabled. The following parameters were used: \verb|roofline_full_manpack -s 150000000 -r 5|. One double array was therefore 1144.41 MB big -- clearly too big for the cache. The best results for various kernels are given in~\prettyref{tbl:res-kernels}. The optimization binary \verb|roofline_full_manpack| was used for these results. This is the binary with all optimizations and the intrinsics kernel enabled. The following parameters were used: \verb|roofline_full_manpack -s 150000000 -r 5|. One double array was therefore 1144.41 MB big -- clearly too big for the cache.
Note how \verb|simple8| is clearly flawed with \verb|-ffast-math| enabled. This is due to the non IEEE compliant optimization as described in~\prettyref{sec:advanced-kernels}. At this level of optimization only \verb|simple8fastmath| (which is fastmath safe but flawed with lower optimization levels) should be considered as a \emph{replacement} of \verb|simple8|.
\begin{table}[h!] \begin{table}[h!]
\centering \centering
\begin{tabular}{ll} \begin{tabular}{ll}
@ -18,6 +20,17 @@ The best results for various kernels are given in~\prettyref{tbl:res-kernels}. T
\label{tbl:res-kernels} \label{tbl:res-kernels}
\end{table} \end{table}
The rooftop graph with the best runs of the 2 best kernels of each category (\verb|simple16| and \verb|fma8|) is depictured in~\prettyref{fig:roofline-withres}.
\begin{figure}
\begin{adjustbox}{center}
\includegraphics[width=0.8\linewidth]{res/rooftop_res}
\end{adjustbox}
\caption{Roofline graph with kernel results}
\label{fig:roofline-withres}
\end{figure}
%%% Local Variables: %%% Local Variables:
%%% mode: latex %%% mode: latex
%%% TeX-master: "../report" %%% TeX-master: "../report"

View file

@ -69,10 +69,17 @@
\newlabel{lst:8-1-fma}{{7}{7}{FMA aware $8$ OI kernel}{lstlisting.7}{}} \newlabel{lst:8-1-fma}{{7}{7}{FMA aware $8$ OI kernel}{lstlisting.7}{}}
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}FMA aware $8$ OI kernel}{7}{lstlisting.7}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}FMA aware $8$ OI kernel}{7}{lstlisting.7}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}}
\newlabel{sec:advanced-kernels}{{3.3.1}{8}{Some Further 8/1 Kernel}{subsubsection.3.3.1}{}}
\newlabel{lst:8-1-fma-fastmath}{{8}{8}{FMA aware $8$ OI kernel with fastmath correctness}{lstlisting.8}{}} \newlabel{lst:8-1-fma-fastmath}{{8}{8}{FMA aware $8$ OI kernel with fastmath correctness}{lstlisting.8}{}}
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}FMA aware $8$ OI kernel with fastmath correctness}{8}{lstlisting.8}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}FMA aware $8$ OI kernel with fastmath correctness}{8}{lstlisting.8}}
\newlabel{lst:8-1-intrinsics}{{9}{8}{FMA aware $8$ OI kernel with intrinsics}{lstlisting.9}{}} \newlabel{lst:8-1-intrinsics}{{9}{8}{FMA aware $8$ OI kernel with intrinsics}{lstlisting.9}{}}
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}FMA aware $8$ OI kernel with intrinsics}{8}{lstlisting.9}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}FMA aware $8$ OI kernel with intrinsics}{8}{lstlisting.9}}
\newlabel{LastPage}{{}{9}{}{page.9}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{8}{section.4}}
\xdef\lastpage@lastpage{9} \newlabel{sec:results}{{4}{8}{Results}{section.4}{}}
\xdef\lastpage@lastpageHy{9} \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Roofline graph with kernel results\relax }}{9}{figure.caption.5}}
\newlabel{fig:roofline-withres}{{2}{9}{Roofline graph with kernel results\relax }{figure.caption.5}{}}
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Results for various kernels\relax }}{9}{table.caption.4}}
\newlabel{tbl:res-kernels}{{2}{9}{Results for various kernels\relax }{table.caption.4}{}}
\newlabel{LastPage}{{}{10}{}{page.10}{}}
\xdef\lastpage@lastpage{10}
\xdef\lastpage@lastpageHy{10}

View file

@ -1,11 +1,11 @@
# Fdb version 3 # Fdb version 3
["biber report"] 1466704438 "report.bcf" "report.bbl" "report" 1466801023 ["biber report"] 1466704438 "report.bcf" "report.bbl" "report" 1466802710
"report.bcf" 1466799693 92382 2683b542d57d2326e3b37a6a44222b52 "" "report.bcf" 1466802710 92382 2683b542d57d2326e3b37a6a44222b52 ""
"roofline.bib" 1466704433 4157 226e47c750579a202f66b6f0e4df67bb "" "roofline.bib" 1466704433 4157 226e47c750579a202f66b6f0e4df67bb ""
(generated) (generated)
"report.bbl" "report.bbl"
"report.blg" "report.blg"
["pdflatex"] 1466799692 "report.tex" "report.pdf" "report" 1466801023 ["pdflatex"] 1466802708 "report.tex" "report.pdf" "report" 1466802710
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-t1.enc" 1136849721 2971 def0b6c1f0b107b3b936def894055589 "" "/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-t1.enc" 1136849721 2971 def0b6c1f0b107b3b936def894055589 ""
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc" 1136849721 2900 1537cc8184ad1792082cd229ecc269f4 "" "/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc" 1136849721 2900 1537cc8184ad1792082cd229ecc269f4 ""
"/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1272929888 3287 e6b82fe08f5336d4d5ebc73fb1152e87 "" "/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1272929888 3287 e6b82fe08f5336d4d5ebc73fb1152e87 ""
@ -196,22 +196,25 @@
"/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1455657841 31706 2be2b4306fae7fc20493e3b90c2ad04d "" "/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1455657841 31706 2be2b4306fae7fc20493e3b90c2ad04d ""
"/usr/share/texlive/texmf-var/web2c/pdftex/pdflatex.fmt" 1457104667 3492982 6abaa3262ef9227a797168d32888676c "" "/usr/share/texlive/texmf-var/web2c/pdftex/pdflatex.fmt" 1457104667 3492982 6abaa3262ef9227a797168d32888676c ""
"inputs/introduction.tex" 1466184626 76 eaf0f76fa74815989416f6f6d1c36f8b "" "inputs/introduction.tex" 1466184626 76 eaf0f76fa74815989416f6f6d1c36f8b ""
"inputs/kernels.tex" 1466800173 12285 7459a5d3d19f8cfbe2ace9512c674169 "" "inputs/kernels.tex" 1466801470 12314 221246c7ddebe8abc5c09e1cb5aad74e ""
"inputs/results.tex" 1466802707 1673 adc8885edd6ff1eb3675420597af7bc0 ""
"inputs/roofline.tex" 1466710567 5525 b96d99208485f5095cd10d50a150dff7 "" "inputs/roofline.tex" 1466710567 5525 b96d99208485f5095cd10d50a150dff7 ""
"report.aux" 1466799693 6920 efd026f088aa74618447caae8f088925 "" "report.aux" 1466802710 7761 28af04e5431bab6872f175a15a5dc18b ""
"report.bbl" 1466704439 7655 4b5f697a70789470cde9f922b6440ee7 "biber report" "report.bbl" 1466704439 7655 4b5f697a70789470cde9f922b6440ee7 "biber report"
"report.out" 1466799693 649 906e25252ab8cb90aead774c66de15bf "" "report.out" 1466802710 692 d200a3569f21ef27e8eb52d8f9418124 ""
"report.run.xml" 1466799693 2317 80d7743117fafc51b1e42b536d793f68 "" "report.run.xml" 1466802710 2317 80d7743117fafc51b1e42b536d793f68 ""
"report.tex" 1466709836 4497 1f64f8ce17913e2b9dd71c7d6e896da8 "" "report.tex" 1466801348 4578 16354a75d3cd97ebd3d125891b5c9668 ""
"report.toc" 1466799693 1343 b579331b0ae5f9f743ca0ceca6f78889 "" "report.toc" 1466802710 1440 6115e52d8ddc79b94f2a327fe162c5c8 ""
"res/rooftop-eps-converted-to.pdf" 1466670002 22114 f6f2c1d53d8b6a5f4042e202648c7b36 "" "res/rooftop-eps-converted-to.pdf" 1466802686 22110 decdc6a1508d6bf8c0204fef73ba9cf2 ""
"res/rooftop.eps" 1466669975 36013 2a6358f72820d80a6e87ee15e92d5669 "" "res/rooftop.eps" 1466669975 36013 2a6358f72820d80a6e87ee15e92d5669 ""
"res/rooftop_res-eps-converted-to.pdf" 1466802710 17422 91f1483c067e8da1e6fdf5b17b69522d ""
"res/rooftop_res.eps" 1466802522 48087 c1aa608548cdfb805d27f4cbfcd7c8ad ""
(generated) (generated)
"report-blx.bib"
"report.log"
"report.out"
"report.aux"
"report.bcf"
"report.run.xml" "report.run.xml"
"report.toc" "report.toc"
"report.log"
"report.out"
"report-blx.bib"
"report.pdf" "report.pdf"
"report.bcf"
"report.aux"

View file

@ -348,6 +348,16 @@ INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmex10.tfm
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam10.tfm INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam10.tfm
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm10.tfm INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm10.tfm
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/stmaryrd/stmary10.tfm INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/stmaryrd/stmary10.tfm
INPUT inputs/results.tex
INPUT inputs/results.tex
INPUT res/rooftop_res.eps
INPUT ./res/rooftop_res.eps
INPUT ./res/rooftop_res.eps
INPUT ./res/rooftop_res.eps
INPUT ./res/rooftop_res-eps-converted-to.pdf
INPUT ./res/rooftop_res-eps-converted-to.pdf
INPUT ./res/rooftop_res-eps-converted-to.pdf
INPUT ./res/rooftop_res-eps-converted-to.pdf
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/eccc1095.tfm INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/eccc1095.tfm
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/tcti1095.tfm INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/tcti1095.tfm
INPUT report.aux INPUT report.aux

View file

@ -1,4 +1,4 @@
This is pdfTeX, Version 3.14159265-2.6-1.40.15 (TeX Live 2014) (preloaded format=pdflatex 2016.3.4) 24 JUN 2016 22:21 This is pdfTeX, Version 3.14159265-2.6-1.40.15 (TeX Live 2014) (preloaded format=pdflatex 2016.3.4) 24 JUN 2016 23:11
entering extended mode entering extended mode
restricted \write18 enabled. restricted \write18 enabled.
%&-line parsing enabled. %&-line parsing enabled.
@ -1352,14 +1352,14 @@ Package epstopdf Info: Source file: <res/rooftop.eps>
(epstopdf) date: 2016-06-23 10:19:35 (epstopdf) date: 2016-06-23 10:19:35
(epstopdf) size: 36013 bytes (epstopdf) size: 36013 bytes
(epstopdf) Output file: <res/rooftop-eps-converted-to.pdf> (epstopdf) Output file: <res/rooftop-eps-converted-to.pdf>
(epstopdf) date: 2016-06-23 10:20:02 (epstopdf) date: 2016-06-24 23:11:26
(epstopdf) size: 22114 bytes (epstopdf) size: 22110 bytes
(epstopdf) Command: <repstopdf --outfile=res/rooftop-eps-converted- (epstopdf) Command: <repstopdf --outfile=res/rooftop-eps-converted-
to.pdf res/rooftop.eps> to.pdf res/rooftop.eps>
(epstopdf) \includegraphics on input line 70. (epstopdf) \includegraphics on input line 70.
Package epstopdf Info: Output file is already uptodate. Package epstopdf Info: Output file is already uptodate.
<res/rooftop-eps-converted-to.pdf, id=103, 587.19376pt x 442.65375pt> <res/rooftop-eps-converted-to.pdf, id=108, 587.19376pt x 442.65375pt>
File: res/rooftop-eps-converted-to.pdf Graphic file (type pdf) File: res/rooftop-eps-converted-to.pdf Graphic file (type pdf)
<use res/rooftop-eps-converted-to.pdf> <use res/rooftop-eps-converted-to.pdf>
@ -1379,32 +1379,60 @@ Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding): Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
(hyperref) removing `math shift' on input line 14. (hyperref) removing `math shift' on input line 14.
[5] [6] [7]) [5] [6] [7]) (./inputs/results.tex
Overfull \hbox (19.7725pt too wide) in paragraph at lines 116--116 Overfull \hbox (40.25502pt too wide) in paragraph at lines 1--2
\T1/cmr/m/n/10.95 (-20) The best re-sults for var-i-ous ker-nels are given in [
][]Ta-ble 2[][][]. The op-ti-miza-tion bi-nary []\T1/cmtt/m/n/10.95 roofline_fu
ll_manpack
[]
[8]
Package epstopdf Info: Source file: <res/rooftop_res.eps>
(epstopdf) date: 2016-06-24 23:08:42
(epstopdf) size: 48087 bytes
(epstopdf) Output file: <res/rooftop_res-eps-converted-to.pdf>
(epstopdf) Command: <repstopdf --outfile=res/rooftop_res-eps-conver
ted-to.pdf res/rooftop_res.eps>
(epstopdf) \includegraphics on input line 27.
runsystem(repstopdf --outfile=res/rooftop_res-eps-converted-to.pdf res/rooftop_
res.eps)...executed safely (allowed).
Package epstopdf Info: Result file: <res/rooftop_res-eps-converted-to.pdf>
(epstopdf) date: 2016-06-24 23:11:50
(epstopdf) size: 17422 bytes.
<res/rooftop_res-eps-converted-to.pdf, id=289, 587.19376pt x 442.65375pt>
File: res/rooftop_res-eps-converted-to.pdf Graphic file (type pdf)
<use res/rooftop_res-eps-converted-to.pdf>
Package pdftex.def Info: res/rooftop_res-eps-converted-to.pdf used on input lin
e 27.
(pdftex.def) Requested size: 358.50612pt x 270.25478pt.
) [9 <./res/rooftop_res-eps-converted-to.pdf>]
Overfull \hbox (19.7725pt too wide) in paragraph at lines 122--122
\T1/cmtt/m/n/10.95 blob / e5aa9ca4a77623ff6f1c2d5daa7995565b944506 / stream . c \T1/cmtt/m/n/10.95 blob / e5aa9ca4a77623ff6f1c2d5daa7995565b944506 / stream . c
# L286$[][] \T1/cmr/m/n/10.95 (-20) (vis-ited on 06/20/2016). # L286$[][] \T1/cmr/m/n/10.95 (-20) (vis-ited on 06/20/2016).
[] []
[8]
AED: lastpage setting LastPage AED: lastpage setting LastPage
[9] [10]
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 117. Package atveryend Info: Empty hook `BeforeClearDocument' on input line 123.
Package atveryend Info: Empty hook `AfterLastShipout' on input line 117. Package atveryend Info: Empty hook `AfterLastShipout' on input line 123.
(./report.aux) (./report.aux)
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 117. Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 123.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 117. Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 123.
Package rerunfilecheck Info: File `report.out' has not changed. Package rerunfilecheck Info: File `report.out' has not changed.
(rerunfilecheck) Checksum: 906E25252AB8CB90AEAD774C66DE15BF;649. (rerunfilecheck) Checksum: D200A3569F21EF27E8EB52D8F9418124;692.
Package logreq Info: Writing requests to 'report.run.xml'. Package logreq Info: Writing requests to 'report.run.xml'.
\openout1 = `report.run.xml'. \openout1 = `report.run.xml'.
Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 117. Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 123.
) )
Here is how much of TeX's memory you used: Here is how much of TeX's memory you used:
21477 strings out of 493339 21497 strings out of 493339
339286 string characters out of 6141383 339717 string characters out of 6141383
879545 words of memory out of 5000000 879761 words of memory out of 5000000
24321 multiletter control sequences out of 15000+600000 24333 multiletter control sequences out of 15000+600000
30053 words of font info for 136 fonts, out of 8000000 for 9000 30053 words of font info for 136 fonts, out of 8000000 for 9000
953 hyphenation exceptions out of 8191 953 hyphenation exceptions out of 8191
48i,8n,76p,1001b,1880s stack positions out of 5000i,500n,10000p,200000b,80000s 48i,8n,76p,1001b,1880s stack positions out of 5000i,500n,10000p,200000b,80000s
@ -1427,10 +1455,10 @@ t/fonts/type1/public/cm-super/sfrm1440.pfb></usr/share/texlive/texmf-dist/fonts
/type1/public/cm-super/sfti0900.pfb></usr/share/texlive/texmf-dist/fonts/type1/ /type1/public/cm-super/sfti0900.pfb></usr/share/texlive/texmf-dist/fonts/type1/
public/cm-super/sfti1095.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/ public/cm-super/sfti1095.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/
cm-super/sftt1095.pfb> cm-super/sftt1095.pfb>
Output written on report.pdf (9 pages, 336309 bytes). Output written on report.pdf (10 pages, 353185 bytes).
PDF statistics: PDF statistics:
390 PDF objects out of 1000 (max. 8388607) 411 PDF objects out of 1000 (max. 8388607)
313 compressed objects within 4 object streams 330 compressed objects within 4 object streams
104 named destinations out of 1000 (max. 500000) 108 named destinations out of 1000 (max. 500000)
26198 words of extra memory for PDF output out of 29859 (max. 10000000) 26211 words of extra memory for PDF output out of 29859 (max. 10000000)

Binary file not shown.

View file

@ -112,6 +112,12 @@
\label{sec:kernels} \label{sec:kernels}
\input{inputs/kernels.tex} \input{inputs/kernels.tex}
\FloatBarrier
\section{Results}
\label{sec:results}
\input{inputs/results.tex}
\printbibliography \printbibliography
\end{document} \end{document}

View file

@ -21,3 +21,5 @@
\contentsline {subsection}{\numberline {3.3}The 8 OI Kernel}{6}{subsection.3.3} \contentsline {subsection}{\numberline {3.3}The 8 OI Kernel}{6}{subsection.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1} \contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4}Results}{8}{section.4}

Binary file not shown.

File diff suppressed because it is too large Load diff