95 lines
8.9 KiB
TeX
95 lines
8.9 KiB
TeX
\relax
|
|
\providecommand\hyper@newdestlabel[2]{}
|
|
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
|
|
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
|
|
\global\let\oldcontentsline\contentsline
|
|
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
|
|
\global\let\oldnewlabel\newlabel
|
|
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
|
|
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
|
|
\AtEndDocument{\ifx\hyper@anchor\@undefined
|
|
\let\contentsline\oldcontentsline
|
|
\let\newlabel\oldnewlabel
|
|
\fi}
|
|
\fi}
|
|
\global\let\hyper@last\relax
|
|
\gdef\HyperFirstAtBeginDocument#1{#1}
|
|
\providecommand\HyField@AuxAddToFields[1]{}
|
|
\providecommand\HyField@AuxAddToCoFields[2]{}
|
|
\abx@aux@sortscheme{nty}
|
|
\providecommand \oddpage@label [2]{}
|
|
\@writefile{toc}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
|
|
\@writefile{lof}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
|
|
\@writefile{lot}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
|
|
\select@language{english}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\select@language{english}}
|
|
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\select@language{english}}
|
|
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\select@language{english}}
|
|
\abx@aux@cite{williams2009}
|
|
\abx@aux@cite{berstrom}
|
|
\abx@aux@cite{ark4210}
|
|
\abx@aux@cite{intel2016}
|
|
\abx@aux@cite{intelvfmadd132pd}
|
|
\abx@aux@cite{shimpi2012}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}{section.1}}
|
|
\newlabel{sec:introduction}{{1}{2}{Introduction}{section.1}{}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {2}Roofline Model}{2}{section.2}}
|
|
\newlabel{sec:roofline}{{2}{2}{Roofline Model}{section.2}{}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Theoretical Peak Performance}{2}{subsection.2.1}}
|
|
\newlabel{sec:peak}{{2.1}{2}{Theoretical Peak Performance}{subsection.2.1}{}}
|
|
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Relevant processor specifications\relax }}{2}{table.caption.2}}
|
|
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
|
|
\newlabel{tbl:spec-4210}{{1}{2}{Relevant processor specifications\relax }{table.caption.2}{}}
|
|
\abx@aux@cite{bergstrom2}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Memory Bandwidth}{3}{subsection.2.2}}
|
|
\newlabel{sec:memory}{{2.2}{3}{Memory Bandwidth}{subsection.2.2}{}}
|
|
\newlabel{lst:numa-stream-results}{{1}{3}{NUMA-STREAM results for two threads}{lstlisting.1}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}NUMA-STREAM results for two threads}{3}{lstlisting.1}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Graph}{3}{subsection.2.3}}
|
|
\newlabel{sec:model}{{2.3}{3}{Graph}{subsection.2.3}{}}
|
|
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Roofline graph from the values obtained in~\hyperref [sec:peak]{Section~\ref *{sec:peak}} and~\hyperref [sec:memory]{Section~\ref *{sec:memory}}\relax }}{4}{figure.caption.3}}
|
|
\newlabel{fig:roofline}{{1}{4}{Roofline graph from the values obtained in~\prettyref {sec:peak} and~\prettyref {sec:memory}\relax }{figure.caption.3}{}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {3}Kernels}{4}{section.3}}
|
|
\newlabel{sec:kernels}{{3}{4}{Kernels}{section.3}{}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}1/16 $\not =$ 1/16. Or: The Fancy Arithmetics of a Compiler}{5}{subsection.3.1}}
|
|
\newlabel{lst:1-16-simple-dangerous}{{2}{5}{Simple $\rfrac {1}{16}$ kernel with questionable compiled form}{lstlisting.2}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Simple ${}^{1}\tmspace -\thinmuskip {.1667em}/_{16}$ kernel with questionable compiled form}{5}{lstlisting.2}}
|
|
\abx@aux@cite{intelvfmadd132sd}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}The 1/16 OI Kernel}{6}{subsection.3.2}}
|
|
\newlabel{sec:1-16}{{3.2}{6}{The 1/16 OI Kernel}{subsection.3.2}{}}
|
|
\newlabel{lst:1-16-simple}{{3}{6}{Simple $\rfrac {1}{16}$ OI kernel}{lstlisting.3}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {3}Simple ${}^{1}\tmspace -\thinmuskip {.1667em}/_{16}$ OI kernel}{6}{lstlisting.3}}
|
|
\newlabel{lst:1-16-fma}{{4}{6}{FMA aware $\rfrac {1}{16}$ OI kernel}{lstlisting.4}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {4}FMA aware ${}^{1}\tmspace -\thinmuskip {.1667em}/_{16}$ OI kernel}{6}{lstlisting.4}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}The 8 OI Kernel}{6}{subsection.3.3}}
|
|
\newlabel{lst:8-1-macros}{{5}{7}{Macros for bulk repeating instructions}{lstlisting.5}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {5}Macros for bulk repeating instructions}{7}{lstlisting.5}}
|
|
\newlabel{lst:8-1-simple}{{6}{7}{Simple $8$ OI kernel}{lstlisting.6}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {6}Simple $8$ OI kernel}{7}{lstlisting.6}}
|
|
\newlabel{lst:8-1-fma}{{7}{7}{FMA aware $8$ OI kernel}{lstlisting.7}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}FMA aware $8$ OI kernel}{7}{lstlisting.7}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}}
|
|
\newlabel{sec:advanced-kernels}{{3.3.1}{8}{Some Further 8/1 Kernel}{subsubsection.3.3.1}{}}
|
|
\newlabel{lst:8-1-fma-fastmath}{{8}{8}{FMA aware $8$ OI kernel with fastmath correctness}{lstlisting.8}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}FMA aware $8$ OI kernel with fastmath correctness}{8}{lstlisting.8}}
|
|
\newlabel{lst:8-1-intrinsics}{{9}{8}{FMA aware $8$ OI kernel with intrinsics}{lstlisting.9}{}}
|
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}FMA aware $8$ OI kernel with intrinsics}{8}{lstlisting.9}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{8}{section.4}}
|
|
\newlabel{sec:results}{{4}{8}{Results}{section.4}{}}
|
|
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Results for various kernels\relax }}{9}{table.caption.4}}
|
|
\newlabel{tbl:res-kernels}{{2}{9}{Results for various kernels\relax }{table.caption.4}{}}
|
|
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Roofline graph with kernel results\relax }}{10}{figure.caption.5}}
|
|
\newlabel{fig:roofline-withres}{{2}{10}{Roofline graph with kernel results\relax }{figure.caption.5}{}}
|
|
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Best results for 100000000\relax }}{10}{table.caption.6}}
|
|
\newlabel{tbl:res-kernels-10}{{3}{10}{Best results for 100000000\relax }{table.caption.6}{}}
|
|
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Best results for 250000000\relax }}{10}{table.caption.8}}
|
|
\newlabel{tbl:res-kernels-25}{{4}{10}{Best results for 250000000\relax }{table.caption.8}{}}
|
|
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Roofline graph with best results for 100000000\relax }}{11}{figure.caption.7}}
|
|
\newlabel{fig:roofline-withres-10}{{3}{11}{Roofline graph with best results for 100000000\relax }{figure.caption.7}{}}
|
|
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Roofline graph with best results for 250000000\relax }}{11}{figure.caption.9}}
|
|
\newlabel{fig:roofline-withres-25}{{4}{11}{Roofline graph with best results for 250000000\relax }{figure.caption.9}{}}
|
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{12}{section.5}}
|
|
\newlabel{sec:discussion}{{5}{12}{Discussion}{section.5}{}}
|
|
\newlabel{LastPage}{{}{12}{}{page.12}{}}
|
|
\xdef\lastpage@lastpage{12}
|
|
\xdef\lastpage@lastpageHy{12}
|