\relax \providecommand\hyper@newdestlabel[2]{} \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined \global\let\oldcontentsline\contentsline \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global\let\oldnewlabel\newlabel \gdef\newlabel#1#2{\newlabelxx{#1}#2} \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\ifx\hyper@anchor\@undefined \let\contentsline\oldcontentsline \let\newlabel\oldnewlabel \fi} \fi} \global\let\hyper@last\relax \gdef\HyperFirstAtBeginDocument#1{#1} \providecommand\HyField@AuxAddToFields[1]{} \providecommand\HyField@AuxAddToCoFields[2]{} \abx@aux@sortscheme{nty} \providecommand \oddpage@label [2]{} \@writefile{toc}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax } \@writefile{lof}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax } \@writefile{lot}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax } \select@language{english} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\select@language{english}} \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\select@language{english}} \@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\select@language{english}} \abx@aux@cite{williams2009} \abx@aux@cite{berstrom} \abx@aux@cite{ark4210} \abx@aux@cite{intel2016} \abx@aux@cite{intelvfmadd132pd} \abx@aux@cite{shimpi2012} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}{section.1}} \newlabel{sec:introduction}{{1}{2}{Introduction}{section.1}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {2}Roofline Model}{2}{section.2}} \newlabel{sec:roofline}{{2}{2}{Roofline Model}{section.2}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Theoretical Peak Performance}{2}{subsection.2.1}} \newlabel{sec:peak}{{2.1}{2}{Theoretical Peak Performance}{subsection.2.1}{}} \@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Relevant processor specifications\relax }}{2}{table.caption.2}} \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} \newlabel{tbl:spec-4210}{{1}{2}{Relevant processor specifications\relax }{table.caption.2}{}} \abx@aux@cite{bergstrom2} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Memory Bandwidth}{3}{subsection.2.2}} \newlabel{sec:memory}{{2.2}{3}{Memory Bandwidth}{subsection.2.2}{}} \newlabel{lst:numa-stream-results}{{1}{3}{NUMA-STREAM results for two threads}{lstlisting.1}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}NUMA-STREAM results for two threads}{3}{lstlisting.1}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Graph}{3}{subsection.2.3}} \newlabel{sec:model}{{2.3}{3}{Graph}{subsection.2.3}{}} \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Roofline graph from the values obtained in~\hyperref [sec:peak]{Section~\ref *{sec:peak}} and~\hyperref [sec:memory]{Section~\ref *{sec:memory}}\relax }}{4}{figure.caption.3}} \newlabel{fig:roofline}{{1}{4}{Roofline graph from the values obtained in~\prettyref {sec:peak} and~\prettyref {sec:memory}\relax }{figure.caption.3}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {3}Kernels}{4}{section.3}} \newlabel{sec:kernels}{{3}{4}{Kernels}{section.3}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}1/16 $\not =$ 1/16. Or: The Fancy Arithmetics of a Compiler}{5}{subsection.3.1}} \newlabel{lst:1-16-simple-dangerous}{{2}{5}{Simple $\rfrac {1}{16}$ kernel with questionable compiled form}{lstlisting.2}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Simple ${}^{1}\tmspace -\thinmuskip {.1667em}/_{16}$ kernel with questionable compiled form}{5}{lstlisting.2}} \abx@aux@cite{intelvfmadd132sd} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}The 1/16 OI Kernel}{6}{subsection.3.2}} \newlabel{sec:1-16}{{3.2}{6}{The 1/16 OI Kernel}{subsection.3.2}{}} \newlabel{lst:1-16-simple}{{3}{6}{Simple $\rfrac {1}{16}$ OI kernel}{lstlisting.3}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {3}Simple ${}^{1}\tmspace -\thinmuskip {.1667em}/_{16}$ OI kernel}{6}{lstlisting.3}} \newlabel{lst:1-16-fma}{{4}{6}{FMA aware $\rfrac {1}{16}$ OI kernel}{lstlisting.4}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {4}FMA aware ${}^{1}\tmspace -\thinmuskip {.1667em}/_{16}$ OI kernel}{6}{lstlisting.4}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}The 8 OI Kernel}{6}{subsection.3.3}} \newlabel{lst:8-1-macros}{{5}{7}{Macros for bulk repeating instructions}{lstlisting.5}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {5}Macros for bulk repeating instructions}{7}{lstlisting.5}} \newlabel{lst:8-1-simple}{{6}{7}{Simple $8$ OI kernel}{lstlisting.6}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {6}Simple $8$ OI kernel}{7}{lstlisting.6}} \newlabel{lst:8-1-fma}{{7}{7}{FMA aware $8$ OI kernel}{lstlisting.7}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}FMA aware $8$ OI kernel}{7}{lstlisting.7}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}} \newlabel{sec:advanced-kernels}{{3.3.1}{8}{Some Further 8/1 Kernel}{subsubsection.3.3.1}{}} \newlabel{lst:8-1-fma-fastmath}{{8}{8}{FMA aware $8$ OI kernel with fastmath correctness}{lstlisting.8}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}FMA aware $8$ OI kernel with fastmath correctness}{8}{lstlisting.8}} \newlabel{lst:8-1-intrinsics}{{9}{8}{FMA aware $8$ OI kernel with intrinsics}{lstlisting.9}{}} \@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}FMA aware $8$ OI kernel with intrinsics}{8}{lstlisting.9}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{8}{section.4}} \newlabel{sec:results}{{4}{8}{Results}{section.4}{}} \@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Results for various kernels\relax }}{9}{table.caption.4}} \newlabel{tbl:res-kernels}{{2}{9}{Results for various kernels\relax }{table.caption.4}{}} \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Roofline graph with kernel results\relax }}{10}{figure.caption.5}} \newlabel{fig:roofline-withres}{{2}{10}{Roofline graph with kernel results\relax }{figure.caption.5}{}} \@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Best results for 100000000\relax }}{10}{table.caption.6}} \newlabel{tbl:res-kernels-10}{{3}{10}{Best results for 100000000\relax }{table.caption.6}{}} \@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Best results for 250000000\relax }}{10}{table.caption.8}} \newlabel{tbl:res-kernels-25}{{4}{10}{Best results for 250000000\relax }{table.caption.8}{}} \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Roofline graph with best results for 100000000\relax }}{11}{figure.caption.7}} \newlabel{fig:roofline-withres-10}{{3}{11}{Roofline graph with best results for 100000000\relax }{figure.caption.7}{}} \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Roofline graph with best results for 250000000\relax }}{11}{figure.caption.9}} \newlabel{fig:roofline-withres-25}{{4}{11}{Roofline graph with best results for 250000000\relax }{figure.caption.9}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{12}{section.5}} \newlabel{sec:discussion}{{5}{12}{Discussion}{section.5}{}} \newlabel{LastPage}{{}{12}{}{page.12}{}} \xdef\lastpage@lastpage{12} \xdef\lastpage@lastpageHy{12}