diff --git a/reduce/report/report.tex b/reduce/report/report.tex new file mode 100755 index 0000000..bfb788e --- /dev/null +++ b/reduce/report/report.tex @@ -0,0 +1,225 @@ +\documentclass[a4paper, DIV=12]{scrartcl} +\usepackage[english]{babel} +\usepackage[utf8]{inputenc} +\usepackage[dvipsnames]{xcolor} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{stmaryrd} +\usepackage{graphicx} +\usepackage{pdflscape} +\usepackage{listingsutf8} +\usepackage{spverbatim} +\usepackage{placeins} +\usepackage{lmodern} +%\usepackage{helvet} +\usepackage{booktabs} +\usepackage[T1]{fontenc} +\usepackage{microtype} +\usepackage{framed} +\usepackage[colorlinks=true, + linkcolor=blue, + urlcolor=blue, + breaklinks=true, + citecolor=blue]{hyperref} +\usepackage{prettyref} +\usepackage{lastpage} +\usepackage{subcaption} +\usepackage{tabularx} +\usepackage{adjustbox} +\usepackage{pdfpages} +\usepackage{xspace} +\usepackage[inline]{enumitem} +\usepackage[abbreviate=false,maxbibnames=99,backend=biber]{biblatex} +\usepackage{textcomp} +\usepackage{tikz} +\usepackage[ruled,linesnumbered]{algorithm2e} + +\setkomafont{disposition}{\normalfont\bfseries} + +\setlist[itemize]{itemsep=0.1em} +\setlist[enumerate]{itemsep=0.1em} + + +\newrefformat{tbl}{\hyperref[#1]{Table~\ref*{#1}}} +\newrefformat{fig}{\hyperref[#1]{Figure~\ref*{#1}}} +\newrefformat{lst}{\hyperref[#1]{Listing~\ref*{#1}}} +\newrefformat{equ}{\hyperref[#1]{Equation~\ref*{#1}}} +\newrefformat{sec}{\hyperref[#1]{Section~\ref*{#1}}} +\newrefformat{alg}{\hyperref[#1]{Algorithm~\ref*{#1}}} +\renewcommand{\arraystretch}{1.2} + +\newcommand\bigforall{\mbox{\Large $\mathsurround0pt\forall$}} +\everymath{\displaystyle} + +\lstset{ % + backgroundcolor=\color{white}, % choose the background color; you must add \usepackage{color} or + basicstyle=\ttfamily, % the size of the fonts that are used for the code + breakatwhitespace=true, % sets if automatic breaks should only happen at whitespace + breaklines=true, % sets automatic line breaking + captionpos=b, % sets the caption-position to bottom + escapeinside={(*}{*)}, % if you want to add LaTeX within your code + extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8 + frame=single, % adds a frame around the code + keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible) + language=TeX, % the language of the code + numbers=left, % where to put the line-numbers; possible values are (none, left, right) + numbersep=5pt, % how far the line-numbers are from the code + numberstyle=\tiny\color{gray}, % the style that is used for the line-numbers + rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. comments (green here)) + showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces' + showstringspaces=false, % underline spaces within strings only + showtabs=false, % show tabs within strings adding particular underscores + stepnumber=1, % the step between two line-numbers. If it's 1, each line will be numbered + tabsize=2, % sets default tabsize to 2 spaces + title=\lstname, % show the filename of files included with \lstinputlisting; also try caption instead of title + emph=[3]{int:,array,set,of,int,if,then,else,constraint,var,union,endif,function,where,in,div,predicate,let,opt,full,format,def,for,True,False,return,or}, + emphstyle=[3]\color{ForestGreen}, + emph=[2]{length,max,forall,startEmptyBuffer,fix,startEmptyBufferShow,exactly,cumulative,occurs,deopt,sum,,all}, + emphstyle=[2]\color{blue}, + commentstyle=\color{BrickRed}, + stringstyle =\color{red}, +} + +\begin{document} + +\subject{High Performance Computing} +\title{Reduction trees for MPI Reductions} +\subtitle{Project 2} + +\author{Johannes Winklehner\\1226104 \and Armin Friedl\\1053597} +\date{\today} + +\maketitle + +\tableofcontents + +\newpage + +\section{Problem Description} +\label{sec:description} + +The purpose of this project is to compare different implementations of the collective communication call MPI\_Reduce. +The compared implementations should all use different forms of Tree Reduction algorithms. +As a baseline for the comparison serves a given implementation of the MPI standard, which is in our case NEC MPI. +\begin{description} + \item[Binomial Tree] + A binomial tree has a non-fixed degree where each tree $B_i$ has exactly $i$ subtrees of size $B_0$ to $B_{i-1}$. + The number of nodes in such a tree is equal to $2^i$ and the depth is $i$. + \item[Fibonacci Tree] + The Fibonacci tree uses a fixed degree of $2$ where a tree of size $F_i$ has one subtree of size $T_{i-1}$ and one of $T_{i-2}$. + Therefore the number of nodes in this kind of tree is $fib(i+3)-1$ using the Fibonacci function $fib(x) = fib(x-1)+fib(x-2)$ and its depth is as well $i$. + \item[Binary Tree] + The binary tree used for reduction is a common complete binary tree where a tree $T_i$ has two subtrees $T_{i-1}$. + Such a tree has $2^{i+1}-1$ nodes and its depth is as for the other types $i$. +\end{description} + +\begin{center} +\begin{minipage}{.4\textwidth} +\begin{tikzpicture} +\node [circle,draw]{$B_i$} +child { node [circle,draw]{$B_{i-1}$}} +child {node [circle,draw] {$B_{i-2}$}} +child {node {\dots} edge from parent[draw=none]} +child {node [circle,draw] {$B_0$}}; +\end{tikzpicture} +%\caption{Binomial Tree of size $i$} +\end{minipage} +\begin{minipage}{.2\textwidth} +\begin{tikzpicture} +\node [circle,draw]{$F_i$} +child { node [circle,draw]{$F_{i-1}$}} +child {node [circle,draw] {$F_{i-2}$}}; +\end{tikzpicture} +%\caption{Fibonacci Tree of size $i$} +\end{minipage} +\begin{minipage}{.2\textwidth} +\begin{tikzpicture} +\node [circle,draw]{$T_i$} +child { node [circle,draw]{$T_{i-1}$}} +child {node [circle,draw] {$T_{i-2}$}}; +\end{tikzpicture} +%\caption{Complete Binary Tree of size $i$} +\end{minipage} +\end{center} + +All three implementations of the reduce function must use exactly the same interface as the MPI standard defines it. +This interface is shown in \prettyref{lst:reduce}. +This requires that all implementations support any arbitrary MPI datatype as well as operations. +The standard also provides some constraints regarding the associativity and commutativity of executable operations. +Every MPI operation must be associative, but does not necessarily have to be commutative. +This means that all results of the operation must be computed in the MPI rank order of all processes. + +\begin{lstlisting}[language=C, caption=MPI Reduce interface, label=lst:reduce] +int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) +\end{lstlisting} + +The standard also defines additional features of the reduce function, for example an in place operator for the root process. +However since those details where not mentioned in the assignment description, we did not consider them as part of the project. + +The basic algorithm for a tree reduction, which will be shown in the next section, is very similar for all kinds of trees and uses Point-to-Point communication between tree nodes. +The assumption for our implementations to be efficient is that the underlying communication network is fully connected and allows for bidirectional communication. + +\FloatBarrier + +\section{Implemented Algorithms} +\label{sec:algorithms} + +The basic algorithm for a tree reduction is very simple and is shown in \prettyref{alg:reduce}. +At first the parent and all child nodes have to be determined to know the communication partners of each process. +Then each process receives the partial results from all of its children and calculates its own result from the received data. +To ensure the correctness of the result for non commutative operations the iteration of child nodes has to be done in rank order. +Processes which are leaf nodes in the tree have no children and therefore skip the receiving part of the algorithm. +If a process has a parent and is therefore not the root process, it sends its result to the determined parent node. +However if the process is the root process the reduction is finished and can be returned. + +\begin{algorithm} + \caption{Tree Reduce} + \label{alg:reduce} + \KwIn{An array $\vec{a}$ of a given $datatype$ with size $count$ for each process} + \KwOut{The result of the reduction on the $root$ process} + determine $parent$ and $children$\; + $result = \vec{a}$\; + \ForAll{child in children}{ + receive $result$ from $child$\; + $result =$ local reduce of received array and $result$\; + } + \eIf{parent exists}{ + send $result$ to $parent$\; + }{ + $output = result$\; + } +\end{algorithm} + +The calculation of the parent and child nodes is the only aspect which has to be changed for all possible kinds of trees. +However there are of course certain optimizations possible to use some knowledge of a concrete tree. + +\FloatBarrier + +\section{Implementation Details} +\label{sec:kernels} + +\FloatBarrier + +\section{Results} +\label{sec:results} + +\FloatBarrier + +\section{Analysis} +\label{sec:analysis} + +\section{Appendix} + +\lstinputlisting[language=C]{../binom_reduce.c} + +\lstinputlisting[language=C]{../fib_reduce.c} + +\lstinputlisting[language=C]{../bin_reduce.c} + +\end{document} + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: