müll von einem fehlgeschlagenen scp
This commit is contained in:
parent
7ae92ef98b
commit
088d5a2a95
11 changed files with 0 additions and 1907 deletions
|
@ -1,7 +0,0 @@
|
||||||
all: reduce
|
|
||||||
|
|
||||||
reduce: hpc_mpi.c binom_reduce.c fib_reduce.c bin_reduce.c
|
|
||||||
mpicc -Wall -Wextra -O3 -o reduce bin_reduce.c fib_reduce.c binom_reduce.c hpc_mpi.c
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm reduce
|
|
|
@ -1,86 +0,0 @@
|
||||||
/*
|
|
||||||
* bin_reduce.c
|
|
||||||
*
|
|
||||||
* Created on: 16 Jun 2016
|
|
||||||
* Author: johannes
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <mpi.h>
|
|
||||||
#include "bin_reduce.h"
|
|
||||||
|
|
||||||
int int_log2(int x) {
|
|
||||||
int r=0;
|
|
||||||
while (x >>= 1) {
|
|
||||||
r++;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Bin_Reduce(const void *sendbuf, void *recvbuf, int count,
|
|
||||||
MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) {
|
|
||||||
if (root != 0) {
|
|
||||||
fprintf(stderr, "Sorry, root!=0 not allowed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int r, p, size;
|
|
||||||
MPI_Status status;
|
|
||||||
|
|
||||||
MPI_Comm_rank(comm, &r);
|
|
||||||
MPI_Comm_size(comm, &p);
|
|
||||||
MPI_Type_size(datatype, &size);
|
|
||||||
|
|
||||||
int tree_depth = int_log2(p) + 1;
|
|
||||||
int i = 0;
|
|
||||||
int depth;
|
|
||||||
int parent = 0;
|
|
||||||
|
|
||||||
// maximum possible number of nodes in a subtree with current depth
|
|
||||||
int max_nodes = ((1 << tree_depth) - 1) / 2;
|
|
||||||
|
|
||||||
void *recv_left;
|
|
||||||
void *recv_right;
|
|
||||||
|
|
||||||
if (r == root) {
|
|
||||||
recv_right = recvbuf;
|
|
||||||
} else {
|
|
||||||
MPI_Alloc_mem(count * size, MPI_INFO_NULL, &recv_right);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (depth = 1; i != r; depth++) {
|
|
||||||
parent = i;
|
|
||||||
if (r > i + max_nodes) {
|
|
||||||
i += max_nodes + 1;
|
|
||||||
} else {
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
max_nodes /= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (depth != tree_depth && r + 1 < p) {
|
|
||||||
MPI_Alloc_mem(count * size, MPI_INFO_NULL, &recv_left);
|
|
||||||
MPI_Recv(recv_left, count, datatype, r + 1, 0, comm, &status);
|
|
||||||
MPI_Reduce_local(sendbuf, recv_left, count, datatype, op);
|
|
||||||
|
|
||||||
if (r + max_nodes + 1 < p) {
|
|
||||||
MPI_Recv(recv_right, count, datatype, r + max_nodes + 1, 0, comm,
|
|
||||||
&status);
|
|
||||||
MPI_Reduce_local(recv_left, recv_right, count, datatype, op);
|
|
||||||
} else {
|
|
||||||
memcpy(recv_right, recv_left, count * size);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Free_mem(recv_left);
|
|
||||||
} else {
|
|
||||||
memcpy(recv_right, sendbuf, count * size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r != root) {
|
|
||||||
MPI_Send(recv_right, count, datatype, parent, 0, comm);
|
|
||||||
MPI_Free_mem(recv_right);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
/*
|
|
||||||
* bin_reduce.h
|
|
||||||
*
|
|
||||||
* Created on: 16 Jun 2016
|
|
||||||
* Author: johannes
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef BIN_REDUCE_H_
|
|
||||||
#define BIN_REDUCE_H_
|
|
||||||
|
|
||||||
int Bin_Reduce(const void *sendbuf, void *recvbuf, int count,
|
|
||||||
MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
|
|
||||||
|
|
||||||
#endif /* BIN_REDUCE_H_ */
|
|
|
@ -1,62 +0,0 @@
|
||||||
/*
|
|
||||||
* binom_reduce.c
|
|
||||||
*
|
|
||||||
* Created on: 18 Jun 2016
|
|
||||||
* Author: johannes
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <mpi.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include "binom_reduce.h"
|
|
||||||
|
|
||||||
void swap(void **a, void **b) {
|
|
||||||
void *temp;
|
|
||||||
temp = *a;
|
|
||||||
*a = *b;
|
|
||||||
*b = temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Binom_Reduce(const void *sendbuf, void *recvbuf, int count,
|
|
||||||
MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) {
|
|
||||||
|
|
||||||
if (root != 0) {
|
|
||||||
fprintf(stderr, "Sorry, root!=0 not allowed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int r, p, size;
|
|
||||||
MPI_Status status;
|
|
||||||
void *recv;
|
|
||||||
void *reduced;
|
|
||||||
|
|
||||||
MPI_Comm_rank(comm, &r);
|
|
||||||
MPI_Comm_size(comm, &p);
|
|
||||||
MPI_Type_size(datatype, &size);
|
|
||||||
|
|
||||||
MPI_Alloc_mem(count * size, MPI_INFO_NULL, &recv);
|
|
||||||
MPI_Alloc_mem(count * size, MPI_INFO_NULL, &reduced);
|
|
||||||
|
|
||||||
memcpy(reduced, sendbuf, count * size);
|
|
||||||
|
|
||||||
int i = 1;
|
|
||||||
while ((r + i) % (2 * i) != 0 && i < p) {
|
|
||||||
if (r + i < p) {
|
|
||||||
MPI_Recv(recv, count, datatype, r + i, i, comm, &status);
|
|
||||||
MPI_Reduce_local(reduced, recv, count, datatype, op);
|
|
||||||
swap(&reduced, &recv);
|
|
||||||
}
|
|
||||||
i <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r != root) {
|
|
||||||
MPI_Send(reduced, count, datatype, r - i, i, comm);
|
|
||||||
} else {
|
|
||||||
memcpy(recvbuf, reduced, count * size);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Free_mem(reduced);
|
|
||||||
MPI_Free_mem(recv);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
/*
|
|
||||||
* binom_reduce.h
|
|
||||||
*
|
|
||||||
* Created on: 18 Jun 2016
|
|
||||||
* Author: johannes
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef BINOM_REDUCE_H_
|
|
||||||
#define BINOM_REDUCE_H_
|
|
||||||
|
|
||||||
int Binom_Reduce(const void *sendbuf, void *recvbuf, int count,
|
|
||||||
MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
|
|
||||||
|
|
||||||
#endif /* BINOM_REDUCE_H_ */
|
|
|
@ -1,86 +0,0 @@
|
||||||
/*
|
|
||||||
* fib_reduce.c
|
|
||||||
*
|
|
||||||
* Created on: 18 Jun 2016
|
|
||||||
* Author: johannes
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <mpi.h>
|
|
||||||
#include "fib_reduce.h"
|
|
||||||
|
|
||||||
int Fib_Reduce(const void *sendbuf, void *recvbuf, int count,
|
|
||||||
MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) {
|
|
||||||
if (root != 0) {
|
|
||||||
fprintf(stderr, "Sorry, root!=0 not allowed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int r, p, size;
|
|
||||||
MPI_Status status;
|
|
||||||
void *recv_right;
|
|
||||||
|
|
||||||
MPI_Comm_rank(comm, &r);
|
|
||||||
MPI_Comm_size(comm, &p);
|
|
||||||
MPI_Type_size(datatype, &size);
|
|
||||||
|
|
||||||
int temp;
|
|
||||||
int right = 1;
|
|
||||||
int fib = 1;
|
|
||||||
|
|
||||||
while (fib - 1 < p) {
|
|
||||||
temp = fib;
|
|
||||||
fib += right;
|
|
||||||
right = temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
int left = fib - right;
|
|
||||||
int i = 0;
|
|
||||||
int parent = 0;
|
|
||||||
|
|
||||||
while (i != r) {
|
|
||||||
parent = i;
|
|
||||||
if (r >= i + left) {
|
|
||||||
i += left;
|
|
||||||
temp = left;
|
|
||||||
left = right - left;
|
|
||||||
right = temp;
|
|
||||||
} else {
|
|
||||||
i++;
|
|
||||||
right -= left;
|
|
||||||
left -= right;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r == root) {
|
|
||||||
recv_right = recvbuf;
|
|
||||||
} else {
|
|
||||||
MPI_Alloc_mem(size * count, MPI_INFO_NULL, &recv_right);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (right - 1 > 0 && r + 1 < p) {
|
|
||||||
void *recv_left;
|
|
||||||
MPI_Alloc_mem(count * size, MPI_INFO_NULL, &recv_left);
|
|
||||||
MPI_Recv(recv_left, count, datatype, r + 1, 0, comm, &status);
|
|
||||||
MPI_Reduce_local(sendbuf, recv_left, count, datatype, op);
|
|
||||||
|
|
||||||
if (left - 1 > 0 && r + left < p) {
|
|
||||||
MPI_Recv(recv_right, count, datatype, r + left, 0, comm, &status);
|
|
||||||
MPI_Reduce_local(recv_left, recv_right, count, datatype, op);
|
|
||||||
} else {
|
|
||||||
memcpy(recv_right, recv_left, count * size);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Free_mem(recv_left);
|
|
||||||
} else {
|
|
||||||
memcpy(recv_right, sendbuf, count * size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r != root) {
|
|
||||||
MPI_Send(recv_right, count, datatype, parent, 0, comm);
|
|
||||||
MPI_Free_mem(recv_right);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
/*
|
|
||||||
* fib_reduce.h
|
|
||||||
*
|
|
||||||
* Created on: 18 Jun 2016
|
|
||||||
* Author: johannes
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef FIB_REDUCE_H_
|
|
||||||
#define FIB_REDUCE_H_
|
|
||||||
|
|
||||||
int Fib_Reduce(const void *sendbuf, void *recvbuf, int count,
|
|
||||||
MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
|
|
||||||
|
|
||||||
#endif /* FIB_REDUCE_H_ */
|
|
|
@ -1,192 +0,0 @@
|
||||||
/*
|
|
||||||
============================================================================
|
|
||||||
Name : hpc_mpi.c
|
|
||||||
Author :
|
|
||||||
Version :
|
|
||||||
Copyright : Your copyright notice
|
|
||||||
Description : Hello MPI World in C
|
|
||||||
============================================================================
|
|
||||||
*/
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <mpi.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "bin_reduce.h"
|
|
||||||
#include "binom_reduce.h"
|
|
||||||
#include "fib_reduce.h"
|
|
||||||
|
|
||||||
void usage(char *progname, int rank) {
|
|
||||||
if (rank == 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"USAGE: %s [-b] [-o operation] size\n supported operations:\n 0 MAX\n 1 MIN\n 2 SUM\n 3 PROD\n 4 LAND\n 5 BAND\n 6 LOR\n 7 BOR\n 8 LXOR\n 9 BXOR\n",
|
|
||||||
progname);
|
|
||||||
}
|
|
||||||
MPI_Finalize();
|
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
void fill(int *a, int count, int rank) {
|
|
||||||
srand(time(NULL) + rank);
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
a[i] = rand();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
|
||||||
int r;
|
|
||||||
int p;
|
|
||||||
|
|
||||||
MPI_Init(&argc, &argv);
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &r);
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &p);
|
|
||||||
|
|
||||||
int opt;
|
|
||||||
int benchmark = 0;
|
|
||||||
char oparg = '0';
|
|
||||||
|
|
||||||
while ((opt = getopt(argc, argv, "bo:")) != -1) {
|
|
||||||
switch (opt) {
|
|
||||||
case 'b':
|
|
||||||
benchmark = 1;
|
|
||||||
break;
|
|
||||||
case 'o':
|
|
||||||
oparg = optarg[0];
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
usage(argv[0], r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (optind >= argc) {
|
|
||||||
usage(argv[0], r);
|
|
||||||
}
|
|
||||||
|
|
||||||
int size = atoi(argv[optind]);
|
|
||||||
MPI_Op op;
|
|
||||||
|
|
||||||
switch (oparg) {
|
|
||||||
case '0':
|
|
||||||
op = MPI_MAX;
|
|
||||||
break;
|
|
||||||
case '1':
|
|
||||||
op = MPI_MIN;
|
|
||||||
break;
|
|
||||||
case '2':
|
|
||||||
op = MPI_SUM;
|
|
||||||
break;
|
|
||||||
case '3':
|
|
||||||
op = MPI_PROD;
|
|
||||||
break;
|
|
||||||
case '4':
|
|
||||||
op = MPI_LAND;
|
|
||||||
break;
|
|
||||||
case '5':
|
|
||||||
op = MPI_BAND;
|
|
||||||
break;
|
|
||||||
case '6':
|
|
||||||
op = MPI_LOR;
|
|
||||||
break;
|
|
||||||
case '7':
|
|
||||||
op = MPI_BOR;
|
|
||||||
break;
|
|
||||||
case '8':
|
|
||||||
op = MPI_LXOR;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
op = MPI_BXOR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
int *a;
|
|
||||||
MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &a);
|
|
||||||
fill(a, size, r);
|
|
||||||
|
|
||||||
if (benchmark) {
|
|
||||||
int *red;
|
|
||||||
MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &red);
|
|
||||||
double start, end, global_end;
|
|
||||||
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
start = MPI_Wtime();
|
|
||||||
MPI_Reduce(a, red, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
end = MPI_Wtime();
|
|
||||||
MPI_Reduce(&end, &global_end, 1, MPI_DOUBLE, MPI_MAX, 0,
|
|
||||||
MPI_COMM_WORLD);
|
|
||||||
if (r == 0) {
|
|
||||||
printf("%f, ", global_end - start);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
start = MPI_Wtime();
|
|
||||||
Fib_Reduce(a, red, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
end = MPI_Wtime();
|
|
||||||
MPI_Reduce(&end, &global_end, 1, MPI_DOUBLE, MPI_MAX, 0,
|
|
||||||
MPI_COMM_WORLD);
|
|
||||||
if (r == 0) {
|
|
||||||
printf("%f, ", global_end - start);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
start = MPI_Wtime();
|
|
||||||
Bin_Reduce(a, red, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
end = MPI_Wtime();
|
|
||||||
MPI_Reduce(&end, &global_end, 1, MPI_DOUBLE, MPI_MAX, 0,
|
|
||||||
MPI_COMM_WORLD);
|
|
||||||
if (r == 0) {
|
|
||||||
printf("%f, ", global_end - start);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
|
||||||
start = MPI_Wtime();
|
|
||||||
Binom_Reduce(a, red, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
end = MPI_Wtime();
|
|
||||||
MPI_Reduce(&end, &global_end, 1, MPI_DOUBLE, MPI_MAX, 0,
|
|
||||||
MPI_COMM_WORLD);
|
|
||||||
if (r == 0) {
|
|
||||||
printf("%f\n", global_end - start);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Free_mem(red);
|
|
||||||
} else {
|
|
||||||
int *red;
|
|
||||||
int *rfib;
|
|
||||||
int *rbin;
|
|
||||||
int *rbinom;
|
|
||||||
MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &red);
|
|
||||||
MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &rfib);
|
|
||||||
MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &rbin);
|
|
||||||
MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &rbinom);
|
|
||||||
|
|
||||||
MPI_Reduce(a, red, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
Fib_Reduce(a, rfib, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
Bin_Reduce(a, rbin, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
Binom_Reduce(a, rbinom, size, MPI_INT, op, 0, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
if (r == 0) {
|
|
||||||
if (memcmp(red, rfib, size * sizeof(int))) {
|
|
||||||
printf("Fib_Reduce does not match\n");
|
|
||||||
}
|
|
||||||
if (memcmp(red, rbin, size * sizeof(int))) {
|
|
||||||
printf("Bin_Reduce does not match\n");
|
|
||||||
}
|
|
||||||
if (memcmp(red, rbinom, size * sizeof(int))) {
|
|
||||||
printf("Binom_Reduce does not match\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("All checks done\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Free_mem(red);
|
|
||||||
MPI_Free_mem(rfib);
|
|
||||||
MPI_Free_mem(rbin);
|
|
||||||
MPI_Free_mem(rbinom);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Free_mem(a);
|
|
||||||
|
|
||||||
MPI_Finalize();
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
BIN
jupiter/reduce
BIN
jupiter/reduce
Binary file not shown.
1081
jupiter/report/nodes
1081
jupiter/report/nodes
File diff suppressed because it is too large
Load diff
|
@ -1,351 +0,0 @@
|
||||||
\documentclass[a4paper, DIV=12]{scrartcl}
|
|
||||||
\usepackage[english]{babel}
|
|
||||||
\usepackage[utf8]{inputenc}
|
|
||||||
\usepackage[dvipsnames]{xcolor}
|
|
||||||
\usepackage{amsmath}
|
|
||||||
\usepackage{amssymb}
|
|
||||||
\usepackage{stmaryrd}
|
|
||||||
\usepackage{graphicx}
|
|
||||||
\usepackage{pdflscape}
|
|
||||||
\usepackage{listingsutf8}
|
|
||||||
\usepackage{spverbatim}
|
|
||||||
\usepackage{placeins}
|
|
||||||
\usepackage{lmodern}
|
|
||||||
%\usepackage{helvet}
|
|
||||||
\usepackage{booktabs}
|
|
||||||
\usepackage[T1]{fontenc}
|
|
||||||
\usepackage{microtype}
|
|
||||||
\usepackage{framed}
|
|
||||||
\usepackage[colorlinks=true,
|
|
||||||
linkcolor=blue,
|
|
||||||
urlcolor=blue,
|
|
||||||
breaklinks=true,
|
|
||||||
citecolor=blue]{hyperref}
|
|
||||||
\usepackage{prettyref}
|
|
||||||
\usepackage{lastpage}
|
|
||||||
\usepackage{subcaption}
|
|
||||||
\usepackage{tabularx}
|
|
||||||
\usepackage{adjustbox}
|
|
||||||
\usepackage{pdfpages}
|
|
||||||
\usepackage{xspace}
|
|
||||||
\usepackage[inline]{enumitem}
|
|
||||||
\usepackage[abbreviate=false,maxbibnames=99,backend=biber]{biblatex}
|
|
||||||
\usepackage{textcomp}
|
|
||||||
\usepackage{tikz}
|
|
||||||
\usepackage[ruled,linesnumbered]{algorithm2e}
|
|
||||||
|
|
||||||
\setkomafont{disposition}{\normalfont\bfseries}
|
|
||||||
|
|
||||||
\setlist[itemize]{itemsep=0.1em}
|
|
||||||
\setlist[enumerate]{itemsep=0.1em}
|
|
||||||
|
|
||||||
|
|
||||||
\newrefformat{tbl}{\hyperref[#1]{Table~\ref*{#1}}}
|
|
||||||
\newrefformat{fig}{\hyperref[#1]{Figure~\ref*{#1}}}
|
|
||||||
\newrefformat{lst}{\hyperref[#1]{Listing~\ref*{#1}}}
|
|
||||||
\newrefformat{equ}{\hyperref[#1]{Equation~\ref*{#1}}}
|
|
||||||
\newrefformat{sec}{\hyperref[#1]{Section~\ref*{#1}}}
|
|
||||||
\newrefformat{alg}{\hyperref[#1]{Algorithm~\ref*{#1}}}
|
|
||||||
\renewcommand{\arraystretch}{1.2}
|
|
||||||
|
|
||||||
\newcommand\bigforall{\mbox{\Large $\mathsurround0pt\forall$}}
|
|
||||||
\everymath{\displaystyle}
|
|
||||||
|
|
||||||
\lstset{ %
|
|
||||||
backgroundcolor=\color{white}, % choose the background color; you must add \usepackage{color} or
|
|
||||||
basicstyle=\ttfamily, % the size of the fonts that are used for the code
|
|
||||||
breakatwhitespace=true, % sets if automatic breaks should only happen at whitespace
|
|
||||||
breaklines=true, % sets automatic line breaking
|
|
||||||
captionpos=b, % sets the caption-position to bottom
|
|
||||||
escapeinside={(*}{*)}, % if you want to add LaTeX within your code
|
|
||||||
extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8
|
|
||||||
frame=single, % adds a frame around the code
|
|
||||||
keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible)
|
|
||||||
language=TeX, % the language of the code
|
|
||||||
numbers=left, % where to put the line-numbers; possible values are (none, left, right)
|
|
||||||
numbersep=5pt, % how far the line-numbers are from the code
|
|
||||||
numberstyle=\tiny\color{gray}, % the style that is used for the line-numbers
|
|
||||||
rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. comments (green here))
|
|
||||||
showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces'
|
|
||||||
showstringspaces=false, % underline spaces within strings only
|
|
||||||
showtabs=false, % show tabs within strings adding particular underscores
|
|
||||||
stepnumber=1, % the step between two line-numbers. If it's 1, each line will be numbered
|
|
||||||
tabsize=2, % sets default tabsize to 2 spaces
|
|
||||||
title=\lstname, % show the filename of files included with \lstinputlisting; also try caption instead of title
|
|
||||||
emph=[3]{int:,array,set,of,int,if,then,else,constraint,var,union,endif,function,where,in,div,predicate,let,opt,full,format,def,for,True,False,return,or},
|
|
||||||
emphstyle=[3]\color{ForestGreen},
|
|
||||||
emph=[2]{length,max,forall,startEmptyBuffer,fix,startEmptyBufferShow,exactly,cumulative,occurs,deopt,sum,,all},
|
|
||||||
emphstyle=[2]\color{blue},
|
|
||||||
commentstyle=\color{BrickRed},
|
|
||||||
stringstyle =\color{red},
|
|
||||||
}
|
|
||||||
|
|
||||||
\begin{document}
|
|
||||||
|
|
||||||
\subject{High Performance Computing}
|
|
||||||
\title{Reduction trees for MPI Reductions}
|
|
||||||
\subtitle{Project 2}
|
|
||||||
|
|
||||||
\author{Johannes Winklehner\\1226104 \and Armin Friedl\\1053597}
|
|
||||||
\date{\today}
|
|
||||||
|
|
||||||
\maketitle
|
|
||||||
|
|
||||||
\tableofcontents
|
|
||||||
|
|
||||||
\newpage
|
|
||||||
|
|
||||||
\section{Problem Description}
|
|
||||||
\label{sec:description}
|
|
||||||
|
|
||||||
The purpose of this project is to compare different implementations of the collective communication call MPI\_Reduce.
|
|
||||||
The compared implementations should all use different forms of Tree Reduction algorithms.
|
|
||||||
As a baseline for the comparison serves a given implementation of the MPI standard, which is in our case NEC MPI.
|
|
||||||
\begin{description}
|
|
||||||
\item[Binomial Tree]
|
|
||||||
A binomial tree has a non-fixed degree where each tree $B_i$ has exactly $i$ subtrees of size $B_0$ to $B_{i-1}$.
|
|
||||||
The number of nodes in such a tree is equal to $2^i$ and the depth is $i$.
|
|
||||||
\item[Fibonacci Tree]
|
|
||||||
The Fibonacci tree uses a fixed degree of $2$ where a tree of size $F_i$ has one subtree of size $T_{i-1}$ and one of $T_{i-2}$.
|
|
||||||
Therefore the number of nodes in this kind of tree is $fib(i+3)-1$ using the Fibonacci function $fib(x) = fib(x-1)+fib(x-2)$ and its depth is as well $i$.
|
|
||||||
\item[Binary Tree]
|
|
||||||
The binary tree used for reduction is a common complete binary tree where a tree $T_i$ has two subtrees $T_{i-1}$.
|
|
||||||
Such a tree has $2^{i+1}-1$ nodes and its depth is as for the other types $i$.
|
|
||||||
\end{description}
|
|
||||||
|
|
||||||
\begin{center}
|
|
||||||
\begin{minipage}{.4\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\node [circle,draw]{$B_i$}
|
|
||||||
child { node [circle,draw]{$B_{i-1}$}}
|
|
||||||
child {node [circle,draw] {$B_{i-2}$}}
|
|
||||||
child {node {\dots} edge from parent[draw=none]}
|
|
||||||
child {node [circle,draw] {$B_0$}};
|
|
||||||
\end{tikzpicture}
|
|
||||||
%\caption{Binomial Tree of size $i$}
|
|
||||||
\end{minipage}
|
|
||||||
\begin{minipage}{.2\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\node [circle,draw]{$F_i$}
|
|
||||||
child { node [circle,draw]{$F_{i-1}$}}
|
|
||||||
child {node [circle,draw] {$F_{i-2}$}};
|
|
||||||
\end{tikzpicture}
|
|
||||||
%\caption{Fibonacci Tree of size $i$}
|
|
||||||
\end{minipage}
|
|
||||||
\begin{minipage}{.2\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\node [circle,draw]{$T_i$}
|
|
||||||
child { node [circle,draw]{$T_{i-1}$}}
|
|
||||||
child {node [circle,draw] {$T_{i-2}$}};
|
|
||||||
\end{tikzpicture}
|
|
||||||
%\caption{Complete Binary Tree of size $i$}
|
|
||||||
\end{minipage}
|
|
||||||
\end{center}
|
|
||||||
|
|
||||||
All three implementations of the reduce function must use exactly the same interface as the MPI standard defines it.
|
|
||||||
This interface is shown in \prettyref{lst:reduce}.
|
|
||||||
This requires that all implementations support any arbitrary MPI datatype as well as operations.
|
|
||||||
The standard also provides some constraints regarding the associativity and commutativity of executable operations.
|
|
||||||
Every MPI operation must be associative, but does not necessarily have to be commutative.
|
|
||||||
This means that all results of the operation must be computed in the MPI rank order of all processes.
|
|
||||||
|
|
||||||
\begin{lstlisting}[language=C, caption=MPI Reduce interface, label=lst:reduce]
|
|
||||||
int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
|
|
||||||
\end{lstlisting}
|
|
||||||
|
|
||||||
The standard also defines additional features of the reduce function, for example an in place operator for the root process.
|
|
||||||
However since those details where not mentioned in the assignment description, we did not consider them as part of the project.
|
|
||||||
|
|
||||||
The basic algorithm for a tree reduction, which will be shown in the next section, is very similar for all kinds of trees and uses Point-to-Point communication between tree nodes.
|
|
||||||
The assumption for our implementations to be efficient is that the underlying communication network is fully connected and allows for bidirectional communication.
|
|
||||||
|
|
||||||
\FloatBarrier
|
|
||||||
|
|
||||||
\section{Implemented Algorithms}
|
|
||||||
\label{sec:algorithms}
|
|
||||||
|
|
||||||
The basic algorithm for a tree reduction is very simple and is shown in \prettyref{alg:reduce}.
|
|
||||||
At first the parent and all child nodes have to be determined to know the communication partners of each process.
|
|
||||||
Then each process receives the partial results from all of its children and calculates its own result from the received data.
|
|
||||||
To ensure the correctness of the result for non commutative operations the iteration of child nodes has to be done in rank order.
|
|
||||||
Processes which are leaf nodes in the tree have no children and therefore skip the receiving part of the algorithm.
|
|
||||||
If a process has a parent and is therefore not the root process, it sends its result to the determined parent node.
|
|
||||||
However if the process is the root process the reduction is finished and can be returned.
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Tree Reduce}
|
|
||||||
\label{alg:reduce}
|
|
||||||
\KwIn{An array $\vec{a}$ of a given $datatype$ with size $count$ for each process}
|
|
||||||
\KwOut{The result of the reduction on the $root$ process}
|
|
||||||
determine $parent$ and $children$\;
|
|
||||||
$result = \vec{a}$\;
|
|
||||||
\ForAll{child in children}{
|
|
||||||
receive $result$ from $child$\;
|
|
||||||
$result =$ local reduce of received array and $result$\;
|
|
||||||
}
|
|
||||||
\eIf{parent exists}{
|
|
||||||
send $result$ to $parent$\;
|
|
||||||
}{
|
|
||||||
$output = result$\;
|
|
||||||
}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
The calculation of the parent and child nodes is the only aspect which has to be changed for all possible kinds of trees.
|
|
||||||
However there are of course certain optimizations possible where some knowledge about the structure of the tree can be used.
|
|
||||||
Such implementation details will be shown in the following part.
|
|
||||||
The code for all our implementations can be found in the Appendix in \prettyref{sec:appendix}.
|
|
||||||
|
|
||||||
\subsection{Binomial Tree Reduce}
|
|
||||||
|
|
||||||
The first of the three implementations we completed was the binomial tree reduction.
|
|
||||||
Since there were already some examples and explanations on how reductions and broadcasts work on binomial trees presented during the lectures, this was probably the most straight forward part of the project.
|
|
||||||
When looking at some trees of different sizes we quickly noticed, that the position of each node is static and the tree only grows in one direction.
|
|
||||||
This fact can be used in a sense that the children do not have to be precomputed but instead can be calculated during the loop before the corresponding receive operation.
|
|
||||||
A comparison between a $B_2$ and a $B_3$ tree is shown in \prettyref{fig:binomtrees}.
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\begin{center}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{scope}
|
|
||||||
\node [circle,draw]{$0$}
|
|
||||||
child { node [circle,draw]{$1$}}
|
|
||||||
child {node [circle,draw] {$2$}
|
|
||||||
child {node [circle,draw] {$3$}}};
|
|
||||||
\end{scope}
|
|
||||||
\begin{scope}[shift={(5,0)}]
|
|
||||||
\node [circle,draw]{$0$}
|
|
||||||
child { node [circle,draw]{$1$}}
|
|
||||||
child {node [circle,draw] {$2$}
|
|
||||||
child {node [circle,draw] {$3$}}}
|
|
||||||
child {node [circle,draw] {$4$}
|
|
||||||
child {node [circle,draw] {$5$}}
|
|
||||||
child {node [circle,draw] {$6$}
|
|
||||||
child {node [circle,draw] {$7$}}}};
|
|
||||||
\end{scope}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{Comparison between a $B_2$ and a $B_3$}
|
|
||||||
\label{fig:binomtrees}
|
|
||||||
\end{center}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
From some of those trees we then determined that for the node with rank $r$ the child in each iteration is $r+i$ where $i=1$ at the start and is multiplied by $2$ after each iteration.
|
|
||||||
Before each iteration there is an additional condition, which checks if the node has any children left or if it should send the result.
|
|
||||||
|
|
||||||
\subsection{Fibonacci Tree Reduce}
|
|
||||||
|
|
||||||
The core difference of a Fibonacci tree compared to a binomial tree is the fixed degree of $2$.
|
|
||||||
To guarantee the correct order of the computed operation the position of a node inside the tree is not only dependent on the rank of the process, but also on the total size of the tree.
|
|
||||||
This is due to the fact that all ranks in one subtree must be lower than the ranks in the second subtree.
|
|
||||||
Therefore the position of a node with a certain rank changes depending on the tree size.
|
|
||||||
This can be seen in the comparison of the trees $F_2$ and $F_3$ in \prettyref{fig:fibtrees}.
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\begin{center}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{scope}
|
|
||||||
\node [circle,draw]{$0$}
|
|
||||||
child { node [circle,draw]{$1$}}
|
|
||||||
child {node [circle,draw] {$2$}
|
|
||||||
child {node [circle,draw] {$3$}}};
|
|
||||||
\end{scope}
|
|
||||||
\begin{scope}[shift={(5,0)}]
|
|
||||||
\node [circle,draw]{$0$}
|
|
||||||
child { node [circle,draw]{$1$}
|
|
||||||
child { node [circle,draw]{$2$}}}
|
|
||||||
child {node [circle,draw] {$3$}
|
|
||||||
child {node [circle,draw] {$4$}}
|
|
||||||
child {node [circle,draw] {$5$}
|
|
||||||
child {node [circle,draw] {$6$}}}};
|
|
||||||
\end{scope}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{Comparison between a $F_2$ and a $F_3$}
|
|
||||||
\label{fig:fibtrees}
|
|
||||||
\end{center}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
During the receiving step of the algorithm we do not need a loop any more, since there are always two or less children for each node.
|
|
||||||
On the other hand the calculation of the children now has to be done in a loop.
|
|
||||||
As a first step we have to determine the size of the tree which can contain all processes.
|
|
||||||
This can be done by searching the Fibonacci numbers for the first value which is greater than the number of processes.
|
|
||||||
Since we know the size of both subtrees using the Fibonacci numbers we can determine whether a node is supposed to be in the left or right subtree.
|
|
||||||
When doing this recursively the position of a node and its children can be calculated.
|
|
||||||
The runtime of this part depends on the size of the tree and is therefore bound by the Fibonacci numbers.
|
|
||||||
|
|
||||||
Now that all communication partners have been determined each process has to execute at most two receives and afterwards one send command.
|
|
||||||
Noticeable when comparing this technique to the biomial tree is that there is already one less node in a tree $F_3$ than in the $B_3$.
|
|
||||||
This means that the binomial tree can handle more processes in the same number of rounds.
|
|
||||||
|
|
||||||
\subsection{Binary Tree Reduce}
|
|
||||||
|
|
||||||
The reduction using a binary tree can be implemented in a very similar way like the Fibonacci tree since the degree is also two.
|
|
||||||
The key difference is of course the structure of the trees and therefore the calculation of the children.
|
|
||||||
Again the position of certain nodes changes depending on the size of the tree since the lower ranks must be in the left subtree and the higher ones in the right subtree.
|
|
||||||
The structure of such trees can be shown rather nice because they are simply complete binary trees.
|
|
||||||
There is again a comparison between a tree $T_2$ and $T_3$ which is shown in \prettyref{fig:bintrees}.
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\begin{center}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{scope}
|
|
||||||
\node [circle,draw]{$0$}
|
|
||||||
child { node [circle,draw]{$1$}
|
|
||||||
child { node [circle,draw]{$2$}}
|
|
||||||
child { node [circle,draw]{$3$}}}
|
|
||||||
child { node [circle,draw]{$4$}
|
|
||||||
child { node [circle,draw]{$5$}}
|
|
||||||
child { node [circle,draw]{$6$}}};
|
|
||||||
\end{scope}
|
|
||||||
\begin{scope}[shift={(5,0)}]
|
|
||||||
\node [circle,draw]{$0$}
|
|
||||||
child { node [circle,draw]{$1$}
|
|
||||||
child { node [circle,draw]{$2$}
|
|
||||||
child { node [circle,draw]{$3$}}
|
|
||||||
child { node [circle,draw]{$4$}}}
|
|
||||||
child { node [circle,draw]{$5$}
|
|
||||||
child { node [circle,draw]{$6$}}
|
|
||||||
child { node [circle,draw]{$7$}}}}
|
|
||||||
child { node [circle,draw]{$8$}
|
|
||||||
child { node [circle,draw]{$9$}
|
|
||||||
child { node [circle,draw]{$10$}}
|
|
||||||
child { node [circle,draw]{$11$}}}
|
|
||||||
child { node [circle,draw]{$12$}
|
|
||||||
child { node [circle,draw]{$13$}}
|
|
||||||
child { node [circle,draw]{$14$}}}};
|
|
||||||
\end{scope}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{Comparison between a $T_2$ and a $T_3$}
|
|
||||||
\label{fig:bintrees}
|
|
||||||
\end{center}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
The tree size as well as the computation of the child nodes can be done using a logarithmic function on the number of processes.
|
|
||||||
The rest works in exactly the same way as the previously explained algorithms.
|
|
||||||
When structuring the tree like this the drawback is that in each round a node receives data from both children.
|
|
||||||
As a result the number of rounds for this algorithm is the size of the tree plus an additional round.
|
|
||||||
|
|
||||||
\FloatBarrier
|
|
||||||
|
|
||||||
\section{Results}
|
|
||||||
\label{sec:results}
|
|
||||||
|
|
||||||
\FloatBarrier
|
|
||||||
|
|
||||||
\section{Analysis}
|
|
||||||
\label{sec:analysis}
|
|
||||||
|
|
||||||
\section{Appendix}
|
|
||||||
\label{sec:appendix}
|
|
||||||
|
|
||||||
\lstinputlisting[language=C]{../binom_reduce.c}
|
|
||||||
|
|
||||||
\lstinputlisting[language=C]{../fib_reduce.c}
|
|
||||||
|
|
||||||
\lstinputlisting[language=C]{../bin_reduce.c}
|
|
||||||
|
|
||||||
\end{document}
|
|
||||||
|
|
||||||
|
|
||||||
%%% Local Variables:
|
|
||||||
%%% mode: latex
|
|
||||||
%%% TeX-master: t
|
|
||||||
%%% End:
|
|
Loading…
Reference in a new issue