343 lines
7.3 KiB
C
343 lines
7.3 KiB
C
|
# include <stdlib.h>
|
||
|
# include <stdio.h>
|
||
|
# include <unistd.h>
|
||
|
# include <ctype.h>
|
||
|
# include <sys/time.h>
|
||
|
# include <sys/stat.h>
|
||
|
# include <errno.h>
|
||
|
# include <string.h>
|
||
|
# include <stdint.h>
|
||
|
# include <getopt.h>
|
||
|
# include <stdarg.h>
|
||
|
# include <limits.h>
|
||
|
|
||
|
# include "aikern.h"
|
||
|
|
||
|
|
||
|
/* === Macros === */
|
||
|
|
||
|
#ifdef ENDEBUG
|
||
|
#define DEBUG(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } while(0)
|
||
|
#else
|
||
|
#define DEBUG(...)
|
||
|
#endif
|
||
|
|
||
|
/* === Constants === */
|
||
|
|
||
|
/* === Global Variables === */
|
||
|
char* prog_name;
|
||
|
|
||
|
/* === Prototypes === */
|
||
|
|
||
|
/**
|
||
|
* @brief print usage message
|
||
|
*/
|
||
|
static void usage(void);
|
||
|
|
||
|
/**
|
||
|
* @brief terminate program on program error
|
||
|
* @param msg additional message to print
|
||
|
* @param ret exit value
|
||
|
*/
|
||
|
static void bail_out(char* fmt, ...);
|
||
|
|
||
|
/**
|
||
|
* @brief converts the argument to size_t if possible.
|
||
|
* bails out on error.
|
||
|
* @param oparg the argument to convert
|
||
|
*/
|
||
|
static size_t get_size(char* oparg);
|
||
|
|
||
|
/**
|
||
|
* @brief converts the argument to int if possible.
|
||
|
* bails out on error.
|
||
|
* @param oparg the argument to convert
|
||
|
*/
|
||
|
static int get_int(char* oparg);
|
||
|
|
||
|
/**
|
||
|
* @brief microseconds since epoch
|
||
|
*/
|
||
|
static double pin_time(void);
|
||
|
|
||
|
/**
|
||
|
* @brief a simple test kernel with ai of 1/16
|
||
|
*/
|
||
|
static void testkern(double* a, double* b, double* c, size_t size);
|
||
|
|
||
|
/**
|
||
|
* @brief pretty prints a kern_result
|
||
|
*/
|
||
|
static void print_kernresult(kern_result* result, const char* logname);
|
||
|
|
||
|
int main(int argc, char* argv[]) {
|
||
|
prog_name = argv[0];
|
||
|
|
||
|
int opt;
|
||
|
char *size_arg = NULL;
|
||
|
char *runs_arg = NULL;
|
||
|
|
||
|
while((opt = getopt(argc, argv, "s:r:")) != -1)
|
||
|
{
|
||
|
switch(opt)
|
||
|
{
|
||
|
case 's':
|
||
|
size_arg = optarg;
|
||
|
break;
|
||
|
case 'r':
|
||
|
runs_arg = optarg;
|
||
|
break;
|
||
|
case '?':
|
||
|
usage();
|
||
|
default:
|
||
|
usage();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if(optind < argc)
|
||
|
{
|
||
|
|
||
|
for (int index = optind; index < argc; index++)
|
||
|
bail_out ("Non-option argument %s\n", argv[index]);
|
||
|
|
||
|
usage();
|
||
|
}
|
||
|
|
||
|
if(size_arg == NULL || runs_arg == NULL)
|
||
|
usage();
|
||
|
|
||
|
size_t size = get_size(size_arg);
|
||
|
int runs = get_int(runs_arg);
|
||
|
|
||
|
// Allocating arrays
|
||
|
printf("Will run with array sizes of %zu elements\n", size);
|
||
|
printf("Will calculate min, max, avg for %d runs\n", runs);
|
||
|
double* a = malloc(sizeof(double)*(size));
|
||
|
double* b = malloc(sizeof(double)*(size));
|
||
|
double* c = malloc(sizeof(double)*(size));
|
||
|
|
||
|
if(a==NULL || b==NULL || c == NULL)
|
||
|
bail_out("One of the mallocs failed\n. a = %p, b=%p, c=%p", a, b, c);
|
||
|
|
||
|
printf("Allocated 3 arrays (3*%.2f MB = %.2f GB)\n", (sizeof(double)*(size)/1024.0/1024.0), (sizeof(double)*(size)*3/1024.0/1024.0/1024));
|
||
|
printf("Filling arrays with dummy values. This will also warm the cache\n");
|
||
|
|
||
|
// Filling arrays with arbitrary numbers
|
||
|
#pragma omp parallel for
|
||
|
for (size_t j=0; j<size; j++)
|
||
|
{
|
||
|
a[j] = 1.0;
|
||
|
b[j] = 2.0;
|
||
|
c[j] = 3.0;
|
||
|
}
|
||
|
|
||
|
double t;
|
||
|
printf("Heating up machine\n");
|
||
|
t = pin_time();
|
||
|
testkern(a,b,c, size);
|
||
|
t = pin_time() - t;
|
||
|
printf("Machine heating took %.4f microseconds = %.4f seconds (with test OI kernel)\n", (t*1.0E6), t);
|
||
|
printf("Starting tests...\n\n\n");
|
||
|
|
||
|
// Executing kernels
|
||
|
kern_result simple16 = kernel_dispatch(SIMPLE_1_16, a, b, c, size, runs);
|
||
|
kern_result fma16 = kernel_dispatch(FMA_1_16, a, b, c, size, runs);
|
||
|
kern_result simple8 = kernel_dispatch(SIMPLE_8_1, a, b, c, size, runs);
|
||
|
kern_result fma8 = kernel_dispatch(FMA_8_1, a, b, c, size, runs);
|
||
|
kern_result simple8fm = kernel_dispatch(SIMPLE_8_1_FASTMATH, a, b, c, size, runs);
|
||
|
|
||
|
#ifdef INTRINS
|
||
|
DEBUG("Running manpack now");
|
||
|
kern_result fma8manpack = kernel_dispatch(FMA_8_1_MANPACK, a, b, c, size, runs);
|
||
|
DEBUG("manpack run successful");
|
||
|
#endif
|
||
|
|
||
|
// Freeing arrays
|
||
|
free(a);
|
||
|
free(b);
|
||
|
free(c);
|
||
|
|
||
|
// Printing results
|
||
|
print_kernresult(&simple16, "simple16");
|
||
|
print_kernresult(&fma16, "fma16");
|
||
|
print_kernresult(&simple8, "simple8");
|
||
|
print_kernresult(&fma8, "fma8");
|
||
|
print_kernresult(&simple8fm, "simple8fastmath");
|
||
|
|
||
|
#ifdef INTRINS
|
||
|
print_kernresult(&fma8manpack, "fma8manpack");
|
||
|
#endif
|
||
|
|
||
|
|
||
|
printf("\n\n\n");
|
||
|
printf("Please refer to the log files in the log/ folder for details about the GFLOP/s of every kernel.");
|
||
|
printf("\n");
|
||
|
printf("Exiting...");
|
||
|
exit(EXIT_SUCCESS);
|
||
|
}
|
||
|
|
||
|
static void testkern(double* a, double* b, double* c, size_t size)
|
||
|
{
|
||
|
#pragma omp parallel for
|
||
|
for (size_t j = 0; j < size; j++)
|
||
|
{
|
||
|
/* 3*8 Bytes read + 3*8 Bytes write, 3 FLOPs -> AI = 3/(2*3*8) = 1/16 */
|
||
|
a[j] = 2.0E0 * a[j];
|
||
|
b[j] = 2.0E0 * b[j];
|
||
|
c[j] = 2.0E0 * c[j];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* === Helper Functions === */
|
||
|
|
||
|
static double pin_time(void)
|
||
|
{
|
||
|
struct timeval tp;
|
||
|
int i;
|
||
|
|
||
|
i = gettimeofday(&tp,NULL);
|
||
|
|
||
|
if(i != 0)
|
||
|
{
|
||
|
bail_out("Time measurement impossible. gettimeofday error");
|
||
|
}
|
||
|
|
||
|
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
|
||
|
}
|
||
|
|
||
|
static size_t get_size(char *oparg)
|
||
|
{
|
||
|
long long int llsize = strtoll(oparg, NULL, 10);
|
||
|
|
||
|
if(llsize <= 0)
|
||
|
usage();
|
||
|
|
||
|
unsigned long long int u_llsize = (unsigned long long int) llsize;
|
||
|
|
||
|
if(u_llsize > SIZE_MAX)
|
||
|
{
|
||
|
bail_out("Only size between 1 to %zu allowed.", SIZE_MAX);
|
||
|
}
|
||
|
|
||
|
return (size_t) llsize;
|
||
|
}
|
||
|
|
||
|
static int get_int(char *oparg)
|
||
|
{
|
||
|
long long int llsize = strtoll(oparg, NULL, 10);
|
||
|
|
||
|
if(llsize <= 0)
|
||
|
usage();
|
||
|
|
||
|
unsigned long long int u_llsize = (unsigned long long int) llsize;
|
||
|
|
||
|
if(u_llsize > INT_MAX)
|
||
|
{
|
||
|
bail_out("Only size between 1 to %d allowed.", INT_MAX);
|
||
|
}
|
||
|
|
||
|
return (int) llsize;
|
||
|
}
|
||
|
|
||
|
static void usage()
|
||
|
{
|
||
|
fprintf(stderr, "USAGE: ./roofline -s <size> -r <runs> \n");
|
||
|
fprintf(stderr, "e.g.: ./roofline -s 100000 -r 5 \n");
|
||
|
bail_out("Invalid paramers");
|
||
|
}
|
||
|
|
||
|
static void bail_out(char* fmt, ...)
|
||
|
{
|
||
|
if(fmt != NULL)
|
||
|
{
|
||
|
char msgbuf[150];
|
||
|
|
||
|
va_list vl;
|
||
|
va_start(vl, fmt);
|
||
|
|
||
|
if(vsnprintf(msgbuf, sizeof(msgbuf), fmt, vl) < 0)
|
||
|
msgbuf[0] = '\0';
|
||
|
|
||
|
va_end( vl);
|
||
|
|
||
|
if(strlen(msgbuf) > 0)
|
||
|
(void)fprintf(stderr, "%s: %s \n", prog_name, msgbuf);
|
||
|
|
||
|
}
|
||
|
|
||
|
if(errno != 0)
|
||
|
(void)fprintf(stderr, "%s: %s\n", prog_name, strerror(errno));
|
||
|
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
static void print_kernresult(kern_result* result, const char* logname)
|
||
|
{
|
||
|
struct stat st = {};
|
||
|
|
||
|
if (stat("log", &st) == -1)
|
||
|
{
|
||
|
if(mkdir("log", 0700))
|
||
|
{
|
||
|
bail_out("Couldn't create log directory for %s", result->kern_name);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char logpath[20];
|
||
|
snprintf(logpath, sizeof(logpath), "%s/%s", "log", logname);
|
||
|
FILE* log = fopen(logpath, "w");
|
||
|
if(log == NULL)
|
||
|
bail_out("Couldn't open log file for %s", result->kern_name);
|
||
|
|
||
|
if(fputs("run,start,end,delta,GFLOP/s\n", log) == EOF)
|
||
|
{
|
||
|
fclose(log);
|
||
|
bail_out("Couldn't write header to log file");
|
||
|
}
|
||
|
|
||
|
printf("=== %s ===\n", result->kern_name);
|
||
|
|
||
|
double min;
|
||
|
double max;
|
||
|
double sum = 0.0;
|
||
|
double deltas[result->runs];
|
||
|
|
||
|
deltas[0] = result->ends[0] - result->starts[0];
|
||
|
min=deltas[0];
|
||
|
max=deltas[0];
|
||
|
sum+=deltas[0];
|
||
|
|
||
|
for(size_t i=1; i<result->runs; i++)
|
||
|
{
|
||
|
deltas[i] = result->ends[i] - result->starts[i];
|
||
|
sum+=deltas[i];
|
||
|
|
||
|
if(deltas[i] < min) min=deltas[i];
|
||
|
if(deltas[i] > max) max=deltas[i];
|
||
|
|
||
|
double gflops = ((result->flops * result->size) / deltas[i]) / 1.0E9;
|
||
|
|
||
|
if(fprintf(log, "%zu,%.4f,%.4f,%.4f,%.4f\n",
|
||
|
i, result->starts[i],
|
||
|
result->ends[i], deltas[i],
|
||
|
gflops) == EOF)
|
||
|
{
|
||
|
fclose(log);
|
||
|
bail_out("Couldn't write to log file");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
printf("%d flop(s) per run\t %zu run(s)\n\n", result->flops, result->runs);
|
||
|
printf("Min: %.4f \t Max: %.4f \t Avg: %.4f\n", min, max, (sum/result->runs));
|
||
|
|
||
|
printf("\n\n\n");
|
||
|
|
||
|
|
||
|
|
||
|
if(fclose(log))
|
||
|
{
|
||
|
bail_out("Couldn't close log file for %s", result->kern_name);
|
||
|
}
|
||
|
}
|