i5-roofline/roofline.c

343 lines
7.3 KiB
C
Raw Permalink Normal View History

2020-09-03 16:49:50 +00:00
# include <stdlib.h>
# include <stdio.h>
# include <unistd.h>
# include <ctype.h>
# include <sys/time.h>
# include <sys/stat.h>
# include <errno.h>
# include <string.h>
# include <stdint.h>
# include <getopt.h>
# include <stdarg.h>
# include <limits.h>
# include "aikern.h"
/* === Macros === */
#ifdef ENDEBUG
#define DEBUG(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } while(0)
#else
#define DEBUG(...)
#endif
/* === Constants === */
/* === Global Variables === */
char* prog_name;
/* === Prototypes === */
/**
* @brief print usage message
*/
static void usage(void);
/**
* @brief terminate program on program error
* @param msg additional message to print
* @param ret exit value
*/
static void bail_out(char* fmt, ...);
/**
* @brief converts the argument to size_t if possible.
* bails out on error.
* @param oparg the argument to convert
*/
static size_t get_size(char* oparg);
/**
* @brief converts the argument to int if possible.
* bails out on error.
* @param oparg the argument to convert
*/
static int get_int(char* oparg);
/**
* @brief microseconds since epoch
*/
static double pin_time(void);
/**
* @brief a simple test kernel with ai of 1/16
*/
static void testkern(double* a, double* b, double* c, size_t size);
/**
* @brief pretty prints a kern_result
*/
static void print_kernresult(kern_result* result, const char* logname);
int main(int argc, char* argv[]) {
prog_name = argv[0];
int opt;
char *size_arg = NULL;
char *runs_arg = NULL;
while((opt = getopt(argc, argv, "s:r:")) != -1)
{
switch(opt)
{
case 's':
size_arg = optarg;
break;
case 'r':
runs_arg = optarg;
break;
case '?':
usage();
default:
usage();
}
}
if(optind < argc)
{
for (int index = optind; index < argc; index++)
bail_out ("Non-option argument %s\n", argv[index]);
usage();
}
if(size_arg == NULL || runs_arg == NULL)
usage();
size_t size = get_size(size_arg);
int runs = get_int(runs_arg);
// Allocating arrays
printf("Will run with array sizes of %zu elements\n", size);
printf("Will calculate min, max, avg for %d runs\n", runs);
double* a = malloc(sizeof(double)*(size));
double* b = malloc(sizeof(double)*(size));
double* c = malloc(sizeof(double)*(size));
if(a==NULL || b==NULL || c == NULL)
bail_out("One of the mallocs failed\n. a = %p, b=%p, c=%p", a, b, c);
printf("Allocated 3 arrays (3*%.2f MB = %.2f GB)\n", (sizeof(double)*(size)/1024.0/1024.0), (sizeof(double)*(size)*3/1024.0/1024.0/1024));
printf("Filling arrays with dummy values. This will also warm the cache\n");
// Filling arrays with arbitrary numbers
#pragma omp parallel for
for (size_t j=0; j<size; j++)
{
a[j] = 1.0;
b[j] = 2.0;
c[j] = 3.0;
}
double t;
printf("Heating up machine\n");
t = pin_time();
testkern(a,b,c, size);
t = pin_time() - t;
printf("Machine heating took %.4f microseconds = %.4f seconds (with test OI kernel)\n", (t*1.0E6), t);
printf("Starting tests...\n\n\n");
// Executing kernels
kern_result simple16 = kernel_dispatch(SIMPLE_1_16, a, b, c, size, runs);
kern_result fma16 = kernel_dispatch(FMA_1_16, a, b, c, size, runs);
kern_result simple8 = kernel_dispatch(SIMPLE_8_1, a, b, c, size, runs);
kern_result fma8 = kernel_dispatch(FMA_8_1, a, b, c, size, runs);
kern_result simple8fm = kernel_dispatch(SIMPLE_8_1_FASTMATH, a, b, c, size, runs);
#ifdef INTRINS
DEBUG("Running manpack now");
kern_result fma8manpack = kernel_dispatch(FMA_8_1_MANPACK, a, b, c, size, runs);
DEBUG("manpack run successful");
#endif
// Freeing arrays
free(a);
free(b);
free(c);
// Printing results
print_kernresult(&simple16, "simple16");
print_kernresult(&fma16, "fma16");
print_kernresult(&simple8, "simple8");
print_kernresult(&fma8, "fma8");
print_kernresult(&simple8fm, "simple8fastmath");
#ifdef INTRINS
print_kernresult(&fma8manpack, "fma8manpack");
#endif
printf("\n\n\n");
printf("Please refer to the log files in the log/ folder for details about the GFLOP/s of every kernel.");
printf("\n");
printf("Exiting...");
exit(EXIT_SUCCESS);
}
static void testkern(double* a, double* b, double* c, size_t size)
{
#pragma omp parallel for
for (size_t j = 0; j < size; j++)
{
/* 3*8 Bytes read + 3*8 Bytes write, 3 FLOPs -> AI = 3/(2*3*8) = 1/16 */
a[j] = 2.0E0 * a[j];
b[j] = 2.0E0 * b[j];
c[j] = 2.0E0 * c[j];
}
}
/* === Helper Functions === */
static double pin_time(void)
{
struct timeval tp;
int i;
i = gettimeofday(&tp,NULL);
if(i != 0)
{
bail_out("Time measurement impossible. gettimeofday error");
}
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
}
static size_t get_size(char *oparg)
{
long long int llsize = strtoll(oparg, NULL, 10);
if(llsize <= 0)
usage();
unsigned long long int u_llsize = (unsigned long long int) llsize;
if(u_llsize > SIZE_MAX)
{
bail_out("Only size between 1 to %zu allowed.", SIZE_MAX);
}
return (size_t) llsize;
}
static int get_int(char *oparg)
{
long long int llsize = strtoll(oparg, NULL, 10);
if(llsize <= 0)
usage();
unsigned long long int u_llsize = (unsigned long long int) llsize;
if(u_llsize > INT_MAX)
{
bail_out("Only size between 1 to %d allowed.", INT_MAX);
}
return (int) llsize;
}
static void usage()
{
fprintf(stderr, "USAGE: ./roofline -s <size> -r <runs> \n");
fprintf(stderr, "e.g.: ./roofline -s 100000 -r 5 \n");
bail_out("Invalid paramers");
}
static void bail_out(char* fmt, ...)
{
if(fmt != NULL)
{
char msgbuf[150];
va_list vl;
va_start(vl, fmt);
if(vsnprintf(msgbuf, sizeof(msgbuf), fmt, vl) < 0)
msgbuf[0] = '\0';
va_end( vl);
if(strlen(msgbuf) > 0)
(void)fprintf(stderr, "%s: %s \n", prog_name, msgbuf);
}
if(errno != 0)
(void)fprintf(stderr, "%s: %s\n", prog_name, strerror(errno));
exit(EXIT_FAILURE);
}
static void print_kernresult(kern_result* result, const char* logname)
{
struct stat st = {};
if (stat("log", &st) == -1)
{
if(mkdir("log", 0700))
{
bail_out("Couldn't create log directory for %s", result->kern_name);
}
}
char logpath[20];
snprintf(logpath, sizeof(logpath), "%s/%s", "log", logname);
FILE* log = fopen(logpath, "w");
if(log == NULL)
bail_out("Couldn't open log file for %s", result->kern_name);
if(fputs("run,start,end,delta,GFLOP/s\n", log) == EOF)
{
fclose(log);
bail_out("Couldn't write header to log file");
}
printf("=== %s ===\n", result->kern_name);
double min;
double max;
double sum = 0.0;
double deltas[result->runs];
deltas[0] = result->ends[0] - result->starts[0];
min=deltas[0];
max=deltas[0];
sum+=deltas[0];
for(size_t i=1; i<result->runs; i++)
{
deltas[i] = result->ends[i] - result->starts[i];
sum+=deltas[i];
if(deltas[i] < min) min=deltas[i];
if(deltas[i] > max) max=deltas[i];
double gflops = ((result->flops * result->size) / deltas[i]) / 1.0E9;
if(fprintf(log, "%zu,%.4f,%.4f,%.4f,%.4f\n",
i, result->starts[i],
result->ends[i], deltas[i],
gflops) == EOF)
{
fclose(log);
bail_out("Couldn't write to log file");
}
}
printf("%d flop(s) per run\t %zu run(s)\n\n", result->flops, result->runs);
printf("Min: %.4f \t Max: %.4f \t Avg: %.4f\n", min, max, (sum/result->runs));
printf("\n\n\n");
if(fclose(log))
{
bail_out("Couldn't close log file for %s", result->kern_name);
}
}