diff --git a/plot/plot.py b/plot/plot.py index 14a5fd8..198739c 100644 --- a/plot/plot.py +++ b/plot/plot.py @@ -21,6 +21,7 @@ while i<=64: xlbl.append(repr(i)) i *= 2 +print(xlbl) # memory values = [] bandwidth = 10.6 diff --git a/roofline/src/Makefile b/roofline/src/Makefile index adb4fb0..845fe45 100644 --- a/roofline/src/Makefile +++ b/roofline/src/Makefile @@ -1,7 +1,7 @@ -all: bin lib +all: clean bin lib # Roofline Binary -bin: roofline roofline_o3 roofline_fma roofline_fma_o3 roofline_fma_fast_o3 roofline_fma_fast_fastmath_o3 +bin: roofline roofline_o3 roofline_fma roofline_fma_o3 roofline_fma_fast_o3 roofline_fma_fast_o2 roofline_fma_fast_fastmath_o3 mkdir bin mv $^ bin @@ -20,12 +20,15 @@ roofline_fma_o3: roofline.c aikern_fma_o3.a roofline_fma_fast_o3: roofline.c aikern_fma_fast_o3.a gcc -Wall -Wextra -std=c99 -fopenmp $^ -o $@ +roofline_fma_fast_o2: roofline.c aikern_fma_fast_o2.a + gcc -Wall -Wextra -std=c99 -fopenmp $^ -o $@ + roofline_fma_fast_fastmath_o3: roofline.c aikern_fma_fast_fastmath_o3.a gcc -Wall -Wextra -std=c99 -fopenmp $^ -o $@ # Static Libraries -lib: aikern.a aikern_o3.a aikern_fma.a aikern_fma_o3.a aikern_fma_fast_o3.a aikern_fma_fast_fastmath_o3.a +lib: aikern.a aikern_o3.a aikern_fma.a aikern_fma_o3.a aikern_fma_fast_o2.a aikern_fma_fast_o3.a aikern_fma_fast_fastmath_o3.a mkdir lib mv $^ lib @@ -49,6 +52,11 @@ aikern_fma_o3.a: aikern.c aikern.h ar rcs $@ aikern_fma_o3.o rm aikern_fma_o3.o +aikern_fma_fast_o2.a: aikern.c aikern.h + gcc -Wall -Wextra -Wno-unused -O2 -mavx -mfma -fopenmp -Ofast -c -o aikern_fma_fast_o2.o $< + ar rcs $@ aikern_fma_fast_o2.o + rm aikern_fma_fast_o2.o + aikern_fma_fast_o3.a: aikern.c aikern.h gcc -Wall -Wextra -Wno-unused -O3 -mavx -mfma -fopenmp -Ofast -c -o aikern_fma_fast_o3.o $< ar rcs $@ aikern_fma_fast_o3.o @@ -61,6 +69,9 @@ aikern_fma_fast_fastmath_o3.a: aikern.c aikern.h # Cleanup clean: + rm -f *.a + rm -f *.o + rm -f roofline roofline_o3 roofline_fma roofline_fma_o3 roofline_fma_fast_o3 roofline_fma_fast_o2 roofline_fma_fast_fastmath_o3 rm -fR bin rm -fR lib diff --git a/roofline/src/aikern.c b/roofline/src/aikern.c index f043d01..1068a41 100644 --- a/roofline/src/aikern.c +++ b/roofline/src/aikern.c @@ -25,10 +25,11 @@ kern_result kernel_dispatch(kernel_t kernel, size_t size, size_t runs) { - kern_result result; + kern_result result = {0}; result.runs = runs; result.starts = malloc(sizeof(double)*(runs)); result.ends = malloc(sizeof(double)*(runs)); + result.size = size; if(result.starts==NULL || result.ends==NULL) { @@ -41,6 +42,7 @@ kern_result kernel_dispatch(kernel_t kernel, case SIMPLE_1_16: result.flops = 1; + result.kern_name = "Simple 1/16"; for(size_t r=0; r # include # include +# include # include # include # include @@ -67,7 +68,7 @@ static void testkern(double* a, double* b, double* c, size_t size); /** * @brief pretty prints a kern_result */ -static void print_kernresult(kern_result* result); +static void print_kernresult(kern_result* result, const char* logname); int main(int argc, char* argv[]) { prog_name = argv[0]; @@ -108,6 +109,7 @@ int main(int argc, char* argv[]) { size_t size = get_size(size_arg); int runs = get_int(runs_arg); + // Allocating arrays printf("Will run with array sizes of %zu elements\n", size); printf("Will calculate min, max, avg for %d runs\n", runs); double* a = malloc(sizeof(double)*(size)); @@ -119,7 +121,8 @@ int main(int argc, char* argv[]) { printf("Allocated 3 arrays (3*%.2f MB = %.2f GB)\n", (sizeof(double)*(size)/1024.0/1024.0), (sizeof(double)*(size)*3/1024.0/1024.0/1024)); printf("Filling arrays with dummy values. This will also warm the cache\n"); - + + // Filling arrays with arbitrary numbers #pragma omp parallel for for (size_t j=0; jkern_name); + } + } + + char logpath[20]; + snprintf(logpath, sizeof(logpath), "%s/%s", "log", logname); + FILE* log = fopen(logpath, "w"); + if(log == NULL) + bail_out("Couldn't open log file for %s", result->kern_name); + + if(fputs("run,start,end,delta,GFLOP/s\n", log) == EOF) + { + fclose(log); + bail_out("Couldn't write header to log file"); + } + + printf("=== %s ===\n", result->kern_name); + + double min; + double max; + double sum = 0.0; + double deltas[result->runs]; + + deltas[0] = result->ends[0] - result->starts[0]; + min=deltas[0]; + max=deltas[0]; + sum+=deltas[0]; + + for(size_t i=1; iruns; i++) + { + deltas[i] = result->ends[i] - result->starts[i]; + sum+=deltas[i]; + + if(deltas[i] < min) min=deltas[i]; + if(deltas[i] > max) max=deltas[i]; + + double gflops = ((result->flops * result->size) / deltas[i]) / 1.0E9; + + if(fprintf(log, "%zu,%.4f,%.4f,%.4f,%.4f\n", + i, result->starts[i], + result->ends[i], deltas[i], + gflops) == EOF) + { + fclose(log); + bail_out("Couldn't write to log file"); + } + } + + + printf("%d flop(s) per run\t %zu run(s)\n\n", result->flops, result->runs); + printf("Min: %.4f \t Max: %.4f \t Avg: %.4f\n", min, max, (sum/result->runs)); + + printf("\n\n\n"); + + + + if(fclose(log)) + { + bail_out("Couldn't close log file for %s", result->kern_name); + } } diff --git a/roofline/src/roofline_fma b/roofline/src/roofline_fma deleted file mode 100755 index 3d5832b..0000000 Binary files a/roofline/src/roofline_fma and /dev/null differ diff --git a/roofline/src/roofline_fma_fast_fastmath_o3 b/roofline/src/roofline_fma_fast_fastmath_o3 deleted file mode 100755 index 1bd004a..0000000 Binary files a/roofline/src/roofline_fma_fast_fastmath_o3 and /dev/null differ diff --git a/roofline/src/roofline_fma_fast_o3 b/roofline/src/roofline_fma_fast_o3 deleted file mode 100755 index 1bd004a..0000000 Binary files a/roofline/src/roofline_fma_fast_o3 and /dev/null differ diff --git a/roofline/src/roofline_fma_o3 b/roofline/src/roofline_fma_o3 deleted file mode 100755 index bc3903e..0000000 Binary files a/roofline/src/roofline_fma_o3 and /dev/null differ diff --git a/roofline/src/roofline_o3 b/roofline/src/roofline_o3 deleted file mode 100755 index 43a7d01..0000000 Binary files a/roofline/src/roofline_o3 and /dev/null differ