diff --git a/roofline/src/Makefile b/roofline/src/Makefile index 742bf38..ac89864 100644 --- a/roofline/src/Makefile +++ b/roofline/src/Makefile @@ -31,7 +31,7 @@ aikern_o3avx.a: aikern.c aikern.h gcc -O3 -mavx -c -o aikern_o3avx.o aikern.c ar rcs aikern_o3avx.a aikern_o3avx.o -# This is the only option that actually uses fma without optimizing the hell out of the kernel +# This is the only version that actually uses FMA aikern_avxfma.a: aikern.c aikern.h gcc -O2 -mavx -mfma -c -o aikern_avxfma.o aikern.c ar rcs aikern_avxfma.a aikern_avxfma.o diff --git a/roofline/src/aikern.c b/roofline/src/aikern.c index 64e5c59..e66d7f0 100644 --- a/roofline/src/aikern.c +++ b/roofline/src/aikern.c @@ -98,15 +98,6 @@ void kernel_8_1_fuseaware(double* a, double* b, double* c, size_t size) } } -void kernel_1_8_vo(double* a, double* b, double* c, size_t size) -{ - double tmp=0.0; - for(size_t i=0; i unpredictable. + */ + + volatile double tmp=0.0; + for(size_t i=0; i