# include # include # include # include # include # include # include # include "aikern.h" /* === Macros === */ #ifdef ENDEBUG #define DEBUG(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } while(0) #else #define DEBUG(...) #endif /** * @brief terminate program on program error * @param msg additional message to print * @param ret exit value */ static void bail_out(char* fmt, ...); /** * @brief microseconds since epoch */ static double pin_time(void); kern_result kernel_dispatch(kernel_t kernel, double* a, double* b, double* c, size_t size, size_t runs) { kern_result result = {0}; result.runs = runs; result.starts = malloc(sizeof(double)*(runs)); result.ends = malloc(sizeof(double)*(runs)); result.size = size; if(result.starts==NULL || result.ends==NULL) { bail_out("One of the mallocs failed\n. starts = %p, ends=%p", result.starts, result.ends); } switch(kernel) { case SIMPLE_1_16: result.flops = 1; result.kern_name = "Simple 1/16"; for(size_t r=0; r inline void kernel_8_1_fuseaware_manpack(double* a, size_t size) { #pragma omp parallel for for(size_t i=0; i<(size-4); i+=4) { // pack doubles __m256d packvec = _mm256_set_pd(a[i], a[i+1], a[i+2], a[i+3]); REP60(packvec = _mm256_fmadd_pd(packvec, packvec, packvec);); REP4(packvec = _mm256_fmadd_pd(packvec, packvec, packvec);); a[i] = packvec[0]; a[i+1] = packvec[1]; a[i+2] = packvec[2]; a[i+3] = packvec[3]; } } #endif /* INTRINS */ /******************************************** * Kernels which potentially compile to * * different operational intensities than * * specified * ********************************************/ void kernel_1_16_simple_dangerous(double* a, double* b, size_t size) { register volatile double tmp = 0.1; #pragma omp parallel for for(size_t i=0; i 0) (void)fprintf(stderr, "%s: %s \n", prog_name, msgbuf); } if(errno != 0) (void)fprintf(stderr, "%s: %s\n", prog_name, strerror(errno)); exit(EXIT_FAILURE); }