From 4d407ada15889c6eafa7f2aebc803fe44cfe1722 Mon Sep 17 00:00:00 2001 From: Armin Friedl Date: Mon, 20 Jun 2016 01:13:23 +0200 Subject: [PATCH] nix wichtiges --- roofline/src/Makefile | 2 +- roofline/src/aikern.c | 29 ++++++++++++++++++++--------- roofline/src/aikern.h | 5 ++++- roofline/src/roofline | Bin 18168 -> 18136 bytes roofline/src/roofline.c | 9 +++++---- roofline/src/roofline_avx | Bin 18168 -> 18136 bytes roofline/src/roofline_avxfma | Bin 14072 -> 14040 bytes roofline/src/roofline_o3 | Bin 18168 -> 18136 bytes roofline/src/roofline_o3avx | Bin 18168 -> 18136 bytes 9 files changed, 30 insertions(+), 15 deletions(-) diff --git a/roofline/src/Makefile b/roofline/src/Makefile index 742bf38..ac89864 100644 --- a/roofline/src/Makefile +++ b/roofline/src/Makefile @@ -31,7 +31,7 @@ aikern_o3avx.a: aikern.c aikern.h gcc -O3 -mavx -c -o aikern_o3avx.o aikern.c ar rcs aikern_o3avx.a aikern_o3avx.o -# This is the only option that actually uses fma without optimizing the hell out of the kernel +# This is the only version that actually uses FMA aikern_avxfma.a: aikern.c aikern.h gcc -O2 -mavx -mfma -c -o aikern_avxfma.o aikern.c ar rcs aikern_avxfma.a aikern_avxfma.o diff --git a/roofline/src/aikern.c b/roofline/src/aikern.c index 64e5c59..e66d7f0 100644 --- a/roofline/src/aikern.c +++ b/roofline/src/aikern.c @@ -98,15 +98,6 @@ void kernel_8_1_fuseaware(double* a, double* b, double* c, size_t size) } } -void kernel_1_8_vo(double* a, double* b, double* c, size_t size) -{ - double tmp=0.0; - for(size_t i=0; i unpredictable. + */ + + volatile double tmp=0.0; + for(size_t i=0; iaV?uMfmXDKX9Z75+LSTmU+LQgH* zRfOIhWs>>&D*dB2`$Y{+Is0m;XRCO6K^ z{eI5(obUOZ?~n7n_fB?lXFIur9x-Z{35wAYvZ3qVcWpfqnh5EUx`vPEL^F+EG*i}< zX*^(<$pQK)_jNjV?9$U4NJ-t99nt8nf}?vb4hxURUgb~SA?@+WL{Yy}N433-v9?Lp zlG8TX++u5++}`5Q9ig!Dox=+uV%)^TBDzt)3kPGIV0%u*A$B=qO zQAfvxFecGai+qpG&evlFz99I*x6Lp*3*iri{^IZ#@ebgIiYwuu5Y-<->O}<#@2pXH5*Uv|35Vkw41)lM z&4ohtKK&*L*((VNK;;cRu0UMdrLpRo%E7*BU{nVCFWikVDBED}gTF9#W3xWK_rsz( zvQAmh4WLlFqcy!NS0bcUT&Jj)wXKltu`ewz(3c&hw9ol*R7Bla%fHh8FouB=Iz_NAroGEH|Ewb%~q@h1u&`NH`DqS4UKQuu@>E64_?}Tbs zg#1*$(l+237dG^N5zm_&doKOuITpMUuhS0ScCH{cYV?pOM>m_T!8p+rTX7W^`D`8FDq?VtxEr`ByDsgwlH%iAMPCaI6Cp=M`bWI z64_T6JaQQ2({j$2v>e|{aMUF76JuuqqgNS=;{3-L>jog#86(gy8fVM{SO-`H7`ee% z1aJWG6yS=Vp~3)%0IhJ9uS_tO2e{%FHjgQw9D`$g67UT_s4{;*8Saf#M;Pqm0eRajPAXB-36C z=!7Zf0g{ytTp0<$(*jT4b$TRcjb#ATN%}*MFXa+ykZdir=Vn_{f$9bg{b(lS9Tp zQcZ6zSkKq^Vh?OL40dA~KkY||Wtpzy>_ zob*_+o2#Z@6|d%+s93UkW7N#A?>vk;ZI1j_kU@i8)>}f(03ri^5Kz3Z7L?2Ipj~pw z*ws%G@fxi!cGJ3Ik9Puk91%&kB%@wkP~TeiQAhCo5Ya}VlAwj@uN~ew=Ho8 z%5<+As`hqo4cJ|f8{aZ0=9EYS-InC$_R=AUeGSG)#6zM9vG19&YrRISe!ReOQjh%? zww2dlj1ywtN{Ia#>>}}!KGU9z$w5+J0%#vI?NDF3q|vTFx&sTr>mm-4c(x_F-A)?J z9o%U~DHz4o>g}_7`&XK?)Xg2$^*FZPf0!9hC1ljlYq6PSx9e1`PL)K@r9xo)w3e%1+bd*NDh3iGdW$tjj>D_92^(z4*c7ojr`!boZ zk1v?xr~kw9O}2K^e%>SN7QF2!M@{>-G+FOw!1_@BX7K0#?x6b20Q~Tvtc5BP`~ww#^z^Qx6{>SRk5RGXf635rx9F`}X@MpHXW zTxi+Bol1}O(Z&v$agt`5v~jjg)n-`M2ck8JIB7ymMI~x(su)}!tZB&Y|KEEz5A@8O z^Zn<4eCI#s{P*tNz5o*qFuP2~bIBaRri_GaZO?mMS2s!4k-ACk7dmsaR%NEODo!!n zt*FUvdW88`>cn4fsdKH=em+#xv*#DJk-mQt-`e0lP0v{Ct63c3{r&zg#T}9)`J$31 z#~0n~(fOhc9=*6>EN#+Rm^-vXx6u2chM;SxBby`kF7c&rB*}k^4@|v?ONDwA?HbVX zz;;$am;a;~j)^D4cR+Nv4D$Yy?CxXkmaFXUF6}Y4r3RT>2HECH5JH`hl6Z4Ol0vx_ zkOn~F10I{#U_Wg?A3CSUgez?Z!Cz-H3H~)UD-Tkx_y^z?+UjfuzOB+`3iaZ~U}_cG z)`0Z@u!#G#untfuOx6^RL|Ee@n^xvS7KIO)bH&BLC)PqlEt}gDT4M2{w;|!;I6@9X znlPypY}yF>v#_`clO4dcXX7%35G44&5c~r?9FYH#vWQp(YH|E`gbH8a5qvqwcMJJG z{~GuP#v-5hA;MN9>;|Knp+Y$S5rn-!6BwQ0qD$w2rK7=N&1_ag|e z0F7M|UjSadt>}2@HP}29?Swd}0QqVJ|E*9|so4Zjc0%=gLt7QaUASKsYR^9C^7pRf z;U0Zghs;ZsxlVP}0}p>HOk_C4=^*31v^#lcYV|}bA&ti6y#JcG0mxQ#C;Wug>gRZ! z<<6fvmphj&e}?zn)bec>zU5Q4+0S4iBjR7af~sA8S*!)LpXXQi@cus6@Lk@~+c1Xr zbupl0{|d_Y`9PA656si@jt8u7F+>Y(%e6wA+rS5w6>5aGADiG&d=DQ*K0FZT3KSXL zfy{h&z&ka?70AqU2W+|SK#@Uk^gG!-r(BMU?4D!J#E%8Xbr1x{9gd~liIGJ)U-7=^ zM2_9t%{52Z<|y#F9oIN^Pq*7~ho#3j?MH0$0B8k&KOeq6j}PA};T_#PTQ(4ubS;U{lF{y|5ozOnJ!ddBD;ausgI$q0K7GbJ}ERzyG zS;c8GLP(|@Zj-~OLU@?r9UnBlcjFz_Lim?&ZiO3a?&AG_qi^cpsQGwUl3Kd9RyS?8 zSxIA>_&Utnid&?{DIxG9;&V|f)3hZPE4;JjI}ieb&Dyz3sah?VZ=4Y~DPH-C*Ej@! zxO2bab9I;=WwXX#%eIn`70USNkAS#SW@n;wkS)}$fJL#?(uRY121cVPyho>|n8%kt z1{>ws{3IKkfhodPgpY^#?BKI0d&bZm6DsM`W)t%wy>Dt@;({-j;~9JHF1e)r5>~Yk znA~r$lI4708sal9|9zML-$LFsx!A)aapDIzf^5b%jTh2^HdLY@;S|(zfx*9b40pf1 z0Ow831x`gD^NV53KZaUP@zZj@-;eD`36l{w7k~AP&`n=R(q+Kn|4LF6&Y)*d@=9-m zqUV+*X+d##NRlc5#oLlp4`_-=QY&EbT}kQ#+zx2f5ONQY1N7gMq*Z`_gfnbHJv^=* zfHm-%=>q%&@G@W&-ZunZC(pwhoCm~~Dvxwl_5_V#kS@(GD3~q+JFG(Vh9~u=r`X3{ z)Hacl33Kyj*e0U003FJY93GUUL5B8cPcGlj==8--{dA{3w^VN})tgH7hElz@3@%Su z0k>Z9r6f&8-ZFg?E{-d|3B4Y6n+r%5=?w_Te*${z5WSH7R6;6*RM0$2Mq&|2#gN=i z>b6)CDq*&aZn9)d-VV}Ra74HWaq11f(kx6KqxntJSop=r1l22G&!LAb842%#G@kZC z%zr>~L&etrAZFdSF<*oI0KIL=csvsh?lyR4f#zHGI1_`ecntHvKJkYvKqLq+LK6Fps2#&l5Q%rY;2d|(^IOrf2Jj-Fix4yQyue85kkqkhQ9 zfkcRW_<&cSqaHs!u$f^y*zQ7?S4`z_D(@O`1bRHVEujpJhGyo_Rq^p@3}pZOBu-Yw zhE)1s<`ZVQtVjxpPFoc-$Y`g|S?T6Ea^5LqXq44WH-U9ESW_T7{Efq=CPy0RrnvNk zcCcD$FEfsLi}r%|Vb%LehT<*X9z=bJ-pqA*9HI8v>E?JKG7>ZCn!XC`xFe;RX!Y!L zW+DyGPPZ=rpA@1e9(8Tig)EZw$)s65nQf|}9HW?Z^4Nl-`5<^2h>3KltHc-%$v*GP z<7f2g;&f&h<%A@7Lgf8aXMkkXu%va=+PSDFP)DjhmiKJX<2k*S&dH0E!yKES>ha?M zTa7#tQb~Iq>E3#<8sHS7qw;H1qhcTX`D)a5Rli<6UyrKauj<>=I9Zbh`^NTXmH*%< z{(mU`1q44QH$89 z9jpt$iU%{IUY7j20LEn)%hbu^4%!ZKD}7_mh3V_7*KR1N+PJ>Hs&+%cbn^4+wRKe+ z8$G14q58QR;;CuytgG4lorgVmaPBWP)UiNFMtx0PmA$ICYRg8-C8W{)3#8!hN+!i= z3dhl-r3JcJA$hWBq;ytF><*i<r!^+8?9abga gZ1Bj!kS3WVDpRDHL0g`jqU)QZXkLk@M@zE*4)YOTi%vC39hf6RRtc+tA9U(I)I)JMZ5>jkJ5Wl{+yUCO@ zZ)U%r_kHjCKJWYEeRp@VlRMkV?emCHyG&4wmXHly_q}54kn3j6!4CxrHVV6JK(5q9T*58B!bko)Bn~ zqSpHJv<^t9JqHn+g}hVoLf{)0oR$kBA-#|aI<+CcbzIokuk~P>6PUUgvs7Ug^;}5( zHl$uw)X{Mvj7hYo!L9`o3HQxHSc`-oAz>~O-m)YT>U@7gzAwRd+XUrV_`<13cpeGQ z`~ZZn%})3=5_TmLjyZq6;(r56{4(VZJjw9h>B(%>? zSYZdkYsm>KuAYT&F0cfcb`(I_QxtwL-T~ZDaZ5QUMD?4H`jrBOch)F835>^~gv0R- zhCu+s<^mymw|*0Z>i@C zeZ8=btWy?t11Qw)Xie|Rl?Z7S*D2~HZ7XDZ_^e>KL+UI<;apDGJ?R`(xN1pLZ zq%}hug#Mo8?W}b{9Ogo_=cjNX8BIA7QTD9dh|n4GOUIjxW#xhUyZ>li#;7&Ms3iz$ z|M4BhnB*xsEN0$i0c)JVI^VgMz_NAroGEH|Ew=51q@mwm(3akaRk}Eye{h0^(!F<; z-3isM2>GdgrES17E^O$3FP=9y_Q&)CbCPd5Eicgy-*#?6Y|59+(SNwpspzW>Y~B}K zAN+MtWx;j9no#HbA6*jcM(0ArhbqeB-$Q``4gp%>CSRUlEDvzyO*W4ypd5o!d=&5_D4st7%5ZtQ0p9=| z{%o{$QD6aAJkEFR);u5x(R2zUO$8X;wOm*g+;=i-dg;epyAlcS8y$8oD2 zktEYz3+RL?=K+!m2d<2S;Aw#;?;1Unv&J$2>LmRp$Cq*uHAuFW+Hx5)nw1z{2riqVJ#?^ z;6c0OlCi6wB;r+CU*x8BMIP@2^f)4tZb?SRj993+Y!N5Y;4-&27dD9_8N^ypH{(ZT z0o}ID9VpekZm8PZy)|HWL2i7>pqNu44Rl+Qo7+W)Aoehfk%)&x6Jq~q#;)}mvHI}> z$4Nc*Gi)ob!WbvS{yib~d$5bdOZrTEGA0K}feD~}%(O#&>5@jfe&G%*0MdL^Dz0#c>rL-g)2m+z7_kHF zPS}^pgnfM096$XVmT$7PoA&b_S-0SAM>%5Jx24H?KLgf>@-Krw|7Qo)X9nPR2W2hv zQBKmK6>jbvPMRChTc)>ZG*IleWWz4#+vZ&X8g9dR^Jw%p2*m%2b``t1N3iwo)W^Wv zKSf6B8EK>a#l!T~5{WB}eOR)ScR$|PytQ%5rog7MIiWAhZu(TYQ=I-Uo&I0lOAnSW q5(~3rJ)khlht~}1$HM27D-*z!Qx@v)~uhKxndNzYtet`c{d{vU9Kuq%G z24Y)%`arD7XAn0{#4UOobq5Fa<^GQ~h`5G>ISg`ki!Xm8N&GEVm~oLT74B2C>p{x` z+ZhEt{0T7<7mthYf#_|Eviu2p_c3qVHF|fq?ik%xN0{59bZa#T;Vwu?yg4RG;XE5i zgCMbj&*3*ZPdU$p&l*U=)ea-aH#jUDzusYILCO<<58PZwgTu&nR68u;ezGx`+PRMP zU_AgV;yxX$15^r=-OQj7+O))>llcgX!bh0%#AU!I)Q_T5wQl;V)}Q43ZL)@zFfk0i}3yD zYv3E4Ncj8@5w;V;UNEW|Dujz3LD&y8;W0wEmk|C&CVYtR4B>lmJYUqTY}o((9lt^d zC%~196@;+t(S)6PAbfm0;Q^KK`+QEqXC{23{{y~}2@jv3-=P!tr2ye4VEn<3y^kQg z0yIuZd;xgbj^dv18?bpO+Q|t}0rJ&w{H<_IsaY*3yP*30;cbfIF5E8-wdag_`2J^D zxW~Y?Ve9gho|D~;z{8%5AQ?_FddPS$?oHX5T06y#P_t26aV}bRPDOUVm+Yq9J{WM<9`KBkrh@JX}FFbTi{XrfINz9WH7-a6q~$4 zR)JUW&oFz0tbDKF$ny%tM$R?hruUrmxGvIrj=7UQ0iV}3#L#d6Ac|rKn?KA74fl{!;)JSiocG~U)6Rki{o+eo&o+4v%9~(HXTfN3z?Ox-4#$D4>PurG|RE^mdC48ES zGi8L3EIHgEhtGiU2*tWSYX0EHd$bL)uiV@QH`LnA@_)r|8{Vq>bX1bsy0_J~>~Ppo zbDH=j%sWcjq~_^i@I&IWF;b>!%WZaeXDxCO5IBduYnM{BIxycjEpAb~3KXwN1o%kT ze#Ph7C_YMOrw`@WQP>V;eDp^^+$FP9F&w3f^lM;IQfg_Vp?o8y(G=aoGtAcX%Ew@% zEM1W7fHSa+&{dJ1Fq;!vZQeTp2PUt^PgyP0OZdK}iAo5)U`?c)^}FPf<|R_qoG@*` zXh*A9VJ5++J^XzS|1U0oNG|rsSc3S`jS!u=UE_yzpbeKQh!_QRj1c`>aJ2W`#bn;n zTIg2valbgm{o|-(6h9sFyZxjcDWx(K7LZ>(6LiZLl5`ocgEQ!hN`C2GQ1soB zBpoOY4@*)Npmz70A}B zzLcbCgtx-bLKY`0zYV<+cAE!?mKcl#2>)^D?Zfy&&QfhEgjC^tTV_%*NF|WmPVBYW zwACYztWVaOwjx~c_RE`WP$26u;=1KwoL8&Af@Aei1{~2 zUZ~i{AH;0;URU>|;fSJ=yvT0ykH*=s_t z+p#_=XkZ9CYQYa?R%(3Np%sM#iIfl5&Q439K?|5=MDcGT*;O49piYAJ(95%PC@WG~ z;`TxiG$;iz_)u{M)q+nIS5QH0np5E?A0MPeNv6=wK_{NwMutq0eE5K$Lnr!S69W=L z^5Fwsfll<~(}OfCY=^YF(B&0V88Vf3O>hi)BDyUR85$1H$;E3E6VoWj{<*1)tc(w- z_`#efta4c)Gm6bz8#l=4#O}En*7cv~Yx(=*n$d3HRk)|exn(&r{3~eV^ z?YN(sM7@Li!TYf4eI--zmTwQDJ|^DOb$J}a&UqQuL?ALDE9#!L7VKn4O0(eFc^T9c z9GRElTns*DWI#T3ZPf)Wk@YF4RXv&Qs-coZQ5)p3jf~cV;Aun_6jWD&6BQ}z2*eP3L>?1y?feX@1xb8t}@&Rpmxg{Pht3E43SMFMq-mv@^12(5?iW g2^}d9Yf?~>G9@&#aodyA_2=g+n%5HX(bAm%1L%7cX8-^I diff --git a/roofline/src/roofline_avxfma b/roofline/src/roofline_avxfma index cc9fee9291ae0cba809ee15fbd0bffd01b4e93da..73dad9afb1d3e12885f6a1ca92470180c869ffc9 100755 GIT binary patch delta 2895 zcmZuz4NM%@5q^7cHe3w5$KP^b%t9^>h6e6TiR`A17sZMf_h@W4X>ExFP=Q#ge=Y>v zB&~hKn6xXK*65|JT~*c9N|ZEd)Kr&5q^h_hF*)2s3U(XSMr!0lxZ1SA!88enf4co< zch7RpjC612ee>p<*?Di@ZEDZCJ;zGD(L9xr*dt>*M%KLG$u6@nmR&Y7JysQv?O8y! z7p3)48)irOo5H(A7r*se@fNF0(?8TdGY*-i zIhZp0ss>ZNeV)P8<9+$Yx7~ceQz?w{sOP@O2^V9b;jjIZm=O|}N>mn17`^{9P5n}Q zojvQqECjaqY^~`N#<*>^=%|3ovGvc#}oVUcGS}?Z`&5DPsV8rWD zlJV(v(~KVnr|~ga&Bbbj8RuXcv1Ld<+~=2qr-NtXOj6mr=-uSc(e&MZDWrG#OGB^} zO>gtp7|$S~@iZcO#G1)?0(|qL$@vxagp){VC-(NIRpfJ|&eMr+U8herL;dk5z`7iUo!4uN!-3yDRIh1&!bKu8* zh4mL3Nz9L|8#4Rr;;4lgKe<8=QXKXwOgpk+C#m6(Um0U|!8LII=dZ?yy=sfSs*#jF zK9*~*$-cs;y%i;Hpz&jX{(L8h$1L`$D?G1Yt>*xehQ5EnXj+W56bLTY_5~g)@|C=g zhR+%f)ke6%?BqoO9kJ#kgaJH{u9Hws(1yQFnP56O+rX+x*A(iIt=eLcGH&a3wl^}yl!=3k#6d2^#so^=mhU=n_Fmre5= z+5a`oC>}KYrD?v1d3eDz8RpTHX_i7yK?k7c(7pj^727xo9frOD9feLoOE^su-u;ei z=Iv$;j3@w=E(5$?JD^qQF!Za?qb`2Mx7wBXf>)Q-H%ll*0R_vK?$0mXBHs4HoB_76 z?4H_2e>G56gda{B+l3PEpmm$`2eKTz4SIloR95KjgLRxQmVIm4DOkG}`Ihps+!-=K zzm4xHuXL|KaF~yj7gqRT{S-SIfIlsi=9|Eye7wA{`$1SK+Viv0w?2KFQawyw80N)?DoAmHUKPbmdkWR3SH&!G%?Ws2Z@hP0OTTmgewIqO|AVTeTjGa zW!~X0^(|l~pf*;flH-QBxp#Gg;N{J$WnVQ~C9qrwZ-Z^!83zvHzg4t|d z`~|P7k%g0%9jDRvcPHayMn;3Y?(6U>{ESf8xgradxkQ%AI8Q#pZ#rk`C#eBWomHLg zq+gZ|4o>$!Nw+oPV}yIx$dOLR8*scq$NMC334KW^9S&lvgUt6#7{@PnMD&Yjs{}jnK%toBqwj#V2H!Kes^hiQARn)t<}cMLLLl~T-2s>UgYMqF-Mia^ z?d#h2?`!Ys-1}&E@4o)W`R4l8*va}rAvW5eWVudy`1IO(Px_F|6!C(_yM5_D%Hg#Rue^Cuc5Z~8_^f8QSECmI{P3Fj*}ftQD5Q|x?W+?8iIC$j?3TK@dqb)JON M`(I`A>4sJR2L_BNWdHyG delta 3530 zcmZu!dr(tX8b3D>4HR+{UP%xn*q|X=B!I2)#SPl!rrvaPt1CN2fhrVN#20jJr`ym5 zX2@V>w1-;V-Ol=lGp)1jE**A#O~)jn_}Fb%9NMz&YAvho-BB#X2eB@_`+fJ(TJ_A_ z^ZV}a`_Aut=bZ1}+zf4uZ`^4$pG)NkHl-(I`>w)wle=?dj&$ej`r=rAqfNPMv?)Ov zb}KaLrbn2+W$d=~rM%X#b&R96Yhc1S`5#Nv`DHxYp z@`LfMEy=<7wict~L!)Nq{bs7K|F5w^V=jL1|-NxVKGNs&&_ z6o=ZCl4NKZVnjz#t*?Um-Ni!v=-EQYH~M04-+yD5&U-q&%#bk9yCd z-hciFdc%`ZFYu7yQWWe5r<$Q6_|78)2S6sW8U^1*!8c{WhxBHm-b)Ya`BlBZ{VRSQ z1shPX5(O(BEhr~A{XxNr#~xm>8}+uL-uOMx8=Lg-4h9?sQ5O6RoIhFd2agc^8f4s( z_$$!myUO|^lRz7aHah_-Kt38FbR$xw)T|!JV^IBp$dD3s3(l9#g8?JH(7+lV&M|m- z)V6Ge?^N_$7L?VgSgj1FxMbW?+Ml{3qb|)(NUM1zAG#togV^eR{j;>eI4|H`<^6?s zrFX^3=lI}t1K*X$cYMjVhZsy`Lj2QrP_=6>i4B16^ZeS=d}zowev9`EY@5UfqYQBD zpMm^@52qya;ROcXbC(S+h0#LSN`uhlH}T;WB|4$&X)8R6ALFCQ$A%Mp;WD#7oa6F` z1GCb6;haK$*x~et%S?jjqLU;McIMmEyh=5#OzHmmN zZ{V81ob(0z81SS{2hY-Q&J?iqF0lFFDaOkdW|Xn!loEK1=K#|W>-%c!6`uJg?-^=w zVFH0eP}N}QRZNu;=3j6Yvq(IHeadM*c*5}fYHzi7wRerTrmum`TOtq3-L4ETQ1J;F zVMvZV+#wI21;b+u@A*gT$JY+Bd4zxc`gS;>_9!3v3w_^sp#Jl5N$QAhuWNhNVJEGb z;=6F)HMc`*of&~RBEA^MGR<7Z+ToqG(1Q>b9QI?QP-alI25?_HD{fJuTuM|rLMV3Z zt`c*3oE~L!v#uEJB+>?Dd~^l~ACuLYI2~b2l2^l|SZbN$y@e)5rz^QlXQkP)4kf}u zdDfNUfIYB|u~o6Y$a|LF=hNmenK%0yuoba+?Vv`;$$O^MJ@l>ga;A!Yn(n7BWaQ+( zHrY;A^5G{CpYw(8_(Gowg;(TSk4+?qo5p(oknyrEAde@bm6 z-_~B_Rd_8;oN42X%x=}~KTQW=;-kj@Tz!J4P9#=(@arRTPQWxXp& z#Xx8c<#qsHOx?Eh`u$*aLIftOHH`QO_$ctO3qLgE2Z28feCZFYlHhgVF9E;&J{}jJ z2m(%ez_wT~fVHgmTiYfkX`hjk!`kTG+*P{4tlpnby_opSV3xwlF11S_EV=YZ-bA!- zq@Tt^;bix|^7vH7M$}m}e-Riul1e!Gp~J$o(KC)pW*;>dR0i;41ltHK0^51GFfwZ5 z@RrD@0r(|c$d8&iuwY?wPsrDR$m6F7Hk-O@x4@QX%;1ogs{wHWu0-;!gf=u9Dafa_ ziHVsEl>N>ePA0W3gWfHeZIkPaq>*@Tt>%!?O}*1Cws~^iX=L;Pub*xK?^^JtL3a3? zhfO=uZFEb5MZXKYb~?bMGat|ah(4@F6E#{p&_Rr5uFCxcbi_;;z^S5-c$&Z0d?dsTi^-H~-_{2zfw{r&b@ zl~Lf35?@5{;|1FeAh3O;I7p8=0qmdL&H}F-^hHdZ8I$`&TIaIZtgr`Wot3<2uT`TJ za8NgR7l9WyCaR{Uy-il&Sf)wt-=Mp|?xY7?UlebwTi;w%vuWeznug}0V)8=W`lgyq ztu3T=TiuKGq@{ja%ZB=`KSsEF_s{%|j(UoOROIWMYTPw*YhKz!IejMGTO{?qHT&@d z-J2=&Xh~7BR!D}A60P!7ytkO diff --git a/roofline/src/roofline_o3 b/roofline/src/roofline_o3 index 8dd3a95ae6947b09a7228f5c9dbbda21d0cfb038..1457ee0ee238cf5f80780b00bdcb0f828bea0fbe 100755 GIT binary patch delta 2923 zcmZuz4Qx}_6~51j!7+)SogZu`1V3l)mIeqhaWY6$^0J`v5;&B-2u)3YnX~jG}E;2^Z zMTSvU?lWMrkG?6qQ@UYtX)5$$%ZKByH8fv3IdpZ!;ltlb{KRrT&IlP?gwhyLv)A@OEeHvUnh6ds~dvfVnd^EAI9~N$$jw zv}+F-OKRZME^%$VT#gX!JX9?)58=m#1J2;-;JGAm$|Nni)&y*dIuLM%)hz)}7@DN0 zJ%MWNB?Q#I509;4^{J#2NlJ@OYsyK;1VUlZ6AswY;-*RMH$2QC9(ssJ>Euz=3t{!Y zVfEjNdLu1Hco6L+;94-3aBvR79!~f>PFTeWA6Rn=;QJTm9$KI@2VbO+6TZa> z&wT{K-_A`q&j!NrT*7JbvpfDXSdu3>-!9Jg^xS;EC;(q8<{}TRFggd}04F@h30H8! zqPYn-6oT+pe$I~T=HOcdmLSt!g;4f}BFB5m*uH06Aw|L#4;VyPGUqAvy{ zGE$~ZVZ3)`jEyWwVl71b!F|4wvO&K@l)aB_;$kcukdAg4+p5O&)xT)thE<1QRnLVw zd32LulYgJixGEP~fhI2k`qLK(wym@K@6)2<74{tn8vgo4t>Ipxy;!hVp14cHrQU@; zJE~m~YYGF(=y6Y4+&FnGX{$(luQWE#iYrmHpO#g$2wk+hVqN5utSPb6%G8=b20^ga z{J#!Y8i|3&d}VaXrmV`~@}%?&m>GHR{bQfF>IrOps&zxF%38Zy*N4X*SzIb1n`6tk ze6n5rpk4iY_|YrM=rx-%`5;ddZ|E>KgBCy%V=p`*KNTVFoN+nX&y zMP4(1x#5P8_%+7PL#AdKiz5lrEyfOGETtJE7H}3kNhuywAn{;{k8r!Sj%c2V_dwQR$<>f#jf3xugz*gH zQPQ+S>X>&Ns^>O+St>8M%vEfB9eqpkEiMF?bepj>&}>@@B>>!Zhu)WJD%V5JV8{RA zVa>*uGl20lZS|LQ?uO>U-s`i|klf;@z-8dwIr!_q_W%!m3g??U1N;bm#oty?iX@$f zg(Y#z&lbE^b-+?E=#X{uE_!d-QiKkNtuRPk4Qn^W&G>ruVRgo|oEBwhgy`9aJcoxf z@GlE*UTlT8D|k3A70Z06pC*^rX&o#P(tMyqo%HiQzeuTw{&0kl%nM)#D=yQ2)VHZ6M-=D!oA|4XY z@%@$QE8!i-TlO$d=)V8pefxFTIN|&A9N#;@UBpXLCZ4Y|5K@2zaQvo;+f2NKj(^b| zT#ngd;vl(a+o0n`WYElDwq_OXCG_b1^Lqb7>R;&=4(WQ5_ujvm5l`hr)cCCR0cIt7 zRw(H!#HDgsM55%>kC)@0s;lfys)6&&MiN#?GxXQZl2Dg;r+J6P{muTNZT$nAgPT`we*W3bTl%&=(?9g=$acDR zb!Xzx>JlL_(IVwp(juMtQms8(B%yLz+}7yL-g~*+pqI__AoLa4Oq%T_OInIFvobllg1-9r5__u2F!*skooQM8e~(r%mjD0& delta 3540 zcmZu!4^R}>8Gn0#9v8U1J3;RBxI66G0_ueWjGX`2LmKwtUI(j0I>8`Sh>661fHh7! zhdD086?dw6RyOymYq58>KWRSaC*!ry~hHtX`%+E-u$QGGh z4J-?4|B}%cJ}pF5^OSG|On+#I4WFiWo$!aQ(!08~C+JWuQ4S5!EmdGd)k;K_VKL|lbugoj&hCvMTYz{Ql6VtB zM~}U5S*F}f;}*(WGD*`8J|qB4`d?m5y3q~@GVL33BBt?Z}WJ)cND$A z<15}q1dkBGG9p;={eo>zf#46v3;Mq&K{wGmLG(ty0lnCSCwDO5)CzB>g5XCm_J@@J0VwlafRcN_3zk%hKhS|`) z#u@O4%*J|1A{eE>UhwMx+{C=Jv4D*Hirmn~d4wo*vN^*nUw;?5MUJoh)_o5SHN6=; zAlFVjs z_Cm=4PFHZ2gfPV_O?OJuXTtO-#d^*+eQ^BkiRaP4 z4ynq+A}Mr^pYAd_PzB4+BDmBSzUK@7o68xL>OVS`AlQdGZ&_Z`1SCx~Tr4AEWYjYJ z(BC>ndk!rm_suOXugt4{Dyse}Y8ly2%lvjPX~&AGsR{GQ4;uXNw>!KKk zZ86^x1L7ed%)_Fn1wtDW#Y(`U5mDR#xCXElu=TDe_5=0JSC#4bSS$ z&(Tl)Lfedrr_9SOa84u699&S5NP9~ZhbY{YIeqzFN~bUK>hrw%>=M1BL~kz98%p%r zGI;6A5y;Z_!|J9J-7E&$_XceV=wR(kMU$PtiT(v(Y`2oHCRRP3e|Mh*b`EcJM@g? z+KlhdTCQ^%cu@)oLv?mA0?y>DNDWxHu3B>FoZ_Z2>^Zy7vYDE3|I(BCjj^bT!bf| zB&1oCow)&3<{}L}L%AiJxos@uv1a+kFYTk2zC;QZ4W~3lXAu?&4!jq2%(rTq1p(@5&vV7KA}b3e&@wdUn@uIlx#Zm6zX=gLDruX(w?dVNzPYHFx?sTMWXHZ-oS z-SnM@yK~>XUuv*tA(u+{+WKmDby4-^^_WRY$9orwoxdsmL4sz$h>w@JbZQ|vtvFWl zj7dGiJgbx$YSn<34OcEQ7}eK<`WI^#er?gy#&Y;BB(2;o0Xo=uY*9p$is0Lsv`n}P UaOl|?y8gKmfqs{Wj~Cni2OgdjxBvhE diff --git a/roofline/src/roofline_o3avx b/roofline/src/roofline_o3avx index 1b23d6ff0a5391211f52dc62f41ff1fb4cdb45b4..e3288a16823de59e9d741e5c23995d8be4adae09 100755 GIT binary patch delta 2867 zcmZ`*4Nz276uxg^bJ>8m%VNt`un$-!L`7GT6bE^TqYqsQO{bipfD&`cF(Rm}Y)Pbg zqfF4%!ltRs$?0cm!dYvmY22kmpqdGtevm208m(uE68(r0-M;(wtz#>l8SXjvyXSo8 z+<5*D-PPeezJS)6_pr`XOB=ih4pluWe_HwT!dI%$D(nMr+IX zLygJWEoRzmbqn1zU@i0SH4)-#oAo_sMo8#nhD^L+t>GWW)N>(=Tc2&f%m=nFB3e;< zwVsGst{&DtfT^^Ol&ihs?oTROPl>z3mQTdiJTOM+7L`Fcn#9GHF|8`0q?O8vB_jiv@i2m6m_%5=2JI#(tNNaMXmDWYEL7f_7oyE zin;xvcJPgfHnW^cNHDkK+B-Z=7Ph1{@GKQPi+bFr{^C=AQq=QfqMs+x zo(8UYV+ltcR})ro!hBAc#R(_Pv4lF`3)8^&1J?diU|&f|nhQxkq8g7CNam>qYe zTz!H!g2l_U{ZPuz0{^F>pU@fTxCH_lQ9a{Re^k(T3nPsu0X&2j_J_Jr0lY*28}mi? z4*g3I-OC8^qVta4r68{5)M(|}^1g5vZ~wj?Syz3s6}9(GF!skr-M{<2xiw^+GW$&^ zg(f98rFLXVgf!XLDe7r$3$opL#QY?EA-RZ#ZSU7zxX4&*_|e+tCp;2qny&R>zH?zK zYnmBCEkt|f65q)5`b>!^JB!zI)#md^hXy0t%Ej`ppJ^Qtuayz6`CO?zhmJ>l;xEx* zd-`=|prIP<^P`&xwym>gUZP1U^Q_yD)OXKGZNbH0d5T~%t+_ybsjll@O+vRTVqT(0 zX*rxWCf?EWY3SLE;Nz)gD{aq+7uxBbj1r-eew|TKyphZ0lT%s;l)h!2aU?>$@n1Hk zM-HcV!ZuB5>9Z({#(O8$hkGZ`&*M#JsN3z%e8nc=+}dVyeOtNuSGoGNZ^mh*<*Y^N znT*q(`!!hY-fHrcxCV`%x+rHp3=>HrHhQ4IiM|{tSRvF?`+^ng z14cdi5yR^X!*fuSVG)w6kAxh9<_!E7f{P1#<8m5*9G6mLKGm;`>vOcWz)5Qg(p(oX z6NsJUX3KoxvY4s8aE@T7rG-vc77kM&(}^X&=JGl>X3_@>o!%w-s*?~r99PO(;3>$> zpB*knNn`_kFwQA#r$-U{3VbAzMglRhZyB+H8zZsN>+RKJzvW~3Ir!KR`+7|5pTO|HO=@g!1joZuR;b5VGdP^FI#>*F4p zxzH)>)9nx+U0)d)`(rX{eAPUORf!%I9JPsPbh}9;{W1OI;`kMP7TrlMaDK8jm#o(k z{ZNVxoWJl~>hux+ZczIor@z)%CB|y>)7T2!hI1K@Imagr|L8wvc?Mgnf!7#!GvC1X z8F+ZQtRvB?4o3L%uDYnO5x{?mxU9yUOD{dT$SEA>MGGVP$n?=cy+uy58>gTjn`;p) zehh;SS&ut!T+OYtqsS@T%SYEX-RD}|e#A$2#Lz-}iiYXiizQ)h@bksnOwRl38n)DJ zuJ%?htbX``>J7D9?yYNhpy?sHVo61C-x7xq>@JbwObre?yfoh$ZIXlOG^MQ26@3T7 z=@EJvH>(SMO?J{p%WU?`FM{G5dfMO1Vp zP!Vi@QMEg8L<&dE!_tQ^z3n4n;0V9%ptt=zzpc-3kZ-SN%PMD|Rrx3dT47AMlM$x_<|Kc7v2=0pTeO{?CeE zdjR2=pmE6355X&TmK+M5LQ$b;r^lcIl%w$ku7+|^UjX$051{&eprs&>U$sR7z?Qe1IV3=F%+Z-~yJ zO_M}_9|w&89l(#p;1r`6eAFO1Z}a|znCjF>vj;v3zTUY zqB=cC)3X$fDe1~|yE1(yriVGv`Pb$Xmp|mQi1@~p7MxI9pBVT9eb@9({ioxy+}_tx z*ZQj6N}7|Tx6$t`ZI_#8gy2V{7b2`olNM!Jac4c|WGd*fTMzW9RcpZT<>S%@)hl21 zn#xoleBhYsbAFue<1?mR$h4BsB9!sI?}GS%!p=qL2w!AehM=s}lE!=U%$!bFbd%0Z zNS*dpJQgbQ`BUuJgOoA8I(#V9V(oo4L2sms(i6EZTAyCdRny<3dudTdy6t_Fl`Ij1 zvzUIy9k}HV{LPbhL8Ex(nKX zTO$H(gjlhC;e zZF(}hBCZE!gi-d|9k%EvxAn2cZ#n4HAJJp0T7 z>k{P*pikpr_^3sIg*{hVLcWA%IQ!sWl8WUrxq(NCm?*HWQUgri#S=^ZEi)TJL$hsk zd2DPFhwS&{2nrZ&66x*PkEANqg{w0%Yk71)(GKdGlbkwF$vc6JP8jvl4H#X4(FA12 ze%Y94$&pQTLrk)MD@Lt!h?~m2M~C3ONAn)cRK268`U&&qE-LL^>X@6H8Ve!|NhN)S z%NZ?Jq)G~{o14sK)9~D6M;Uw)h>1)*TUIkHr!CsfJguyNkfm%Ew_0i2S)0mA$4pX4 zw-!YULOxJ@K33X)(5Fh1xp8Ler=YHt_0*-ovgmlSXF~ntGW?#pisn5RILoOpO;CbF z-w;m}*Ldo+6l&;@GugKuqh{R|)v~FH<$~I;+Nl=)sX@|9#&E*=E_D#AKb_acD-e-kt>Rw!1P`hr;`r3xI1%>4Kx)&R3 z*EKhh=1p}k)RU(AO--xoH-78k=)9Hme#(sjtcrDy0c8~{b~8b zF}mlI=)Q^qW3-TZvglaFoP_Azuscg#WQPu3nY6mnY>8eE(LYez=*yLlSO&AzH4J8} j6Rz~`tqken3BI(MWZ)^L?T^ne4$o5n@{3ryuRQa=3y=dQ