size.log
This commit is contained in:
parent
0cecff4079
commit
3c79104312
14 changed files with 1652 additions and 42 deletions
11
plot/plot.py
11
plot/plot.py
|
@ -30,6 +30,8 @@ basepeak = 54.4
|
||||||
ymem = []
|
ymem = []
|
||||||
ypeak = []
|
ypeak = []
|
||||||
ybasepeak = []
|
ybasepeak = []
|
||||||
|
yeight = []
|
||||||
|
ysix = []
|
||||||
|
|
||||||
for i in np.arange(0,64,0.1):
|
for i in np.arange(0,64,0.1):
|
||||||
if bandwidth*i < peak:
|
if bandwidth*i < peak:
|
||||||
|
@ -39,6 +41,13 @@ for i in np.arange(0,64,0.1):
|
||||||
|
|
||||||
i=1/32
|
i=1/32
|
||||||
while i<=64:
|
while i<=64:
|
||||||
|
if i < 16 and i > 4:
|
||||||
|
yeight.append(21.78)
|
||||||
|
ysix.append(peak)
|
||||||
|
else:
|
||||||
|
yeight.append(None)
|
||||||
|
ysix.append(None)
|
||||||
|
|
||||||
if bandwidth*i < peak and bandwidth*i*2 < peak:
|
if bandwidth*i < peak and bandwidth*i*2 < peak:
|
||||||
ymem.append(bandwidth*i)
|
ymem.append(bandwidth*i)
|
||||||
ypeak.append(None)
|
ypeak.append(None)
|
||||||
|
@ -55,7 +64,7 @@ while i<=64:
|
||||||
#plot data
|
#plot data
|
||||||
#data = pd.Series(data=values, name='Peak Memory Bandwidth', index=np.arange(0,64,0.1))
|
#data = pd.Series(data=values, name='Peak Memory Bandwidth', index=np.arange(0,64,0.1))
|
||||||
|
|
||||||
data = {'Peak Memory Bandwidth': pd.Series(ymem, index=xlbl), 'Peak Floating-Point Performance (Turbo)': pd.Series(ypeak, index=xlbl)}
|
data = {'Peak Memory Bandwidth': pd.Series(ymem, index=xlbl), 'Peak Floating-Point Performance (Turbo)': pd.Series(ypeak, index=xlbl), 'Best 8 OI Kernel': pd.Series(yeight, index=xlbl), 'Best 1/16 OI Kernel': pd.Series(yeight, index=xlbl)}
|
||||||
df = pd.DataFrame(data)
|
df = pd.DataFrame(data)
|
||||||
|
|
||||||
ax = df.plot()
|
ax = df.plot()
|
||||||
|
|
210
reduce/size.log
Normal file
210
reduce/size.log
Normal file
|
@ -0,0 +1,210 @@
|
||||||
|
1,0.015060, 0.012820, 0.006505, 0.000924
|
||||||
|
1,0.013010, 0.008138, 0.006707, 0.000929
|
||||||
|
1,0.013959, 0.018517, 0.012665, 0.010040
|
||||||
|
1,0.017139, 0.008266, 0.006602, 0.000933
|
||||||
|
1,0.012186, 0.009795, 0.011971, 0.000928
|
||||||
|
1,0.011320, 0.011973, 0.006519, 0.000926
|
||||||
|
1,0.019044, 0.008140, 0.008787, 0.000913
|
||||||
|
1,0.008048, 0.026552, 0.006572, 0.000934
|
||||||
|
1,0.014868, 0.008198, 0.006745, 0.000927
|
||||||
|
1,0.018982, 0.012490, 0.006542, 0.000935
|
||||||
|
1,0.014860, 0.010456, 0.015956, 0.000929
|
||||||
|
1,0.015556, 0.008380, 0.006694, 0.000931
|
||||||
|
1,0.011853, 0.008191, 0.006520, 0.000935
|
||||||
|
1,0.017231, 0.016554, 0.006573, 0.000938
|
||||||
|
1,0.019388, 0.008211, 0.006490, 0.000940
|
||||||
|
1,0.017562, 0.008292, 0.006570, 0.000940
|
||||||
|
1,0.008377, 0.008385, 0.008122, 0.016472
|
||||||
|
1,0.016700, 0.008092, 0.006615, 0.000940
|
||||||
|
1,0.008701, 0.008410, 0.006554, 0.000936
|
||||||
|
1,0.021345, 0.008315, 0.006538, 0.000935
|
||||||
|
1,0.012839, 0.010863, 0.015722, 0.006039
|
||||||
|
1,0.010784, 0.012452, 0.006464, 0.000943
|
||||||
|
1,0.013310, 0.009412, 0.006499, 0.000939
|
||||||
|
1,0.015488, 0.008262, 0.006519, 0.000957
|
||||||
|
1,0.016392, 0.008346, 0.006548, 0.000946
|
||||||
|
1,0.022343, 0.008379, 0.006572, 0.000951
|
||||||
|
1,0.009021, 0.008462, 0.006554, 0.000950
|
||||||
|
1,0.019012, 0.008520, 0.006710, 0.000960
|
||||||
|
1,0.011137, 0.014027, 0.010778, 0.014246
|
||||||
|
1,0.016490, 0.008227, 0.006638, 0.010030
|
||||||
|
10,0.015200, 0.008364, 0.006514, 0.000942
|
||||||
|
10,0.018563, 0.019307, 0.006683, 0.000939
|
||||||
|
10,0.015394, 0.010670, 0.010332, 0.000954
|
||||||
|
10,0.007525, 0.008310, 0.007347, 0.000953
|
||||||
|
10,0.012634, 0.010634, 0.006643, 0.000956
|
||||||
|
10,0.014186, 0.008059, 0.006486, 0.000947
|
||||||
|
10,0.016306, 0.009247, 0.006542, 0.000958
|
||||||
|
10,0.009274, 0.008334, 0.006675, 0.000944
|
||||||
|
10,0.015010, 0.008208, 0.006568, 0.000956
|
||||||
|
10,0.007146, 0.008374, 0.009934, 0.000952
|
||||||
|
10,0.015609, 0.008445, 0.006520, 0.000954
|
||||||
|
10,0.015018, 0.008323, 0.006675, 0.000961
|
||||||
|
10,0.015876, 0.008303, 0.006915, 0.020554
|
||||||
|
10,0.009362, 0.008383, 0.006504, 0.000963
|
||||||
|
10,0.010540, 0.013352, 0.011971, 0.000955
|
||||||
|
10,0.016023, 0.008194, 0.011112, 0.010048
|
||||||
|
10,0.008929, 0.008427, 0.006597, 0.000962
|
||||||
|
10,0.011935, 0.008371, 0.006765, 0.000969
|
||||||
|
10,0.012526, 0.008457, 0.006569, 0.000962
|
||||||
|
10,0.014575, 0.008361, 0.006579, 0.000950
|
||||||
|
10,0.016678, 0.021072, 0.006916, 0.000959
|
||||||
|
10,0.019073, 0.012327, 0.009451, 0.000967
|
||||||
|
10,0.016419, 0.008326, 0.007186, 0.000963
|
||||||
|
10,0.009798, 0.008325, 0.006630, 0.000959
|
||||||
|
10,0.007363, 0.016486, 0.006578, 0.000957
|
||||||
|
10,0.017285, 0.009254, 0.021329, 0.000991
|
||||||
|
10,0.015300, 0.008662, 0.006476, 0.000973
|
||||||
|
10,0.008793, 0.008317, 0.006629, 0.000961
|
||||||
|
10,0.016962, 0.008466, 0.006804, 0.000973
|
||||||
|
10,0.011350, 0.008511, 0.006569, 0.000978
|
||||||
|
100,0.011944, 0.008318, 0.006828, 0.000974
|
||||||
|
100,0.013588, 0.009846, 0.006560, 0.000975
|
||||||
|
100,0.009091, 0.008647, 0.010100, 0.010636
|
||||||
|
100,0.016221, 0.008714, 0.011530, 0.010031
|
||||||
|
100,0.014302, 0.008184, 0.006780, 0.006496
|
||||||
|
100,0.016849, 0.008263, 0.006687, 0.000981
|
||||||
|
100,0.012719, 0.008396, 0.006918, 0.000976
|
||||||
|
100,0.014992, 0.008173, 0.006631, 0.004980
|
||||||
|
100,0.024122, 0.008202, 0.006504, 0.000982
|
||||||
|
100,0.014949, 0.008518, 0.006513, 0.000981
|
||||||
|
100,0.005903, 0.013908, 0.006588, 0.000970
|
||||||
|
100,0.018213, 0.008668, 0.006526, 0.000983
|
||||||
|
100,0.012493, 0.018007, 0.006539, 0.000981
|
||||||
|
100,0.014951, 0.008529, 0.012096, 0.000977
|
||||||
|
100,0.020768, 0.008437, 0.006678, 0.000983
|
||||||
|
100,0.014121, 0.008367, 0.006633, 0.000984
|
||||||
|
100,0.017092, 0.008173, 0.006618, 0.000984
|
||||||
|
100,0.014134, 0.008309, 0.006683, 0.007205
|
||||||
|
100,0.021945, 0.016707, 0.010962, 0.005524
|
||||||
|
100,0.017444, 0.008253, 0.006535, 0.000998
|
||||||
|
100,0.018334, 0.010120, 0.014758, 0.008821
|
||||||
|
100,0.013700, 0.008144, 0.006571, 0.000988
|
||||||
|
100,0.018027, 0.014722, 0.006547, 0.000987
|
||||||
|
100,0.016101, 0.008572, 0.006593, 0.000992
|
||||||
|
100,0.013204, 0.008207, 0.006552, 0.000990
|
||||||
|
100,0.015996, 0.008204, 0.006574, 0.000989
|
||||||
|
100,0.031931, 0.009827, 0.006538, 0.000990
|
||||||
|
100,0.016046, 0.008238, 0.006489, 0.000992
|
||||||
|
100,0.008523, 0.013016, 0.006636, 0.000994
|
||||||
|
100,0.016014, 0.008304, 0.006663, 0.001004
|
||||||
|
1000,0.013056, 0.022506, 0.006647, 0.001072
|
||||||
|
1000,0.008438, 0.008262, 0.006646, 0.001060
|
||||||
|
1000,0.010373, 0.008534, 0.006729, 0.001073
|
||||||
|
1000,0.025268, 0.008545, 0.006731, 0.001070
|
||||||
|
1000,0.016263, 0.008354, 0.006659, 0.001078
|
||||||
|
1000,0.014329, 0.008434, 0.006751, 0.001071
|
||||||
|
1000,0.017146, 0.010863, 0.006634, 0.001073
|
||||||
|
1000,0.018675, 0.008679, 0.012135, 0.021216
|
||||||
|
1000,0.014633, 0.008818, 0.006577, 0.001090
|
||||||
|
1000,0.023196, 0.011840, 0.006592, 0.001076
|
||||||
|
1000,0.015533, 0.008400, 0.006624, 0.001078
|
||||||
|
1000,0.008505, 0.008295, 0.006727, 0.001077
|
||||||
|
1000,0.014966, 0.008623, 0.006701, 0.001077
|
||||||
|
1000,0.019072, 0.009632, 0.006680, 0.009179
|
||||||
|
1000,0.018672, 0.008424, 0.006804, 0.003599
|
||||||
|
1000,0.017340, 0.009541, 0.007161, 0.001083
|
||||||
|
1000,0.014347, 0.008685, 0.006701, 0.001082
|
||||||
|
1000,0.011189, 0.008424, 0.006667, 0.001084
|
||||||
|
1000,0.015610, 0.008340, 0.006706, 0.001082
|
||||||
|
1000,0.010836, 0.008901, 0.006614, 0.001098
|
||||||
|
1000,0.012571, 0.011788, 0.007889, 0.007866
|
||||||
|
1000,0.019443, 0.009326, 0.006642, 0.001086
|
||||||
|
1000,0.021744, 0.008486, 0.006620, 0.001093
|
||||||
|
1000,0.022898, 0.018285, 0.031074, 0.009292
|
||||||
|
1000,0.012526, 0.008323, 0.006738, 0.001088
|
||||||
|
1000,0.010241, 0.008389, 0.006595, 0.001090
|
||||||
|
1000,0.015677, 0.011739, 0.008517, 0.001094
|
||||||
|
1000,0.013255, 0.008391, 0.006764, 0.001085
|
||||||
|
1000,0.014460, 0.008330, 0.007717, 0.001111
|
||||||
|
1000,0.013080, 0.013283, 0.006677, 0.001104
|
||||||
|
10000,0.018514, 0.009322, 0.007049, 0.001389
|
||||||
|
10000,0.013228, 0.009583, 0.007220, 0.001445
|
||||||
|
10000,0.011737, 0.008952, 0.007170, 0.001427
|
||||||
|
10000,0.014161, 0.013269, 0.007176, 0.001614
|
||||||
|
10000,0.009160, 0.009017, 0.012309, 0.001398
|
||||||
|
10000,0.025428, 0.008998, 0.006959, 0.001429
|
||||||
|
10000,0.012735, 0.011255, 0.007631, 0.024461
|
||||||
|
10000,0.008205, 0.012843, 0.007041, 0.001425
|
||||||
|
10000,0.024209, 0.008824, 0.007011, 0.001421
|
||||||
|
10000,0.012522, 0.010046, 0.007270, 0.001418
|
||||||
|
10000,0.017439, 0.012809, 0.007340, 0.001598
|
||||||
|
10000,0.018774, 0.009051, 0.007153, 0.001395
|
||||||
|
10000,0.014256, 0.008903, 0.007264, 0.001412
|
||||||
|
10000,0.017022, 0.009033, 0.007169, 0.001419
|
||||||
|
10000,0.021680, 0.020549, 0.014481, 0.008859
|
||||||
|
10000,0.019863, 0.011795, 0.007531, 0.001393
|
||||||
|
10000,0.022391, 0.006808, 0.007279, 0.001422
|
||||||
|
10000,0.015490, 0.009025, 0.007194, 0.001431
|
||||||
|
10000,0.014665, 0.010363, 0.007027, 0.001424
|
||||||
|
10000,0.015784, 0.009613, 0.007224, 0.001418
|
||||||
|
10000,0.023222, 0.010015, 0.023339, 0.030465
|
||||||
|
10000,0.018942, 0.025240, 0.011122, 0.001415
|
||||||
|
10000,0.011123, 0.009108, 0.007154, 0.001444
|
||||||
|
10000,0.019147, 0.008789, 0.006974, 0.001461
|
||||||
|
10000,0.013667, 0.012343, 0.015495, 0.001466
|
||||||
|
10000,0.015624, 0.009109, 0.009302, 0.001402
|
||||||
|
10000,0.019067, 0.013941, 0.016134, 0.001437
|
||||||
|
10000,0.011807, 0.012808, 0.007084, 0.001426
|
||||||
|
10000,0.014506, 0.011155, 0.007108, 0.001466
|
||||||
|
10000,0.012350, 0.009386, 0.007166, 0.001446
|
||||||
|
100000,0.022371, 0.035526, 0.023131, 0.027448
|
||||||
|
100000,0.013378, 0.023856, 0.013041, 0.024717
|
||||||
|
100000,0.010871, 0.029044, 0.015932, 0.017511
|
||||||
|
100000,0.012205, 0.032383, 0.017835, 0.015721
|
||||||
|
100000,0.012234, 0.024945, 0.017423, 0.025463
|
||||||
|
100000,0.014343, 0.035935, 0.016265, 0.022189
|
||||||
|
100000,0.012570, 0.023612, 0.016249, 0.023553
|
||||||
|
100000,0.014353, 0.027281, 0.019160, 0.019438
|
||||||
|
100000,0.014454, 0.024914, 0.024764, 0.030581
|
||||||
|
100000,0.013496, 0.029832, 0.014719, 0.023770
|
||||||
|
100000,0.018304, 0.024292, 0.014693, 0.021875
|
||||||
|
100000,0.028399, 0.023915, 0.027137, 0.025491
|
||||||
|
100000,0.010145, 0.022987, 0.015735, 0.021145
|
||||||
|
100000,0.012533, 0.032294, 0.028012, 0.019748
|
||||||
|
100000,0.013240, 0.027931, 0.014247, 0.026066
|
||||||
|
100000,0.013468, 0.025389, 0.014612, 0.017768
|
||||||
|
100000,0.014450, 0.030766, 0.021046, 0.023127
|
||||||
|
100000,0.013543, 0.030922, 0.017547, 0.022809
|
||||||
|
100000,0.013719, 0.023514, 0.016720, 0.028750
|
||||||
|
100000,0.011630, 0.027707, 0.017063, 0.024498
|
||||||
|
100000,0.013445, 0.028254, 0.016900, 0.016538
|
||||||
|
100000,0.013426, 0.026310, 0.014950, 0.025251
|
||||||
|
100000,0.016215, 0.020926, 0.023538, 0.022055
|
||||||
|
100000,0.012278, 0.023712, 0.015968, 0.014424
|
||||||
|
100000,0.012335, 0.022143, 0.014598, 0.014518
|
||||||
|
100000,0.013790, 0.053877, 0.016792, 0.021680
|
||||||
|
100000,0.015594, 0.020772, 0.017987, 0.020527
|
||||||
|
100000,0.012329, 0.029190, 0.015424, 0.021301
|
||||||
|
100000,0.012230, 0.024379, 0.017230, 0.017684
|
||||||
|
100000,0.013589, 0.040487, 0.027573, 0.022245
|
||||||
|
1000000,0.020727, 0.124509, 0.112477, 0.108229
|
||||||
|
1000000,0.032473, 0.126158, 0.118394, 0.098192
|
||||||
|
1000000,0.021973, 0.125135, 0.114933, 0.097082
|
||||||
|
1000000,0.018970, 0.122516, 0.115200, 0.095380
|
||||||
|
1000000,0.020313, 0.127562, 0.111321, 0.101456
|
||||||
|
1000000,0.021555, 0.127997, 0.114297, 0.101177
|
||||||
|
1000000,0.022448, 0.123110, 0.109968, 0.100089
|
||||||
|
1000000,0.022387, 0.123137, 0.115687, 0.143453
|
||||||
|
1000000,0.020858, 0.126027, 0.112245, 0.097318
|
||||||
|
1000000,0.021095, 0.129142, 0.114619, 0.120646
|
||||||
|
1000000,0.021187, 0.134334, 0.117879, 0.103980
|
||||||
|
1000000,0.022047, 0.124922, 0.110836, 0.120224
|
||||||
|
1000000,0.020579, 0.132466, 0.117571, 0.098158
|
||||||
|
1000000,0.022642, 0.124157, 0.113329, 0.111869
|
||||||
|
1000000,0.020913, 0.120785, 0.115758, 0.097079
|
||||||
|
1000000,0.021255, 0.129725, 0.114023, 0.115193
|
||||||
|
1000000,0.022199, 0.130058, 0.111212, 0.100436
|
||||||
|
1000000,0.022980, 0.126308, 0.113577, 0.102413
|
||||||
|
1000000,0.022109, 0.126565, 0.111826, 0.098784
|
||||||
|
1000000,0.021139, 0.123117, 0.113357, 0.139509
|
||||||
|
1000000,0.021675, 0.121999, 0.116524, 0.115985
|
||||||
|
1000000,0.022481, 0.125771, 0.114231, 0.098526
|
||||||
|
1000000,0.021303, 0.128222, 0.119330, 0.114650
|
||||||
|
1000000,0.020648, 0.122423, 0.113259, 0.102972
|
||||||
|
1000000,0.020828, 0.124934, 0.113054, 0.095407
|
||||||
|
1000000,0.021351, 0.131414, 0.114445, 0.103877
|
||||||
|
1000000,0.020946, 0.130915, 0.113755, 0.097917
|
||||||
|
1000000,0.020846, 0.121703, 0.117466, 0.097012
|
||||||
|
1000000,0.021515, 0.124847, 0.117515, 0.105351
|
||||||
|
1000000,0.020116, 0.119978, 0.112513, 0.099434
|
|
@ -114,6 +114,7 @@ for(size_t i=0; i<size; i++){
|
||||||
\bigskip
|
\bigskip
|
||||||
|
|
||||||
\subsubsection{Some Further 8/1 Kernel}
|
\subsubsection{Some Further 8/1 Kernel}
|
||||||
|
\label{sec:advanced-kernels}
|
||||||
Since some effort was put in getting results near peak performance \verb|-Ofast -ffast-math| was used to stretch compiler optimization to the maximum. Unfortunately \verb|-ffast-math| does not preserve strict IEEE compliance. It is therefore allowed to ignore non-associativity of floating point operations. For example $x = x*x*x*x*x*x*x*x$ can be optimized to $x~*=~x; x~*=~x; x~*=~x;$. Clearly this has an effect on the OI of the kernel. To test fastmath the kernel in~\prettyref{lst:8-1-fma-fastmath} was introduced. Mind that a[i] is written out only once and held in registers during a single iteration.
|
Since some effort was put in getting results near peak performance \verb|-Ofast -ffast-math| was used to stretch compiler optimization to the maximum. Unfortunately \verb|-ffast-math| does not preserve strict IEEE compliance. It is therefore allowed to ignore non-associativity of floating point operations. For example $x = x*x*x*x*x*x*x*x$ can be optimized to $x~*=~x; x~*=~x; x~*=~x;$. Clearly this has an effect on the OI of the kernel. To test fastmath the kernel in~\prettyref{lst:8-1-fma-fastmath} was introduced. Mind that a[i] is written out only once and held in registers during a single iteration.
|
||||||
|
|
||||||
\bigskip
|
\bigskip
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
The best results for various kernels are given in~\prettyref{tbl:res-kernels}. The optimization binary \verb|roofline_full_manpack| was used for these results. This is the binary with all optimizations and the intrinsics kernel enabled. The following parameters were used: \verb|roofline_full_manpack -s 150000000 -r 5|. One double array was therefore 1144.41 MB big -- clearly too big for the cache.
|
The best results for various kernels are given in~\prettyref{tbl:res-kernels}. The optimization binary \verb|roofline_full_manpack| was used for these results. This is the binary with all optimizations and the intrinsics kernel enabled. The following parameters were used: \verb|roofline_full_manpack -s 150000000 -r 5|. One double array was therefore 1144.41 MB big -- clearly too big for the cache.
|
||||||
|
|
||||||
|
Note how \verb|simple8| is clearly flawed with \verb|-ffast-math| enabled. This is due to the non IEEE compliant optimization as described in~\prettyref{sec:advanced-kernels}. At this level of optimization only \verb|simple8fastmath| (which is fastmath safe but flawed with lower optimization levels) should be considered as a \emph{replacement} of \verb|simple8|.
|
||||||
|
|
||||||
\begin{table}[h!]
|
\begin{table}[h!]
|
||||||
\centering
|
\centering
|
||||||
\begin{tabular}{ll}
|
\begin{tabular}{ll}
|
||||||
|
@ -18,6 +20,17 @@ The best results for various kernels are given in~\prettyref{tbl:res-kernels}. T
|
||||||
\label{tbl:res-kernels}
|
\label{tbl:res-kernels}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
|
The rooftop graph with the best runs of the 2 best kernels of each category (\verb|simple16| and \verb|fma8|) is depictured in~\prettyref{fig:roofline-withres}.
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\begin{adjustbox}{center}
|
||||||
|
\includegraphics[width=0.8\linewidth]{res/rooftop_res}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{Roofline graph with kernel results}
|
||||||
|
\label{fig:roofline-withres}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
%%% mode: latex
|
%%% mode: latex
|
||||||
%%% TeX-master: "../report"
|
%%% TeX-master: "../report"
|
||||||
|
|
|
@ -69,10 +69,17 @@
|
||||||
\newlabel{lst:8-1-fma}{{7}{7}{FMA aware $8$ OI kernel}{lstlisting.7}{}}
|
\newlabel{lst:8-1-fma}{{7}{7}{FMA aware $8$ OI kernel}{lstlisting.7}{}}
|
||||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}FMA aware $8$ OI kernel}{7}{lstlisting.7}}
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}FMA aware $8$ OI kernel}{7}{lstlisting.7}}
|
||||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}}
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}}
|
||||||
|
\newlabel{sec:advanced-kernels}{{3.3.1}{8}{Some Further 8/1 Kernel}{subsubsection.3.3.1}{}}
|
||||||
\newlabel{lst:8-1-fma-fastmath}{{8}{8}{FMA aware $8$ OI kernel with fastmath correctness}{lstlisting.8}{}}
|
\newlabel{lst:8-1-fma-fastmath}{{8}{8}{FMA aware $8$ OI kernel with fastmath correctness}{lstlisting.8}{}}
|
||||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}FMA aware $8$ OI kernel with fastmath correctness}{8}{lstlisting.8}}
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}FMA aware $8$ OI kernel with fastmath correctness}{8}{lstlisting.8}}
|
||||||
\newlabel{lst:8-1-intrinsics}{{9}{8}{FMA aware $8$ OI kernel with intrinsics}{lstlisting.9}{}}
|
\newlabel{lst:8-1-intrinsics}{{9}{8}{FMA aware $8$ OI kernel with intrinsics}{lstlisting.9}{}}
|
||||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}FMA aware $8$ OI kernel with intrinsics}{8}{lstlisting.9}}
|
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}FMA aware $8$ OI kernel with intrinsics}{8}{lstlisting.9}}
|
||||||
\newlabel{LastPage}{{}{9}{}{page.9}{}}
|
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{8}{section.4}}
|
||||||
\xdef\lastpage@lastpage{9}
|
\newlabel{sec:results}{{4}{8}{Results}{section.4}{}}
|
||||||
\xdef\lastpage@lastpageHy{9}
|
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Roofline graph with kernel results\relax }}{9}{figure.caption.5}}
|
||||||
|
\newlabel{fig:roofline-withres}{{2}{9}{Roofline graph with kernel results\relax }{figure.caption.5}{}}
|
||||||
|
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Results for various kernels\relax }}{9}{table.caption.4}}
|
||||||
|
\newlabel{tbl:res-kernels}{{2}{9}{Results for various kernels\relax }{table.caption.4}{}}
|
||||||
|
\newlabel{LastPage}{{}{10}{}{page.10}{}}
|
||||||
|
\xdef\lastpage@lastpage{10}
|
||||||
|
\xdef\lastpage@lastpageHy{10}
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
# Fdb version 3
|
# Fdb version 3
|
||||||
["biber report"] 1466704438 "report.bcf" "report.bbl" "report" 1466801023
|
["biber report"] 1466704438 "report.bcf" "report.bbl" "report" 1466802710
|
||||||
"report.bcf" 1466799693 92382 2683b542d57d2326e3b37a6a44222b52 ""
|
"report.bcf" 1466802710 92382 2683b542d57d2326e3b37a6a44222b52 ""
|
||||||
"roofline.bib" 1466704433 4157 226e47c750579a202f66b6f0e4df67bb ""
|
"roofline.bib" 1466704433 4157 226e47c750579a202f66b6f0e4df67bb ""
|
||||||
(generated)
|
(generated)
|
||||||
"report.bbl"
|
"report.bbl"
|
||||||
"report.blg"
|
"report.blg"
|
||||||
["pdflatex"] 1466799692 "report.tex" "report.pdf" "report" 1466801023
|
["pdflatex"] 1466802708 "report.tex" "report.pdf" "report" 1466802710
|
||||||
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-t1.enc" 1136849721 2971 def0b6c1f0b107b3b936def894055589 ""
|
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-t1.enc" 1136849721 2971 def0b6c1f0b107b3b936def894055589 ""
|
||||||
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc" 1136849721 2900 1537cc8184ad1792082cd229ecc269f4 ""
|
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc" 1136849721 2900 1537cc8184ad1792082cd229ecc269f4 ""
|
||||||
"/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1272929888 3287 e6b82fe08f5336d4d5ebc73fb1152e87 ""
|
"/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1272929888 3287 e6b82fe08f5336d4d5ebc73fb1152e87 ""
|
||||||
|
@ -196,22 +196,25 @@
|
||||||
"/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1455657841 31706 2be2b4306fae7fc20493e3b90c2ad04d ""
|
"/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1455657841 31706 2be2b4306fae7fc20493e3b90c2ad04d ""
|
||||||
"/usr/share/texlive/texmf-var/web2c/pdftex/pdflatex.fmt" 1457104667 3492982 6abaa3262ef9227a797168d32888676c ""
|
"/usr/share/texlive/texmf-var/web2c/pdftex/pdflatex.fmt" 1457104667 3492982 6abaa3262ef9227a797168d32888676c ""
|
||||||
"inputs/introduction.tex" 1466184626 76 eaf0f76fa74815989416f6f6d1c36f8b ""
|
"inputs/introduction.tex" 1466184626 76 eaf0f76fa74815989416f6f6d1c36f8b ""
|
||||||
"inputs/kernels.tex" 1466800173 12285 7459a5d3d19f8cfbe2ace9512c674169 ""
|
"inputs/kernels.tex" 1466801470 12314 221246c7ddebe8abc5c09e1cb5aad74e ""
|
||||||
|
"inputs/results.tex" 1466802707 1673 adc8885edd6ff1eb3675420597af7bc0 ""
|
||||||
"inputs/roofline.tex" 1466710567 5525 b96d99208485f5095cd10d50a150dff7 ""
|
"inputs/roofline.tex" 1466710567 5525 b96d99208485f5095cd10d50a150dff7 ""
|
||||||
"report.aux" 1466799693 6920 efd026f088aa74618447caae8f088925 ""
|
"report.aux" 1466802710 7761 28af04e5431bab6872f175a15a5dc18b ""
|
||||||
"report.bbl" 1466704439 7655 4b5f697a70789470cde9f922b6440ee7 "biber report"
|
"report.bbl" 1466704439 7655 4b5f697a70789470cde9f922b6440ee7 "biber report"
|
||||||
"report.out" 1466799693 649 906e25252ab8cb90aead774c66de15bf ""
|
"report.out" 1466802710 692 d200a3569f21ef27e8eb52d8f9418124 ""
|
||||||
"report.run.xml" 1466799693 2317 80d7743117fafc51b1e42b536d793f68 ""
|
"report.run.xml" 1466802710 2317 80d7743117fafc51b1e42b536d793f68 ""
|
||||||
"report.tex" 1466709836 4497 1f64f8ce17913e2b9dd71c7d6e896da8 ""
|
"report.tex" 1466801348 4578 16354a75d3cd97ebd3d125891b5c9668 ""
|
||||||
"report.toc" 1466799693 1343 b579331b0ae5f9f743ca0ceca6f78889 ""
|
"report.toc" 1466802710 1440 6115e52d8ddc79b94f2a327fe162c5c8 ""
|
||||||
"res/rooftop-eps-converted-to.pdf" 1466670002 22114 f6f2c1d53d8b6a5f4042e202648c7b36 ""
|
"res/rooftop-eps-converted-to.pdf" 1466802686 22110 decdc6a1508d6bf8c0204fef73ba9cf2 ""
|
||||||
"res/rooftop.eps" 1466669975 36013 2a6358f72820d80a6e87ee15e92d5669 ""
|
"res/rooftop.eps" 1466669975 36013 2a6358f72820d80a6e87ee15e92d5669 ""
|
||||||
|
"res/rooftop_res-eps-converted-to.pdf" 1466802710 17422 91f1483c067e8da1e6fdf5b17b69522d ""
|
||||||
|
"res/rooftop_res.eps" 1466802522 48087 c1aa608548cdfb805d27f4cbfcd7c8ad ""
|
||||||
(generated)
|
(generated)
|
||||||
"report-blx.bib"
|
|
||||||
"report.log"
|
|
||||||
"report.out"
|
|
||||||
"report.aux"
|
|
||||||
"report.bcf"
|
|
||||||
"report.run.xml"
|
"report.run.xml"
|
||||||
"report.toc"
|
"report.toc"
|
||||||
|
"report.log"
|
||||||
|
"report.out"
|
||||||
|
"report-blx.bib"
|
||||||
"report.pdf"
|
"report.pdf"
|
||||||
|
"report.bcf"
|
||||||
|
"report.aux"
|
||||||
|
|
|
@ -348,6 +348,16 @@ INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmex10.tfm
|
||||||
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam10.tfm
|
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam10.tfm
|
||||||
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm10.tfm
|
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm10.tfm
|
||||||
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/stmaryrd/stmary10.tfm
|
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/stmaryrd/stmary10.tfm
|
||||||
|
INPUT inputs/results.tex
|
||||||
|
INPUT inputs/results.tex
|
||||||
|
INPUT res/rooftop_res.eps
|
||||||
|
INPUT ./res/rooftop_res.eps
|
||||||
|
INPUT ./res/rooftop_res.eps
|
||||||
|
INPUT ./res/rooftop_res.eps
|
||||||
|
INPUT ./res/rooftop_res-eps-converted-to.pdf
|
||||||
|
INPUT ./res/rooftop_res-eps-converted-to.pdf
|
||||||
|
INPUT ./res/rooftop_res-eps-converted-to.pdf
|
||||||
|
INPUT ./res/rooftop_res-eps-converted-to.pdf
|
||||||
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/eccc1095.tfm
|
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/eccc1095.tfm
|
||||||
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/tcti1095.tfm
|
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/jknappen/ec/tcti1095.tfm
|
||||||
INPUT report.aux
|
INPUT report.aux
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
This is pdfTeX, Version 3.14159265-2.6-1.40.15 (TeX Live 2014) (preloaded format=pdflatex 2016.3.4) 24 JUN 2016 22:21
|
This is pdfTeX, Version 3.14159265-2.6-1.40.15 (TeX Live 2014) (preloaded format=pdflatex 2016.3.4) 24 JUN 2016 23:11
|
||||||
entering extended mode
|
entering extended mode
|
||||||
restricted \write18 enabled.
|
restricted \write18 enabled.
|
||||||
%&-line parsing enabled.
|
%&-line parsing enabled.
|
||||||
|
@ -1352,14 +1352,14 @@ Package epstopdf Info: Source file: <res/rooftop.eps>
|
||||||
(epstopdf) date: 2016-06-23 10:19:35
|
(epstopdf) date: 2016-06-23 10:19:35
|
||||||
(epstopdf) size: 36013 bytes
|
(epstopdf) size: 36013 bytes
|
||||||
(epstopdf) Output file: <res/rooftop-eps-converted-to.pdf>
|
(epstopdf) Output file: <res/rooftop-eps-converted-to.pdf>
|
||||||
(epstopdf) date: 2016-06-23 10:20:02
|
(epstopdf) date: 2016-06-24 23:11:26
|
||||||
(epstopdf) size: 22114 bytes
|
(epstopdf) size: 22110 bytes
|
||||||
(epstopdf) Command: <repstopdf --outfile=res/rooftop-eps-converted-
|
(epstopdf) Command: <repstopdf --outfile=res/rooftop-eps-converted-
|
||||||
to.pdf res/rooftop.eps>
|
to.pdf res/rooftop.eps>
|
||||||
(epstopdf) \includegraphics on input line 70.
|
(epstopdf) \includegraphics on input line 70.
|
||||||
Package epstopdf Info: Output file is already uptodate.
|
Package epstopdf Info: Output file is already uptodate.
|
||||||
|
|
||||||
<res/rooftop-eps-converted-to.pdf, id=103, 587.19376pt x 442.65375pt>
|
<res/rooftop-eps-converted-to.pdf, id=108, 587.19376pt x 442.65375pt>
|
||||||
File: res/rooftop-eps-converted-to.pdf Graphic file (type pdf)
|
File: res/rooftop-eps-converted-to.pdf Graphic file (type pdf)
|
||||||
|
|
||||||
<use res/rooftop-eps-converted-to.pdf>
|
<use res/rooftop-eps-converted-to.pdf>
|
||||||
|
@ -1379,32 +1379,60 @@ Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
|
||||||
Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
|
Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
|
||||||
(hyperref) removing `math shift' on input line 14.
|
(hyperref) removing `math shift' on input line 14.
|
||||||
|
|
||||||
[5] [6] [7])
|
[5] [6] [7]) (./inputs/results.tex
|
||||||
Overfull \hbox (19.7725pt too wide) in paragraph at lines 116--116
|
Overfull \hbox (40.25502pt too wide) in paragraph at lines 1--2
|
||||||
|
\T1/cmr/m/n/10.95 (-20) The best re-sults for var-i-ous ker-nels are given in [
|
||||||
|
][]Ta-ble 2[][][]. The op-ti-miza-tion bi-nary []\T1/cmtt/m/n/10.95 roofline_fu
|
||||||
|
ll_manpack
|
||||||
|
[]
|
||||||
|
|
||||||
|
[8]
|
||||||
|
Package epstopdf Info: Source file: <res/rooftop_res.eps>
|
||||||
|
(epstopdf) date: 2016-06-24 23:08:42
|
||||||
|
(epstopdf) size: 48087 bytes
|
||||||
|
(epstopdf) Output file: <res/rooftop_res-eps-converted-to.pdf>
|
||||||
|
(epstopdf) Command: <repstopdf --outfile=res/rooftop_res-eps-conver
|
||||||
|
ted-to.pdf res/rooftop_res.eps>
|
||||||
|
(epstopdf) \includegraphics on input line 27.
|
||||||
|
runsystem(repstopdf --outfile=res/rooftop_res-eps-converted-to.pdf res/rooftop_
|
||||||
|
res.eps)...executed safely (allowed).
|
||||||
|
|
||||||
|
Package epstopdf Info: Result file: <res/rooftop_res-eps-converted-to.pdf>
|
||||||
|
(epstopdf) date: 2016-06-24 23:11:50
|
||||||
|
(epstopdf) size: 17422 bytes.
|
||||||
|
<res/rooftop_res-eps-converted-to.pdf, id=289, 587.19376pt x 442.65375pt>
|
||||||
|
File: res/rooftop_res-eps-converted-to.pdf Graphic file (type pdf)
|
||||||
|
|
||||||
|
<use res/rooftop_res-eps-converted-to.pdf>
|
||||||
|
Package pdftex.def Info: res/rooftop_res-eps-converted-to.pdf used on input lin
|
||||||
|
e 27.
|
||||||
|
(pdftex.def) Requested size: 358.50612pt x 270.25478pt.
|
||||||
|
) [9 <./res/rooftop_res-eps-converted-to.pdf>]
|
||||||
|
Overfull \hbox (19.7725pt too wide) in paragraph at lines 122--122
|
||||||
\T1/cmtt/m/n/10.95 blob / e5aa9ca4a77623ff6f1c2d5daa7995565b944506 / stream . c
|
\T1/cmtt/m/n/10.95 blob / e5aa9ca4a77623ff6f1c2d5daa7995565b944506 / stream . c
|
||||||
# L286$[][] \T1/cmr/m/n/10.95 (-20) (vis-ited on 06/20/2016).
|
# L286$[][] \T1/cmr/m/n/10.95 (-20) (vis-ited on 06/20/2016).
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[8]
|
|
||||||
AED: lastpage setting LastPage
|
AED: lastpage setting LastPage
|
||||||
[9]
|
[10]
|
||||||
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 117.
|
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 123.
|
||||||
Package atveryend Info: Empty hook `AfterLastShipout' on input line 117.
|
Package atveryend Info: Empty hook `AfterLastShipout' on input line 123.
|
||||||
(./report.aux)
|
(./report.aux)
|
||||||
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 117.
|
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 123.
|
||||||
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 117.
|
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 123.
|
||||||
Package rerunfilecheck Info: File `report.out' has not changed.
|
Package rerunfilecheck Info: File `report.out' has not changed.
|
||||||
(rerunfilecheck) Checksum: 906E25252AB8CB90AEAD774C66DE15BF;649.
|
(rerunfilecheck) Checksum: D200A3569F21EF27E8EB52D8F9418124;692.
|
||||||
Package logreq Info: Writing requests to 'report.run.xml'.
|
Package logreq Info: Writing requests to 'report.run.xml'.
|
||||||
\openout1 = `report.run.xml'.
|
\openout1 = `report.run.xml'.
|
||||||
|
|
||||||
Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 117.
|
Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 123.
|
||||||
)
|
)
|
||||||
Here is how much of TeX's memory you used:
|
Here is how much of TeX's memory you used:
|
||||||
21477 strings out of 493339
|
21497 strings out of 493339
|
||||||
339286 string characters out of 6141383
|
339717 string characters out of 6141383
|
||||||
879545 words of memory out of 5000000
|
879761 words of memory out of 5000000
|
||||||
24321 multiletter control sequences out of 15000+600000
|
24333 multiletter control sequences out of 15000+600000
|
||||||
30053 words of font info for 136 fonts, out of 8000000 for 9000
|
30053 words of font info for 136 fonts, out of 8000000 for 9000
|
||||||
953 hyphenation exceptions out of 8191
|
953 hyphenation exceptions out of 8191
|
||||||
48i,8n,76p,1001b,1880s stack positions out of 5000i,500n,10000p,200000b,80000s
|
48i,8n,76p,1001b,1880s stack positions out of 5000i,500n,10000p,200000b,80000s
|
||||||
|
@ -1427,10 +1455,10 @@ t/fonts/type1/public/cm-super/sfrm1440.pfb></usr/share/texlive/texmf-dist/fonts
|
||||||
/type1/public/cm-super/sfti0900.pfb></usr/share/texlive/texmf-dist/fonts/type1/
|
/type1/public/cm-super/sfti0900.pfb></usr/share/texlive/texmf-dist/fonts/type1/
|
||||||
public/cm-super/sfti1095.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/
|
public/cm-super/sfti1095.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/
|
||||||
cm-super/sftt1095.pfb>
|
cm-super/sftt1095.pfb>
|
||||||
Output written on report.pdf (9 pages, 336309 bytes).
|
Output written on report.pdf (10 pages, 353185 bytes).
|
||||||
PDF statistics:
|
PDF statistics:
|
||||||
390 PDF objects out of 1000 (max. 8388607)
|
411 PDF objects out of 1000 (max. 8388607)
|
||||||
313 compressed objects within 4 object streams
|
330 compressed objects within 4 object streams
|
||||||
104 named destinations out of 1000 (max. 500000)
|
108 named destinations out of 1000 (max. 500000)
|
||||||
26198 words of extra memory for PDF output out of 29859 (max. 10000000)
|
26211 words of extra memory for PDF output out of 29859 (max. 10000000)
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -112,6 +112,12 @@
|
||||||
\label{sec:kernels}
|
\label{sec:kernels}
|
||||||
\input{inputs/kernels.tex}
|
\input{inputs/kernels.tex}
|
||||||
|
|
||||||
|
\FloatBarrier
|
||||||
|
|
||||||
|
\section{Results}
|
||||||
|
\label{sec:results}
|
||||||
|
\input{inputs/results.tex}
|
||||||
|
|
||||||
\printbibliography
|
\printbibliography
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
|
@ -21,3 +21,5 @@
|
||||||
\contentsline {subsection}{\numberline {3.3}The 8 OI Kernel}{6}{subsection.3.3}
|
\contentsline {subsection}{\numberline {3.3}The 8 OI Kernel}{6}{subsection.3.3}
|
||||||
\defcounter {refsection}{0}\relax
|
\defcounter {refsection}{0}\relax
|
||||||
\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}
|
\contentsline {subsubsection}{\numberline {3.3.1}Some Further 8/1 Kernel}{8}{subsubsection.3.3.1}
|
||||||
|
\defcounter {refsection}{0}\relax
|
||||||
|
\contentsline {section}{\numberline {4}Results}{8}{section.4}
|
||||||
|
|
Binary file not shown.
BIN
roofline/report/res/rooftop_res-eps-converted-to.pdf
Normal file
BIN
roofline/report/res/rooftop_res-eps-converted-to.pdf
Normal file
Binary file not shown.
1321
roofline/report/res/rooftop_res.eps
Normal file
1321
roofline/report/res/rooftop_res.eps
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue