Commit 691ed21c authored by kuhnm's avatar kuhnm

blatt 2 daxpy

parent 34064cae
# Blatt 2
## Aufgabe 1
``` build/ARM/gem5.opt --debug-flags=O3PipeView --debug-start=1 --debug-file=trace.out configs/example/se.py --cpu-type=detailed --caches -c ~/advanced_computer_architecture/exercises/blatt02/exec/daxpy ```
all:
arm-linux-gnueabihf-gcc -static daxpy.c -o daxpy
clean:
rm daxpy
FILE1 = basicmath_small.c rad2deg.c cubic.c isqrt.c
FILE2 = basicmath_large.c rad2deg.c cubic.c isqrt.c
all: basicmath_small basicmath_large
basicmath_small: ${FILE1} Makefile
arm-linux-gnueabihf-gcc -static -O3 ${FILE1} -o basicmath_small -lm
basicmath_large: ${FILE2} Makefile
arm-linux-gnueabihf-gcc -static -O3 ${FILE2} -o basicmath_large -lm
clean:
rm -rf basicmath_small basicmath_large output*
#include<stdio.h>
#define N 8 // size of vector
#define R 10 // number of repetitions
// daxpy: Y = aX + Y (n size vector, a constant, X,Y vectors)
void daxpy(double* x, double* y, double a, int n)
{
int i;
for (i = 0; i < n; i++) {
y[i] = a * x[i] + y[i];
}
}
// pretty print vector
void print_vector(double* vec, int n)
{
int i;
for (i=0; i<n-1; i++) {
printf("%f,\t",vec[i]);
}
printf("%f",vec[n-1]);
printf("\n");
}
void main()
{
static double x[N], y[N];
double a = 5.5;
int i;
for (i = 0; i<N; i++) {
x[i] = 7.833667;
y[i] = 0;
}
print_vector(y, N);
for (i = 0; i<R; i++) {
daxpy(x,y,a,N);
}
print_vector(y,N);
}
[root]
type=Root
children=system
eventq_index=0
full_system=false
sim_quantum=0
time_sync_enable=false
time_sync_period=100000000000
time_sync_spin_threshold=100000000
[system]
type=System
children=clk_domain cpu cpu_clk_domain cpu_voltage_domain dvfs_handler mem_ctrls membus voltage_domain
boot_osflags=a
cache_line_size=64
clk_domain=system.clk_domain
eventq_index=0
exit_on_work_items=false
init_param=0
kernel=
kernel_addr_check=true
load_addr_mask=1099511627775
load_offset=0
mem_mode=timing
mem_ranges=0:536870911
memories=system.mem_ctrls
mmap_using_noreserve=false
multi_thread=false
num_work_ids=16
readfile=
symbolfile=
work_begin_ckpt_count=0
work_begin_cpu_id_exit=-1
work_begin_exit_count=0
work_cpus_ckpt_count=0
work_end_ckpt_count=0
work_end_exit_count=0
work_item_id=-1
system_port=system.membus.slave[0]
[system.clk_domain]
type=SrcClockDomain
clock=1000
domain_id=-1
eventq_index=0
init_perf_level=0
voltage_domain=system.voltage_domain
[system.cpu]
type=DerivO3CPU
children=branchPred dcache dstage2_mmu dtb fuPool icache interrupts isa istage2_mmu itb tracer workload
LFSTSize=1024
LQEntries=32
LSQCheckLoads=true
LSQDepCheckShift=4
SQEntries=32
SSITSize=1024
activity=0
backComSize=5
branchPred=system.cpu.branchPred
cachePorts=200
checker=Null
clk_domain=system.cpu_clk_domain
commitToDecodeDelay=1
commitToFetchDelay=1
commitToIEWDelay=1
commitToRenameDelay=1
commitWidth=8
cpu_id=0
decodeToFetchDelay=1
decodeToRenameDelay=1
decodeWidth=8
dispatchWidth=8
do_checkpoint_insts=true
do_quiesce=true
do_statistics_insts=true
dstage2_mmu=system.cpu.dstage2_mmu
dtb=system.cpu.dtb
eventq_index=0
fetchBufferSize=64
fetchQueueSize=32
fetchToDecodeDelay=1
fetchTrapLatency=1
fetchWidth=8
forwardComSize=5
fuPool=system.cpu.fuPool
function_trace=false
function_trace_start=0
iewToCommitDelay=1
iewToDecodeDelay=1
iewToFetchDelay=1
iewToRenameDelay=1
interrupts=system.cpu.interrupts
isa=system.cpu.isa
issueToExecuteDelay=1
issueWidth=8
istage2_mmu=system.cpu.istage2_mmu
itb=system.cpu.itb
max_insts_all_threads=0
max_insts_any_thread=0
max_loads_all_threads=0
max_loads_any_thread=0
needsTSO=false
numIQEntries=64
numPhysCCRegs=1280
numPhysFloatRegs=256
numPhysIntRegs=256
numROBEntries=192
numRobs=1
numThreads=1
profile=0
progress_interval=0
renameToDecodeDelay=1
renameToFetchDelay=1
renameToIEWDelay=2
renameToROBDelay=1
renameWidth=8
simpoint_start_insts=
smtCommitPolicy=RoundRobin
smtFetchPolicy=SingleThread
smtIQPolicy=Partitioned
smtIQThreshold=100
smtLSQPolicy=Partitioned
smtLSQThreshold=100
smtNumFetchingThreads=1
smtROBPolicy=Partitioned
smtROBThreshold=100
socket_id=0
squashWidth=8
store_set_clear_period=250000
switched_out=false
system=system
tracer=system.cpu.tracer
trapLatency=13
wbWidth=8
workload=system.cpu.workload
dcache_port=system.cpu.dcache.cpu_side
icache_port=system.cpu.icache.cpu_side
[system.cpu.branchPred]
type=TournamentBP
BTBEntries=4096
BTBTagSize=16
RASSize=16
choiceCtrBits=2
choicePredictorSize=8192
eventq_index=0
globalCtrBits=2
globalPredictorSize=8192
instShiftAmt=2
localCtrBits=2
localHistoryTableSize=2048
localPredictorSize=2048
numThreads=1
[system.cpu.dcache]
type=Cache
children=tags
addr_ranges=0:18446744073709551615
assoc=2
clk_domain=system.cpu_clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
hit_latency=2
is_read_only=false
max_miss_count=0
mshrs=4
prefetch_on_access=false
prefetcher=Null
response_latency=2
sequential_access=false
size=65536
system=system
tags=system.cpu.dcache.tags
tgts_per_mshr=20
write_buffers=8
writeback_clean=false
cpu_side=system.cpu.dcache_port
mem_side=system.membus.slave[2]
[system.cpu.dcache.tags]
type=LRU
assoc=2
block_size=64
clk_domain=system.cpu_clk_domain
eventq_index=0
hit_latency=2
sequential_access=false
size=65536
[system.cpu.dstage2_mmu]
type=ArmStage2MMU
children=stage2_tlb
eventq_index=0
stage2_tlb=system.cpu.dstage2_mmu.stage2_tlb
sys=system
tlb=system.cpu.dtb
[system.cpu.dstage2_mmu.stage2_tlb]
type=ArmTLB
children=walker
eventq_index=0
is_stage2=true
size=32
walker=system.cpu.dstage2_mmu.stage2_tlb.walker
[system.cpu.dstage2_mmu.stage2_tlb.walker]
type=ArmTableWalker
clk_domain=system.cpu_clk_domain
eventq_index=0
is_stage2=true
num_squash_per_cycle=2
sys=system
[system.cpu.dtb]
type=ArmTLB
children=walker
eventq_index=0
is_stage2=false
size=64
walker=system.cpu.dtb.walker
[system.cpu.dtb.walker]
type=ArmTableWalker
clk_domain=system.cpu_clk_domain
eventq_index=0
is_stage2=false
num_squash_per_cycle=2
sys=system
port=system.membus.slave[4]
[system.cpu.fuPool]
type=FUPool
children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7 FUList8
FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 system.cpu.fuPool.FUList8
eventq_index=0
[system.cpu.fuPool.FUList0]
type=FUDesc
children=opList
count=6
eventq_index=0
opList=system.cpu.fuPool.FUList0.opList
[system.cpu.fuPool.FUList0.opList]
type=OpDesc
eventq_index=0
opClass=IntAlu
opLat=1
pipelined=true
[system.cpu.fuPool.FUList1]
type=FUDesc
children=opList0 opList1
count=2
eventq_index=0
opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
[system.cpu.fuPool.FUList1.opList0]
type=OpDesc
eventq_index=0
opClass=IntMult
opLat=3
pipelined=true
[system.cpu.fuPool.FUList1.opList1]
type=OpDesc
eventq_index=0
opClass=IntDiv
opLat=20
pipelined=false
[system.cpu.fuPool.FUList2]
type=FUDesc
children=opList0 opList1 opList2
count=4
eventq_index=0
opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
[system.cpu.fuPool.FUList2.opList0]
type=OpDesc
eventq_index=0
opClass=FloatAdd
opLat=2
pipelined=true
[system.cpu.fuPool.FUList2.opList1]
type=OpDesc
eventq_index=0
opClass=FloatCmp
opLat=2
pipelined=true
[system.cpu.fuPool.FUList2.opList2]
type=OpDesc
eventq_index=0
opClass=FloatCvt
opLat=2
pipelined=true
[system.cpu.fuPool.FUList3]
type=FUDesc
children=opList0 opList1 opList2
count=2
eventq_index=0
opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
[system.cpu.fuPool.FUList3.opList0]
type=OpDesc
eventq_index=0
opClass=FloatMult
opLat=4
pipelined=true
[system.cpu.fuPool.FUList3.opList1]
type=OpDesc
eventq_index=0
opClass=FloatDiv
opLat=12
pipelined=false
[system.cpu.fuPool.FUList3.opList2]
type=OpDesc
eventq_index=0
opClass=FloatSqrt
opLat=24
pipelined=false
[system.cpu.fuPool.FUList4]
type=FUDesc
children=opList
count=0
eventq_index=0
opList=system.cpu.fuPool.FUList4.opList
[system.cpu.fuPool.FUList4.opList]
type=OpDesc
eventq_index=0
opClass=MemRead
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5]
type=FUDesc
children=opList00 opList01 opList02 opList03 opList04 opList05 opList06 opList07 opList08 opList09 opList10 opList11 opList12 opList13 opList14 opList15 opList16 opList17 opList18 opList19
count=4
eventq_index=0
opList=system.cpu.fuPool.FUList5.opList00 system.cpu.fuPool.FUList5.opList01 system.cpu.fuPool.FUList5.opList02 system.cpu.fuPool.FUList5.opList03 system.cpu.fuPool.FUList5.opList04 system.cpu.fuPool.FUList5.opList05 system.cpu.fuPool.FUList5.opList06 system.cpu.fuPool.FUList5.opList07 system.cpu.fuPool.FUList5.opList08 system.cpu.fuPool.FUList5.opList09 system.cpu.fuPool.FUList5.opList10 system.cpu.fuPool.FUList5.opList11 system.cpu.fuPool.FUList5.opList12 system.cpu.fuPool.FUList5.opList13 system.cpu.fuPool.FUList5.opList14 system.cpu.fuPool.FUList5.opList15 system.cpu.fuPool.FUList5.opList16 system.cpu.fuPool.FUList5.opList17 system.cpu.fuPool.FUList5.opList18 system.cpu.fuPool.FUList5.opList19
[system.cpu.fuPool.FUList5.opList00]
type=OpDesc
eventq_index=0
opClass=SimdAdd
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList01]
type=OpDesc
eventq_index=0
opClass=SimdAddAcc
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList02]
type=OpDesc
eventq_index=0
opClass=SimdAlu
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList03]
type=OpDesc
eventq_index=0
opClass=SimdCmp
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList04]
type=OpDesc
eventq_index=0
opClass=SimdCvt
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList05]
type=OpDesc
eventq_index=0
opClass=SimdMisc
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList06]
type=OpDesc
eventq_index=0
opClass=SimdMult
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList07]
type=OpDesc
eventq_index=0
opClass=SimdMultAcc
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList08]
type=OpDesc
eventq_index=0
opClass=SimdShift
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList09]
type=OpDesc
eventq_index=0
opClass=SimdShiftAcc
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList10]
type=OpDesc
eventq_index=0
opClass=SimdSqrt
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList11]
type=OpDesc
eventq_index=0
opClass=SimdFloatAdd
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList12]
type=OpDesc
eventq_index=0
opClass=SimdFloatAlu
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList13]
type=OpDesc
eventq_index=0
opClass=SimdFloatCmp
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList14]
type=OpDesc
eventq_index=0
opClass=SimdFloatCvt
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList15]
type=OpDesc
eventq_index=0
opClass=SimdFloatDiv
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList16]
type=OpDesc
eventq_index=0
opClass=SimdFloatMisc
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList17]
type=OpDesc
eventq_index=0
opClass=SimdFloatMult
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList18]
type=OpDesc
eventq_index=0
opClass=SimdFloatMultAcc
opLat=1
pipelined=true
[system.cpu.fuPool.FUList5.opList19]
type=OpDesc
eventq_index=0
opClass=SimdFloatSqrt
opLat=1
pipelined=true
[system.cpu.fuPool.FUList6]
type=FUDesc
children=opList
count=0
eventq_index=0
opList=system.cpu.fuPool.FUList6.opList
[system.cpu.fuPool.FUList6.opList]
type=OpDesc
eventq_index=0
opClass=MemWrite
opLat=1
pipelined=true
[system.cpu.fuPool.FUList7]
type=FUDesc
children=opList0 opList1
count=4
eventq_index=0
opList=system.cpu.fuPool.FUList7.opList0 system.cpu.fuPool.FUList7.opList1
[system.cpu.fuPool.FUList7.opList0]
type=OpDesc
eventq_index=0
opClass=MemRead
opLat=1
pipelined=true
[system.cpu.fuPool.FUList7.opList1]
type=OpDesc
eventq_index=0
opClass=MemWrite
opLat=1
pipelined=true
[system.cpu.fuPool.FUList8]
type=FUDesc
children=opList
count=1
eventq_index=0
opList=system.cpu.fuPool.FUList8.opList
[system.cpu.fuPool.FUList8.opList]
type=OpDesc
eventq_index=0
opClass=IprAccess
opLat=3
pipelined=false
[system.cpu.icache]
type=Cache
children=tags
addr_ranges=0:18446744073709551615
assoc=2
clk_domain=system.cpu_clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
hit_latency=2
is_read_only=true
max_miss_count=0
mshrs=4
prefetch_on_access=false
prefetcher=Null
response_latency=2
sequential_access=false
size=32768
system=system
tags=system.cpu.icache.tags
tgts_per_mshr=20
write_buffers=8
writeback_clean=true
cpu_side=system.cpu.icache_port
mem_side=system.membus.slave[1]
[system.cpu.icache.tags]
type=LRU
assoc=2
block_size=64
clk_domain=system.cpu_clk_domain
eventq_index=0
hit_latency=2
sequential_access=false
size=32768
[system.cpu.interrupts]
type=ArmInterrupts
eventq_index=0
[system.cpu.isa]
type=ArmISA
decoderFlavour=Generic
eventq_index=0
fpsid=1090793632
id_aa64afr0_el1=0