Files
gem5-ilp-experiments/integratedAnalysis/run_integrated.sh
Carlos Gutierrez cd69096346 initial commit
2025-09-21 01:17:26 -04:00

105 lines
3.9 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
set -eu
###############################################################################
# Integrated ILP experiment: Branch Prediction × Superscalar Width × SMT
# Layout matches your environment.
###############################################################################
# --- Paths (adapt to your tree if needed) ------------------------------------
GEM5=/home/carlos/projects/gem5/gem5src/gem5
BIN="$GEM5/build/X86/gem5.opt"
SE="$GEM5/configs/deprecated/example/se.py"
# Workloads for SMT threads (use your binaries/args here).
# For SMT>1 we pass them joined with ';' so se.py creates multiple thread contexts
CMD1=/home/carlos/projects/gem5/gem5-run/memtouch/memtouch
CMD2=/home/carlos/projects/gem5/gem5-run/memtouch/memtouch
CMD3=/bin/ls
CMD4=/bin/ls
ROOT=/home/carlos/projects/gem5/gem5-data/results/integrated
mkdir -p "$ROOT"
# --- Global constants (kept fixed across runs to be comparable) --------------
MAXI=20000000 # limit committed instructions per run (finish in reasonable time)
L1I=32kB; L1D=32kB; L2=1MB # keep memory hierarchy constant across runs
# NOTE: Use `$SE --list-bp-types` to confirm these names in your build.
BP_LIST="LocalBP BiModeBP TournamentBP LTAGE"
W_LIST="1 2 4" # superscalar widths (fetch/decode/rename/issue/commit)
T_LIST="1 2 4" # SMT hardware threads on ONE physical core
# --- Helper: build command string for T threads -------------------------------
mk_cmds () {
T="$1"
case "$T" in
1) echo "$CMD1" ;;
2) echo "$CMD1;$CMD2" ;;
4) echo "$CMD1;$CMD2;$CMD3;$CMD4" ;;
*) echo "$CMD1" ;;
esac
}
for BP in $BP_LIST; do
for W in $W_LIST; do
# Scale the core buffers with width (simple heuristic).
ROB=$((W*64)) # Reorder Buffer entries
IQ=$((W*32)) # Issue Queue entries
LQ=$((W*32)) # Load Queue
SQ=$((W*32)) # Store Queue
for T in $T_LIST; do
# Directory name encodes the three dimensions
OUT="$ROOT/BP-${BP}/W${W}/SMT${T}"
mkdir -p "$OUT"
echo "[*] BP=$BP W=$W SMT=$T -> $OUT"
# Build per-run command list for thread contexts
CMDS="$(mk_cmds "$T")"
# ------------------------- RUN -----------------------------------------
# Key flags explained (use these lines in your report):
# --bp-type=<BP> choose branch predictor implementation
# --caches --l2cache enable private L1I/L1D and a unified L2
# --l1i_size/--l1d_size/--l2_size keep memory fixed across runs
# --maxinsts=<N> stop after N committed insts (fairness)
# --smt --num-cpus=1 single O3 core exposing T HW threads
# --param system.cpu[0].* set per-CPU (index 0) microarch widths
# fetch/decode/rename/issue/commitWidth = W (superscalar width)
# ROB/IQ/LQ/SQ entries scaled with W to avoid artificial stalls
# -----------------------------------------------------------------------
"$BIN" --outdir="$OUT" \
"$SE" \
--cmd="$CMDS" \
--cpu-type=DerivO3CPU \
--caches --l2cache \
--l1i_size="$L1I" --l1d_size="$L1D" --l2_size="$L2" \
--bp-type="$BP" \
--maxinsts="$MAXI" \
--num-cpus=1 $([ "$T" -gt 1 ] && echo --smt) \
\
--param "system.cpu[0].fetchWidth=$W" \
--param "system.cpu[0].decodeWidth=$W" \
--param "system.cpu[0].renameWidth=$W" \
--param "system.cpu[0].issueWidth=$W" \
--param "system.cpu[0].commitWidth=$W" \
--param "system.cpu[0].numROBEntries=$ROB" \
--param "system.cpu[0].numIQEntries=$IQ" \
--param "system.cpu[0].LQEntries=$LQ" \
--param "system.cpu[0].SQEntries=$SQ" \
> "$OUT/simout" 2> "$OUT/simerr"
if [ -s "$OUT/stats.txt" ]; then
echo " ok: $OUT/stats.txt"
else
echo " FAILED/RUNNING — see $OUT/simerr"
fi
done
done
done
echo "[*] Integrated sweep complete."