#!/bin/bash set -eu ############################################################################### # Integrated ILP experiment: Branch Prediction × Superscalar Width × SMT # Layout matches your environment. ############################################################################### # --- Paths (adapt to your tree if needed) ------------------------------------ GEM5=/home/carlos/projects/gem5/gem5src/gem5 BIN="$GEM5/build/X86/gem5.opt" SE="$GEM5/configs/deprecated/example/se.py" # Workloads for SMT threads (use your binaries/args here). # For SMT>1 we pass them joined with ';' so se.py creates multiple thread contexts CMD1=/home/carlos/projects/gem5/gem5-run/memtouch/memtouch CMD2=/home/carlos/projects/gem5/gem5-run/memtouch/memtouch CMD3=/bin/ls CMD4=/bin/ls ROOT=/home/carlos/projects/gem5/gem5-data/results/integrated mkdir -p "$ROOT" # --- Global constants (kept fixed across runs to be comparable) -------------- MAXI=20000000 # limit committed instructions per run (finish in reasonable time) L1I=32kB; L1D=32kB; L2=1MB # keep memory hierarchy constant across runs # NOTE: Use `$SE --list-bp-types` to confirm these names in your build. BP_LIST="LocalBP BiModeBP TournamentBP LTAGE" W_LIST="1 2 4" # superscalar widths (fetch/decode/rename/issue/commit) T_LIST="1 2 4" # SMT hardware threads on ONE physical core # --- Helper: build command string for T threads ------------------------------- mk_cmds () { T="$1" case "$T" in 1) echo "$CMD1" ;; 2) echo "$CMD1;$CMD2" ;; 4) echo "$CMD1;$CMD2;$CMD3;$CMD4" ;; *) echo "$CMD1" ;; esac } for BP in $BP_LIST; do for W in $W_LIST; do # Scale the core buffers with width (simple heuristic). ROB=$((W*64)) # Reorder Buffer entries IQ=$((W*32)) # Issue Queue entries LQ=$((W*32)) # Load Queue SQ=$((W*32)) # Store Queue for T in $T_LIST; do # Directory name encodes the three dimensions OUT="$ROOT/BP-${BP}/W${W}/SMT${T}" mkdir -p "$OUT" echo "[*] BP=$BP W=$W SMT=$T -> $OUT" # Build per-run command list for thread contexts CMDS="$(mk_cmds "$T")" # ------------------------- RUN ----------------------------------------- # Key flags explained (use these lines in your report): # --bp-type= choose branch predictor implementation # --caches --l2cache enable private L1I/L1D and a unified L2 # --l1i_size/--l1d_size/--l2_size keep memory fixed across runs # --maxinsts= stop after N committed insts (fairness) # --smt --num-cpus=1 single O3 core exposing T HW threads # --param system.cpu[0].* set per-CPU (index 0) microarch widths # fetch/decode/rename/issue/commitWidth = W (superscalar width) # ROB/IQ/LQ/SQ entries scaled with W to avoid artificial stalls # ----------------------------------------------------------------------- "$BIN" --outdir="$OUT" \ "$SE" \ --cmd="$CMDS" \ --cpu-type=DerivO3CPU \ --caches --l2cache \ --l1i_size="$L1I" --l1d_size="$L1D" --l2_size="$L2" \ --bp-type="$BP" \ --maxinsts="$MAXI" \ --num-cpus=1 $([ "$T" -gt 1 ] && echo --smt) \ \ --param "system.cpu[0].fetchWidth=$W" \ --param "system.cpu[0].decodeWidth=$W" \ --param "system.cpu[0].renameWidth=$W" \ --param "system.cpu[0].issueWidth=$W" \ --param "system.cpu[0].commitWidth=$W" \ --param "system.cpu[0].numROBEntries=$ROB" \ --param "system.cpu[0].numIQEntries=$IQ" \ --param "system.cpu[0].LQEntries=$LQ" \ --param "system.cpu[0].SQEntries=$SQ" \ > "$OUT/simout" 2> "$OUT/simerr" if [ -s "$OUT/stats.txt" ]; then echo " ok: $OUT/stats.txt" else echo " FAILED/RUNNING — see $OUT/simerr" fi done done done echo "[*] Integrated sweep complete."