From 63656bd1abd4f558547aa36716fc66c78f24407a Mon Sep 17 00:00:00 2001 From: Joshua Mashburn Date: Sat, 13 Jun 2026 18:22:53 -0500 Subject: [PATCH 1/3] Initial insertion of new register allocator to WP --- Makefile | 2 +- config/instantiation_file.py | 5 +- config/parse.py | 2 +- configs/champsim_config.json | 1 + inc/defaults.hpp | 1 + inc/instruction.h | 5 +- inc/ooo_cpu.h | 22 ++-- inc/options.h | 3 +- inc/register_allocator.h | 45 ++++++++ src/ooo_cpu.cc | 105 +++++++++--------- src/options.cc | 4 +- src/register_allocator.cc | 119 +++++++++++++++++++++ test/config/compile-only/core-options.json | 2 +- vcpkg | 2 +- 14 files changed, 244 insertions(+), 74 deletions(-) create mode 100644 inc/register_allocator.h create mode 100644 src/register_allocator.cc diff --git a/Makefile b/Makefile index edb9e64dc9..7e6839502d 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ CXXFLAGS += --std=c++17 -O3 -Wall -Wextra -Wshadow -Wpedantic -I./libbf/ TRIPLET_DIR = $(patsubst %/,%,$(firstword $(filter-out $(ROOT_DIR)/vcpkg_installed/vcpkg/, $(wildcard $(ROOT_DIR)/vcpkg_installed/*/)))) CPPFLAGS += -isystem $(TRIPLET_DIR)/include LDFLAGS += -L$(TRIPLET_DIR)/lib -L$(TRIPLET_DIR)/lib/manual-link -LDLIBS += -llzma -lz -lbz2 -lfmt ./libbf/build/lib/libbf.a +LDLIBS += -llzma -lz -lbz2 -lfmt -lCLI11 ./libbf/build/lib/libbf.a .phony: all all_execs clean configclean test makedirs diff --git a/config/instantiation_file.py b/config/instantiation_file.py index 9a064f7344..3106504c60 100644 --- a/config/instantiation_file.py +++ b/config/instantiation_file.py @@ -25,8 +25,9 @@ core_builder_parts = { 'ifetch_buffer_size': '.ifetch_buffer_size({ifetch_buffer_size})', - 'decode_buffer_size': '.decode_buffer_size({dispatch_buffer_size})', - 'dispatch_buffer_size': '.dispatch_buffer_size({decode_buffer_size})', + 'decode_buffer_size': '.decode_buffer_size({decode_buffer_size})', + 'dispatch_buffer_size': '.dispatch_buffer_size({dispatch_buffer_size})', + 'register_file_size': '.register_file_size({register_file_size})', 'rob_size': '.rob_size({rob_size})', 'lq_size': '.lq_size({lq_size})', 'sq_size': '.sq_size({sq_size})', diff --git a/config/parse.py b/config/parse.py index 7328b6738c..0a2443a651 100644 --- a/config/parse.py +++ b/config/parse.py @@ -72,7 +72,7 @@ def normalize_config(config_file): # Default core elements # Give cores numeric indices - core_keys_to_copy = ('frequency', 'ifetch_buffer_size', 'decode_buffer_size', 'dispatch_buffer_size', 'rob_size', 'lq_size', 'sq_size', 'fetch_width', 'decode_width', 'dispatch_width', 'execute_width', 'lq_width', 'sq_width', 'retire_width', 'mispredict_penalty', 'scheduler_size', 'decode_latency', 'dispatch_latency', 'schedule_latency', 'execute_latency', 'branch_predictor', 'btb', 'DIB') + core_keys_to_copy = ('frequency', 'ifetch_buffer_size', 'decode_buffer_size', 'dispatch_buffer_size', 'register_file_size', 'rob_size', 'lq_size', 'sq_size', 'fetch_width', 'decode_width', 'dispatch_width', 'execute_width', 'lq_width', 'sq_width', 'retire_width', 'mispredict_penalty', 'scheduler_size', 'decode_latency', 'dispatch_latency', 'schedule_latency', 'execute_latency', 'branch_predictor', 'btb', 'DIB') cores = [util.chain(cpu, util.subdict(config_file, core_keys_to_copy), {'name': 'cpu'+str(i), '_index': i}) for i,cpu in enumerate(cores)] pinned_cache_names = ('L1I', 'L1D', 'ITLB', 'DTLB', 'L2C', 'STLB') diff --git a/configs/champsim_config.json b/configs/champsim_config.json index 419e89d4e7..77134cf348 100644 --- a/configs/champsim_config.json +++ b/configs/champsim_config.json @@ -11,6 +11,7 @@ "ifetch_buffer_size":512, "decode_buffer_size":128, "dispatch_buffer_size":128, + "register_file_size":128, "rob_size": 512, "lq_size": 144, "sq_size": 112, diff --git a/inc/defaults.hpp b/inc/defaults.hpp index 7e3632571f..77b3ed626e 100644 --- a/inc/defaults.hpp +++ b/inc/defaults.hpp @@ -31,6 +31,7 @@ const auto default_core = O3_CPU::Builder{} .ifetch_buffer_size(64) .decode_buffer_size(32) .dispatch_buffer_size(32) + .register_file_size(128) .rob_size(352) .lq_size(128) .sq_size(72) diff --git a/inc/instruction.h b/inc/instruction.h index b1674ef86a..09515520a0 100644 --- a/inc/instruction.h +++ b/inc/instruction.h @@ -40,6 +40,7 @@ enum branch_type { NOT_BRANCH }; +using PHYSICAL_REGISTER_ID = int16_t; // signed to use -1 to indicate no physical register enum flags { NON_SPEC = 0, SERIAL, SERIAL_AFTER, SERIAL_BEFORE, READ_BARRIER, WRITE_BARRIER, SQUASH_AFTER, SQUASHED }; struct ooo_model_instr { @@ -83,8 +84,8 @@ struct ooo_model_instr { unsigned completed_mem_ops = 0; int num_reg_dependent = 0; - std::vector destination_registers = {}; // output registers - std::vector source_registers = {}; // input registers + std::vector destination_registers = {}; // output registers + std::vector source_registers = {}; // input registers std::vector destination_memory = {}; std::vector source_memory = {}; diff --git a/inc/ooo_cpu.h b/inc/ooo_cpu.h index 757592c453..4baf2a2d11 100644 --- a/inc/ooo_cpu.h +++ b/inc/ooo_cpu.h @@ -40,6 +40,7 @@ #include "instruction.h" #include "module_impl.h" #include "operable.h" +#include "register_allocator.h" #include "util/lru_table.h" #include @@ -253,29 +254,28 @@ class O3_CPU : public champsim::operable std::vector> LQ; std::deque SQ; - // std::array>, std::numeric_limits::max() + 1> reg_producers; - std::array, std::numeric_limits::max() + 1> reg_producers; - struct saved_instr { uint64_t ip = 0; uint64_t instr_id = 0; std::vector source_memory = {}; - std::vector source_registers = {}; + std::vector source_registers = {}; std::vector destination_memory = {}; - std::vector destination_registers = {}; + std::vector destination_registers = {}; }; std::map imap_counter; std::map> instruction_map; // Constants - std::size_t IFETCH_BUFFER_SIZE, DECODE_BUFFER_SIZE, DISPATCH_BUFFER_SIZE, ROB_SIZE, LQ_SIZE, SQ_SIZE; + std::size_t IFETCH_BUFFER_SIZE, DECODE_BUFFER_SIZE, DISPATCH_BUFFER_SIZE, REGISTER_FILE_SIZE, ROB_SIZE, LQ_SIZE, SQ_SIZE; long int FETCH_WIDTH, DECODE_WIDTH, DISPATCH_WIDTH, EXEC_WIDTH; long int LQ_WIDTH, SQ_WIDTH; long int RETIRE_WIDTH; unsigned BRANCH_MISPREDICT_PENALTY, SCHEDULER_SIZE, DECODE_LATENCY, DISPATCH_LATENCY, SCHEDULING_LATENCY, EXEC_LATENCY; long int L1I_BANDWIDTH, L1D_BANDWIDTH; + RegisterAllocator reg_allocator{REGISTER_FILE_SIZE}; + // branch uint64_t fetch_resume_cycle = 0; @@ -388,6 +388,7 @@ class O3_CPU : public champsim::operable std::size_t m_ifetch_buffer_size{}; std::size_t m_decode_buffer_size{}; std::size_t m_dispatch_buffer_size{}; + std::size_t m_register_file_size{}; std::size_t m_rob_size{}; std::size_t m_lq_size{}; std::size_t m_sq_size{}; @@ -416,7 +417,7 @@ class O3_CPU : public champsim::operable template Builder(builder_conversion_tag, const Builder& other) : m_cpu(other.m_cpu), m_freq_scale(other.m_freq_scale), m_dib_set(other.m_dib_set), m_dib_way(other.m_dib_way), m_dib_window(other.m_dib_window), - m_ifetch_buffer_size(other.m_ifetch_buffer_size), m_decode_buffer_size(other.m_decode_buffer_size), + m_ifetch_buffer_size(other.m_ifetch_buffer_size), m_decode_buffer_size(other.m_decode_buffer_size), m_register_file_size(other.m_register_file_size), m_dispatch_buffer_size(other.m_dispatch_buffer_size), m_rob_size(other.m_rob_size), m_lq_size(other.m_lq_size), m_sq_size(other.m_sq_size), m_fetch_width(other.m_fetch_width), m_decode_width(other.m_decode_width), m_dispatch_width(other.m_dispatch_width), m_schedule_width(other.m_schedule_width), m_execute_width(other.m_execute_width), m_lq_width(other.m_lq_width), m_sq_width(other.m_sq_width), @@ -469,6 +470,11 @@ class O3_CPU : public champsim::operable m_dispatch_buffer_size = dispatch_buffer_size_; return *this; } + self_type& register_file_size(std::size_t register_file_size_) + { + m_register_file_size = register_file_size_; + return *this; + } self_type& rob_size(std::size_t rob_size_) { m_rob_size = rob_size_; @@ -590,7 +596,7 @@ class O3_CPU : public champsim::operable template explicit O3_CPU(Builder b) : champsim::operable(b.m_freq_scale), cpu(b.m_cpu), DIB(b.m_dib_set, b.m_dib_way, {champsim::lg2(b.m_dib_window)}, {champsim::lg2(b.m_dib_window)}), - LQ(b.m_lq_size), IFETCH_BUFFER_SIZE(b.m_ifetch_buffer_size), DISPATCH_BUFFER_SIZE(b.m_dispatch_buffer_size), DECODE_BUFFER_SIZE(b.m_decode_buffer_size), + LQ(b.m_lq_size), IFETCH_BUFFER_SIZE(b.m_ifetch_buffer_size), DISPATCH_BUFFER_SIZE(b.m_dispatch_buffer_size), DECODE_BUFFER_SIZE(b.m_decode_buffer_size), REGISTER_FILE_SIZE(b.m_register_file_size), ROB_SIZE(b.m_rob_size), LQ_SIZE(b.m_lq_size), SQ_SIZE(b.m_sq_size), FETCH_WIDTH(b.m_fetch_width), DECODE_WIDTH(b.m_decode_width), DISPATCH_WIDTH(b.m_dispatch_width), SCHEDULER_SIZE(b.m_schedule_width), EXEC_WIDTH(b.m_execute_width), LQ_WIDTH(b.m_lq_width), SQ_WIDTH(b.m_sq_width), RETIRE_WIDTH(b.m_retire_width), BRANCH_MISPREDICT_PENALTY(b.m_mispredict_penalty), DISPATCH_LATENCY(b.m_dispatch_latency), DECODE_LATENCY(b.m_decode_latency), diff --git a/inc/options.h b/inc/options.h index 090932bec5..7e0a29d6ad 100644 --- a/inc/options.h +++ b/inc/options.h @@ -15,6 +15,7 @@ class Options std::size_t decode_buffer_size; std::size_t dispatch_buffer_size; std::size_t rob_size; + std::size_t register_file_size; std::size_t lq_size; std::size_t sq_size; @@ -48,4 +49,4 @@ class Options } // namespace champsim -#endif \ No newline at end of file +#endif diff --git a/inc/register_allocator.h b/inc/register_allocator.h new file mode 100644 index 0000000000..257a631a4d --- /dev/null +++ b/inc/register_allocator.h @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifndef REG_ALLOC_H +#define REG_ALLOC_H + +#include "instruction.h" + +struct physical_register { + uint16_t arch_reg_index; + uint64_t producing_instruction_id; + bool valid; // has the producing instruction committed yet? + bool busy; // is this register in use anywhere in the pipeline? +}; + +class RegisterAllocator +{ +private: + std::array::max() + 1> frontend_RAT, backend_RAT, snapshot_frontend_RAT; + std::queue free_registers; + std::vector wp_issued_registers; + std::vector physical_register_file; + bool inWrongPath = false; + +public: + RegisterAllocator(size_t num_physical_registers); + PHYSICAL_REGISTER_ID rename_dest_register(int16_t reg, uint64_t producer_id); + PHYSICAL_REGISTER_ID rename_src_register(int16_t reg); + void complete_dest_register(PHYSICAL_REGISTER_ID physreg); + void retire_dest_register(PHYSICAL_REGISTER_ID physreg); + void free_register(PHYSICAL_REGISTER_ID physreg); + bool isValid(PHYSICAL_REGISTER_ID physreg) const; + bool isAllocated(PHYSICAL_REGISTER_ID archreg) const; + unsigned long count_free_registers() const; + int count_reg_dependencies(const ooo_model_instr& instr) const; + void save_frontend_RAT(); + void restore_frontend_RAT(); + void print_deadlock(); +}; +#endif diff --git a/src/ooo_cpu.cc b/src/ooo_cpu.cc index fee1ef8052..7d969ab676 100644 --- a/src/ooo_cpu.cc +++ b/src/ooo_cpu.cc @@ -338,8 +338,9 @@ void O3_CPU::modify_instruction(ooo_model_instr& inst) assert(imap_counter[inst.ip] < instruction_map[inst.ip].size()); inst.source_memory = instruction_map[inst.ip][imap_counter[inst.ip]].source_memory; - inst.source_registers = instruction_map[inst.ip][imap_counter[inst.ip]].source_registers; inst.destination_memory = instruction_map[inst.ip][imap_counter[inst.ip]].destination_memory; + + inst.source_registers = instruction_map[inst.ip][imap_counter[inst.ip]].source_registers; inst.destination_registers = instruction_map[inst.ip][imap_counter[inst.ip]].destination_registers; if ((imap_counter[inst.ip] + 1) == instruction_map[inst.ip].size()) @@ -725,6 +726,12 @@ long O3_CPU::schedule_instruction() } for (auto rob_it = std::begin(ROB); rob_it != std::end(ROB) && search_bw > 0; ++rob_it) { + // Ensure there are enough registers free + unsigned long sources_to_allocate = std::count_if(rob_it->source_registers.begin(), rob_it->source_registers.end(), + [&alloc = std::as_const(reg_allocator)](auto srcreg) { return !alloc.isAllocated(srcreg); }); + if (reg_allocator.count_free_registers() < (sources_to_allocate + rob_it->destination_registers.size())) { + break; + } if (rob_it->scheduled == 0) { do_scheduling(*rob_it); ++progress; @@ -752,29 +759,14 @@ long O3_CPU::schedule_instruction() void O3_CPU::do_scheduling(ooo_model_instr& instr) { // Mark register dependencies - for (auto src_reg : instr.source_registers) { - if (!std::empty(reg_producers[src_reg])) { - ooo_model_instr& prior = *reg_producers.at(src_reg).back(); - if (prior.registers_instrs_depend_on_me.empty() || prior.registers_instrs_depend_on_me.back().get().instr_id != instr.instr_id) { - if (prior.instr_id > instr.instr_id) { - if constexpr (champsim::wp_debug_print) { - fmt::print("something is wrong! src_reg: {} prior: instr_id {} cur: instr_id {}\n", src_reg, prior.instr_id, instr.instr_id); - } - print_deadlock(); - } - prior.registers_instrs_depend_on_me.push_back(instr); - instr.num_reg_dependent++; - } - } + for (auto& src_reg : instr.source_registers) { + // rename source register + src_reg = reg_allocator.rename_src_register(src_reg); } - for (auto dreg : instr.destination_registers) { - // auto begin = std::begin(reg_producers[dreg]); - // auto end = std::end(reg_producers[dreg]); - // auto ins = std::lower_bound(begin, end, instr, [](const ooo_model_instr& lhs, const ooo_model_instr& rhs) { return lhs.instr_id < rhs.instr_id; }); - reg_producers.at(dreg).clear(); - reg_producers.at(dreg).push_back(&instr); - // reg_producers[dreg].insert(ins, std::ref(instr)); + for (auto& dreg : instr.destination_registers) { + // rename destination register + dreg = reg_allocator.rename_dest_register(dreg, instr.instr_id); } instr.scheduled = true; @@ -797,15 +789,19 @@ long O3_CPU::execute_instruction() bool cp_executed = false; for (auto rob_it = std::begin(ROB); rob_it != std::end(ROB) && exec_bw > 0; ++rob_it) { - if (rob_it->scheduled && rob_it->executed == 0 && rob_it->num_reg_dependent == 0 && rob_it->event_cycle <= current_cycle) { - do_execution(*rob_it); - --exec_bw; - if(rob_it->is_wrong_path) { - wp_executed = true; - } else { - cp_executed = true; + if (rob_it->scheduled && rob_it->executed == 0 && rob_it->event_cycle <= current_cycle) { + bool ready = std::all_of(std::begin(rob_it->source_registers), std::end(rob_it->source_registers), + [&alloc = std::as_const(reg_allocator)](auto srcreg) { return alloc.isValid(srcreg); }); + if (ready) { + do_execution(*rob_it); + --exec_bw; + if(rob_it->is_wrong_path) { + wp_executed = true; + } else { + cp_executed = true; + } + sim_stats.total_execute_instructions++; } - sim_stats.total_execute_instructions++; } } @@ -1071,26 +1067,12 @@ bool O3_CPU::execute_load(const LSQ_ENTRY& lq_entry) void O3_CPU::do_complete_execution(ooo_model_instr& instr) { for (auto dreg : instr.destination_registers) { - // auto begin = std::begin(reg_producers[dreg]); - // auto end = std::end(reg_producers[dreg]); - // auto elem = std::find_if(begin, end, [id = instr.instr_id](ooo_model_instr& x) { return x.instr_id == id; }); - // assert(elem != end); - // reg_producers[dreg].erase(elem); - if (!reg_producers.at(dreg).empty() && reg_producers.at(dreg).back()->instr_id == instr.instr_id) { - reg_producers.at(dreg).clear(); - } + // mark physical register's data as valid + reg_allocator.complete_dest_register(dreg); } instr.completed = true; - for (ooo_model_instr& dependent : instr.registers_instrs_depend_on_me) { - dependent.num_reg_dependent--; - assert(dependent.num_reg_dependent >= 0); - - if (dependent.num_reg_dependent == 0) - dependent.scheduled = true; - } - bool pay_penalty = false; if (instr.branch_mispredicted && !instr.is_wrong_path && !instr.squashed) { update_branch_stats(instr); @@ -1142,14 +1124,14 @@ void O3_CPU::do_complete_execution(ooo_model_instr& instr) std::cout << std::flush; // Remove dependences by wrong path instructions which // are tracked by reg_producers - for (auto dreg : x.destination_registers) { - auto begin = std::begin(reg_producers.at(dreg)); - auto end = std::end(reg_producers.at(dreg)); - auto elem = std::find_if(begin, end, [wp_id = x.instr_id](ooo_model_instr* y) { return y->instr_id == wp_id; }); - if (elem != end) { - reg_producers.at(dreg).erase(elem); - } - } + // for (auto dreg : x.destination_registers) { + // auto begin = std::begin(reg_producers.at(dreg)); + // auto end = std::end(reg_producers.at(dreg)); + // auto elem = std::find_if(begin, end, [wp_id = x.instr_id](ooo_model_instr* y) { return y->instr_id == wp_id; }); + // if (elem != end) { + // reg_producers.at(dreg).erase(elem); + // } + // } } else { auto first_wp_inst = find_if(std::begin(x.registers_instrs_depend_on_me), std::end(x.registers_instrs_depend_on_me), [id = id](auto& y) { @@ -1307,6 +1289,14 @@ long O3_CPU::retire_rob() std::for_each(retire_begin, retire_end, [](const auto& x) { fmt::print("[ROB] retire_rob instr_id: {} is retired\n", x.instr_id); }); } + // commit register writes to backend RAT + // and recycle the old physical registers + for (auto rob_it = retire_begin; rob_it != retire_end; ++rob_it) { + for (auto dreg : rob_it->destination_registers) { + reg_allocator.retire_dest_register(dreg); + } + } + if (std::empty(ROB) && !in_repair_mode) { sim_stats.retire_starve_cycles++; } @@ -1456,7 +1446,7 @@ void O3_CPU::print_deadlock() { fmt::print("DEADLOCK! CPU {} cycle {}\n", cpu, current_cycle); - auto instr_pack = [](const auto& entry) { + auto instr_pack = [this](const auto& entry) { return std::tuple{entry.instr_id, entry.ip, entry.fetch_issued, @@ -1464,7 +1454,7 @@ void O3_CPU::print_deadlock() entry.scheduled, entry.executed, entry.completed, - +entry.num_reg_dependent, + reg_allocator.count_reg_dependencies(entry), entry.num_mem_ops() - entry.completed_mem_ops, entry.event_cycle, entry.is_wrong_path}; @@ -1476,6 +1466,9 @@ void O3_CPU::print_deadlock() champsim::range_print_deadlock(DISPATCH_BUFFER, "cpu" + std::to_string(cpu) + "_DISPATCH", instr_fmt, instr_pack); champsim::range_print_deadlock(ROB, "cpu" + std::to_string(cpu) + "_ROB", instr_fmt, instr_pack); + // print occupied physical registers + reg_allocator.print_deadlock(); + // print LSQ entries auto lq_pack = [](const auto& entry) { std::string depend_id{"-"}; diff --git a/src/options.cc b/src/options.cc index ac1540a8b8..bfbb9e21c5 100644 --- a/src/options.cc +++ b/src/options.cc @@ -52,6 +52,8 @@ void Options::update(environment& env) cpu.DISPATCH_BUFFER_SIZE = dispatch_buffer_size; if (rob_size) cpu.ROB_SIZE = rob_size; + if (register_file_size) + cpu.REGISTER_FILE_SIZE = register_file_size; if (lq_size) cpu.LQ_SIZE = lq_size; if (sq_size) @@ -96,4 +98,4 @@ void Options::update(environment& env) set(env); } -} // namespace champsim \ No newline at end of file +} // namespace champsim diff --git a/src/register_allocator.cc b/src/register_allocator.cc new file mode 100644 index 0000000000..b0e8a8f636 --- /dev/null +++ b/src/register_allocator.cc @@ -0,0 +1,119 @@ +#include "register_allocator.h" + +#include + +RegisterAllocator::RegisterAllocator(size_t num_physical_registers) +{ + assert(num_physical_registers <= std::numeric_limits::max()); + for (size_t i = 0; i < num_physical_registers; ++i) { + free_registers.push(static_cast(i)); + } + physical_register_file = std::vector(num_physical_registers, {0, 0, false, false}); + frontend_RAT.fill(-1); // default value for no mapping + backend_RAT.fill(-1); +} + +PHYSICAL_REGISTER_ID RegisterAllocator::rename_dest_register(int16_t reg, uint64_t producer_id) +{ + assert(!free_registers.empty()); + + PHYSICAL_REGISTER_ID phys_reg = free_registers.front(); + free_registers.pop(); + frontend_RAT[reg] = phys_reg; + physical_register_file.at(phys_reg) = {(uint16_t)reg, producer_id, false, true}; // arch_reg_index, valid, busy + if(inWrongPath) { + wp_issued_registers.push_back(phys_reg); + } + return phys_reg; +} + +PHYSICAL_REGISTER_ID RegisterAllocator::rename_src_register(int16_t reg) +{ + PHYSICAL_REGISTER_ID phys = frontend_RAT[reg]; + + if (phys < 0) { + // allocate the register if it hasn't yet been mapped + // (common due to the traces being slices in the middle of a program) + phys = free_registers.front(); + free_registers.pop(); + frontend_RAT[reg] = phys; + backend_RAT[reg] = phys; // we assume this register's last write has been committed + physical_register_file.at(phys) = {(uint16_t)reg, 0, true, true}; // arch_reg_index, producing_inst_id, valid, busy + } + + return phys; +} + +void RegisterAllocator::complete_dest_register(PHYSICAL_REGISTER_ID physreg) +{ + // mark the physical register as valid + physical_register_file.at(physreg).valid = true; +} + +void RegisterAllocator::retire_dest_register(PHYSICAL_REGISTER_ID physreg) +{ + // grab the arch reg index, find old phys reg in backend RAT + uint16_t arch_reg = physical_register_file.at(physreg).arch_reg_index; + PHYSICAL_REGISTER_ID old_phys_reg = backend_RAT[arch_reg]; + + // update the backend RAT with the new phys reg + backend_RAT[arch_reg] = physreg; + + // free the old phys reg + if (old_phys_reg != -1) { + free_register(old_phys_reg); + } +} + +void RegisterAllocator::free_register(PHYSICAL_REGISTER_ID physreg) +{ + physical_register_file.at(physreg) = {255, 0, false, false}; // arch_reg_index, producing_inst_id, valid, busy + free_registers.push(physreg); +} + +bool RegisterAllocator::isValid(PHYSICAL_REGISTER_ID physreg) const { return physical_register_file.at(physreg).valid; } + +bool RegisterAllocator::isAllocated(PHYSICAL_REGISTER_ID archreg) const { return frontend_RAT[archreg] != -1; } + +unsigned long RegisterAllocator::count_free_registers() const { return std::size(free_registers); } + +int RegisterAllocator::count_reg_dependencies(const ooo_model_instr& instr) const +{ + return static_cast(std::count_if(std::begin(instr.source_registers), std::end(instr.source_registers), [this](auto reg) { return !isValid(reg); })); +} + +void RegisterAllocator::save_frontend_RAT() { + std::copy(std::begin(frontend_RAT), std::end(frontend_RAT), std::begin(snapshot_frontend_RAT)); + inWrongPath = true; +} + +void RegisterAllocator::restore_frontend_RAT() +{ + inWrongPath = false; + std::copy(std::begin(snapshot_frontend_RAT), std::end(snapshot_frontend_RAT), std::begin(frontend_RAT)); + for (auto physreg : wp_issued_registers) { + free_register(physreg); + } + wp_issued_registers.clear(); +} + +void RegisterAllocator::print_deadlock() +{ + fmt::print("Frontend Register Allocation Table Backend Register Allocation Table\n"); + for (size_t i = 0; i < frontend_RAT.size(); ++i) { + fmt::print("Arch reg: {:3} Phys reg: {:3} Arch reg: {:3} Phys reg: {:3}\n", i, frontend_RAT[i], i, backend_RAT[i]); + } + + if (count_free_registers() == 0) { + fmt::print("\n**WARNING!! WARNING!!** THE PHYSICAL REGISTER FILE IS COMPLETELY OCCUPIED.\n"); + fmt::print("It is extremely likely your register file size is too small.\n"); + } + + fmt::print("\nPhysical Register File\n"); + for (size_t i = 0; i < physical_register_file.size(); ++i) { + fmt::print("Phys reg: {:3}\t Arch reg: {:3}\t Producer: {}\t Valid: {}\t Busy: {}\n", static_cast(i), + static_cast(physical_register_file.at(i).arch_reg_index), physical_register_file.at(i).producing_instruction_id, + physical_register_file.at(i).valid, physical_register_file.at(i).busy); + } + fmt::print("\n"); +} diff --git a/test/config/compile-only/core-options.json b/test/config/compile-only/core-options.json index 62adf0d120..55e88753ae 100644 --- a/test/config/compile-only/core-options.json +++ b/test/config/compile-only/core-options.json @@ -1,7 +1,7 @@ { "branch_predictor": "perceptron", - "rob_size": 226, "lq_size": 90, "sq_size": 85, + "register_file_size": 128, "rob_size": 226, "lq_size": 90, "sq_size": 85, "fetch_width": 6, "decode_width": 6, "lq_width": 2, "sq_width": 1, "decode_latency": 3, "execute_latency": 2 diff --git a/vcpkg b/vcpkg index 4a600e9fea..44819aa2a6 160000 --- a/vcpkg +++ b/vcpkg @@ -1 +1 @@ -Subproject commit 4a600e9fea71bd7872080cbb716797e04d30e6d3 +Subproject commit 44819aa2a6c10e56065e2b0330e7d6c89d1d2574 From 9c44d0642b4b35d752f7a436509689db51860e09 Mon Sep 17 00:00:00 2001 From: Joshua Mashburn Date: Sat, 13 Jun 2026 18:43:17 -0500 Subject: [PATCH 2/3] Implemented WP enhancements for RegisterAllocator --- inc/register_allocator.h | 3 ++- src/ooo_cpu.cc | 11 +++++++++++ src/register_allocator.cc | 8 +++++--- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/inc/register_allocator.h b/inc/register_allocator.h index 257a631a4d..74652e3710 100644 --- a/inc/register_allocator.h +++ b/inc/register_allocator.h @@ -25,7 +25,7 @@ class RegisterAllocator std::queue free_registers; std::vector wp_issued_registers; std::vector physical_register_file; - bool inWrongPath = false; + bool in_wp = false; public: RegisterAllocator(size_t num_physical_registers); @@ -40,6 +40,7 @@ class RegisterAllocator int count_reg_dependencies(const ooo_model_instr& instr) const; void save_frontend_RAT(); void restore_frontend_RAT(); + bool inWrongPath() const; void print_deadlock(); }; #endif diff --git a/src/ooo_cpu.cc b/src/ooo_cpu.cc index 7d969ab676..c8a089cac5 100644 --- a/src/ooo_cpu.cc +++ b/src/ooo_cpu.cc @@ -732,6 +732,12 @@ long O3_CPU::schedule_instruction() if (reg_allocator.count_free_registers() < (sources_to_allocate + rob_it->destination_registers.size())) { break; } + + if (!rob_it->is_wrong_path && reg_allocator.inWrongPath()) { + // We've hit the first non-WP instruction, restore the frontend register file + reg_allocator.restore_frontend_RAT(); + } + if (rob_it->scheduled == 0) { do_scheduling(*rob_it); ++progress; @@ -740,6 +746,11 @@ long O3_CPU::schedule_instruction() if (rob_it->executed == 0) --search_bw; + + if (rob_it->before_wrong_path) { + // checkpoint the frontend register file for recovery later + reg_allocator.save_frontend_RAT(); + } } if (progress == 0) { diff --git a/src/register_allocator.cc b/src/register_allocator.cc index b0e8a8f636..e7eb265279 100644 --- a/src/register_allocator.cc +++ b/src/register_allocator.cc @@ -21,7 +21,7 @@ PHYSICAL_REGISTER_ID RegisterAllocator::rename_dest_register(int16_t reg, uint64 free_registers.pop(); frontend_RAT[reg] = phys_reg; physical_register_file.at(phys_reg) = {(uint16_t)reg, producer_id, false, true}; // arch_reg_index, valid, busy - if(inWrongPath) { + if(in_wp) { wp_issued_registers.push_back(phys_reg); } return phys_reg; @@ -84,12 +84,12 @@ int RegisterAllocator::count_reg_dependencies(const ooo_model_instr& instr) cons void RegisterAllocator::save_frontend_RAT() { std::copy(std::begin(frontend_RAT), std::end(frontend_RAT), std::begin(snapshot_frontend_RAT)); - inWrongPath = true; + in_wp = true; } void RegisterAllocator::restore_frontend_RAT() { - inWrongPath = false; + in_wp = false; std::copy(std::begin(snapshot_frontend_RAT), std::end(snapshot_frontend_RAT), std::begin(frontend_RAT)); for (auto physreg : wp_issued_registers) { free_register(physreg); @@ -97,6 +97,8 @@ void RegisterAllocator::restore_frontend_RAT() wp_issued_registers.clear(); } +bool RegisterAllocator::inWrongPath() const { return in_wp; } + void RegisterAllocator::print_deadlock() { fmt::print("Frontend Register Allocation Table Backend Register Allocation Table\n"); From 7c494b31211d7483f4c45e8afc15044745534fbe Mon Sep 17 00:00:00 2001 From: Joshua Mashburn Date: Sat, 13 Jun 2026 20:08:02 -0500 Subject: [PATCH 3/3] Cleanup --- src/ooo_cpu.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/ooo_cpu.cc b/src/ooo_cpu.cc index c8a089cac5..60ebc2d990 100644 --- a/src/ooo_cpu.cc +++ b/src/ooo_cpu.cc @@ -1133,16 +1133,6 @@ void O3_CPU::do_complete_execution(ooo_model_instr& instr) fmt::print("FLUSH instr_id: {} is_wrong_path: {}\n", x.instr_id, x.is_wrong_path); } std::cout << std::flush; - // Remove dependences by wrong path instructions which - // are tracked by reg_producers - // for (auto dreg : x.destination_registers) { - // auto begin = std::begin(reg_producers.at(dreg)); - // auto end = std::end(reg_producers.at(dreg)); - // auto elem = std::find_if(begin, end, [wp_id = x.instr_id](ooo_model_instr* y) { return y->instr_id == wp_id; }); - // if (elem != end) { - // reg_producers.at(dreg).erase(elem); - // } - // } } else { auto first_wp_inst = find_if(std::begin(x.registers_instrs_depend_on_me), std::end(x.registers_instrs_depend_on_me), [id = id](auto& y) {