From c0c39cbbe5af2a501e2b4bf6cf9e1565d3533d57 Mon Sep 17 00:00:00 2001 From: jotabulacios Date: Tue, 26 May 2026 17:52:35 -0300 Subject: [PATCH] Add RISC-V compressed instruction support --- executor/programs/asm/test_compressed.s | 171 +++++++++ executor/src/elf.rs | 15 +- executor/src/flamegraph.rs | 2 +- executor/src/tests/decompress_tests.rs | 167 +++++++++ executor/src/tests/keccak_tests.rs | 4 +- executor/src/tests/mod.rs | 1 + executor/src/vm/execution.rs | 140 ++++--- executor/src/vm/instruction/decoding.rs | 102 ++++++ executor/src/vm/instruction/decompress.rs | 426 ++++++++++++++++++++++ executor/src/vm/instruction/execution.rs | 43 ++- executor/src/vm/instruction/mod.rs | 1 + executor/tests/asm.rs | 43 ++- executor/tests/flamegraph.rs | 9 +- prover/src/tables/cpu.rs | 38 +- prover/src/tables/decode.rs | 35 +- prover/src/tables/trace_builder.rs | 20 +- prover/src/tables/types.rs | 6 +- prover/src/test_utils.rs | 23 +- prover/src/tests/cpu_tests.rs | 16 +- prover/src/tests/decode_tests.rs | 94 ++++- prover/src/tests/prove_elfs_tests.rs | 17 + prover/src/tests/trace_builder_tests.rs | 12 +- 22 files changed, 1230 insertions(+), 155 deletions(-) create mode 100644 executor/programs/asm/test_compressed.s create mode 100644 executor/src/tests/decompress_tests.rs create mode 100644 executor/src/vm/instruction/decompress.rs diff --git a/executor/programs/asm/test_compressed.s b/executor/programs/asm/test_compressed.s new file mode 100644 index 000000000..a09f3c0b8 --- /dev/null +++ b/executor/programs/asm/test_compressed.s @@ -0,0 +1,171 @@ + .attribute 5, "rv64i2p1_m2p0_c2p0" + .option rvc + .globl main +# Self-checking RV64C test. Each check computes a difference that is zero when the +# compressed instruction behaved correctly, and ORs it into the error accumulator +# s0 (x8). The program exits with a0 = s0, so a0 == 0 iff every check passed. +# +# Compressed register-register ops (c.and/c.or/c.xor/c.sub/c.add/c.mv) only address +# x8..x15, so working values live in s0,s1,a0..a5. +main: + li s0, 0 # error accumulator + + # --- C.LI, C.ADDI, C.MV, C.SUB --- + c.li a0, 10 + c.addi a0, 5 # a0 = 15 + c.li a1, 15 + c.mv a2, a0 + c.sub a2, a1 # a2 = 0 + c.or s0, a2 + + # --- C.ADD --- + c.li a0, 7 + c.li a1, 3 + c.add a0, a1 # a0 = 10 + c.li a1, 10 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + # --- C.AND / C.OR / C.XOR --- + c.li a0, 12 + c.li a1, 10 + c.mv a2, a0 + c.and a2, a1 # 12 & 10 = 8 + c.li a3, 8 + c.mv a4, a2 + c.sub a4, a3 + c.or s0, a4 + + c.mv a2, a0 + c.or a2, a1 # 12 | 10 = 14 + c.li a3, 14 + c.mv a4, a2 + c.sub a4, a3 + c.or s0, a4 + + c.mv a2, a0 + c.xor a2, a1 # 12 ^ 10 = 6 + c.li a3, 6 + c.mv a4, a2 + c.sub a4, a3 + c.or s0, a4 + + # --- C.ANDI / C.SLLI / C.SRLI / C.SRAI --- + c.li a0, 15 + c.andi a0, 10 # 15 & 10 = 10 + c.li a1, 10 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + c.li a0, 1 + c.slli a0, 4 # 1 << 4 = 16 + c.li a1, 16 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + c.li a0, 28 + c.srli a0, 2 # 28 >> 2 = 7 + c.li a1, 7 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + li a0, -16 + c.srai a0, 2 # -16 >> 2 = -4 (arithmetic) + li a1, -4 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + # --- C.LUI --- + c.lui a0, 1 # a0 = 0x1000 + li a1, 0x1000 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + # --- C.SWSP / C.LWSP and C.SDSP / C.LDSP --- + li sp, 0x2000 + li a0, 1234 + c.swsp a0, 0(sp) + c.lwsp a1, 0(sp) # a1 = 1234 + c.mv a2, a1 + li a3, 1234 + c.sub a2, a3 + c.or s0, a2 + + li a0, 0x123456789 + c.sdsp a0, 8(sp) + c.ldsp a1, 8(sp) # full 64-bit round trip + c.mv a2, a1 + li a3, 0x123456789 + c.sub a2, a3 + c.or s0, a2 + + # --- C.SW / C.LW and C.SD / C.LD (base in x8..x15) --- + li a3, 0x3000 + c.li a0, 30 + c.sw a0, 0(a3) + c.lw a1, 0(a3) # a1 = 30 + c.mv a2, a1 + c.li a4, 30 + c.sub a2, a4 + c.or s0, a2 + + li a0, 0x5678abcd + c.sd a0, 8(a3) + c.ld a1, 8(a3) # 64-bit round trip + c.mv a2, a1 + li a4, 0x5678abcd + c.sub a2, a4 + c.or s0, a2 + + # --- C.ADDI4SPN / C.ADDI16SP --- + li sp, 0x4000 + c.addi4spn a0, sp, 8 # a0 = sp + 8 = 0x4008 + li a1, 0x4008 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + c.addi16sp sp, 16 # sp = 0x4010 + li a1, 0x4010 + c.mv a2, sp + c.sub a2, a1 + c.or s0, a2 + + # --- branches: C.BEQZ / C.BNEZ / C.J (with a straddling region) --- + c.li a0, 0 + c.beqz a0, beqz_ok # taken + c.li s1, 1 + c.or s0, s1 # only reached on failure +beqz_ok: + c.li a0, 5 + c.bnez a0, bnez_ok # taken + c.li s1, 1 + c.or s0, s1 +bnez_ok: + c.j after_j + c.li s1, 1 + c.or s0, s1 +after_j: + + # --- call/return: jal (4-byte) + C.JR (return) --- + c.li a0, 0 + jal ra, func # func sets a0 = 1 + c.li a1, 1 + c.mv a2, a0 + c.sub a2, a1 + c.or s0, a2 + + # --- exit: a0 = s0 (0 on success) --- + c.mv a0, s0 + li a7, 93 + ecall + +func: + c.li a0, 1 + c.jr ra diff --git a/executor/src/elf.rs b/executor/src/elf.rs index ed79fb983..6f5120b2d 100644 --- a/executor/src/elf.rs +++ b/executor/src/elf.rs @@ -216,7 +216,9 @@ impl ProgramHeader { pub struct Segment { /// Base virtual address for this segment pub base_addr: u64, - /// The 32-bit instruction words in this segment + /// The raw 4-byte little-endian words of this segment. For executable + /// segments these may encode a mix of 2-byte (compressed) and 4-byte + /// instructions when reinterpreted as a halfword stream. pub values: Vec, /// Whether this segment is executable (has PF_X flag) pub is_executable: bool, @@ -230,6 +232,11 @@ pub struct Elf { pub(crate) const WORD_SIZE: u64 = 4; +/// Minimum alignment for instruction addresses (entry point, executable segment +/// base). The RV64C "C" extension allows instructions on 2-byte boundaries; the +/// base ISA only ever produces 4-byte-aligned addresses, so 2 is the safe floor. +const INSTRUCTION_ALIGN: u64 = 2; + #[derive(Debug, thiserror::Error)] pub enum ElfError { #[error("Not a 64-bit ELF")] @@ -274,7 +281,9 @@ impl Elf { return Err(ElfError::NotExecutable); } let entry_point: u64 = elf_program.ehdr.e_entry; - if !entry_point.is_multiple_of(WORD_SIZE) { + // Instructions only need 2-byte alignment with the "C" (compressed) + // extension; without it everything is 4-byte aligned anyway. + if !entry_point.is_multiple_of(INSTRUCTION_ALIGN) { return Err(ElfError::InvalidEntryPoint); } let phdrs = elf_program.phdrs; @@ -287,7 +296,7 @@ impl Elf { .iter() .filter(|program_header| program_header.p_type == PT_LOAD) { - if !program_header.p_vaddr.is_multiple_of(WORD_SIZE) { + if !program_header.p_vaddr.is_multiple_of(INSTRUCTION_ALIGN) { return Err(ElfError::UnalignedVAddr); } let mut values = Vec::new(); diff --git a/executor/src/flamegraph.rs b/executor/src/flamegraph.rs index f9b447d19..92f05cc0b 100644 --- a/executor/src/flamegraph.rs +++ b/executor/src/flamegraph.rs @@ -54,7 +54,7 @@ impl FlamegraphGenerator { // Update call stack based on instruction type let instruction = instructions .get(log.current_pc) - .copied() + .map(|decoded| decoded.instr) .ok_or(FlamegraphError::InstructionNotFound)?; self.update_stack(log, instruction); } diff --git a/executor/src/tests/decompress_tests.rs b/executor/src/tests/decompress_tests.rs new file mode 100644 index 000000000..7d006995b --- /dev/null +++ b/executor/src/tests/decompress_tests.rs @@ -0,0 +1,167 @@ +//! Tests for RV64C (compressed instruction) decompression. + +use crate::vm::instruction::decoding::{ArithOp, Instruction, InstructionError, LoadStoreWidth}; +use crate::vm::instruction::decompress::{decompress, instr_len}; + +#[test] +fn instr_len_distinguishes_compressed_from_full() { + assert_eq!(instr_len(0x0000), 2); // low bits 00 + assert_eq!(instr_len(0x4515), 2); // low bits 01 (c.li) + assert_eq!(instr_len(0x8082), 2); // low bits 10 (c.jr ra) + assert_eq!(instr_len(0x0033), 4); // low bits 11 (a base instruction) +} + +#[test] +fn all_zero_halfword_is_illegal() { + assert!(matches!( + decompress(0x0000), + Err(InstructionError::IllegalCompressed(0x0000)) + )); +} + +#[test] +fn c_li_expands_to_addi_from_x0() { + // c.li x10, 5 + match decompress(0x4515).unwrap() { + Instruction::ArithImm { + dst, + src, + imm, + op: ArithOp::Add, + } => { + assert_eq!((dst, src, imm), (10, 0, 5)); + } + other => panic!("expected ArithImm, got {other:?}"), + } +} + +#[test] +fn c_li_sign_extends_negative_immediate() { + // c.li x10, -1 (6-bit immediate, all bits set) exercises the sign-extension path. + let enc: u16 = (0b010 << 13) | (1 << 12) | (10 << 7) | (0b11111 << 2) | 0b01; + match decompress(enc).unwrap() { + Instruction::ArithImm { + dst, + src, + imm, + op: ArithOp::Add, + } => { + assert_eq!((dst, src, imm), (10, 0, -1)); + } + other => panic!("expected ArithImm, got {other:?}"), + } +} + +#[test] +fn c_addi4spn_expands_to_addi_from_sp() { + // c.addi4spn x8, x2, 4 + match decompress(0x0040).unwrap() { + Instruction::ArithImm { + dst, + src, + imm, + op: ArithOp::Add, + } => { + assert_eq!((dst, src, imm), (8, 2, 4)); + } + other => panic!("expected ArithImm, got {other:?}"), + } +} + +#[test] +fn c_mv_and_c_add() { + // c.mv x10, x11 -> add x10, x0, x11 + match decompress(0x852E).unwrap() { + Instruction::Arith { + dst, + src1, + src2, + op: ArithOp::Add, + } => assert_eq!((dst, src1, src2), (10, 0, 11)), + other => panic!("expected Arith (c.mv), got {other:?}"), + } + // c.add x10, x11 -> add x10, x10, x11 + match decompress(0x952E).unwrap() { + Instruction::Arith { + dst, + src1, + src2, + op: ArithOp::Add, + } => assert_eq!((dst, src1, src2), (10, 10, 11)), + other => panic!("expected Arith (c.add), got {other:?}"), + } +} + +#[test] +fn c_jr_and_c_jalr() { + // c.jr ra (0x8082) -> jalr x0, 0(x1) + match decompress(0x8082).unwrap() { + Instruction::JumpAndLinkRegister { base, dst, offset } => { + assert_eq!((base, dst, offset), (1, 0, 0)) + } + other => panic!("expected JALR (c.jr), got {other:?}"), + } + // c.jalr ra (0x9082) -> jalr x1, 0(x1) + match decompress(0x9082).unwrap() { + Instruction::JumpAndLinkRegister { base, dst, offset } => { + assert_eq!((base, dst, offset), (1, 1, 0)) + } + other => panic!("expected JALR (c.jalr), got {other:?}"), + } +} + +#[test] +fn c_ebreak_expands_to_ecall_ebreak() { + assert!(matches!(decompress(0x9002), Ok(Instruction::EcallEbreak))); +} + +#[test] +fn c_sub_expands_to_register_sub_not_imm() { + // c.sub x8, x9 -> sub x8, x8, x9 (must be Arith, never ArithImm). + // funct3=100, funct2=11, rd'=x8 (field 0), sub-bits=00 (C.SUB), rs2'=x9 (field 1) + let enc: u16 = (0b100 << 13) | (0b11 << 10) | (1 << 2) | 0b01; + match decompress(enc).unwrap() { + Instruction::Arith { + dst, + src1, + src2, + op: ArithOp::Sub, + } => assert_eq!((dst, src1, src2), (8, 8, 9)), + other => panic!("expected Arith Sub, got {other:?}"), + } +} + +#[test] +fn c_lwsp_x0_is_reserved() { + // c.lwsp with rd == 0 is reserved. + let enc: u16 = (0b010 << 13) | 0b10; + assert!(matches!( + decompress(enc), + Err(InstructionError::ReservedCompressed(_)) + )); +} + +#[test] +fn c_lwsp_loads_from_sp() { + // c.lwsp x10, 0(x2) + let enc: u16 = (0b010 << 13) | (10 << 7) | 0b10; + match decompress(enc).unwrap() { + Instruction::Load { + dst, + offset, + base, + width: LoadStoreWidth::Word, + } => assert_eq!((dst, offset, base), (10, 0, 2)), + other => panic!("expected Load (c.lwsp), got {other:?}"), + } +} + +#[test] +fn float_compressed_is_excluded() { + // c.fld (Q0 funct3=001) must not decode to an integer instruction. + let enc: u16 = 0b001 << 13; + assert!(matches!( + decompress(enc), + Err(InstructionError::IllegalCompressed(_)) + )); +} diff --git a/executor/src/tests/keccak_tests.rs b/executor/src/tests/keccak_tests.rs index 1c592f080..7c9635e3f 100644 --- a/executor/src/tests/keccak_tests.rs +++ b/executor/src/tests/keccak_tests.rs @@ -61,7 +61,7 @@ fn test_keccak_syscall_rejects_unaligned_state_addr() { registers.write(10, 0x1001).unwrap(); let err = Instruction::EcallEbreak - .run(&mut pc, &mut registers, &mut memory) + .run(&mut pc, &mut registers, &mut memory, 4) .unwrap_err(); assert!(matches!( err, @@ -79,7 +79,7 @@ fn test_keccak_syscall_rejects_overflowing_state_range() { registers.write(10, u64::MAX - 191).unwrap(); let err = Instruction::EcallEbreak - .run(&mut pc, &mut registers, &mut memory) + .run(&mut pc, &mut registers, &mut memory, 4) .unwrap_err(); assert!(matches!( err, diff --git a/executor/src/tests/mod.rs b/executor/src/tests/mod.rs index 448a05dee..cc9579c53 100644 --- a/executor/src/tests/mod.rs +++ b/executor/src/tests/mod.rs @@ -1,3 +1,4 @@ +pub mod decompress_tests; pub mod flamegraph_tests; pub mod keccak_tests; pub mod memory_tests; diff --git a/executor/src/vm/execution.rs b/executor/src/vm/execution.rs index 614aad649..d010197f5 100644 --- a/executor/src/vm/execution.rs +++ b/executor/src/vm/execution.rs @@ -4,7 +4,8 @@ use crate::{ elf::Elf, vm::{ instruction::{ - decoding::{Instruction, InstructionError}, + decoding::{DecodedInstruction, Instruction, InstructionError, decode_segment_words}, + decompress::{decompress, instr_len}, execution::ExecutionError, }, logs::Log, @@ -22,9 +23,9 @@ pub struct ReturnValues { pub struct ExecutionResult { pub return_values: ReturnValues, pub logs: Vec, - /// Predecoded instructions map (pc -> instruction) - /// Use this to look up instructions by their PC from the logs - pub instructions: U64HashMap, + /// Predecoded instructions map (pc -> decoded instruction + byte width). + /// Use this to look up instructions by their PC from the logs. + pub instructions: U64HashMap, } /// Size of each log chunk - balances memory usage vs callback overhead @@ -64,17 +65,41 @@ impl Executor { self.logs.clear(); while self.pc != 0 && self.logs.len() < CHUNK_SIZE { - if !self.pc.is_multiple_of(4) { + // Instructions must be at least 2-byte aligned. With the RV64C "C" + // extension a compressed instruction can start on any 2-byte boundary + // (so `pc % 4 == 2` is legal); only an odd `pc` is truly misaligned. + if !self.pc.is_multiple_of(2) { return Err(ExecutorError::InstructionAddressMisaligned(self.pc)); } - let instruction = match self.instructions.get(self.pc) { - Some(&instr) => instr, + let decoded = match self.instructions.get(self.pc) { + Some(&decoded) => decoded, None => { - let next_instruction = self.memory.load_word(self.pc)?; - Instruction::parse(next_instruction)? + // Not predecoded (e.g. a jump outside the known segments): fetch + // a halfword, and only read the second halfword if it is a 4-byte + // instruction. Reading per-halfword avoids over-reading past the + // end of a region that ends in a compressed instruction. + let lo = self.memory.load_half(self.pc)?; + if instr_len(lo) == 2 { + DecodedInstruction { + instr: decompress(lo)?, + len: 2, + } + } else { + let hi = self.memory.load_half(self.pc + 2)?; + let word = ((hi as u32) << 16) | (lo as u32); + DecodedInstruction { + instr: Instruction::parse(word)?, + len: 4, + } + } } }; - let log = instruction.run(&mut self.pc, &mut self.registers, &mut self.memory)?; + let log = decoded.instr.run( + &mut self.pc, + &mut self.registers, + &mut self.memory, + decoded.len, + )?; self.logs.push(log); } @@ -131,13 +156,12 @@ fn load_program(segments: &[crate::elf::Segment], memory: &mut Memory) -> Result pub struct InstructionSegment { base_addr: u64, - instructions: Vec, -} - -impl InstructionSegment { - fn end_addr(&self) -> u64 { - self.base_addr + (self.instructions.len() as u64 * 4) - } + /// Exclusive end address (`base_addr + byte length`). + end_addr: u64, + /// Decoded instructions indexed by 2-byte slot: slot `i` covers the halfword at + /// `base_addr + 2*i`. A slot is `Some` at an instruction start and `None` for + /// the second half of a 4-byte instruction (or a non-instruction tail). + entries: Vec>, } pub struct InstructionCache { @@ -145,39 +169,46 @@ pub struct InstructionCache { } impl InstructionCache { - /// Creates an InstructionCache from a hashmap of address -> instruction. + /// Creates an InstructionCache from a hashmap of address -> decoded instruction. /// Used for testing where we don't have real ELF segments. - pub fn from_map(map: &U64HashMap) -> Self { + pub fn from_map(map: &U64HashMap) -> Self { if map.is_empty() { return Self { segments: Vec::new(), }; } - let mut entries: Vec<_> = map.iter().collect(); - entries.sort_by_key(|(addr, _)| *addr); + let mut sorted: Vec<_> = map.iter().collect(); + sorted.sort_by_key(|(addr, _)| **addr); let mut segments = Vec::new(); - let mut current_base = *entries[0].0; - let mut current_instructions = vec![*entries[0].1]; - - for (addr, instruction) in entries.into_iter().skip(1) { - let expected_addr = current_base + (current_instructions.len() as u64 * 4); - if *addr == expected_addr { - current_instructions.push(*instruction); - } else { + let mut base_addr = *sorted[0].0; + let mut entries: Vec> = Vec::new(); + let mut next_addr = base_addr; + + for (&addr, &decoded) in sorted { + if addr != next_addr { + // Gap between instructions: close the current segment. segments.push(InstructionSegment { - base_addr: current_base, - instructions: current_instructions, + base_addr, + end_addr: next_addr, + entries: std::mem::take(&mut entries), }); - current_base = *addr; - current_instructions = vec![*instruction]; + base_addr = addr; + next_addr = addr; + } + entries.push(Some(decoded)); + // The second halfword slot of a 4-byte instruction holds no start. + if decoded.len == 4 { + entries.push(None); } + next_addr += decoded.len as u64; } segments.push(InstructionSegment { - base_addr: current_base, - instructions: current_instructions, + base_addr, + end_addr: next_addr, + entries, }); Self { segments } @@ -186,24 +217,26 @@ impl InstructionCache { pub fn new(segments: &[crate::elf::Segment]) -> Result { let mut result = Vec::new(); for seg in segments.iter().filter(|s| s.is_executable) { - let instructions = seg - .values - .iter() - .map(|v| Instruction::parse(*v)) - .collect::, _>>()?; + // Two 2-byte slots per 4-byte memory word. + let num_slots = seg.values.len() * 2; + let mut entries: Vec> = vec![None; num_slots]; + for (byte_offset, decoded) in decode_segment_words(&seg.values)? { + entries[(byte_offset / 2) as usize] = Some(decoded); + } result.push(InstructionSegment { base_addr: seg.base_addr, - instructions, + end_addr: seg.base_addr + (num_slots as u64) * 2, + entries, }); } Ok(Self { segments: result }) } - pub fn get(&self, pc: u64) -> Option<&Instruction> { + pub fn get(&self, pc: u64) -> Option<&DecodedInstruction> { // Fast path: most programs have a single executable segment let segment = if self.segments.len() == 1 { let seg = &self.segments[0]; - if pc < seg.base_addr || pc >= seg.end_addr() { + if pc < seg.base_addr || pc >= seg.end_addr { return None; } seg @@ -214,7 +247,7 @@ impl InstructionCache { .binary_search_by(|seg| { if pc < seg.base_addr { Ordering::Greater - } else if pc >= seg.end_addr() { + } else if pc >= seg.end_addr { Ordering::Less } else { Ordering::Equal @@ -225,22 +258,27 @@ impl InstructionCache { }; let byte_offset = pc - segment.base_addr; - if !byte_offset.is_multiple_of(4) { + if !byte_offset.is_multiple_of(2) { return None; } - segment.instructions.get((byte_offset / 4) as usize) + segment.entries.get((byte_offset / 2) as usize)?.as_ref() } pub fn instruction_count(&self) -> usize { - self.segments.iter().map(|s| s.instructions.len()).sum() + self.segments + .iter() + .map(|s| s.entries.iter().filter(|e| e.is_some()).count()) + .sum() } - pub fn into_instruction_map(self) -> U64HashMap { + pub fn into_instruction_map(self) -> U64HashMap { let mut map = U64HashMap::default(); for segment in self.segments { - for (i, instruction) in segment.instructions.into_iter().enumerate() { - let addr = segment.base_addr + (i as u64 * 4); - map.insert(addr, instruction); + let base_addr = segment.base_addr; + for (i, slot) in segment.entries.into_iter().enumerate() { + if let Some(decoded) = slot { + map.insert(base_addr + (i as u64) * 2, decoded); + } } } map diff --git a/executor/src/vm/instruction/decoding.rs b/executor/src/vm/instruction/decoding.rs index 10998f8b7..f35eb808b 100644 --- a/executor/src/vm/instruction/decoding.rs +++ b/executor/src/vm/instruction/decoding.rs @@ -265,6 +265,104 @@ impl Instruction { } } +/// A decoded instruction together with the number of bytes it occupies in the +/// instruction stream (`2` for an RV64C compressed instruction, `4` otherwise). +/// +/// Compressed instructions are expanded to their equivalent base `Instruction` +/// at decode time, so the rest of the pipeline (execution, constraints) only ever +/// sees base instructions; `len` is what distinguishes them and drives `pc` advance +/// and the prover's `c_type` flag. +#[derive(Debug, Clone, Copy)] +pub struct DecodedInstruction { + pub instr: Instruction, + pub len: u8, +} + +/// Decode a single instruction from a little-endian instruction word. +/// +/// Only the low 16 bits are inspected to determine the length: if they do not end +/// in `0b11` the instruction is a 2-byte RV64C compressed instruction and is +/// expanded via [`decompress`](super::decompress::decompress); otherwise the full +/// 32 bits are parsed as a base instruction. Callers that only have 16 bits +/// available (e.g. at a segment boundary) must guarantee the high 16 bits are valid +/// whenever the low half indicates a 4-byte instruction. +pub fn decode_instruction(word: u32) -> Result { + let first_half = word as u16; + if super::decompress::instr_len(first_half) == 2 { + Ok(DecodedInstruction { + instr: super::decompress::decompress(first_half)?, + len: 2, + }) + } else { + Ok(DecodedInstruction { + instr: Instruction::parse(word)?, + len: 4, + }) + } +} + +/// Decode an executable segment, given as the little-endian 4-byte memory words it +/// was loaded from, into the sequence of `(byte_offset, instruction)` it contains. +/// +/// The words are reinterpreted as a halfword stream and walked by the actual +/// instruction width, so a segment may mix 2-byte (compressed) and 4-byte +/// instructions, and a 4-byte instruction may start at a 2-byte (non-4) offset. A +/// final dangling halfword (a 4-byte instruction whose second half lies past the +/// segment) is treated as a non-instruction tail and dropped. +/// +/// This is the single decode entry point shared by the executor's instruction +/// cache and the prover/verifier's DECODE generation, so the two cannot disagree +/// on instruction boundaries or `c_type`. +pub fn decode_segment_words( + words: &[u32], +) -> Result, InstructionError> { + let halfwords: Vec = words + .iter() + .flat_map(|w| [*w as u16, (*w >> 16) as u16]) + .collect(); + + let mut out = Vec::new(); + let mut i = 0usize; + while i < halfwords.len() { + let lo = halfwords[i]; + // A zero halfword is `c.unimp` / alignment padding (the ELF loader also + // zero-fills the high half of a final word when the segment's byte length + // is not a multiple of 4). It is never a real instruction start, so skip it + // instead of failing the whole segment decode. If such a slot is ever the + // target of a jump, the on-demand fetch path will surface the error. + if lo == 0 { + i += 1; + continue; + } + let byte_offset = (i as u64) * 2; + if super::decompress::instr_len(lo) == 2 { + out.push(( + byte_offset, + DecodedInstruction { + instr: super::decompress::decompress(lo)?, + len: 2, + }, + )); + i += 1; + } else { + // 4-byte instruction: needs the following halfword. + let Some(&hi) = halfwords.get(i + 1) else { + break; // dangling trailing halfword, not an instruction + }; + let word = ((hi as u32) << 16) | (lo as u32); + out.push(( + byte_offset, + DecodedInstruction { + instr: Instruction::parse(word)?, + len: 4, + }, + )); + i += 2; + } + } + Ok(out) +} + fn parse_opcode(instruction: u32) -> Result { let opcode = instruction & OPCODE_MASK; Opcode::try_from(opcode) @@ -631,4 +729,8 @@ pub enum InstructionError { InvalidSystemInstruction(u32), #[error("Invalid W32 instruction: operation not supported")] InvalidW32Instruction, + #[error("Illegal compressed instruction encoding: {0:#06x}")] + IllegalCompressed(u16), + #[error("Reserved compressed instruction encoding: {0:#06x}")] + ReservedCompressed(u16), } diff --git a/executor/src/vm/instruction/decompress.rs b/executor/src/vm/instruction/decompress.rs new file mode 100644 index 000000000..56d2d943e --- /dev/null +++ b/executor/src/vm/instruction/decompress.rs @@ -0,0 +1,426 @@ +//! RV64C (compressed instruction) decompression. +//! +//! The "C" extension encodes common instructions in 16 bits. Every compressed +//! instruction is, by construction, an alias for exactly one 32-bit base +//! instruction. We therefore expand each 16-bit encoding to its equivalent base +//! [`Instruction`] at decode time: the rest of the pipeline (execution, +//! constraints, prover) only ever sees base instructions, and the 2-byte width is +//! tracked separately (see [`super::decoding::DecodedInstruction`]) to drive `pc` +//! advance and the prover's `c_type` flag. +//! +//! Scope: the integer RV64C subset. Floating-point compressed encodings +//! (`C.FLD/C.FSD/C.FLDSP/C.FSDSP`) and the RV32-only `C.JAL` are rejected, since +//! Lambda VM is `rv64im` (no F/D). Reserved/illegal encodings (including the +//! all-zero halfword) return an error, matching how 32-bit decode rejects unknown +//! encodings. +//! +//! Immediate bit-scrambling follows the "RVC instruction set listings" tables in +//! the RISC-V unprivileged ISA spec; each scramble is annotated with the +//! `instr[..]` source bits → immediate `[..]` destination bits. + +use super::decoding::{ArithOp, Comparison, Instruction, InstructionError, LoadStoreWidth}; + +/// Length in bytes of the instruction starting with `first_half`. +/// +/// Per the RISC-V encoding, any halfword whose low two bits are not `0b11` is a +/// 2-byte compressed instruction; everything else is a 4-byte base instruction. +pub fn instr_len(first_half: u16) -> u8 { + if first_half & 0b11 == 0b11 { 4 } else { 2 } +} + +/// Sign-extend the low `bits` of `value` to a 32-bit signed integer. +fn sign_extend(value: u32, bits: u32) -> i32 { + let shift = 32 - bits; + ((value << shift) as i32) >> shift +} + +/// Expand a 16-bit RV64C instruction into its equivalent base [`Instruction`]. +/// +/// Returns [`InstructionError::IllegalCompressed`] for the all-zero/illegal +/// encodings and excluded floating-point encodings, and +/// [`InstructionError::ReservedCompressed`] for encodings the spec marks reserved. +pub fn decompress(half: u16) -> Result { + let h = half as u32; + let funct3 = (h >> 13) & 0b111; + match h & 0b11 { + 0b00 => decompress_q0(h, funct3, half), + 0b01 => decompress_q1(h, funct3, half), + 0b10 => decompress_q2(h, funct3, half), + // 0b11 is not a compressed instruction. + _ => Err(InstructionError::IllegalCompressed(half)), + } +} + +/// Quadrant 0 (`op = 0b00`): stack-pointer-relative `ADDI4SPN` and register-pair +/// loads/stores. +fn decompress_q0(h: u32, funct3: u32, half: u16) -> Result { + let rs1p = ((h >> 7) & 0x7) + 8; // inst[9:7] -> x8..x15 + let rdp_rs2p = ((h >> 2) & 0x7) + 8; // inst[4:2] -> x8..x15 + match funct3 { + // C.ADDI4SPN: addi rd', x2, nzuimm + 0b000 => { + let nzuimm = ((h >> 7) & 0x30) // inst[12:11] -> imm[5:4] + | ((h >> 1) & 0x3C0) // inst[10:7] -> imm[9:6] + | ((h >> 4) & 0x4) // inst[6] -> imm[2] + | ((h >> 2) & 0x8); // inst[5] -> imm[3] + // nzuimm == 0 is reserved; this also catches the all-zero illegal word. + if nzuimm == 0 { + return Err(InstructionError::IllegalCompressed(half)); + } + Ok(Instruction::ArithImm { + dst: rdp_rs2p, + src: 2, + imm: nzuimm as i32, + op: ArithOp::Add, + }) + } + // C.LW: lw rd', uimm(rs1') + 0b010 => Ok(Instruction::Load { + dst: rdp_rs2p, + offset: lw_uimm(h) as i32, + base: rs1p, + width: LoadStoreWidth::Word, + }), + // C.LD: ld rd', uimm(rs1') + 0b011 => Ok(Instruction::Load { + dst: rdp_rs2p, + offset: ld_uimm(h) as i32, + base: rs1p, + width: LoadStoreWidth::DoubleWord, + }), + // C.SW: sw rs2', uimm(rs1') + 0b110 => Ok(Instruction::Store { + src: rdp_rs2p, + offset: lw_uimm(h) as i32, + base: rs1p, + width: LoadStoreWidth::Word, + }), + // C.SD: sd rs2', uimm(rs1') + 0b111 => Ok(Instruction::Store { + src: rdp_rs2p, + offset: ld_uimm(h) as i32, + base: rs1p, + width: LoadStoreWidth::DoubleWord, + }), + // 0b100 is reserved; 0b001/0b101 are C.FLD/C.FSD (float, excluded). + 0b100 => Err(InstructionError::ReservedCompressed(half)), + _ => Err(InstructionError::IllegalCompressed(half)), + } +} + +/// Quadrant 1 (`op = 0b01`): immediate ALU ops, jumps and branches. +fn decompress_q1(h: u32, funct3: u32, half: u16) -> Result { + match funct3 { + // C.NOP / C.ADDI: addi rd, rd, nzimm (rd == 0 is C.NOP / a HINT) + 0b000 => { + let rd = (h >> 7) & 0x1F; + Ok(Instruction::ArithImm { + dst: rd, + src: rd, + imm: imm6(h), + op: ArithOp::Add, + }) + } + // C.ADDIW: addiw rd, rd, imm (rd == 0 reserved). RV32 would be C.JAL (excluded). + 0b001 => { + let rd = (h >> 7) & 0x1F; + if rd == 0 { + return Err(InstructionError::ReservedCompressed(half)); + } + Ok(Instruction::ArithImmW { + dst: rd, + src: rd, + imm: imm6(h), + op: ArithOp::Add, + }) + } + // C.LI: addi rd, x0, imm (rd == 0 HINT) + 0b010 => { + let rd = (h >> 7) & 0x1F; + Ok(Instruction::ArithImm { + dst: rd, + src: 0, + imm: imm6(h), + op: ArithOp::Add, + }) + } + // C.LUI / C.ADDI16SP (rd == 2) + 0b011 => { + let rd = (h >> 7) & 0x1F; + if rd == 2 { + // C.ADDI16SP: addi x2, x2, nzimm + let field = ((h >> 3) & 0x200) // inst[12] -> imm[9] + | ((h >> 2) & 0x10) // inst[6] -> imm[4] + | ((h << 1) & 0x40) // inst[5] -> imm[6] + | ((h << 4) & 0x180) // inst[4:3] -> imm[8:7] + | ((h << 3) & 0x20); // inst[2] -> imm[5] + if field == 0 { + return Err(InstructionError::ReservedCompressed(half)); + } + Ok(Instruction::ArithImm { + dst: 2, + src: 2, + imm: sign_extend(field, 10), + op: ArithOp::Add, + }) + } else { + // C.LUI: lui rd, nzimm (rd == 0 HINT) + let nzimm6 = ((h >> 7) & 0x20) | ((h >> 2) & 0x1F); + if nzimm6 == 0 { + return Err(InstructionError::ReservedCompressed(half)); + } + // LUI places imm[17:12] sign-extended; the base LUI imm field holds + // the value already shifted into bits [31:12]. + Ok(Instruction::LoadUpperImm { + dst: rd, + imm: (sign_extend(nzimm6, 6) << 12) as u32, + }) + } + } + // MISC-ALU + 0b100 => decompress_q1_alu(h, half), + // C.J: jal x0, offset + 0b101 => Ok(Instruction::JumpAndLink { + dst: 0, + offset: cj_offset(h), + }), + // C.BEQZ: beq rs1', x0, offset + 0b110 => Ok(Instruction::Branch { + src1: ((h >> 7) & 0x7) + 8, + src2: 0, + cond: Comparison::Equal, + offset: cb_offset(h), + }), + // C.BNEZ: bne rs1', x0, offset + 0b111 => Ok(Instruction::Branch { + src1: ((h >> 7) & 0x7) + 8, + src2: 0, + cond: Comparison::NotEqual, + offset: cb_offset(h), + }), + _ => Err(InstructionError::IllegalCompressed(half)), + } +} + +/// Quadrant 1, `funct3 = 0b100`: register/immediate ALU on the `x8..x15` subset. +fn decompress_q1_alu(h: u32, half: u16) -> Result { + let rdp = ((h >> 7) & 0x7) + 8; // rs1'/rd' + match (h >> 10) & 0x3 { + // C.SRLI: srli rd', rd', shamt + 0b00 => Ok(Instruction::ArithImm { + dst: rdp, + src: rdp, + imm: shamt6(h) as i32, + op: ArithOp::ShiftRightLogical, + }), + // C.SRAI: srai rd', rd', shamt + 0b01 => Ok(Instruction::ArithImm { + dst: rdp, + src: rdp, + imm: shamt6(h) as i32, + op: ArithOp::ShiftRightArith, + }), + // C.ANDI: andi rd', rd', imm + 0b10 => Ok(Instruction::ArithImm { + dst: rdp, + src: rdp, + imm: imm6(h), + op: ArithOp::And, + }), + // Register-register ops, discriminated by inst[12] and inst[6:5]. + 0b11 => { + let rs2p = ((h >> 2) & 0x7) + 8; + let op = match ((h >> 12) & 1, (h >> 5) & 0x3) { + (0, 0b00) => return arith(rdp, rs2p, ArithOp::Sub), // C.SUB + (0, 0b01) => return arith(rdp, rs2p, ArithOp::Xor), // C.XOR + (0, 0b10) => return arith(rdp, rs2p, ArithOp::Or), // C.OR + (0, 0b11) => return arith(rdp, rs2p, ArithOp::And), // C.AND + (1, 0b00) => ArithOp::Sub, // C.SUBW + (1, 0b01) => ArithOp::Add, // C.ADDW + // (1, 0b10) and (1, 0b11) are reserved. + _ => return Err(InstructionError::ReservedCompressed(half)), + }; + Ok(Instruction::ArithW { + dst: rdp, + src1: rdp, + src2: rs2p, + op, + }) + } + _ => Err(InstructionError::IllegalCompressed(half)), + } +} + +/// Quadrant 2 (`op = 0b10`): `SLLI`, stack-pointer loads/stores and the +/// `JR/MV/EBREAK/JALR/ADD` family. +fn decompress_q2(h: u32, funct3: u32, half: u16) -> Result { + match funct3 { + // C.SLLI: slli rd, rd, shamt (rd == 0 HINT) + 0b000 => { + let rd = (h >> 7) & 0x1F; + Ok(Instruction::ArithImm { + dst: rd, + src: rd, + imm: shamt6(h) as i32, + op: ArithOp::ShiftLeftLogical, + }) + } + // C.LWSP: lw rd, uimm(x2) (rd == 0 reserved) + 0b010 => { + let rd = (h >> 7) & 0x1F; + if rd == 0 { + return Err(InstructionError::ReservedCompressed(half)); + } + let uimm = ((h >> 7) & 0x20) // inst[12] -> imm[5] + | ((h >> 2) & 0x1C) // inst[6:4] -> imm[4:2] + | ((h << 4) & 0xC0); // inst[3:2] -> imm[7:6] + Ok(Instruction::Load { + dst: rd, + offset: uimm as i32, + base: 2, + width: LoadStoreWidth::Word, + }) + } + // C.LDSP: ld rd, uimm(x2) (rd == 0 reserved) + 0b011 => { + let rd = (h >> 7) & 0x1F; + if rd == 0 { + return Err(InstructionError::ReservedCompressed(half)); + } + let uimm = ((h >> 7) & 0x20) // inst[12] -> imm[5] + | ((h >> 2) & 0x18) // inst[6:5] -> imm[4:3] + | ((h << 4) & 0x1C0); // inst[4:2] -> imm[8:6] + Ok(Instruction::Load { + dst: rd, + offset: uimm as i32, + base: 2, + width: LoadStoreWidth::DoubleWord, + }) + } + // C.JR / C.MV / C.EBREAK / C.JALR / C.ADD + 0b100 => decompress_q2_cr(h, half), + // C.SWSP: sw rs2, uimm(x2) + 0b110 => { + let uimm = ((h >> 7) & 0x3C) // inst[12:9] -> imm[5:2] + | ((h >> 1) & 0xC0); // inst[8:7] -> imm[7:6] + Ok(Instruction::Store { + src: (h >> 2) & 0x1F, + offset: uimm as i32, + base: 2, + width: LoadStoreWidth::Word, + }) + } + // C.SDSP: sd rs2, uimm(x2) + 0b111 => { + let uimm = ((h >> 7) & 0x38) // inst[12:10] -> imm[5:3] + | ((h >> 1) & 0x1C0); // inst[9:7] -> imm[8:6] + Ok(Instruction::Store { + src: (h >> 2) & 0x1F, + offset: uimm as i32, + base: 2, + width: LoadStoreWidth::DoubleWord, + }) + } + // 0b001/0b101 are C.FLDSP/C.FSDSP (float, excluded). + _ => Err(InstructionError::IllegalCompressed(half)), + } +} + +/// Quadrant 2, `funct3 = 0b100`: the `CR`-format `JR/MV/EBREAK/JALR/ADD` group. +fn decompress_q2_cr(h: u32, half: u16) -> Result { + let rd_rs1 = (h >> 7) & 0x1F; + let rs2 = (h >> 2) & 0x1F; + match ((h >> 12) & 1, rs2) { + // C.JR: jalr x0, 0(rs1) (rs1 == 0 reserved) + (0, 0) => { + if rd_rs1 == 0 { + return Err(InstructionError::ReservedCompressed(half)); + } + Ok(Instruction::JumpAndLinkRegister { + base: rd_rs1, + dst: 0, + offset: 0, + }) + } + // C.MV: add rd, x0, rs2 (rd == 0 HINT) + (0, _) => Ok(Instruction::Arith { + dst: rd_rs1, + src1: 0, + src2: rs2, + op: ArithOp::Add, + }), + // C.EBREAK (rs1 == 0) or C.JALR: jalr x1, 0(rs1) + (1, 0) => { + if rd_rs1 == 0 { + Ok(Instruction::EcallEbreak) + } else { + Ok(Instruction::JumpAndLinkRegister { + base: rd_rs1, + dst: 1, + offset: 0, + }) + } + } + // C.ADD: add rd, rd, rs2 (rd == 0 HINT) + (1, _) => Ok(Instruction::Arith { + dst: rd_rs1, + src1: rd_rs1, + src2: rs2, + op: ArithOp::Add, + }), + _ => Err(InstructionError::IllegalCompressed(half)), + } +} + +/// Build a register-register `Arith` instruction (`dst = rs1' = rdp`). +fn arith(rdp: u32, rs2p: u32, op: ArithOp) -> Result { + Ok(Instruction::Arith { + dst: rdp, + src1: rdp, + src2: rs2p, + op, + }) +} + +/// 6-bit signed immediate (`CI`): inst[12] -> imm[5], inst[6:2] -> imm[4:0]. +fn imm6(h: u32) -> i32 { + sign_extend(((h >> 7) & 0x20) | ((h >> 2) & 0x1F), 6) +} + +/// 6-bit shift amount (RV64: full 6 bits, inst[12] -> shamt[5], inst[6:2] -> shamt[4:0]). +fn shamt6(h: u32) -> u32 { + ((h >> 7) & 0x20) | ((h >> 2) & 0x1F) +} + +/// `C.LW`/`C.SW` unsigned offset: inst[12:10] -> imm[5:3], inst[6] -> imm[2], inst[5] -> imm[6]. +fn lw_uimm(h: u32) -> u32 { + ((h >> 7) & 0x38) | ((h >> 4) & 0x4) | ((h << 1) & 0x40) +} + +/// `C.LD`/`C.SD` unsigned offset: inst[12:10] -> imm[5:3], inst[6:5] -> imm[7:6]. +fn ld_uimm(h: u32) -> u32 { + ((h >> 7) & 0x38) | ((h << 1) & 0xC0) +} + +/// `C.J` jump offset (`CJ`): imm[11|4|9:8|10|6|7|3:1|5] = inst[12|11|10:9|8|7|6|5:3|2]. +fn cj_offset(h: u32) -> i32 { + let imm = ((h >> 1) & 0x800) // inst[12] -> imm[11] + | ((h >> 7) & 0x10) // inst[11] -> imm[4] + | ((h >> 1) & 0x300) // inst[10:9] -> imm[9:8] + | ((h << 2) & 0x400) // inst[8] -> imm[10] + | ((h >> 1) & 0x40) // inst[7] -> imm[6] + | ((h << 1) & 0x80) // inst[6] -> imm[7] + | ((h >> 2) & 0xE) // inst[5:3] -> imm[3:1] + | ((h << 3) & 0x20); // inst[2] -> imm[5] + sign_extend(imm, 12) +} + +/// `C.BEQZ`/`C.BNEZ` branch offset (`CB`): imm[8|4:3|7:6|2:1|5] = inst[12|11:10|6:5|4:3|2]. +fn cb_offset(h: u32) -> i32 { + let imm = ((h >> 4) & 0x100) // inst[12] -> imm[8] + | ((h >> 7) & 0x18) // inst[11:10] -> imm[4:3] + | ((h << 1) & 0xC0) // inst[6:5] -> imm[7:6] + | ((h >> 2) & 0x6) // inst[4:3] -> imm[2:1] + | ((h << 3) & 0x20); // inst[2] -> imm[5] + sign_extend(imm, 9) +} diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs index d9b0e1c8d..c278b1bff 100644 --- a/executor/src/vm/instruction/execution.rs +++ b/executor/src/vm/instruction/execution.rs @@ -5,8 +5,6 @@ use crate::vm::{ registers::Registers, }; -const REGULAR_PC_UPDATE: u64 = 4; - pub enum SyscallNumbers { // Placeholder discriminant. The actual syscall value is KECCAK_SYSCALL_NUMBER. KeccakPermute = 0, @@ -37,14 +35,18 @@ impl TryFrom for SyscallNumbers { } impl Instruction { - /// Runs the given instruction and returns its execution log + /// Runs the given instruction and returns its execution log. + /// + /// `instr_len` is the encoded width of this instruction in bytes (2 for an + /// RV64C compressed instruction, 4 otherwise) and drives the `pc` advance. pub fn run( self, pc: &mut u64, registers: &mut Registers, memory: &mut Memory, + instr_len: u8, ) -> Result { - let log = self.execute(*pc, registers, memory)?; + let log = self.execute(*pc, registers, memory, instr_len)?; *pc = log.next_pc; Ok(log) } @@ -55,6 +57,7 @@ impl Instruction { pc: u64, registers: &mut Registers, memory: &mut Memory, + instr_len: u8, ) -> Result { Ok(match self { Instruction::ArithImm { dst, src, imm, op } => { @@ -66,7 +69,7 @@ impl Instruction { registers.write(dst, res)?; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: op1 as u64, src2_val: 0, dst_val: res, @@ -87,7 +90,7 @@ impl Instruction { registers.write(dst, res)?; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: raw_src, src2_val: 0, dst_val: res, @@ -96,23 +99,23 @@ impl Instruction { Instruction::JumpAndLinkRegister { dst, base, offset } => { let base_value = registers.read(base)?; let new_pc = (((base_value as i64).wrapping_add(offset as i64)) & !1) as u64; - registers.write(dst, pc.wrapping_add(REGULAR_PC_UPDATE))?; + registers.write(dst, pc.wrapping_add(instr_len as u64))?; Log { current_pc: pc, next_pc: new_pc, src1_val: base_value, src2_val: 0, - dst_val: pc.wrapping_add(REGULAR_PC_UPDATE), + dst_val: pc.wrapping_add(instr_len as u64), } } Instruction::JumpAndLink { dst, offset } => { - registers.write(dst, pc.wrapping_add(REGULAR_PC_UPDATE))?; + registers.write(dst, pc.wrapping_add(instr_len as u64))?; Log { current_pc: pc, next_pc: (pc as i64).wrapping_add(offset as i64) as u64, src1_val: 0, src2_val: 0, - dst_val: pc.wrapping_add(REGULAR_PC_UPDATE), + dst_val: pc.wrapping_add(instr_len as u64), } } Instruction::Store { @@ -151,7 +154,7 @@ impl Instruction { }; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: base, src2_val: read_value, dst_val: 0, @@ -184,7 +187,7 @@ impl Instruction { registers.write(dst, value)?; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: base, src2_val: 0, dst_val: value, @@ -200,7 +203,7 @@ impl Instruction { let new_pc = if cond.apply(a, b) { (pc as i64).wrapping_add(offset as i64) as u64 } else { - pc.wrapping_add(REGULAR_PC_UPDATE) + pc.wrapping_add(instr_len as u64) }; Log { current_pc: pc, @@ -216,7 +219,7 @@ impl Instruction { registers.write(dst, value)?; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: 0, src2_val: 0, dst_val: value, @@ -228,7 +231,7 @@ impl Instruction { registers.write(dst, value)?; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: 0, src2_val: 0, dst_val: value, @@ -246,7 +249,7 @@ impl Instruction { registers.write(dst, res)?; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: a, src2_val: b, dst_val: res, @@ -270,7 +273,7 @@ impl Instruction { registers.write(dst, res)?; Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: raw_src1, src2_val: raw_src2, dst_val: res, @@ -285,7 +288,7 @@ impl Instruction { // Todo: CSR are currently no-ops Log { current_pc: pc, - next_pc: pc.wrapping_add(REGULAR_PC_UPDATE), + next_pc: pc.wrapping_add(instr_len as u64), src1_val: 0, src2_val: 0, dst_val: 0, @@ -372,7 +375,7 @@ impl Instruction { } Log { current_pc: pc, - next_pc: pc + REGULAR_PC_UPDATE, + next_pc: pc.wrapping_add(instr_len as u64), src1_val: syscall_number_raw, src2_val, dst_val, @@ -382,7 +385,7 @@ impl Instruction { // FENCE is a memory barrier - in single-threaded, in-order execution it's a no-op Log { current_pc: pc, - next_pc: pc + REGULAR_PC_UPDATE, + next_pc: pc.wrapping_add(instr_len as u64), src1_val: 0, src2_val: 0, dst_val: 0, diff --git a/executor/src/vm/instruction/mod.rs b/executor/src/vm/instruction/mod.rs index fba21cf72..0d178f02d 100644 --- a/executor/src/vm/instruction/mod.rs +++ b/executor/src/vm/instruction/mod.rs @@ -1,2 +1,3 @@ pub mod decoding; +pub mod decompress; pub mod execution; diff --git a/executor/tests/asm.rs b/executor/tests/asm.rs index e9c9c08dd..56a39052f 100644 --- a/executor/tests/asm.rs +++ b/executor/tests/asm.rs @@ -37,6 +37,34 @@ fn test_private_input_memory_mapped() { assert_eq!(result.return_values.memory_values, input[4..12].to_vec()); } +/// RV64C: a self-checking program built almost entirely from compressed (2-byte) +/// instructions. `a0` (the exit code) is 0 only if every compressed-instruction +/// check produced the expected result. We also assert the ELF really contains +/// 2-byte instructions, so the test cannot silently pass on a non-compressed build. +#[test] +fn test_compressed() { + let elf_data = std::fs::read("./program_artifacts/asm/test_compressed.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + let executor = Executor::new(&program, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + + assert_eq!( + result.return_values.register_values.0, 0, + "compressed self-check failed (a0 = {})", + result.return_values.register_values.0 + ); + + let num_compressed = result + .instructions + .values() + .filter(|decoded| decoded.len == 2) + .count(); + assert!( + num_compressed > 0, + "expected 2-byte (compressed) instructions in test_compressed.elf" + ); +} + #[test] fn test_basic_program() { run_program("./program_artifacts/asm/basic_program.elf"); @@ -472,21 +500,28 @@ fn test_misalign_sd() { run_program("./program_artifacts/asm/misalign_sd.elf"); } +/// With the RV64C "C" extension a 2-byte-aligned PC is a valid instruction address, +/// so jumping to pc=2 (which is not 4-aligned) must NOT raise +/// `InstructionAddressMisaligned` — only an odd PC is misaligned, and that is +/// unreachable via jalr/jal/branch (their targets are always even). `misaligned_pc.s` +/// jumps to pc=2 where there is no instruction, so the executor fails to decode +/// rather than trapping on alignment. This guards against the fetch alignment check +/// regressing back to 4-byte (which would break compressed instructions). #[test] -fn test_misaligned_pc_traps() { +fn test_2byte_aligned_pc_not_misaligned() { let elf_data = std::fs::read("./program_artifacts/asm/misaligned_pc.elf").unwrap(); let program = Elf::load(&elf_data).unwrap(); let mut executor = Executor::new(&program, vec![]).expect("Failed to create executor"); let err = loop { match executor.resume() { Ok(Some(_)) => continue, - Ok(None) => panic!("expected misaligned PC trap, program halted normally"), + Ok(None) => panic!("expected a decode error jumping to pc=2, program halted normally"), Err(e) => break e, } }; assert!( - matches!(err, ExecutorError::InstructionAddressMisaligned(2)), - "expected InstructionAddressMisaligned(2), got {:?}", + !matches!(err, ExecutorError::InstructionAddressMisaligned(_)), + "pc=2 is 2-byte aligned and must not trap as misaligned with the C extension; got {:?}", err ); } diff --git a/executor/tests/flamegraph.rs b/executor/tests/flamegraph.rs index d064bdb7d..3490b167d 100644 --- a/executor/tests/flamegraph.rs +++ b/executor/tests/flamegraph.rs @@ -2,7 +2,9 @@ use executor::{ elf::{FunctionSymbol, SymbolTable}, flamegraph::FlamegraphGenerator, vm::{ - execution::InstructionCache, instruction::decoding::Instruction, logs::Log, + execution::InstructionCache, + instruction::decoding::{DecodedInstruction, Instruction}, + logs::Log, memory::U64HashMap, }, }; @@ -23,7 +25,10 @@ fn make_symbol_table(symbols: Vec<(&str, u64, u64)>) -> SymbolTable { /// Helper to create an instruction cache fn make_instructions(instructions: Vec<(u64, Instruction)>) -> InstructionCache { - let map: U64HashMap = instructions.into_iter().collect(); + let map: U64HashMap = instructions + .into_iter() + .map(|(addr, instr)| (addr, DecodedInstruction { instr, len: 4 })) + .collect(); InstructionCache::from_map(&map) } diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index 5f1a759b1..5b132c502 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -56,7 +56,10 @@ use super::dvrm::DvrmOperation; use super::types::{BusId, DecodeEntry, FE, GoldilocksExtension, GoldilocksField}; use crate::Error; use executor::vm::{ - instruction::{decoding::Instruction, execution::SyscallNumbers}, + instruction::{ + decoding::{DecodedInstruction, Instruction}, + execution::SyscallNumbers, + }, logs::Log, memory::U64HashMap, }; @@ -698,8 +701,13 @@ impl CpuOperation { /// /// This creates the DecodeEntry internally. Use `from_log` with a pre-built /// DecodeEntry when possible to avoid redundant decoding. - pub fn from_log_and_instruction(log: &Log, timestamp: u64, instruction: Instruction) -> Self { - let decode = DecodeEntry::from_instruction(log.current_pc, instruction); + pub fn from_log_and_instruction( + log: &Log, + timestamp: u64, + instruction: Instruction, + c_type: bool, + ) -> Self { + let decode = DecodeEntry::from_instruction(log.current_pc, instruction, c_type); Self::from_log(log, timestamp, decode) } @@ -757,11 +765,13 @@ impl CpuOperation { } // ECALL: Per spec constraint CO69, next_pc = pc + instr_size for all instructions, - // including ECALL. The CPU transition constraint enforces next_pc = pc + 4 on every - // row, so the trace must satisfy this even though the executor sets next_pc=0 to - // signal halt. The HALT table separately proves program termination via the ECALL bus. + // including ECALL. The CPU transition constraint enforces next_pc = pc + (4 - 2*c_type) + // on every row, so the trace must satisfy this even though the executor sets next_pc=0 + // to signal halt. The HALT table separately proves program termination via the ECALL bus. + // (RV64C has no compressed ECALL, so c_type is false in practice, but we match the + // constraint exactly rather than relying on that.) if self.decode.op_ecall { - self.next_pc = self.decode.pc + 4; + self.next_pc = self.decode.pc + (4 - 2 * self.decode.c_type as u64); } } } @@ -918,17 +928,18 @@ pub fn generate_cpu_trace( /// Panics if logs.len() is not a power of 2 >= 4. pub fn generate_cpu_trace_from_logs( logs: &[Log], - instructions: &U64HashMap, + instructions: &U64HashMap, ) -> Result, Error> { let mut operations = Vec::with_capacity(logs.len()); for (i, log) in logs.iter().enumerate() { - let instruction = *instructions + let decoded = *instructions .get(&log.current_pc) .ok_or(Error::MissingInstruction(log.current_pc))?; operations.push(CpuOperation::from_log_and_instruction( log, (i as u64) * 4 + 4, - instruction, + decoded.instr, + decoded.len == 2, )); } Ok(generate_cpu_trace(&operations)) @@ -947,17 +958,18 @@ pub fn collect_bitwise_ops(operations: &[CpuOperation]) -> Vec, + instructions: &U64HashMap, ) -> Result, Error> { let mut operations = Vec::with_capacity(logs.len()); for (i, log) in logs.iter().enumerate() { - let instruction = *instructions + let decoded = *instructions .get(&log.current_pc) .ok_or(Error::MissingInstruction(log.current_pc))?; operations.push(CpuOperation::from_log_and_instruction( log, (i as u64) * 4 + 4, - instruction, + decoded.instr, + decoded.len == 2, )); } Ok(collect_bitwise_ops(&operations)) diff --git a/prover/src/tables/decode.rs b/prover/src/tables/decode.rs index 4805ffc42..dd2eb174d 100644 --- a/prover/src/tables/decode.rs +++ b/prover/src/tables/decode.rs @@ -36,7 +36,9 @@ //! - **Receiver**: DECODE bus - receives lookups from CPU table use executor::elf::Elf; -use executor::vm::instruction::decoding::{Instruction, InstructionError}; +use executor::vm::instruction::decoding::{ + DecodedInstruction, InstructionError, decode_segment_words, +}; use executor::vm::memory::U64HashMap; use math::fft::bit_reversing::in_place_bit_reverse_permute; use math::polynomial::Polynomial; @@ -104,16 +106,16 @@ pub type PcToRow = HashMap; /// Empty rows use pc=7 with EBREAK=1, which makes them unprovable /// since CPU asserts EBREAK=0. pub fn generate_decode_trace( - instructions: &U64HashMap, + instructions: &U64HashMap, ) -> (TraceTable, PcToRow) { // Build entries and PC-to-row mapping let mut pc_to_row = HashMap::with_capacity(instructions.len()); let entries: Vec<_> = instructions .iter() .enumerate() - .map(|(row_idx, (&pc, &instr))| { + .map(|(row_idx, (&pc, &decoded))| { pc_to_row.insert(pc, row_idx); - DecodeEntry::from_instruction(pc, instr) + DecodeEntry::from_instruction(pc, decoded.instr, decoded.len == 2) }) .collect(); @@ -260,7 +262,7 @@ pub fn bus_interactions() -> Vec { /// ## Returns /// The Merkle root commitment over the LDE of precomputed columns. pub fn compute_precomputed_commitment( - instructions: &U64HashMap, + instructions: &U64HashMap, options: &ProofOptions, ) -> Commitment { // Step 1: Generate trace (MU=0, we only need precomputed columns) @@ -319,12 +321,16 @@ pub fn compute_precomputed_commitment( /// /// This is the minimal computation needed for verifier to compute /// the DECODE commitment from the program. -pub fn instructions_from_elf(elf: &Elf) -> Result, InstructionError> { +pub fn instructions_from_elf( + elf: &Elf, +) -> Result, InstructionError> { let mut map = U64HashMap::default(); for seg in elf.data.iter().filter(|s| s.is_executable) { - for (i, &word) in seg.values.iter().enumerate() { - let pc = seg.base_addr + (i as u64 * 4); - map.insert(pc, Instruction::parse(word)?); + // Walk the segment as a variable-length instruction stream (the executor's + // `InstructionCache` uses the same `decode_segment_words`, so the prover, + // verifier and executor cannot disagree on instruction boundaries or c_type). + for (byte_offset, decoded) in decode_segment_words(&seg.values)? { + map.insert(seg.base_addr + byte_offset, decoded); } } Ok(map) @@ -373,11 +379,14 @@ pub fn tables_from_elf(elf: &Elf) -> Result { // Process all ELF segments for DECODE (only executable segments) for segment in &elf.data { if segment.is_executable { - for (i, &word) in segment.values.iter().enumerate() { - let addr = segment.base_addr + (i as u64 * 4); - let instruction = Instruction::parse(word)?; + for (byte_offset, decoded) in decode_segment_words(&segment.values)? { + let addr = segment.base_addr + byte_offset; pc_to_row.insert(addr, decode_entries.len()); - decode_entries.push(DecodeEntry::from_instruction(addr, instruction)); + decode_entries.push(DecodeEntry::from_instruction( + addr, + decoded.instr, + decoded.len == 2, + )); } } } diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index 76535484b..32fd7c217 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -30,7 +30,7 @@ use std::collections::HashMap; use std::collections::HashSet; use executor::elf::Elf; -use executor::vm::instruction::decoding::Instruction; +use executor::vm::instruction::decoding::DecodedInstruction; use executor::vm::logs::Log; use executor::vm::memory::U64HashMap; #[cfg(feature = "disk-spill")] @@ -318,7 +318,7 @@ fn pack_register_value(value: u64) -> [u64; 8] { /// Returns a vector of CpuOperation, one per log entry. fn collect_cpu_ops( logs: &[Log], - instructions: &U64HashMap, + instructions: &U64HashMap, ) -> Result, Error> { let mut cpu_ops = Vec::with_capacity(logs.len()); @@ -329,12 +329,13 @@ fn collect_cpu_ops( // matching the REGISTER table's initial PC token at timestamp 1 (per spec/memory.typ). for (i, log) in logs.iter().enumerate() { let timestamp = (i as u64) * 4 + 4; - let instruction = instructions + let decoded = instructions .get(&log.current_pc) .copied() .ok_or(Error::MissingInstruction(log.current_pc))?; - let op = CpuOperation::from_log_and_instruction(log, timestamp, instruction); + let op = + CpuOperation::from_log_and_instruction(log, timestamp, decoded.instr, decoded.len == 2); cpu_ops.push(op); } Ok(cpu_ops) @@ -2588,11 +2589,12 @@ pub fn count_table_lengths( for (i, log) in logs.iter().enumerate() { let timestamp = (i as u64) * 4 + 4; - let instruction = instructions + let decoded = instructions .get(&log.current_pc) .copied() .ok_or(Error::MissingInstruction(log.current_pc))?; - let cpu_op = CpuOperation::from_log_and_instruction(log, timestamp, instruction); + let cpu_op = + CpuOperation::from_log_and_instruction(log, timestamp, decoded.instr, decoded.len == 2); cpu_count += 1; // Memory ops from load/store @@ -3116,7 +3118,7 @@ impl Traces { /// Note: This creates empty PAGE tables since no ELF is provided. pub fn from_logs( logs: &[Log], - instructions: U64HashMap, + instructions: U64HashMap, max_rows: &super::MaxRowsConfig, ) -> Result { // Phase 1: Logs → CPU operations @@ -3184,7 +3186,7 @@ impl Traces { #[cfg(test)] pub fn from_logs_trimmed( logs: &[Log], - instructions: U64HashMap, + instructions: U64HashMap, max_rows: &super::MaxRowsConfig, ) -> Result { // Generate full traces (including full 2^20 bitwise table with multiplicities) @@ -3202,7 +3204,7 @@ impl Traces { #[cfg(test)] pub fn from_logs_minimal( logs: &[Log], - instructions: U64HashMap, + instructions: U64HashMap, max_rows: &super::MaxRowsConfig, ) -> Result { Self::from_logs_trimmed(logs, instructions, max_rows) diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs index ceefbbc60..77cefda97 100644 --- a/prover/src/tables/types.rs +++ b/prover/src/tables/types.rs @@ -480,9 +480,13 @@ impl DecodeEntry { /// Creates a DecodeEntry from a PC and Instruction. /// /// Extracts all decode-time information: pc, registers, flags, immediate. - pub fn from_instruction(pc: u64, instruction: Instruction) -> Self { + /// `c_type` is `true` when the instruction was encoded as a 2-byte RV64C + /// compressed instruction, which the CPU `next_pc` constraint reads as + /// `instr_size = 4 - 2*c_type`. + pub fn from_instruction(pc: u64, instruction: Instruction, c_type: bool) -> Self { let mut entry = Self { pc, + c_type, ..Default::default() }; diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 1b608034c..d1abe4ea7 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -15,7 +15,7 @@ use std::path::PathBuf; use crypto::fiat_shamir::is_transcript::IsStarkTranscript; use executor::elf::Elf; use executor::vm::execution::Executor; -use executor::vm::instruction::decoding::Instruction; +use executor::vm::instruction::decoding::{DecodedInstruction, Instruction}; use executor::vm::logs::Log; use executor::vm::memory::U64HashMap; use math::field::element::FieldElement; @@ -131,7 +131,7 @@ pub fn asm_elf_bytes(name: &str) -> Vec { /// Helper to run an ELF from the program_artifacts directory. /// /// Returns the ELF, execution logs, and instruction map. -pub fn run_asm_elf(name: &str) -> (Elf, Vec, U64HashMap) { +pub fn run_asm_elf(name: &str) -> (Elf, Vec, U64HashMap) { let elf_data = asm_elf_bytes(name); let elf = Elf::load(&elf_data).expect("Failed to load ELF"); let executor = Executor::new(&elf, vec![]).expect("Failed to create executor"); @@ -146,13 +146,18 @@ pub fn run_asm_elf(name: &str) -> (Elf, Vec, U64HashMap) { /// Collect bitwise lookups from executor logs for minimal table generation. pub fn collect_bitwise_ops_from_logs( logs: &[Log], - instructions: &U64HashMap, + instructions: &U64HashMap, ) -> Vec { logs.iter() .enumerate() .flat_map(|(i, log)| { - let instruction = *instructions.get(&log.current_pc).unwrap(); - let op = CpuOperation::from_log_and_instruction(log, (i as u64) * 4, instruction); + let decoded = *instructions.get(&log.current_pc).unwrap(); + let op = CpuOperation::from_log_and_instruction( + log, + (i as u64) * 4, + decoded.instr, + decoded.len == 2, + ); op.collect_bitwise_ops() }) .collect() @@ -164,14 +169,14 @@ pub fn collect_bitwise_ops_from_logs( /// with the arg1, arg2, and signed values. pub fn collect_lt_lookups_from_logs( logs: &[Log], - instructions: &U64HashMap, + instructions: &U64HashMap, ) -> Vec { use executor::vm::instruction::decoding::{ArithOp, Comparison}; let mut lookups = Vec::new(); for log in logs { - let instruction = *instructions.get(&log.current_pc).unwrap(); + let instruction = instructions.get(&log.current_pc).unwrap().instr; let is_slt = matches!( &instruction, @@ -263,14 +268,14 @@ pub fn collect_lt_lookups_from_logs( /// Creates LoadOperation objects for each Load instruction in the logs. pub fn collect_load_ops_from_logs( logs: &[Log], - instructions: &U64HashMap, + instructions: &U64HashMap, ) -> Vec { use executor::vm::instruction::decoding::LoadStoreWidth; let mut load_ops = Vec::new(); for log in logs { - let instruction = *instructions.get(&log.current_pc).unwrap(); + let instruction = instructions.get(&log.current_pc).unwrap().instr; if let Instruction::Load { width, .. } = instruction { let base_address = log.src1_val.wrapping_add(match instruction { diff --git a/prover/src/tests/cpu_tests.rs b/prover/src/tests/cpu_tests.rs index f05d1005c..d48336a84 100644 --- a/prover/src/tests/cpu_tests.rs +++ b/prover/src/tests/cpu_tests.rs @@ -11,7 +11,11 @@ use crate::tables::types::{DecodeEntry, FE}; use executor::{ elf::Elf, - vm::{execution::Executor, instruction::decoding::Instruction, memory::U64HashMap}, + vm::{ + execution::Executor, + instruction::decoding::{DecodedInstruction, Instruction}, + memory::U64HashMap, + }, }; /// Helper to create 4 operations from a template (required for power-of-2 trace). @@ -362,7 +366,7 @@ fn test_column_arrays() { // ============================================================================= /// Helper to run an ELF and return the logs and instructions -fn run_elf(path: &str) -> (Vec, U64HashMap) { +fn run_elf(path: &str) -> (Vec, U64HashMap) { let elf_data = std::fs::read(path).expect("Failed to read ELF"); let program = Elf::load(&elf_data).expect("Failed to load ELF"); let executor = Executor::new(&program, vec![]).expect("Failed to create executor"); @@ -371,7 +375,7 @@ fn run_elf(path: &str) -> (Vec, U64HashMap } /// Helper to run an ELF from the program_artifacts directory -fn run_asm_elf(name: &str) -> (Vec, U64HashMap) { +fn run_asm_elf(name: &str) -> (Vec, U64HashMap) { run_elf(&format!( "{}/executor/program_artifacts/asm/{}.elf", env!("CARGO_MANIFEST_DIR").replace("/prover", ""), @@ -411,7 +415,7 @@ fn test_cpu_operation_from_log_arith() { dst_val: 300, }; - let op = CpuOperation::from_log_and_instruction(&log, 0, instruction); + let op = CpuOperation::from_log_and_instruction(&log, 0, instruction, false); assert_eq!(op.decode.pc, 0x1000); assert_eq!(op.next_pc, 0x1004); @@ -445,7 +449,7 @@ fn test_cpu_operation_from_log_branch() { dst_val: 0, }; - let op = CpuOperation::from_log_and_instruction(&log, 4, instruction); + let op = CpuOperation::from_log_and_instruction(&log, 4, instruction, false); assert_eq!(op.timestamp, 4); assert_eq!(op.decode.pc, 0x2000); @@ -477,7 +481,7 @@ fn test_cpu_operation_from_log_word_instr() { dst_val: 0xFFFF_FFFF_8000_0001, // Result sign-extended }; - let op = CpuOperation::from_log_and_instruction(&log, 8, instruction); + let op = CpuOperation::from_log_and_instruction(&log, 8, instruction, false); assert!(op.decode.word_instr); assert!(op.decode.op_add); diff --git a/prover/src/tests/decode_tests.rs b/prover/src/tests/decode_tests.rs index 84ae8ff3a..0ad5b1b03 100644 --- a/prover/src/tests/decode_tests.rs +++ b/prover/src/tests/decode_tests.rs @@ -1,7 +1,7 @@ //! Tests for the DECODE table. use executor::elf::{Elf, Segment}; -use executor::vm::instruction::decoding::{ArithOp, Instruction}; +use executor::vm::instruction::decoding::{ArithOp, DecodedInstruction, Instruction}; use executor::vm::memory::U64HashMap; use math::field::element::FieldElement; @@ -18,6 +18,14 @@ use crate::test_utils::multi_prove_ram; use crate::test_utils::run_asm_elf; use crate::{prove, verify_with_options}; +/// Wrap an `Instruction`-valued map as a `DecodedInstruction` map for tests that +/// build programs by hand. All entries are treated as 4-byte (non-compressed). +fn as_decoded(map: &U64HashMap) -> U64HashMap { + map.iter() + .map(|(&pc, &instr)| (pc, DecodedInstruction { instr, len: 4 })) + .collect() +} + // ========================================================================= // Packed decode tests // ========================================================================= @@ -346,7 +354,7 @@ fn test_from_instruction_arith() { op: ArithOp::Add, }; - let entry = DecodeEntry::from_instruction(0x1000, instr); + let entry = DecodeEntry::from_instruction(0x1000, instr, false); assert_eq!(entry.pc, 0x1000); assert_eq!(entry.rd, 10); @@ -368,7 +376,7 @@ fn test_from_instruction_arith_imm() { op: ArithOp::Add, }; - let entry = DecodeEntry::from_instruction(0x1000, instr); + let entry = DecodeEntry::from_instruction(0x1000, instr, false); assert_eq!(entry.pc, 0x1000); assert_eq!(entry.rd, 10); @@ -407,7 +415,7 @@ fn test_trace_generation_basic() { }, ); - let (trace, _pc_to_row) = generate_decode_trace(&instructions); + let (trace, _pc_to_row) = generate_decode_trace(&as_decoded(&instructions)); // 2 instructions + 1 CPU padding entry = 3, padded to power of 2 = 4 assert_eq!(trace.main_table.height, 4); @@ -427,7 +435,7 @@ fn test_trace_multiplicities() { }, ); - let (mut trace, pc_to_row) = generate_decode_trace(&instructions); + let (mut trace, pc_to_row) = generate_decode_trace(&as_decoded(&instructions)); // PC 0x1000 executed 5 times let lookups = vec![0x1000, 0x1000, 0x1000, 0x1000, 0x1000]; @@ -470,7 +478,7 @@ fn test_trace_multiple_instructions_different_multiplicities() { }, ); - let (mut trace, pc_to_row) = generate_decode_trace(&instructions); + let (mut trace, pc_to_row) = generate_decode_trace(&as_decoded(&instructions)); // 0x1000 executed 3 times, 0x1004 executed 7 times let lookups = vec![ @@ -529,7 +537,7 @@ fn test_trace_padding_to_power_of_two() { }, ); - let (trace, _pc_to_row) = generate_decode_trace(&instructions); + let (trace, _pc_to_row) = generate_decode_trace(&as_decoded(&instructions)); // 3 instructions + 1 CPU padding entry = 4, already power of 2 assert_eq!( @@ -572,7 +580,7 @@ fn test_trace_dword_encoding() { }, ); - let (trace, _pc_to_row) = generate_decode_trace(&instructions); + let (trace, _pc_to_row) = generate_decode_trace(&as_decoded(&instructions)); // Find the row (could be row 0 or 1 due to HashMap ordering) let mut found = false; @@ -648,8 +656,8 @@ fn test_compute_precomputed_commitment_deterministic() { let options = ProofOptions::default_test_options(); - let commitment1 = compute_precomputed_commitment(&instructions, &options); - let commitment2 = compute_precomputed_commitment(&instructions, &options); + let commitment1 = compute_precomputed_commitment(&as_decoded(&instructions), &options); + let commitment2 = compute_precomputed_commitment(&as_decoded(&instructions), &options); assert_eq!( commitment1, commitment2, @@ -688,8 +696,8 @@ fn test_compute_precomputed_commitment_different_programs() { }, ); - let commitment_a = compute_precomputed_commitment(&program_a, &options); - let commitment_b = compute_precomputed_commitment(&program_b, &options); + let commitment_a = compute_precomputed_commitment(&as_decoded(&program_a), &options); + let commitment_b = compute_precomputed_commitment(&as_decoded(&program_b), &options); assert_ne!( commitment_a, commitment_b, @@ -728,8 +736,8 @@ fn test_compute_precomputed_commitment_different_pc() { }, ); - let commitment_a = compute_precomputed_commitment(&program_a, &options); - let commitment_b = compute_precomputed_commitment(&program_b, &options); + let commitment_a = compute_precomputed_commitment(&as_decoded(&program_a), &options); + let commitment_b = compute_precomputed_commitment(&as_decoded(&program_b), &options); assert_ne!( commitment_a, commitment_b, @@ -766,8 +774,10 @@ fn test_instructions_from_elf_matches_executor() { .unwrap_or_else(|| panic!("Verifier missing instruction at PC {:#x}", pc)); // Compare by converting to DecodeEntry - this is what the DECODE table uses - let executor_entry = DecodeEntry::from_instruction(*pc, *executor_instr); - let verifier_entry = DecodeEntry::from_instruction(*pc, *verifier_instr); + let executor_entry = + DecodeEntry::from_instruction(*pc, executor_instr.instr, executor_instr.len == 2); + let verifier_entry = + DecodeEntry::from_instruction(*pc, verifier_instr.instr, verifier_instr.len == 2); assert_eq!( executor_entry.packed_decode(), @@ -790,6 +800,52 @@ fn test_instructions_from_elf_matches_executor() { ); } +/// RV64C: instructions_from_elf marks 2-byte instructions with c_type, and the +/// executor and verifier decoders agree on the compressed program. +#[test] +fn test_instructions_from_elf_compressed_c_type() { + let (_elf, _logs, executor_instructions) = run_asm_elf("test_compressed"); + + let manifest_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let elf_path = manifest_dir + .parent() + .unwrap() + .join("executor/program_artifacts/asm/test_compressed.elf"); + let elf_bytes = std::fs::read(&elf_path).expect("Failed to read ELF file"); + let elf = Elf::load(&elf_bytes).expect("Failed to load ELF"); + let verifier_instructions = + instructions_from_elf(&elf).expect("Failed to extract instructions"); + + // The program really contains compressed instructions. + let compressed = verifier_instructions + .values() + .filter(|d| d.len == 2) + .count(); + assert!(compressed > 0, "expected compressed instructions in ELF"); + + // Every executed instruction's DECODE entry carries the right c_type, and the + // executor and verifier decoders agree exactly on instruction width. + for (pc, executor_instr) in executor_instructions.iter() { + let verifier_instr = verifier_instructions + .get(pc) + .unwrap_or_else(|| panic!("Verifier missing instruction at PC {:#x}", pc)); + assert_eq!( + executor_instr.len, verifier_instr.len, + "instruction width mismatch at PC {:#x}", + pc + ); + + let c_type = executor_instr.len == 2; + let entry = DecodeEntry::from_instruction(*pc, executor_instr.instr, c_type); + let c_type_bit_set = entry.packed_decode() & (1u64 << bits::C_TYPE) != 0; + assert_eq!( + c_type_bit_set, c_type, + "c_type bit in packed_decode wrong at PC {:#x}", + pc + ); + } +} + /// Test instructions_from_elf with a more complex program. #[test] fn test_instructions_from_elf_matches_executor_complex() { @@ -813,8 +869,10 @@ fn test_instructions_from_elf_matches_executor_complex() { .unwrap_or_else(|| panic!("Verifier missing instruction at PC {:#x}", pc)); // Compare via DecodeEntry - let executor_entry = DecodeEntry::from_instruction(*pc, *executor_instr); - let verifier_entry = DecodeEntry::from_instruction(*pc, *verifier_instr); + let executor_entry = + DecodeEntry::from_instruction(*pc, executor_instr.instr, executor_instr.len == 2); + let verifier_entry = + DecodeEntry::from_instruction(*pc, verifier_instr.instr, verifier_instr.len == 2); assert_eq!( executor_entry.packed_decode(), diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 5b9ec0f8a..028942000 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -593,6 +593,23 @@ fn test_prove_elfs_misalign_lh() { ); } +/// RV64C: prove + verify a program built from compressed (2-byte) instructions. +/// This exercises the full production decode path on both sides — the prover +/// re-decodes the ELF in `from_elf_and_logs` and the verifier recomputes the DECODE +/// commitment in `commitment_from_elf`, both via the compressed-aware walk. The CPU +/// `next_pc` constraint enforces `pc + (4 - 2*c_type)`, so a wrong `c_type` would make +/// the proof unverifiable. +#[test] +fn test_prove_elfs_compressed() { + let (elf, logs, _instructions) = run_asm_elf("test_compressed"); + let mut traces = + Traces::from_elf_and_logs_minimal(&elf, &logs, &Default::default(), &[]).unwrap(); + assert!( + prove_and_verify_vm_minimal(&elf, &mut traces), + "test_compressed prove/verify failed" + ); +} + #[test] fn test_prove_elfs_misalign_lhu() { let (elf, logs, _instructions) = run_asm_elf("misalign_lhu"); diff --git a/prover/src/tests/trace_builder_tests.rs b/prover/src/tests/trace_builder_tests.rs index 199ce71db..a3b5498aa 100644 --- a/prover/src/tests/trace_builder_tests.rs +++ b/prover/src/tests/trace_builder_tests.rs @@ -6,7 +6,7 @@ use crate::tables::lt; use crate::tables::memw_register; use crate::tables::trace_builder::Traces; use crate::tables::types::FE; -use executor::vm::instruction::decoding::{ArithOp, Comparison, Instruction}; +use executor::vm::instruction::decoding::{ArithOp, Comparison, DecodedInstruction, Instruction}; use executor::vm::logs::Log; use executor::vm::memory::U64HashMap; @@ -41,10 +41,16 @@ fn make_and_log(pc: u64, rs1_val: u64, rs2_val: u64, result: u64) -> Log { } /// Build instructions map for test logs -fn make_instructions(logs: &[Log], instrs: &[Instruction]) -> U64HashMap { +fn make_instructions(logs: &[Log], instrs: &[Instruction]) -> U64HashMap { let mut map = U64HashMap::default(); for (log, instr) in logs.iter().zip(instrs.iter()) { - map.insert(log.current_pc, *instr); + map.insert( + log.current_pc, + DecodedInstruction { + instr: *instr, + len: 4, + }, + ); } map }