From caa053c0772166f6e23daef9efea32445fac6236 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Fri, 10 Apr 2026 17:28:03 +0200 Subject: [PATCH 1/9] spec: SHA256 accelerator (#372) * spec: First sha256 accelerator draft * Types checked * Update typst description * HWSLC -> HWSL * Apply suggestions from code review Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> Co-authored-by: Robin Jadoul * preliminary changes after review * fix out_e carry check * rotxor with 2 fewer columns * Correct count of bytes for out range checks * Explicit tables for SHA256_K * whoops * cosmetic + explanation * Apply suggestions from code review Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> * Update spec/sha256.typ * Replace base_addr by entry in addr * review comments * structure * Apply suggestions from code review Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> * Update spec/sha256.typ Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> * spec/sha256: rebase fixes --------- Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> Co-authored-by: Erik Takke --- spec/book.typ | 1 + spec/sha256.typ | 204 +++++++++++++++++++++++++ spec/src/config.toml | 16 ++ spec/src/rotxor.toml | 186 ++++++++++++++++++++++ spec/src/sha256.toml | 270 ++++++++++++++++++++++++++++++++ spec/src/sha256consts.toml | 28 ++++ spec/src/sha256msgsched.toml | 163 ++++++++++++++++++++ spec/src/sha256round.toml | 288 +++++++++++++++++++++++++++++++++++ spec/src/signatures.toml | 24 +++ 9 files changed, 1180 insertions(+) create mode 100644 spec/sha256.typ create mode 100644 spec/src/rotxor.toml create mode 100644 spec/src/sha256.toml create mode 100644 spec/src/sha256consts.toml create mode 100644 spec/src/sha256msgsched.toml create mode 100644 spec/src/sha256round.toml diff --git a/spec/book.typ b/spec/book.typ index bcc5fec19..3de02e363 100644 --- a/spec/book.typ +++ b/spec/book.typ @@ -42,6 +42,7 @@ ("about_ecalls.typ", [About `ECALL`], ), ("halt.typ", [`HALT` chip], ), ("commit.typ", [`COMMIT` chip], ), + ("sha256.typ", [`SHA256` accelerator], ), )) ) ) diff --git a/spec/sha256.typ b/spec/sha256.typ new file mode 100644 index 000000000..d72d5fc7a --- /dev/null +++ b/spec/sha256.typ @@ -0,0 +1,204 @@ +#import "/book.typ": book-page, aside, rj +#import "/src.typ": load_config, load_chip +#import "/chip.typ": ( + render_chip_variable_table, + total_nr_variables, + total_nr_instantiated_columns, + render_constraint_table, + render_chip_assumptions, + render_chip_padding_table, +) + +#let config = load_config() + +#show: book-page("sha256.typ") + +#let sha256chip = load_chip("src/sha256.toml", config) +#let sha256msgschedchip = load_chip("src/sha256msgsched.toml", config) +#let sha256roundchip = load_chip("src/sha256round.toml", config) +#let rotxorchip = load_chip("src/rotxor.toml", config) +#let sha256 = raw(sha256chip.name) +#let sha256msgsched = raw(sha256msgschedchip.name) +#let sha256round = raw(sha256roundchip.name) +#let rotxor = raw(rotxorchip.name) + +The following chips constitute an accelerator for the SHA256 compression function; +other aspects of SHA256 hashing (such as repeated compression invocation, +input padding and state initialization) fall outside the scope of this accelerator. + +The base #sha256 chip provides the `ECALL` interface, interacts with memory and then delegates to the #sha256msgsched and #sha256round chips +to perform the message schedule and the compression rounds, respectively. +The `SHA256_M` interaction signature is used to represent the output of the message schedule. +The `SHA256_K` interaction signature is used to represent the `k` constants. +It could either be instantiated with a (short) precomputed table, or through hardcoded LogUp contributions in this chip. +For this exposition, we choose the former option, and present a table further below. +Additionally, we introduce a #rotxor chip to perform the common action of computing the XOR of three rotations (or shifts) of a word. + +Most of the structure and variable naming follows the pseudocode of the wikipedia page#footnote(link("https://web.archive.org/web/20260320010021/https://en.wikipedia.org/wiki/SHA-2#Pseudocode")). + += #sha256 chip + +== Columns +#let nr_variables = total_nr_variables(sha256chip) +#let nr_columns = total_nr_instantiated_columns(sha256chip, config) + +The #sha256 chip leverages #nr_variables variables, spanning #nr_columns columns: +#render_chip_variable_table(sha256chip, config) + +== Constraints + +The first responsibility of the chip is to read the current state and message chunk from memory, +passed as arguments through pointers. +Since the memory ranges could overlap, we read the chunk first (in @sha256:c:read_chunk, at timestamp `timestamp`), before reading and writing the state (in @sha256:c:read_state, at timestamp `timestamp + 1`). +The addresses containing the state and the current chunk are passed in as arguments `A0 = x10` and `A1 = x11`, respectively. +Note that following the SHA256 spec, this state and the chunks are read and written as big-endian. +#render_constraint_table(sha256chip, config, groups: "memory") + +Then we prepare the message schedule, by emitting the input chunk with multiplicities +corresponding to the number of times it will be read during a compression evaluation. +The #sha256msgsched chip itself is implicitly invoked by itself and #sha256round, setting the `amount` +column appropriately for the number of times the `w` value is required. +#render_constraint_table(sha256chip, config, groups: "sched") + +And finally, we provide the boundaries for the #sha256round chip and the +final addition of the compression to the old state. +Observe that we embed the addition into the upper 32 bits of a double word, +in order to satisfy and use the `ADD` chip. +#render_constraint_table(sha256chip, config, groups: "compress") + +In this VM, we assign syscall number -1 to the #sha256 accelerator. +The chip therefore contributes the following interaction to the lookup-argument: +#render_constraint_table(sha256chip, config, groups: "lookup") + +== Padding + +#render_chip_padding_table(sha256chip, config) + += #sha256msgsched chip + +== Columns + +#let nr_variables = total_nr_variables(sha256msgschedchip) +#let nr_columns = total_nr_instantiated_columns(sha256msgschedchip, config) + +The #sha256msgsched chip leverages #nr_variables variables, spanning #nr_columns columns: +#render_chip_variable_table(sha256msgschedchip, config) + +== Assumptions + +#render_chip_assumptions(sha256msgschedchip, config) + +== Constraints + +First, we gather the dependencies from earlier in the message schedule. + +#render_constraint_table(sha256msgschedchip, config, groups: "lookback") + +Then, we calculate the result. +It suffices to check that the carry of adding four range-checked words +into a range-checked word is not too big, following the logic from @add. +In this case, using the `IS_BYTE` constraint allows us to add multiple words together +at the same time, without needing to store and range-check intermediate results. +#render_constraint_table(sha256msgschedchip, config, groups: "calc") + +Finally, we contribute to the LogUp. +#render_constraint_table(sha256msgschedchip, config, groups: "output") + += #sha256round chip + +== Columns + +#let nr_variables = total_nr_variables(sha256roundchip) +#let nr_columns = total_nr_instantiated_columns(sha256roundchip, config) + +The #sha256round chip leverages #nr_variables variables, spanning #nr_columns columns: +#render_chip_variable_table(sha256roundchip, config) + +== Assumptions + +#render_chip_assumptions(sha256roundchip, config) + +== Constraints + +First, we compute the necessary intermediate values. +#let bitand = math.class("binary", math.amp) +To compute `maj`, observe that $ (a bitand b) xor (a bitand c) xor (b bitand c) = (a bitand b) xor (c bitand (a xor b)), $ +by distribution. +Additionally, since for this form, $(a bitand b)$ and $(a xor b)$ are disjoint, so are $(a bitand b)$ and $(c bitand (a xor b))$, +and hence we can replace that top-level XOR with a field addition to compute $(a bitand b) + (c bitand (a xor b))$, +needing fewer intermediate columns. +Similarly, `ch` can be written as $(e bitand f) + ((2^32 - 1 - e) bitand g)$. +#render_constraint_table(sha256roundchip, config, groups: "value") + +Then we constrain the addition for the new state, constraining additions with the same `IS_BYTE` trick as before. +#render_constraint_table(sha256roundchip, config, groups: "addition") + +Finally, we chain the rounds together through the interactions. +#render_constraint_table(sha256roundchip, config, groups: "output") + +== Padding + +#render_chip_padding_table(sha256roundchip, config) + += #rotxor chip + + +This chip takes as input `a`, `r0`, `r1`, `r2` (4-bit values) and a bit `last_rot` to compute +$ + cases( + (a >>> (16 + r_0)) xor (a >>> (16 + r_0 - r_1)) xor (a >>> r_2) quad "if" #`last_rot`, + (a >>> (16 + r_0)) xor (a >>> (16 + r_0 - r_1)) xor (a >> r_2) quad "if" #`!last_rot` + ), +$ +where we let $>>>$ denote right rotation and $>>$ logical shift right. +We choose this representation so that all shift amounts required fit into 4 bits, +making the usage of `HWSL` more straightforward and avoid extra columns to represent more bits. + +== Columns + +#let nr_variables = total_nr_variables(rotxorchip) +#let nr_columns = total_nr_instantiated_columns(rotxorchip, config) +The #rotxor chip leverages #nr_variables variables, spanning #nr_columns columns: + +#render_chip_variable_table(rotxorchip, config) + +== Assumptions + +Range checking for all elements is inherited from the bitwise lookups. +We can safely assume that no `r_i` will be zero, and avoid extra work due to right rotation needing `16 - shift` as arguments to the `HWSL` interactions. +#render_chip_assumptions(rotxorchip, config) + +== Constraints + +We first compute all rotations (or shifts) of `a`. +`a1` is computed as a left rotation of `a0`, in order to not need +additional columns to represent the full right-rotation amounts. +#render_constraint_table(rotxorchip, config, groups: "shift") + +Then the bitwise XOR of the results. +#render_constraint_table(rotxorchip, config, groups: "xor") + +And finally contribute to the lookup argument. +#render_constraint_table(rotxorchip, config, groups: "output") + +== Padding + +#render_chip_padding_table(rotxorchip, config) + += Constant lookup + +#let sha256_kchip = load_chip("src/sha256consts.toml", config) +#let sha256_k = raw(sha256_kchip.name) + +As mentioned, we provide the round constants through a short precomputed lookup table: #sha256_k. + +#render_chip_variable_table(sha256_kchip, config) +#render_constraint_table(sha256_kchip, config) + += Notes/optimizations +- This could instead be designed following the #link("https://github.com/riscv/riscv-crypto")[RISC-V Crypto Scalar extension `Zknh`], + for wider compatibility, but this design is likely to be more efficient. + It is still possible, if desired, to expose #rotxor (or a selection of parameter instantiations thereof) + as implementation for these primitives. +- The message schedule could be exposed as its own ECALL instead, but the direct integration leads to better efficiency. +- Some of these chips could be made narrower, at the cost of introducing some extra lookups and extra tables to compute and store intermediate results. diff --git a/spec/src/config.toml b/spec/src/config.toml index d9dcaec37..9ced2ce0d 100644 --- a/spec/src/config.toml +++ b/spec/src/config.toml @@ -91,6 +91,22 @@ desc = """\ The `Word` is the *most* significant digit. """ +[[variables.types]] +label = "WordBL" +subtypes = ["Byte", "Byte", "Byte", "Byte"] +desc = """\ + Variable that can only assume values in the range $[0, 2^32)$. \\ + Represented as an array of four `Byte` variables.\ + """ + +[[variables.types]] +label = "WordHL" +subtypes = ["Half", "Half"] +desc = """\ + Variable that can only assume values in the range $[0, 2^32)$. \\ + Represented as an array of two `Half` variables.\ + """ + [[variables.types]] label = "QuadHL" subtypes = ["Half", "Half", "Half", "Half", "Half", "Half", "Half", "Half"] diff --git a/spec/src/rotxor.toml b/spec/src/rotxor.toml new file mode 100644 index 000000000..730e9bda5 --- /dev/null +++ b/spec/src/rotxor.toml @@ -0,0 +1,186 @@ +name = "ROTXOR" + +[[variables.input]] +name = "a" +type = "WordHL" +desc = "The input value" +pad = 0 + +[[variables.input]] +name = "r0" +type = "Byte" +desc = "The first amount of rotation, low nibble" +pad = 0 + +[[variables.input]] +name = "r1" +type = "Byte" +desc = "The second amount of rotation, low nibble" +pad = 0 + +[[variables.input]] +name = "r2" +type = "Byte" +desc = "The third amount of rotation, low nibble" +pad = 0 + +[[variables.input]] +name = "last_rot" +type = "Bit" +desc = "Whether the rotation by `r2` is a rotation (1) or just a shift (0)" +pad = 0 + +[[variables.output]] +name = "out" +type = "WordBL" +desc = "The output" +pad = 0 + +[[variables.auxiliary]] +name = "a0_left" +type = "WordHL" +desc = "`a << (16 - r0)`" +pad = 0 + +[[variables.auxiliary]] +name = "a0_right" +type = "WordHL" +desc = "`a >> r0`" +pad = 0 + +[[variables.auxiliary]] +name = "a1_left" +type = "WordHL" +desc = "`a0 << r1`" +pad = 0 + +[[variables.auxiliary]] +name = "a1_right" +type = "WordHL" +desc = "`a0 >> (16 - r1)`" +pad = 0 + +[[variables.auxiliary]] +name = "a2_left" +type = "WordHL" +desc = "`a << (16 - r2)`" +pad = 0 + +[[variables.auxiliary]] +name = "a2_right" +type = "WordHL" +desc = "`a >> r2`" +pad = 0 + +[[variables.auxiliary]] +name = "a0" +type = "WordBL" +desc = "`a >>> (16 + r0)`" +pad = 0 + +[[variables.auxiliary]] +name = "a1" +type = "WordBL" +desc = "`a >>> (16 + r0 - r1)` (which is `a0 <<< r1`)" +pad = 0 + +[[variables.auxiliary]] +name = "a2" +type = "WordBL" +desc = "`a >>> r2` or `a >> r2`" +pad = 0 + +[[variables.auxiliary]] +name = "a01" +type = "WordBL" +desc = "$a_0 xor a_1$" +pad = 0 + +[[variables.multiplicity]] +name = "μ" +type = "BaseField" +desc = "" +pad = 0 + + +[[assumptions]] +desc = "$#`r0`, #`r1`, #`r2` in [1, 15]$" + + +[[constraint_groups]] +name = "shift" + +[[constraints.shift]] +kind = "interaction" +tag = "HWSL" +input = [["idx", "a", "i"], ["-", 16, "r0"]] +output = ["arr", ["idx", "a0_left", "i"], ["idx", "a0_right", "i"]] +iter = ["i", 0, 1] +multiplicity = "μ" + +[[constraints.shift]] +kind = "interaction" +tag = "HWSL" +input = [["idx", ["cast", "a0", "WordHL"], "i"], "r1"] +output = ["arr", ["idx", "a1_left", "i"], ["idx", "a1_right", "i"]] +iter = ["i", 0, 1] +multiplicity = "μ" + +[[constraints.shift]] +kind = "interaction" +tag = "HWSL" +input = [["idx", "a", "i"], ["-", 16, "r2"]] +output = ["arr", ["idx", "a2_left", "i"], ["idx", "a2_right", "i"]] +iter = ["i", 0, 1] +multiplicity = "μ" + +[[constraints.shift]] +kind = "arith" +constraint = "$#`a0[i]` = #`a0_left[i]` + #`a0_right[1 - i]`$" +poly = ["-", ["idx", ["cast", "a0", "WordHL"], "i"], ["idx", "a0_left", "i"], ["idx", "a0_right", ["-", 1, "i"]]] +iter = ["i", 0, 1] + +[[constraints.shift]] +kind = "arith" +constraint = "$#`a1[i]` = #`a1_left[i]` + #`a1_right[1 - i]`$" +poly = ["-", ["idx", ["cast", "a1", "WordHL"], "i"], ["idx", "a1_left", "i"], ["idx", "a1_right", ["-", 1, "i"]]] +iter = ["i", 0, 1] + +[[constraints.shift]] +kind = "arith" +constraint = "$#`a2[0]` = #`a2_left[1]` + #`a2_right[0]`$" +poly = ["-", ["idx", ["cast", "a2", "WordHL"], 0], ["idx", "a2_left", 1], ["idx", "a2_right", 0]] + +[[constraints.shift]] +kind = "arith" +constraint = "$#`a2[1]` = #`last_rot` dot #`a2_left[0]` + #`a2_right[1]`$" +poly = ["-", ["idx", ["cast", "a2", "WordHL"], 0], ["*", "last_rot", ["idx", "a2_left", 0]], ["idx", "a2_right", 1]] + +[[constraint_groups]] +name = "xor" + +[[constraints.xor]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", "a0", "i"], ["idx", "a1", "i"]] +output = ["idx", "a01", "i"] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraints.xor]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", "a01", "i"], ["idx", "a2", "i"]] +output = ["idx", "out", "i"] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraint_groups]] +name = "output" + +[[constraints.output]] +kind = "interaction" +tag = "ROTXOR" +input = [["cast", "a", "Word"], "r0", "r1", "r2", "last_rot"] +output = ["cast", "out", "Word"] +multiplicity = ["-", "μ"] diff --git a/spec/src/sha256.toml b/spec/src/sha256.toml new file mode 100644 index 000000000..4cd4de9ba --- /dev/null +++ b/spec/src/sha256.toml @@ -0,0 +1,270 @@ +name = "SHA256" + +[[variables.input]] +name = "timestamp" +type = "DWordWL" +desc = "Timestamp at which the ECALL is invoked. Used as unique identifier for this invocation." +pad = 0 + +[[variables.input]] +name = "h" +type = ["Byte", 32] +desc = "The state of the hash function." +pad = 0 + +[[variables.input]] +name = "h_addr" +type = ["DWordHL", 4] +desc = "The addresses of the doublewords of `h`" +pad = ["arr", 0, 8, 16, 24] + +[[variables.input]] +name = "m" +type = ["Byte", 64] +desc = "The input chunk." +pad = 0 + +[[variables.input]] +name = "m_addr" +type = ["DWordHL", 8] +desc = "The addresses of the doublewords of `m`" +pad = ["arr", 0, 8, 16, 24, 32, 40, 48, 56] + +[[variables.output]] +name = "out" +type = ["Byte", 32] +desc = "The new state." +pad = 0 + +[[variables.auxiliary]] +name = "last_round_out" +type = ["Word", 8] +desc = "The output from the last compression round" +pad = 0 + +[[variables.multiplicity]] +name = "μ" +type = "Bit" +desc = "" +pad = 0 + + +[[constraint_groups]] +name = "memory" + +[[constraints.memory]] +kind = "interaction" +tag = "MEMW" +input = [1, ["cast", ["*", 2, 11], "DWordWL"], ["arr", ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 0], ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 1], 0, 0, 0, 0, 0, 0], "timestamp", 1, 0, 0] +output = ["arr", ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 0], ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 0], 0, 0, 0, 0, 0, 0] +multiplicity = "μ" + +[[constraints.memory]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", ["idx", "m_addr", "i"], "j"]] +multiplicity = "μ" +iters = [["i", 0, 7], ["j", 0, 3]] + +[[constraints.memory]] +kind = "template" +tag = "ADD" +input = [["cast", ["idx", "m_addr", 0], "DWordWL"], ["cast", ["*", 8, "i"], "DWordWL"]] +output = ["cast", ["idx", "m_addr", "i"], "DWordWL"] +iter = ["i", 1, 7] + +[[constraints.memory]] +kind = "interaction" +tag = "MEMW" +input = [0, ["cast", ["idx", "m_addr", "i"], "DWordWL"], ["arr", + ["idx", "m", ["+", ["*", 8, "i"], 3]], + ["idx", "m", ["+", ["*", 8, "i"], 2]], + ["idx", "m", ["+", ["*", 8, "i"], 1]], + ["idx", "m", ["+", ["*", 8, "i"], 0]], + ["idx", "m", ["+", ["*", 8, "i"], 7]], + ["idx", "m", ["+", ["*", 8, "i"], 6]], + ["idx", "m", ["+", ["*", 8, "i"], 5]], + ["idx", "m", ["+", ["*", 8, "i"], 4]]], + "timestamp", 0, 0, 1] +output = ["arr", + ["idx", "m", ["+", ["*", 8, "i"], 3]], + ["idx", "m", ["+", ["*", 8, "i"], 2]], + ["idx", "m", ["+", ["*", 8, "i"], 1]], + ["idx", "m", ["+", ["*", 8, "i"], 0]], + ["idx", "m", ["+", ["*", 8, "i"], 7]], + ["idx", "m", ["+", ["*", 8, "i"], 6]], + ["idx", "m", ["+", ["*", 8, "i"], 5]], + ["idx", "m", ["+", ["*", 8, "i"], 4]]] +multiplicity = "μ" +iter = ["i", 0, 7] +ref = "sha256:c:read_chunk" + +[[constraints.memory]] +kind = "interaction" +tag = "MEMW" +input = [1, ["cast", ["*", 2, 10], "DWordWL"], ["arr", ["idx", ["cast", ["idx", "h_addr", 0], "DWordWL"], 0], ["idx", ["cast", ["idx", "h_addr", 0], "DWordWL"], 1], 0, 0, 0, 0, 0, 0], "timestamp", 1, 0, 0] +output = ["arr", ["idx", ["cast", ["idx", "h_addr", 0], "DWordWL"], 0], ["idx", ["cast", ["idx", "h_addr", 0], "DWordWL"], 1], 0, 0, 0, 0, 0, 0] +multiplicity = "μ" + +[[constraints.memory]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", ["idx", "h_addr", "i"], "j"]] +multiplicity = "μ" +iters = [["i", 0, 3], ["j", 0, 3]] + +[[constraints.memory]] +kind = "template" +tag = "ADD" +input = [["cast", ["idx", "h_addr", 0], "DWordWL"], ["*", 8, ["cast", "i", "DWordWL"]]] +output = ["cast", ["idx", "h_addr", "i"], "DWordWL"] +iter = ["i", 1, 3] + +[[constraints.memory]] +kind = "interaction" +tag = "MEMW" +input = [ + 0, + ["cast", ["idx", "h_addr", "i"], "DWordWL"], + ["arr", + ["idx", "out", ["+", ["*", 8, "i"], 3]], + ["idx", "out", ["+", ["*", 8, "i"], 2]], + ["idx", "out", ["+", ["*", 8, "i"], 1]], + ["idx", "out", ["+", ["*", 8, "i"], 0]], + ["idx", "out", ["+", ["*", 8, "i"], 7]], + ["idx", "out", ["+", ["*", 8, "i"], 6]], + ["idx", "out", ["+", ["*", 8, "i"], 5]], + ["idx", "out", ["+", ["*", 8, "i"], 4]]], + ["+", "timestamp", ["cast", 1, "DWordWL"]], + 0, 0, 1 +] +output = ["arr", + ["idx", "h", ["+", ["*", 8, "i"], 3]], + ["idx", "h", ["+", ["*", 8, "i"], 2]], + ["idx", "h", ["+", ["*", 8, "i"], 1]], + ["idx", "h", ["+", ["*", 8, "i"], 0]], + ["idx", "h", ["+", ["*", 8, "i"], 7]], + ["idx", "h", ["+", ["*", 8, "i"], 6]], + ["idx", "h", ["+", ["*", 8, "i"], 5]], + ["idx", "h", ["+", ["*", 8, "i"], 4]], +] +iter = ["i", 0, 3] +multiplicity = "μ" +ref = "sha256:c:read_state" + +[[constraint_groups]] +name = "sched" + +[[constraints.sched]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", "i"] +output = ["+", + ["*", ["^", 2, 0], ["idx", "m", ["+", ["*", 4, "i"], 3]]], + ["*", ["^", 2, 8], ["idx", "m", ["+", ["*", 4, "i"], 2]]], + ["*", ["^", 2, 16], ["idx", "m", ["+", ["*", 4, "i"], 1]]], + ["*", ["^", 2, 24], ["idx", "m", ["+", ["*", 4, "i"], 0]]], +] +multiplicity = ["*", -2, "μ"] +iter = ["i", 0, 0] + +[[constraints.sched]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", "i"] +output = ["+", + ["*", ["^", 2, 0], ["idx", "m", ["+", ["*", 4, "i"], 3]]], + ["*", ["^", 2, 8], ["idx", "m", ["+", ["*", 4, "i"], 2]]], + ["*", ["^", 2, 16], ["idx", "m", ["+", ["*", 4, "i"], 1]]], + ["*", ["^", 2, 24], ["idx", "m", ["+", ["*", 4, "i"], 0]]], +] +multiplicity = ["*", -3, "μ"] +iter = ["i", 1, 8] + +[[constraints.sched]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", "i"] +output = ["+", + ["*", ["^", 2, 0], ["idx", "m", ["+", ["*", 4, "i"], 3]]], + ["*", ["^", 2, 8], ["idx", "m", ["+", ["*", 4, "i"], 2]]], + ["*", ["^", 2, 16], ["idx", "m", ["+", ["*", 4, "i"], 1]]], + ["*", ["^", 2, 24], ["idx", "m", ["+", ["*", 4, "i"], 0]]], +] +multiplicity = ["*", -4, "μ"] +iter = ["i", 9, 13] + +[[constraints.sched]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", "i"] +output = ["+", + ["*", ["^", 2, 0], ["idx", "m", ["+", ["*", 4, "i"], 3]]], + ["*", ["^", 2, 8], ["idx", "m", ["+", ["*", 4, "i"], 2]]], + ["*", ["^", 2, 16], ["idx", "m", ["+", ["*", 4, "i"], 1]]], + ["*", ["^", 2, 24], ["idx", "m", ["+", ["*", 4, "i"], 0]]], +] +multiplicity = ["*", -5, "μ"] +iter = ["i", 14, 15] + +[[constraint_groups]] +name = "compress" + +[[constraints.compress]] +kind = "interaction" +tag = "SHA256ROUND" +input = ["timestamp", ["arr", + ["+", ["*", ["^", 2, 0], ["idx", "h", 3]], ["*", ["^", 2, 8], ["idx", "h", 2]], ["*", ["^", 2, 16], ["idx", "h", 1]], ["*", ["^", 2, 24], ["idx", "h", 0]]], + ["+", ["*", ["^", 2, 0], ["idx", "h", 7]], ["*", ["^", 2, 8], ["idx", "h", 6]], ["*", ["^", 2, 16], ["idx", "h", 5]], ["*", ["^", 2, 24], ["idx", "h", 4]]], + ["+", ["*", ["^", 2, 0], ["idx", "h", 11]], ["*", ["^", 2, 8], ["idx", "h", 10]], ["*", ["^", 2, 16], ["idx", "h", 9]], ["*", ["^", 2, 24], ["idx", "h", 8]]], + ["+", ["*", ["^", 2, 0], ["idx", "h", 15]], ["*", ["^", 2, 8], ["idx", "h", 14]], ["*", ["^", 2, 16], ["idx", "h", 13]], ["*", ["^", 2, 24], ["idx", "h", 12]]], + ["+", ["*", ["^", 2, 0], ["idx", "h", 19]], ["*", ["^", 2, 8], ["idx", "h", 18]], ["*", ["^", 2, 16], ["idx", "h", 17]], ["*", ["^", 2, 24], ["idx", "h", 16]]], + ["+", ["*", ["^", 2, 0], ["idx", "h", 23]], ["*", ["^", 2, 8], ["idx", "h", 22]], ["*", ["^", 2, 16], ["idx", "h", 21]], ["*", ["^", 2, 24], ["idx", "h", 20]]], + ["+", ["*", ["^", 2, 0], ["idx", "h", 27]], ["*", ["^", 2, 8], ["idx", "h", 26]], ["*", ["^", 2, 16], ["idx", "h", 25]], ["*", ["^", 2, 24], ["idx", "h", 24]]], + ["+", ["*", ["^", 2, 0], ["idx", "h", 31]], ["*", ["^", 2, 8], ["idx", "h", 30]], ["*", ["^", 2, 16], ["idx", "h", 29]], ["*", ["^", 2, 24], ["idx", "h", 28]]], +], 0] +multiplicity = "μ" + +[[constraints.compress]] +kind = "interaction" +tag = "SHA256ROUND" +input = ["timestamp", "last_round_out", 64] +multiplicity = ["-", "μ"] + +[[constraints.compress]] +kind = "interaction" +tag = "IS_BYTE" +input = [["idx", "out", "i"]] +multiplicity = "μ" +iter = ["i", 0, 31] + +[[constraints.compress]] +kind = "template" +tag = "ADD" +input = [["arr", 0, ["idx", "last_round_out", "i"]], ["arr", 0, ["+", + ["*", ["^", 2, 0], ["idx", "h", ["+", ["*", 4, "i"], 3]]], + ["*", ["^", 2, 8], ["idx", "h", ["+", ["*", 4, "i"], 2]]], + ["*", ["^", 2, 16], ["idx", "h", ["+", ["*", 4, "i"], 1]]], + ["*", ["^", 2, 24], ["idx", "h", ["+", ["*", 4, "i"], 0]]], +]]] +output = ["arr", 0, ["+", + ["*", ["^", 2, 0], ["idx", "out", ["+", ["*", 4, "i"], 3]]], + ["*", ["^", 2, 8], ["idx", "out", ["+", ["*", 4, "i"], 2]]], + ["*", ["^", 2, 16], ["idx", "out", ["+", ["*", 4, "i"], 1]]], + ["*", ["^", 2, 24], ["idx", "out", ["+", ["*", 4, "i"], 0]]], +]] +iter = ["i", 0, 7] + +[[constraint_groups]] +name = "lookup" + +[[constraints.lookup]] +kind = "template" +tag = "IS_BIT" +input = ["μ"] + +[[constraints.lookup]] +kind = "interaction" +tag = "ECALL" +input = ["timestamp", ["arr", ["-", ["^", 2, 32], 1], ["-", ["^", 2, 32], 1]]] +multiplicity = ["-", "μ"] diff --git a/spec/src/sha256consts.toml b/spec/src/sha256consts.toml new file mode 100644 index 000000000..17fe6fb0f --- /dev/null +++ b/spec/src/sha256consts.toml @@ -0,0 +1,28 @@ +name = "SHA256_K" + +[[variables.input]] +name = "index" +type = "BaseField" +desc = "" +precomputed = true + +[[variables.input]] +name = "K" +type = "Word" +desc = "" +precomputed = true + +[[variables.multiplicity]] +name = "μ" +type = "BaseField" +desc = "" + +[[constraint_groups]] +name = "contributions" + +[[constraints.contributions]] +kind = "interaction" +tag = "SHA256_K" +input = ["index"] +output = "K" +multiplicity = ["-", "μ"] diff --git a/spec/src/sha256msgsched.toml b/spec/src/sha256msgsched.toml new file mode 100644 index 000000000..79664a797 --- /dev/null +++ b/spec/src/sha256msgsched.toml @@ -0,0 +1,163 @@ +name = "SHA256MSGSCHED" + +[[variables.input]] +name = "timestamp" +type = "DWordWL" +desc = "The timestamp/identifier for this execution of the message schedule" +pad = 0 + +[[variables.input]] +name = "index" +type = "BaseField" +desc = "The index of the output word" +pad = 0 + +[[variables.input]] +name = "amount" +type = "BaseField" +desc = "The multiplicity with which to output the resulting word" +pad = 0 + +[[variables.output]] +name = "out" +type = "WordHL" +desc = "The output, `w[index]`" + +[[variables.auxiliary]] +name = "back2" +type = "Word" +desc = "`w[index - 2]`" +pad = 0 + +[[variables.auxiliary]] +name = "back7" +type = "Word" +desc = "`w[index - 7]`" +pad = 0 + +[[variables.auxiliary]] +name = "back15" +type = "Word" +desc = "`w[index - 15]`" +pad = 0 + +[[variables.auxiliary]] +name = "back16" +type = "Word" +desc = "`w[index - 16]`" +pad = 0 + +[[variables.auxiliary]] +name = "s0" +type = "Word" +desc = "$#`back15` >>> 7 xor #`back15` >>> 18 xor #`back15` >> 3$" +pad = 0 + +[[variables.auxiliary]] +name = "s1" +type = "Word" +desc = "$#`back2` >>> 17 xor #`back2` >>> 19 xor #`back2` >> 10$" +pad = 0 + +[[variables.virtual]] +name = "carry" +type = "Byte" +desc = "The carry of computing `out`" +def = ["*", ["^", 2, -32], ["-", ["+", "back16", "s0", "back7", "s1"], ["cast", "out", "Word"]]] + +[[variables.multiplicity]] +name = "μ" +type = "Bit" +desc = "" +pad = 0 + +[[assumptions]] +desc = "#`IS_WORD[SHA256_M[timestamp, i]]` for $0 <= i < #`index`$" + + +[[constraint_groups]] +name = "lookback" + +[[constraints.lookback]] +kind = "interaction" +tag = "IS_BYTE" +input = [["-", "index", 16]] +multiplicity = "μ" + +[[constraints.lookback]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", ["-", "index", 2]] +output = "back2" +multiplicity = "μ" + +[[constraints.lookback]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", ["-", "index", 7]] +output = "back7" +multiplicity = "μ" + +[[constraints.lookback]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", ["-", "index", 15]] +output = "back15" +multiplicity = "μ" + +[[constraints.lookback]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", ["-", "index", 16]] +output = "back16" +multiplicity = "μ" + +[[constraint_groups]] +name = "calc" + +[[constraints.calc]] +kind = "interaction" +tag = "ROTXOR" +input = ["back15", 2, 11, 3, 0] +output = "s0" +multiplicity = "μ" + +[[constraints.calc]] +kind = "interaction" +tag = "ROTXOR" +input = ["back2", 3, 2, 10, 0] +output = "s1" +multiplicity = "μ" + +[[constraints.calc]] +kind = "interaction" +tag = "IS_BYTE" +input = ["carry"] +multiplicity = "μ" + +[[constraints.calc]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "out", "i"]] +iter = ["i", 0, 1] +multiplicity = "μ" + +[[constraint_groups]] +name = "output" + +[[constraints.output]] +kind = "template" +tag = "IS_BIT" +input = ["μ"] + +[[constraints.output]] +kind = "arith" +constraint = "$#`μ` = 0 => #`amount` = 0$" +poly = ["*", ["not", "μ"], "amount"] + +[[constraints.output]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", "index"] +output = ["cast", "out", "Word"] +multiplicity = ["-", "amount"] diff --git a/spec/src/sha256round.toml b/spec/src/sha256round.toml new file mode 100644 index 000000000..8ec93ea36 --- /dev/null +++ b/spec/src/sha256round.toml @@ -0,0 +1,288 @@ +name = "SHA256ROUND" + +[[variables.input]] +name = "timestamp" +type = "DWordWL" +desc = "The timestamp/identifier for this execution of the round function" +pad = 0 + +[[variables.input]] +name = "a" +type = "WordBL" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "b" +type = "WordBL" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "c" +type = "WordBL" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "d" +type = "Word" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "e" +type = "WordBL" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "f" +type = "WordBL" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "g" +type = "WordBL" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "h" +type = "Word" +desc = "State element" +pad = 0 + +[[variables.input]] +name = "index" +type = "BaseField" +desc = "The round number/index" +pad = 0 + +[[variables.output]] +name = "out_a" +type = "WordHL" +desc = "$#`temp1` + #`temp2`$" +pad = 0 + +[[variables.output]] +name = "out_e" +type = "WordHL" +desc = "$#`d` + #`temp1`$" +pad = 0 + +[[variables.auxiliary]] +name = "a_and_b" +type = "WordBL" +desc = "$#`a` class(\"binary\", amp) #`b`$. Part of `maj`" +pad = 0 + +[[variables.auxiliary]] +name = "a_xor_b" +type = "WordBL" +desc = "$#`a` xor #`b`$. Part of `maj`" +pad = 0 + +[[variables.auxiliary]] +name = "c_and_a_xor_b" +type = "WordBL" +desc = "$#`c` class(\"binary\", amp) (#`a` xor #`b`)$. Part of `maj`" +pad = 0 + +[[variables.auxiliary]] +name = "e_and_f" +type = "WordBL" +desc = "$#`e` class(\"binary\", amp) #`f`$. Part of `ch`" +pad = 0 + +[[variables.auxiliary]] +name = "not_e_and_g" +type = "WordBL" +desc = "$(not #`e`) class(\"binary\", amp) #`g`$. Part of `ch`" +pad = 0 + +[[variables.auxiliary]] +name = "kval" +type = "Word" +desc = "`k[index]`" +pad = 0 + +[[variables.auxiliary]] +name = "S0" +type = "Word" +desc = "Transformation of `a`" +pad = 0 + +[[variables.auxiliary]] +name = "S1" +type = "Word" +desc = "Transformation of `e`" +pad = 0 + +[[variables.auxiliary]] +name = "wval" +type = "Word" +desc = "`w[index]`" +pad = 0 + +[[variables.virtual]] +name = "carry_a" +type = "Byte" +desc = "The carry from `out_a`" +def = ["*", ["^", 2, -32], ["-", ["+", "temp1", "temp2"], ["cast", "out_a", "Word"]]] + +[[variables.virtual]] +name = "carry_e" +type = "Byte" +desc = "The carry from `out_e`" +def = ["*", ["^", 2, -32], ["-", ["+", "d", "temp1"], ["cast", "out_e", "Word"]]] + +[[variables.virtual]] +name = "ch" +type = "Word" +desc = "ch value" +def = ["+", ["cast", "e_and_f", "Word"], ["cast", "not_e_and_g", "Word"]] + +[[variables.virtual]] +name = "maj" +type = "Word" +desc = "maj value" +def = ["+", ["cast", "a_and_b", "Word"], ["cast", "c_and_a_xor_b", "Word"]] + +[[variables.virtual]] +name = "temp1" +type = "BaseField" +desc = "`temp1` value" +def = ["+", "h", "S1", "ch", "kval", "wval"] + +[[variables.virtual]] +name = "temp2" +type = "BaseField" +desc = "`temp2` value" +def = ["+", "S0", "maj"] + +[[variables.multiplicity]] +name = "μ" +type = "Bit" +desc = "" +pad = 0 + +[[assumptions]] +desc = "All state values are valid words" + + +[[constraint_groups]] +name = "value" + +[[constraints.value]] +kind = "interaction" +tag = "AND_BYTE" +input = [["idx", "a", "i"], ["idx", "b", "i"]] +output = ["idx", "a_and_b", "i"] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraints.value]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", "a", "i"], ["idx", "b", "i"]] +output = ["idx", "a_xor_b", "i"] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraints.value]] +kind = "interaction" +tag = "AND_BYTE" +input = [["idx", "c", "i"], ["idx", "a_xor_b", "i"]] +output = ["idx", "c_and_a_xor_b", "i"] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraints.value]] +kind = "interaction" +tag = "AND_BYTE" +input = [["idx", "e", "i"], ["idx", "f", "i"]] +output = ["idx", "e_and_f", "i"] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraints.value]] +kind = "interaction" +tag = "AND_BYTE" +input = [["-", 255, ["idx", "e", "i"]], ["idx", "g", "i"]] +output = ["idx", "not_e_and_g", "i"] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraints.value]] +kind = "interaction" +tag = "SHA256_K" +input = ["index"] +output = "kval" +multiplicity = "μ" + +[[constraints.value]] +kind = "interaction" +tag = "SHA256_M" +input = ["timestamp", "index"] +output = "wval" +multiplicity = "μ" + +[[constraints.value]] +kind = "interaction" +tag = "ROTXOR" +input = [["cast", "a", "Word"], 6, 9, 2, 1] +output = "S0" +multiplicity = "μ" + +[[constraints.value]] +kind = "interaction" +tag = "ROTXOR" +input = [["cast", "e", "Word"], 9, 14, 6, 1] +output = "S1" +multiplicity = "μ" + +[[constraint_groups]] +name = "addition" + +[[constraints.addition]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "out_a", "i"]] +multiplicity = "μ" +iter = ["i", 0, 1] + +[[constraints.addition]] +kind = "interaction" +tag = "IS_BYTE" +input = ["carry_a"] +multiplicity = "μ" + +[[constraints.addition]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "out_e", "i"]] +multiplicity = "μ" +iter = ["i", 0, 1] + +[[constraints.addition]] +kind = "interaction" +tag = "IS_BYTE" +input = ["carry_e"] +multiplicity = "μ" + +[[constraint_groups]] +name = "output" + +[[constraints.output]] +kind = "interaction" +tag = "SHA256ROUND" +input = ["timestamp", ["arr", ["cast", "a", "Word"], ["cast", "b", "Word"], ["cast", "c", "Word"], "d", ["cast", "e", "Word"], ["cast", "f", "Word"], ["cast", "g", "Word"], "h"], "index"] +multiplicity = ["-", "μ"] + +[[constraints.output]] +kind = "interaction" +tag = "SHA256ROUND" +input = ["timestamp", ["arr", ["cast", "out_a", "Word"], ["cast", "a", "Word"], ["cast", "b", "Word"], ["cast", "c", "Word"], ["cast", "out_e", "Word"], ["cast", "e", "Word"], ["cast", "f", "Word"], ["cast", "g", "Word"]], ["+", "index", 1]] +multiplicity = "μ" diff --git a/spec/src/signatures.toml b/spec/src/signatures.toml index 17ecd3933..33f97cebf 100644 --- a/spec/src/signatures.toml +++ b/spec/src/signatures.toml @@ -187,3 +187,27 @@ output = ["Half", 2] tag = "memory" kind = "interaction" input = ["Bit", "DWordWL", "DWordWL", "BaseField"] + +# SHA256 things +[[signatures]] +tag = "SHA256_K" +kind = "interaction" +input = ["BaseField"] +output = "Word" + +[[signatures]] +tag = "SHA256_M" +kind = "interaction" +input = ["DWordWL", "BaseField"] +output = "Word" + +[[signatures]] +tag = "SHA256ROUND" +kind = "interaction" +input = ["DWordWL", ["Word", 8], "BaseField"] + +[[signatures]] +tag = "ROTXOR" +kind = "interaction" +input = ["Word", "Byte", "Byte", "Byte", "Bit"] +output = "Word" From d23f806fe61c0a5dec11e0269e413cf35d468165 Mon Sep 17 00:00:00 2001 From: Erik <159244975+erik-3milabs@users.noreply.github.com> Date: Wed, 22 Apr 2026 11:07:33 +0200 Subject: [PATCH 2/9] spec: KECCAK accelerator (#474) * spec: math/code render mod expr * spec/type_check: add ModExpr * spec: add multi-dimensional array support * spec/KECCAK: introduce v0 * spec/keccak: define padding * spec: support multidimensional array in signatures * spec/keccak: add signatures * spec/keccak: update core chip * spec/keccak: update keccak_rnd description * spec/keccak: define round constant lookup * Apply suggestions from code review Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> * spec/keccak: clarify "optimizations" header * spec/keccak: list `state_ptr` simplification optimization * spec/keccak: fix C3 * spec/keccak: fix missing EOF * spec/keccak: list interaction counts * spec/keccak: list three-way XOR optimization idea * spec/tooling: fix mod_expr default * spec: add spaces round `%` rendering * spec: reuse `type_to_code` in `signatures.typ` * Apply suggestions from code review Co-authored-by: Robin Jadoul * spec/keccak: update three-way XOR optimization benefits * spec/ecall: reintroduce ecall-number overview * spec/keccak: ref to sections in FIPS202 on state endianness * spec/keccak: fix typo --------- Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: Robin Jadoul --- spec/about_ecalls.typ | 10 ++ spec/book.typ | 1 + spec/chip.typ | 15 +- spec/expr.typ | 49 ++++-- spec/keccak.typ | 122 +++++++++++++++ spec/signatures.typ | 26 ++-- spec/src.typ | 18 +-- spec/src/keccak.toml | 102 +++++++++++++ spec/src/keccak_rc.toml | 28 ++++ spec/src/keccak_round.toml | 301 +++++++++++++++++++++++++++++++++++++ spec/src/signatures.toml | 13 ++ spec/tooling/chip.py | 26 ++++ 12 files changed, 665 insertions(+), 46 deletions(-) create mode 100644 spec/keccak.typ create mode 100644 spec/src/keccak.toml create mode 100644 spec/src/keccak_rc.toml create mode 100644 spec/src/keccak_round.toml diff --git a/spec/about_ecalls.typ b/spec/about_ecalls.typ index ef4203610..9b37d5f21 100644 --- a/spec/about_ecalls.typ +++ b/spec/about_ecalls.typ @@ -22,3 +22,13 @@ When `ECALL` is executed, it is assumed that: - the return value is written to `A0`, where `A0`-`A7` are symbolic names for the registers `x10`-`x17` #footnote([RISC-V - Register sets; en.wikipedia.org, #link("https://web.archive.org/web/20260209053447/https://en.wikipedia.org/wiki/RISC-V#Register_sets")[[src]]]). + += ECALL number overview + +We provide a list of supported ECALL numbers. +Negative numbers (represented as 2s complement 64-bit numbers), are used for our own custom accelerators/extensions. + +/ 64: `write` (@commit) +/ 93: `exit` (@halt) +/ -1: `SHA256` (@sha256) +/ -2: `KECCAK` (@keccak) \ No newline at end of file diff --git a/spec/book.typ b/spec/book.typ index 3de02e363..3b65642f9 100644 --- a/spec/book.typ +++ b/spec/book.typ @@ -43,6 +43,7 @@ ("halt.typ", [`HALT` chip], ), ("commit.typ", [`COMMIT` chip], ), ("sha256.typ", [`SHA256` accelerator], ), + ("keccak.typ", [`KECCAK` accelerator], ), )) ) ) diff --git a/spec/chip.typ b/spec/chip.typ index f3e0892f7..c6cce5073 100644 --- a/spec/chip.typ +++ b/spec/chip.typ @@ -14,12 +14,15 @@ .map(pair => pair.at(1)) .flatten() .map(var => { - let (label, factor) = if type(var.type) == array { - (var.type.at(0), var.type.at(1)) - } else { - (var.type, 1) + let (factor, var_type) = (1, var.type) + while type(var_type) == array { + assert(var_type.len() == 2, message: "invalid var (sub)type length: " + repr(var.type)) + assert(type(var_type.at(1)) == int, message: "invalid var (sub)type length: " + repr(var.type)) + factor *= var_type.at(1) + var_type = var_type.at(0) } - config.variables.types.filter(type => type.label == label).first().subtypes.len() * factor + + config.variables.types.filter(type => type.label == var_type).first().subtypes.len() * factor }) .sum() } @@ -290,7 +293,7 @@ } else if type(groups) == str { groups = (groups,) } - assert(groups.all(group => group in all_groups), message: "unknown group") + assert(groups.all(group => group in all_groups), message: "unknown group: " + repr(groups)) let selected_constraints = groups.map(g => ((g): chip.constraints.at(g))).join() // Find the group definition in the constraint_groups diff --git a/spec/expr.typ b/spec/expr.typ index 2de0d6ba3..20a55d753 100644 --- a/spec/expr.typ +++ b/spec/expr.typ @@ -1,20 +1,26 @@ // Types and array types // ::= str -// | [str, int] +// | [, int] // Check that a type expression is structurally valid, without validating against a set of known base types #let check_array_type(typ) = { - assert(type(typ.at(0)) == str, message: "Array types need to have a regular type as base") - assert(type(typ.at(1)) == int, message: "Array types need to have a constant dimension") + while type(typ) == array { + assert(typ.len() == 2, message: "Array types must specify two parameters") + assert(type(typ.at(1)) == int, message: "Array types need to have a constant dimension") + typ = typ.at(0) + } + assert(type(typ) == str, message: "Array types need to have a regular type as base") } // Render a type to code #let type_to_code(typ) = { - if type(typ) == array { - check_array_type(typ) - return raw(typ.at(0) + "[" + str(typ.at(1)) + "]") - } else if type(typ) == str { - return raw(typ) + let label = "" + while type(typ) == array { + label += "[" + str(typ.at(1)) + "]" + typ = typ.at(0) + } + if type(typ) == str { + return raw(typ + label) } else { assert(false, message: "Unknown format for type: " + repr(typ)) } @@ -54,12 +60,13 @@ "cast": 3, // cast "mul": 4, // * "div": 5, // / - "sum": 6, // Σ - "not": 7, // not - "sub": 8, // - - "add": 9, // + - "eq": 10, // = and := - "MAX": 11, // + "mod": 6, // mod + "sum": 7, // Σ + "not": 8, // not + "sub": 9, // - + "add": 10, // + + "eq": 11, // = and := + "MAX": 12, // ) // Mutual recursion through a trick from https://github.com/typst/typst/issues/744 @@ -97,6 +104,13 @@ "not": (pp, rec, e) => cwrap(rec(PREC.not, 1) + ` - ` + rec(PREC.not, e.at(1)), pp < PREC.not), "+": (pp, rec, e) => cwrap(e.slice(1).map(rec.with(PREC.add)).join(` + `), pp < PREC.add), "sum": (pp, rec, e) => assert(false, message: "sum is unsupported in code."), + "mod": (pp, rec, e) => { + assert(e.len() == 3 and type(e.at(2)) == int, message: "Invalid mod expr: " + repr(e)) + cwrap( + rec(PREC.mod, e.at(1)) + ` % ` + rec(PREC.mod, e.at(2)), + pp <= PREC.mod + ) + }, "*": (pp, rec, e) => { if e.len() == 3 and type(e.at(1)) == int and type(e.at(2)) == str and e.at(2).len() == 1 { // multiplication of a constant with one-letter variable. @@ -165,6 +179,13 @@ pp <= PREC.sub ) }, + "mod": (pp, rec, e) => { + assert(e.len() == 3 and type(e.at(2)) == int, message: "Invalid mod expr: " + repr(e)) + mwrap( + $#rec(PREC.mod, e.at(1)) mod #rec(PREC.mod, e.at(2))$, + pp <= PREC.mod + ) + }, "*": (pp, rec, e) => { if e.len() == 3 and type(e.at(1)) == int and type(e.at(2)) == str and e.at(2).len() == 1 { // multiplication of a constant with one-letter variable. diff --git a/spec/keccak.typ b/spec/keccak.typ new file mode 100644 index 000000000..e6e16f43f --- /dev/null +++ b/spec/keccak.typ @@ -0,0 +1,122 @@ +#import "/book.typ": book-page +#import "/src.typ": load_config, load_chip +#import "/chip.typ": ( + compute_nr_interactions, + render_chip_assumptions, + render_chip_variable_table, + total_nr_variables, + total_nr_instantiated_columns, + render_constraint_table, + render_chip_padding_table, +) + +#let config = load_config() +#let chip = load_chip("src/keccak.toml", config) + +#show: book-page(chip.name) +#let keccak = raw(chip.name) + +The #keccak chip applies the keccak permutation $kappa$ to a given memory range; +other aspects of keccak hashing (such as repeated permutation invocation, +input padding and state initialization) fall outside the scope of this accelerator. + +This permutation $kappa: FF_2^1600 -> FF_2^1600$ operates on 1600 bits and is composed of 24 applications of round-permutation $Lambda: FF_2^1600 times NN -> FF_2^1600$, where the additional parameter is the round constant. +$Lambda$ is defined as the composition $iota compose chi compose pi compose rho compose theta$, where only $iota$ depends on the round constant. +#footnote("More details on the KECCAK permutation: FIPS 202, NIST, " + link("https://csrc.nist.gov/pubs/fips/202/final")) + +The keccak accelerator comprises two chips: a core chip that interacts with the memory --- loading the input and writing the output, and a round chip that applies the round permutation. + + += Core chip +== Columns +#let nr_variables = total_nr_variables(chip) +#let nr_columns = total_nr_instantiated_columns(chip, config) +#let nr_interactions = compute_nr_interactions(chip) + +The #keccak chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): +#render_chip_variable_table(chip, config) + +== Constraints +In this VM, we assign syscall number -2 to the #keccak accelerator. +The chip therefore contributes the following interaction to the lookup-argument: +#render_constraint_table(chip, config, groups: "output") + +The address containing the state to be permuted is passed in as argument `A0 = x10`. +The following constraints describe that this address is read into `addr` (@keccak:c:read_addr), from which `state_ptr` --- the collection of pointers to all lanes of the state --- is derived (@keccak:c:state_ptr). +The state is then read into `input_state`, while the `output_state` is written back to the indicated address (@keccak:c:load_store_state). +#render_constraint_table(chip, config, groups: "mem") + +Lastly, the input state is pushed to the Keccak-round function, while the output after 24 rounds is taken off the bus: +#render_constraint_table(chip, config, groups: "round") + +== Padding +The #keccak table can be padded to the next power of two with the following value assignments: +#render_chip_padding_table(chip, config) + += Round chip +#let round_chip = load_chip("src/keccak_round.toml", config) +#let keccak_rnd = raw(round_chip.name) + +== Columns +#let nr_variables = total_nr_variables(round_chip) +#let nr_columns = total_nr_instantiated_columns(round_chip, config) +#let nr_interactions = compute_nr_interactions(round_chip) + +The #keccak_rnd chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): +#render_chip_variable_table(round_chip, config) + +#strong("Note on " + raw("start") + ".") +`start` contains the state to which the permutation should be applied. +Its three-dimensional array mimics the specification's three-dimensional state +#footnote("FIPS 202, NIST, Section 3.1 (" + link("https://csrc.nist.gov/pubs/fips/202/final") + ")") +and orders the bits as prescribed. +#footnote("FIPS 202, NIST, Section B.1, Algorithm 10 (" + link("https://csrc.nist.gov/pubs/fips/202/final") + ")") + +#strong("Note on " + raw("rnc") + " and " + raw("rbc") + ".") +Rho rotates every lane by a rotation offset in $[0, 64)$. +These offsets are identical for every round. +#footnote("FIPS 202, NIST, page 13, Table 2 (" + link("https://csrc.nist.gov/pubs/fips/202/final") + ")") +We decompose each offset in three components: the lower nibble (4 bits) are represented by `rnc`, while the upper two bits are represented by as `Bit`s in `rbc`. +That is, $#`rho_offset[x][y]` = #`rnc[x][y]` + 16 dot #`rbc[x][y][0]` + 32 dot #`rbc[x][y][1]`$. + + +== Constraints + +The following constraints ensure that `theta` captures the state after applying the first subpermutation of the round-permutation: $theta$. +Note here that `Cxz_left` and `Cxz_right` do have to be range-checked; it cannot be assumed that this implicitly follows from @keccak:c:Dxz combined with `rotated_Cxz`'s definition. +#render_constraint_table(round_chip, config, groups: "theta") + +Next, we constrain that `rho` captures the state after applying subpermutation $rho$. +Note here as well that `rot_left` and `rot_right` do have to be range-checked; it cannot be assumed that this implicitly follows from later constraints. +#render_constraint_table(round_chip, config, groups: "rho") + +Observe that the lane-permutation performed by $pi$ is absorbed in `pi`'s definition. +The next permutation that is constrained in $chi$: +#render_constraint_table(round_chip, config, groups: "chi") + +Lastly, the round constants are added to one of the lanes in the state. +`iota` contains the updated lane. +In the definition of `out`, the output of `chi` and `iota` is combined to construct the output of the permutation. +#render_constraint_table(round_chip, config, groups: "iota") + +Lastly, the round chip contributes the following interactions to the lookup: +#render_constraint_table(round_chip, config, groups: "io") + +== Notes/potential optimizations +- one does not have to repeat `addr` in `state_ptr`; this saves 4 columns and 4 `IS_HALF` checks. +- step $rho$ does not need to be applied to `state[0][0]`; its has a zero-shift. This saves 16 columns and 4 `HWSL` interactions. +- $#`rc[2]` = #`rc[4]` = #`rc[5]` = #`rc[6]` = 0$. As such, those elements need not be stored in `rc`, and need not be XORed into the state in the $iota$-step. This saves 8 columns and 4 `XOR_BYTE` interactions. +- when executed in large volumnes, `KECCAK_RND` could benefit from having a three-way XOR lookup table. With this in place, the 80 interactions in @keccak:c:theta_cxz_start and @keccak:c:theta_cxz could be dropped. + Likewise, 80 columns could be removed from the chip (a \~5% savings). + += Round constant lookup +#let rc_chip = load_chip("src/keccak_rc.toml", config) +#let keccak_rc = raw(rc_chip.name) + +== Columns +#let nr_variables = total_nr_variables(rc_chip) +#let nr_columns = total_nr_instantiated_columns(rc_chip, config) + +We provide the round constants through a short precomputed lookup table: #keccak_rc. +#render_chip_variable_table(rc_chip, config) +#render_constraint_table(rc_chip, config) \ No newline at end of file diff --git a/spec/signatures.typ b/spec/signatures.typ index 12d84f757..2839a74c6 100644 --- a/spec/signatures.typ +++ b/spec/signatures.typ @@ -1,5 +1,6 @@ #import "/book.typ": book-page #import "/src.typ": load_signatures, load_config +#import "/expr.typ": type_to_code #show: book-page("signatures.typ") @@ -19,21 +20,11 @@ raw(cond) + ` => ` } else {``} - let input_str = sig.input.map(elt => { - if type(elt) == array { - raw(elt.at(0)) + `[` + raw(str(elt.at(1))) + `]` - } else { - raw(elt) - } - }).join(`, `) + let input_str = sig.input.map(type_to_code).join(`, `) let output = sig.at("output", default: none) let output_str = if output != none { - if type(output) == array { - raw(output.at(0)) + `[` + raw(str(output.at(1))) + `]` - } else { - raw(output) - } + `; ` + type_to_code(output) + `; ` } else {``} return [#cond_str#raw(sig.tag)#lb#output_str#input_str#rb] @@ -44,12 +35,13 @@ let vars = sig.input + if "output" in sig { (sig.output, )} else {()} return vars.map(v => { - let (label, factor) = if type(v) == array { - (v.at(0), v.at(1)) - } else { - (v, 1) + let factor = 1 + while type(v) == array { + factor *= v.at(1) + v = v.at(0) } - config.variables.types.filter(type => type.label == label).first().subtypes.len() * factor + let lbl = v + config.variables.types.filter(type => type.label == lbl).first().subtypes.len() * factor }) .sum() } diff --git a/spec/src.typ b/spec/src.typ index 6328c4665..d553629ff 100644 --- a/spec/src.typ +++ b/spec/src.typ @@ -40,10 +40,11 @@ // Verify that `var` is a valid variable. let verify_variable(var) = { - if type(var) == array { - assert(var.at(0) in var_labels, message: "Invalid var type: " + repr(var)) + while type(var) == array { assert(type(var.at(1)) == int, message: "Invalid var type: " + repr(var)) - } else if type(var) == str { + var = var.at(0) + } + if type(var) == str { assert(var in var_labels, message: "Invalid var type: " + repr(var)) } else { assert(false, message: "Invalid var type: " + repr(var)) @@ -104,14 +105,13 @@ let all_vars = chip.variables.values().flatten() let all_labels = config.variables.types.map(type => type.label); for var in all_vars { - let type_label = if type(var.type) == array { - var.type.at(0) - } else { - var.type + let type_label = var.type + while type(type_label) == array { + assert(type_label.len() == 2 and type(type_label.at(1)) == int, message: "invalid type: " + repr(var.type)) + type_label = type_label.at(0) } - // Check that all variable types are valid - assert(type_label in all_labels, message: "found invalid var type:" + repr(var.type)) + assert(type_label in all_labels, message: "found invalid var type: " + repr(var.type)) } } diff --git a/spec/src/keccak.toml b/spec/src/keccak.toml new file mode 100644 index 000000000..b8f2d91c2 --- /dev/null +++ b/spec/src/keccak.toml @@ -0,0 +1,102 @@ +name = "KECCAK" + +[[variables.input]] +name = "timestamp" +type = "DWordWL" +desc = "timestamp at which the permutation is performed" +pad = 0 + +[[variables.input]] +name = "addr" +type = "DWordBL" +desc = "memory address storing the first bit of the state" +pad = 0 + +[[variables.input]] +name = "input_state" +type = [[["Byte", 8], 5], 5] +desc = "state at the start of executing the permutation" +pad = 0 + +[[variables.output]] +name = "output_state" +type = [[["Byte", 8], 5], 5] +desc = "state after executing the permutation" +pad = 0 + +[[variables.auxiliary]] +name = "state_ptr" +type = [["DWordHL", 5], 5] +desc = "memory addresses storing the entire state" +pad = ["*", 8, ["arr", + ["arr", 0, 1, 2, 3, 4], + ["arr", 5, 6, 7, 8, 9], + ["arr", 10, 11, 12, 13, 14], + ["arr", 15, 16, 17, 18, 19], + ["arr", 20, 21, 22, 23, 24] +]] + +[[variables.multiplicity]] +name = "μ" +type = "Bit" +desc = "" +pad = 0 + +[[constraint_groups]] +name = "output" + +[[constraints.output]] +kind = "interaction" +tag = "ECALL" +input = ["timestamp", ["arr", ["-", ["^", 2, 32], 1], ["-", ["^", 2, 32], 2]]] +multiplicity = ["-", "μ"] + +[[constraint_groups]] +name = "mem" + +[[constraints.mem]] +kind = "interaction" +tag = "MEMW" +input = [1, ["cast", ["*", 2, 10], "DWordWL"], "addr", "timestamp", 1, 0, 0] +output = "addr" +multiplicity = "μ" +ref = "keccak:c:read_addr" + +[[constraints.mem]] +kind = "template" +tag = "ADD" +input = [["cast", "addr", "DWordWL"], ["cast", ["*", 8, ["+", ["*", 5, "y"], "x"]], "DWordWL"]] +output = ["cast", ["idx", ["idx", "state_ptr", "x"], "y"], "DWordWL"] +iters = [["x", 0, 4], ["y", 0, 4]] +ref = "keccak:c:state_ptr" + +[[constraints.mem]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", ["idx", ["idx", "state_ptr", "x"], "y"], "z"]] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 3]] +multiplicity = "μ" + +[[constraints.mem]] +kind = "interaction" +tag = "MEMW" +input = [0, ["cast", ["idx", ["idx", "state_ptr", "x"], "y"], "DWordWL"], ["idx", ["idx", "output_state", "x"], "y"], "timestamp", 0, 0, 1] +output = ["idx", ["idx", "input_state", "x"], "y"] +iters = [["x", 0, 4], ["y", 0, 4]] +multiplicity = "μ" +ref = "keccak:c:load_store_state" + +[[constraint_groups]] +name = "round" + +[[constraints.round]] +kind = "interaction" +tag = "KECCAK" +input = ["timestamp", 0, "input_state"] +multiplicity = "μ" + +[[constraints.round]] +kind = "interaction" +tag = "KECCAK" +input = ["timestamp", 24, "output_state"] +multiplicity = ["-", "μ"] diff --git a/spec/src/keccak_rc.toml b/spec/src/keccak_rc.toml new file mode 100644 index 000000000..7844dfbee --- /dev/null +++ b/spec/src/keccak_rc.toml @@ -0,0 +1,28 @@ +name = "KECCAK_RC" + +[[variables.input]] +name = "round" +type = "BaseField" +desc = "" +precomputed = true + +[[variables.input]] +name = "RC" +type = ["Byte", 8] +desc = "round constants for the given `round`" +precomputed = true + +[[variables.multiplicity]] +name = "μ" +type = "BaseField" +desc = "" + +[[constraint_groups]] +name = "contributions" + +[[constraints.contributions]] +kind = "interaction" +tag = "KECCAK_RC" +input = ["round"] +output = "RC" +multiplicity = ["-", "μ"] diff --git a/spec/src/keccak_round.toml b/spec/src/keccak_round.toml new file mode 100644 index 000000000..548cb5151 --- /dev/null +++ b/spec/src/keccak_round.toml @@ -0,0 +1,301 @@ +name = "KECCAK_RND" + +[[variables.input]] +name = "timestamp" +type = "DWordWL" +desc = "timestamp at which the permutation is performed" + + +[[variables.input]] +name = "round" +type = "BaseField" +desc = "index of the permutation round" + +[[variables.input]] +name = "start" +type = [[["Byte", 8], 5], 5] +desc = "state at the start of executing the permutation" + +[[variables.auxiliary]] +name = "Cxz" +type = [[["Byte", 8], 4], 5] +desc = "$xor_(i=0)^(y+2) #`start[x,i,z]`$" + +[[variables.auxiliary]] +name = "Cxz_left" +type = [["Byte", 8], 5] +desc = "the left-rotated component of `rotated_Cxz`" + +[[variables.auxiliary]] +name = "Cxz_right" +type = [["Byte", 8], 5] +desc = "the right-rotated component of `rotated_Cxz`" + +[[variables.auxiliary]] +name = "Dxz" +type = [["Byte", 8], 5] +desc = "$#`Cxz[`\\(#`x` - 1) mod 5#`,y,z]` xor #`rotated_Cxz[`\\(#`x` + 1) mod 5#`,y,z]`$" + +[[variables.auxiliary]] +name = "theta" +type = [[["Byte", 8], 5], 5] +desc = "$theta(#`start`)$, the state after applying $theta$." + +[[variables.auxiliary]] +name = "rot_left" +type = [[["Byte", 8], 5], 5] +desc = "the left-rotated component of $#`theta[x,y]` <<< #`rnc`$" + +[[variables.auxiliary]] +name = "rot_right" +type = [[["Byte", 8], 5], 5] +desc = "the right-rotated component of $#`theta[x,y]` <<< #`rnc`$" + +[[variables.auxiliary]] +name = "chi_ANDs" +type = [[["Byte", 8], 5], 5] +desc = "$(#`pi[`\\(x+1) mod 5#`,y,z]` xor 255) times.o #`pi[`\\(x + 2) mod 5#`,y,z]`$" + +[[variables.auxiliary]] +name = "chi" +type = [[["Byte", 8], 5], 5] +desc = "$(chi compose pi compose rho compose theta)(#`start`)$; the state after applying $chi$" + +[[variables.auxiliary]] +name = "rc" +type = ["Byte", 8] +desc = "round constants" + +[[variables.auxiliary]] +name = "iota" +type = ["Byte", 8] +desc = "state update following from step $iota$." + +[[variables.virtual]] +name = "rotated_Cxz" +type = [["Byte", 8], 5] +desc = "$#`Cxz[x,`3#`,z]` <<< 1$" +def = {iters=[["x", 0, 4], ["z", 0, 7]], poly=["+", ["idx", ["idx", "Cxz_left", "x"], "z"], ["idx", ["idx", "Cxz_right", "x"], ["mod", ["-", "z", 1], 8]]]} + +[[variables.virtual]] +name = "out" +type = [[["Byte", 8], 5], 5] +desc = "state at the end of executing the permutation" +def = {polys=[ + {iters=[["x", 0], ["y", 0], ["z", 0, 7]], poly=["idx", "iota","z"]}, + {iters=[["x", 1, 4], ["y", 0], ["z", 0, 7]], poly=["idx",["idx",["idx","chi","x"],"y"],"z"]}, + {iters=[["x", 0], ["y", 1, 4], ["z", 0, 7]], poly=["idx",["idx",["idx","chi","x"],"y"],"z"]}, + {iters=[["x", 1, 4], ["y", 1, 4], ["z", 0, 7]], poly=["idx",["idx",["idx","chi","x"],"y"],"z"]} +]} + +[[variables.virtual]] +name = "rho" +type = [[["Byte", 8], 5], 5] +desc = "$(rho compose theta)(#`start`)$; the state state after applying $rho$" +def = {iters=[["x", 0, 4], ["y", 0, 4], ["z", 0, 7]], poly=[ + "+", + ["*", + ["not", ["idx", ["idx", ["idx", "rbc", "x"], "y"], 0]], + ["not", ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1]], + ["+", + ["idx", ["idx", ["idx", "rot_left", "x"], "y"], "z"], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 1], 8]], + ] + ], + ["*", + ["idx", ["idx", ["idx", "rbc", "x"], "y"], 0], + ["not", ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1]], + ["+", + ["idx", ["idx", ["idx", "rot_left", "x"], "y"], ["mod", ["-", "z", 2], 8]], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 3], 8]], + ] + ], + ["*", + ["not", ["idx", ["idx", ["idx", "rbc", "x"], "y"], 0]], + ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1], + ["+", + ["idx", ["idx", ["idx", "rot_left", "x"], "y"], ["mod", ["-", "z", 4], 8]], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 5], 8]], + ] + ], + ["*", + ["idx", ["idx", ["idx", "rbc", "x"], "y"], 0], + ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1], + ["+", + ["idx", ["idx", ["idx", "rot_left", "x"], "y"], ["mod", ["-", "z", 6], 8]], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 7], 8]], + ] + ], +]} + +[[variables.virtual]] +name = "pi" +type = [[["Byte", 8], 5], 5] +desc = "$(pi compose rho compose theta)(#`start`)$; the state after applying $pi$" +def = {iters=[["x", 0, 4], ["y", 0, 4], ["z", 0, 7]], poly=["idx", ["idx", ["idx", "rho", ["mod", ["+", "x", ["*", 3, "y"]], 5]], "x"], "z"]} + +[[variables.constant]] +name = "rnc" +type = [["Byte", 5], 5] +desc = "lower nibble of `ρ` constants" + +[[variables.constant]] +name = "rbc" +type = [[["Bit", 2], 5], 5] +desc = "top two bits of `ρ` constants" + +[[variables.multiplicity]] +name = "μ" +type = "Bit" +desc = "" + + +# Assumptions + + +# Constraints + +[[constraint_groups]] +name = "io" + +[[constraints.io]] +kind = "interaction" +tag = "KECCAK" +input = ["timestamp", "round", "start"] +multiplicity = ["-", "μ"] + +[[constraints.io]] +kind = "interaction" +tag = "KECCAK" +input = ["timestamp", ["+", "round", 1], "out"] +multiplicity = "μ" + +[[constraints.io]] +kind = "interaction" +tag = "KECCAK_RC" +input = ["round"] +output = "rc" +multiplicity = ["-", "μ"] + +[[constraint_groups]] +name = "theta" + +[[constraints.theta]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", ["idx", ["idx", "start", "x"], 0], "z"], ["idx", ["idx", ["idx", "start", "x"], 1], "z"]] +output = ["idx", ["idx", ["idx", "Cxz", "x"], 0], "z"] +iters = [["x", 0, 4], ["z", 0, 7]] +multiplicity = "μ" +ref = "keccak:c:theta_cxz_start" + +[[constraints.theta]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", ["idx", ["idx", "Cxz", "x"], ["-", "y", 2]], "z"], ["idx", ["idx", ["idx", "start", "x"], "y"], "z"]] +output = ["idx", ["idx", ["idx", "Cxz", "x"], ["-", "y", 1]], "z"] +iters = [["x", 0, 4], ["y", 2, 4], ["z", 0, 7]] +multiplicity = "μ" +ref = "keccak:c:theta_cxz" + +[[constraints.theta]] +kind = "interaction" +tag = "HWSL" +input = [["idx", ["cast", ["idx", ["idx", "Cxz", "x"], 3], "DWordHL"], "z"], 1] +output = ["arr", ["idx", ["cast", ["idx", "Cxz_left", "x"], "DWordHL"], "z"], ["idx", ["cast", ["idx", "Cxz_right", "x"], "DWordHL"], "z"]] +iters = [["x", 0, 4], ["z", 0, 3]] +multiplicity = "μ" + +# Note: these IS_BYTE checks are necessary. +# Without them, it is possible to prove 0 <<< S evaluates to -1 by setting +# Cxz_left = [-1, 256, -1, 256, -1, 256, -1, 256] and +# Cxz_right = [ 1, -256, 1, -256, 1, -256, 1, -256] +[[constraints.theta]] +kind = "interaction" +tag = "IS_BYTE" +input = [["idx", ["idx", "Cxz_left", "x"], "z"]] +iters = [["x", 0, 4], ["z", 0, 7]] +multiplicity = "μ" + +[[constraints.theta]] +kind = "interaction" +tag = "IS_BYTE" +input = [["idx", ["idx", "Cxz_right", "x"], "z"]] +iters = [["x", 0, 4], ["z", 0, 7]] +multiplicity = "μ" + +[[constraints.theta]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", ["idx", ["idx", "Cxz", ["mod", ["-", "x", 1], 5]], 3], "z"], ["idx", ["idx", "rotated_Cxz", ["mod", ["+", "x", 1], 5]], "z"]] +output = ["idx", ["idx", "Dxz", "x"], "z"] +iters = [["x", 0, 4], ["z", 0, 7]] +multiplicity = "μ" +ref = "keccak:c:Dxz" + +[[constraints.theta]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", ["idx", ["idx", "start", "x"], "y"], "z"], ["idx", ["idx", "Dxz", "x"], "z"]] +output = ["idx", ["idx", ["idx", "theta", "x"], "y"], "z"] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] +multiplicity = "μ" + +[[constraint_groups]] +name = "rho" + +[[constraints.rho]] +kind = "interaction" +tag = "HWSL" +input = [["idx", ["cast", ["idx", ["idx", "theta", "x"], "y"], "DWordHL"], "z"], ["idx", ["idx", "rnc", "x"], "y"]] +output = ["arr", ["idx", ["cast", ["idx", ["idx", "rot_left", "x"], "y"], "DWordHL"], "z"], ["idx", ["cast", ["idx", ["idx", "rot_right", "x"], "y"], "DWordHL"], "z"]] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 3]] +multiplicity = "μ" + +# Note: these IS_BYTE checks are necessary. +# Without them, it is possible to prove 0 <<< S evaluates to -1 by setting +# rot_left = [-1, 256, -1, 256, -1, 256, -1, 256] and +# rot_right = [ 1, -256, 1, -256, 1, -256, 1, -256] +[[constraints.rho]] +kind = "interaction" +tag = "IS_BYTE" +input = [["idx", ["idx", ["idx", "rot_left", "x"], "y"], "z"]] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] +multiplicity = "μ" + +[[constraints.rho]] +kind = "interaction" +tag = "IS_BYTE" +input = [["idx", ["idx", ["idx", "rot_right", "x"], "y"], "z"]] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] +multiplicity = "μ" + +[[constraint_groups]] +name = "chi" + +[[constraints.chi]] +kind = "interaction" +tag = "AND_BYTE" +input = [["-", 255, ["idx", ["idx", ["idx", "pi", ["mod", ["+", "x", 1], 5]], "y"], "z"]], ["idx",["idx",["idx", "pi", ["mod", ["+", "x", 2], 5]], "y"], "z"]] +output = ["idx", ["idx", ["idx", "chi_ANDs", "x"], "y"], "z"] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] +multiplicity = "μ" + +[[constraints.chi]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", ["idx", ["idx", "pi", "x"], "y"], "z"], ["idx",["idx",["idx", "chi_ANDs", "x"], "y"], "z"]] +output = ["idx", ["idx", ["idx", "chi", "x"], "y"], "z"] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] +multiplicity = "μ" + +[[constraint_groups]] +name = "iota" + +[[constraints.iota]] +kind = "interaction" +tag = "XOR_BYTE" +input = [["idx", ["idx", ["idx", "chi", 0], 0], "z"], ["idx","rc","z"]] +output = ["idx", "iota", "z"] +iter = ["z", 0, 7] +multiplicity = "μ" diff --git a/spec/src/signatures.toml b/spec/src/signatures.toml index 33f97cebf..b04b3d561 100644 --- a/spec/src/signatures.toml +++ b/spec/src/signatures.toml @@ -211,3 +211,16 @@ tag = "ROTXOR" kind = "interaction" input = ["Word", "Byte", "Byte", "Byte", "Bit"] output = "Word" + +# Keccak communication between rounds +[[signatures]] +tag = "KECCAK" +kind = "interaction" +input = ["DWordWL", "BaseField", [[["Byte", 8], 5], 5]] + +# Keccak round constants +[[signatures]] +tag = "KECCAK_RC" +kind = "interaction" +input = ["BaseField"] +output = ["Byte", 8] diff --git a/spec/tooling/chip.py b/spec/tooling/chip.py index d597d2274..7f7ecca81 100644 --- a/spec/tooling/chip.py +++ b/spec/tooling/chip.py @@ -255,6 +255,30 @@ def typecheck(self, env: Environment) -> Type: return t +@dataclass +class ModExpr: + elt: Expr + modulus: Expr + + def typecheck(self, env: Environment) -> Type: + elt = self.elt.typecheck(env) + modulus = self.modulus.typecheck(env) + + if isinstance(modulus, list) or not modulus.is_const(): + reporter.error(f"Invalid non-constant modulus: {self.modulus!r}") + return Range.const(0) + modulus = modulus.get_const() + if modulus <= 0: + reporter.error(f"Invalid non-positive modulus: {self.modulus!r}") + return Range.const(0) + + if elt.is_const(): + elt = elt.get_const() + return Range.const(elt % modulus) + else: + return Range(0, modulus-1) + + @dataclass class PowExpr: base: Expr @@ -343,6 +367,8 @@ def build_expr(config: Optional["Config"], data: object) -> Expr: return SubExpr( build_expr(config, head), [build_expr(config, s) for s in subs] ) + case ["mod", elt, modulus]: + return ModExpr(build_expr(config, elt), build_expr(config, modulus)) case ["^", base, exp]: return PowExpr(build_expr(config, base), build_expr(config, exp)) case ["sum", ["=", str(var), start], stop, terms]: From 0f90155442dd27f64bf3a584b3c53becf894792d Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Fri, 24 Apr 2026 11:41:43 +0200 Subject: [PATCH 3/9] spec: Inline PC memory access into CPU (#501) * spec: Inline PC memory access into CPU * Apply suggestions from code review Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: Robin Jadoul * Apply review suggestion Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> * Remove `pc_double_read` constraints and clarify why in cpu.typ * Potential optimization -> subsubsection * Address review comments * Clarifying remark on register initialization --------- Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> --- spec/cpu.typ | 16 +++++++++++++--- spec/decode.typ | 14 +++++++------- spec/memory.typ | 2 ++ spec/src/cpu.toml | 36 +++++++++++++++++++++++++++++++++--- 4 files changed, 55 insertions(+), 13 deletions(-) diff --git a/spec/cpu.typ b/spec/cpu.typ index 2fbd60d59..2ee85befa 100644 --- a/spec/cpu.typ +++ b/spec/cpu.typ @@ -56,16 +56,26 @@ The ALU functionality is then obtained through judicious dispatching to the corr #render_constraint_table(chip, config, groups: "alu") -== Memory +== Memory The interactions with the memory, both for register loading and storing, as for `LOAD` and `STORE` instructions are handled. Note that since registers need no byte-addressing, we store them in the memory argument with `Word` limbs. -The timestamps are ensured to be disjoint for disjoint memory locations. +The `pc` register behaves very predictably with respect to its timestamps and when it is being read, +so for performance reasons, we inline its memory interactions directly into the #cpu chip. + +Potentially overlapping memory accesses are ensured to have disjoint timestamps. One consequence of that is that `next_pc` is written at `timestamp + 1` -to ensure the access is disjoint with the `pc` read into `rv1` as part of the `AUIPC` instruction. +to ensure the access is disjoint with the `pc` read into `rv1` as part of the `AUIPC` instruction (see @cpu:c:read_rv1 and @decode:decoding-overview). +Constraints regarding whether `pc_double_read` corresponds to an `AUIPC` instruction are not necessary, +as regardless of its value, the old timestamp is guaranteed smaller than the new timestamp, +and the integrity of the memory argument therefore ensures the correctness of this bit. #render_constraint_table(chip, config, groups: "mem") +=== Potential optimizations + +- `double_pc_read` could be integrated into decoding, so that `AUIPC` could set `read_register1 = 0` and no extra MEMW access for `rv1` is needed at this point. + == System The interactions with the wider system. diff --git a/spec/decode.typ b/spec/decode.typ index bb5d0d5a1..21cd26acb 100644 --- a/spec/decode.typ +++ b/spec/decode.typ @@ -39,7 +39,7 @@ Given that `CPU` asserts that `EBREAK = 0` (see @cpu:c:ebreak_traps), using this Note moreover that the `pc` is set to $7$. This value is the _smallest odd number_ (i.e., not reachable during regular execution) that is more than _$4$_ (i.e., the max `pc`-increment) greater than _$1$_ (i.e., the `pc`-value used in the #link()[additional instruction] referred to by `CPU`-padding lines). -= Decoding += Decoding For the purposes of explaining decoding, we decompress #decode's `packed_decode` variable into its constituent variables. Note that the below table is _not_ used in practice: it is solely used for the purposes of this explanation. @@ -64,12 +64,6 @@ For the purpose of brevity and readability, the table uses the following rules-o Further clarification is provided in the notes following the table. -== C-type instructions -The `RV64C` extension for compressed instructions specifies that \~50% of all instructions can be represented using a 16-bit instruction (rather than 32-bits), saving \~25% in code size. -This execution of assembly code is _not_ agnostic to an instruction's compression state; after executing a compressed instruction, the `pc` should be incremented by $2$ rather than $4$. -To indicate an instruction is provided in compressed form, the `c_type` flag is introduced. -*This flag should be set to $1$ whenever the decoded instruction is provided in compressed form and $0$ otherwise.* - /// Add a reference to one or more notes following this table. #let ref_note(..refs) = { super("[" + refs.pos().map(r => ref(r)).join(",") + "]") @@ -152,6 +146,12 @@ To indicate an instruction is provided in compressed form, the `c_type` flag is #decoding_table(decoding) +== C-type instructions +The `RV64C` extension for compressed instructions specifies that \~50% of all instructions can be represented using a 16-bit instruction (rather than 32-bits), saving \~25% in code size. +This execution of assembly code is _not_ agnostic to an instruction's compression state; after executing a compressed instruction, the `pc` should be incremented by $2$ rather than $4$. +To indicate an instruction is provided in compressed form, the `c_type` flag is introduced. +*This flag should be set to $1$ whenever the decoded instruction is provided in compressed form and $0$ otherwise.* + // Construct a note that can be referenced through `lbl` #let referenceable_note(lbl, note) = { show figure: (it) => align(left, [#it]) diff --git a/spec/memory.typ b/spec/memory.typ index 876884c85..6a204c1b7 100644 --- a/spec/memory.typ +++ b/spec/memory.typ @@ -136,6 +136,8 @@ The initialization will need to correspond to a fixed initial register state for as well as the memory loaded from the program binary, zero-initialization of memory elsewhere, and private input provided by the prover. The contribution of initialization with static data from the ELF executable and the initial register state to the sum can be handled directly by the verifier, ensuring correctness corresponding to the ELF binary being proven. +To enable the loading of the PC in @cpu:memory, register initialization happens at timestamp 1. +Register finalization is made possible for the verifier by having a known state from the HALT chip (@halt). This leaves only zero-initialization and prover input as prover-side concerns for initialization, alongside the finalization of the entire used memory. diff --git a/spec/src/cpu.toml b/spec/src/cpu.toml index a455b854f..b25138d4d 100644 --- a/spec/src/cpu.toml +++ b/spec/src/cpu.toml @@ -221,6 +221,18 @@ desc = "The value to (maybe) be written back to rvd" pad = 0 # Auxiliary +[[variables.auxiliary]] +name = "prev_pc_timestamp_borrow" +type = "Bit" +desc = "The borrow bit for computing the previous timestamp the PC was accessed" +pad = 0 + +[[variables.auxiliary]] +name = "pc_double_read" +type = "Bit" +desc = "Whether the PC is being read as a general purpose register (`rs1`) this cycle" +pad = 0 + [[variables.auxiliary]] name = "rv1" type = "DWordWHH" @@ -652,6 +664,7 @@ tag = "MEMW" input = [1, ["*", ["cast", 2, "DWordWL"], "rs1"], ["arr", ["idx", ["cast", "rv1", "DWordWL"], 0], ["idx", ["cast", "rv1", "DWordWL"], 1], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 0, "DWordWL"]], 1, 0, 0] output = ["arr", ["idx", ["cast", "rv1", "DWordWL"], 0], ["idx", ["cast", "rv1", "DWordWL"], 1], 0, 0, 0, 0, 0, 0] multiplicity = "read_register1" +ref = "cpu:c:read_rv1" [[constraints.mem]] kind = "arith" @@ -691,12 +704,29 @@ tag = "MEMW" input = [0, ["cast", "res", "DWordWL"], ["cast", "arg2", ["Byte", 8]], ["+", "timestamp", ["cast", 1, "DWordWL"]], "memory_2bytes", "memory_4bytes", "memory_8bytes"] multiplicity = "STORE" +[[constraints.mem]] +kind = "template" +tag = "IS_BIT" +input = ["pc_double_read"] + +[[constraints.mem]] +kind = "template" +tag = "IS_BIT" +input = ["prev_pc_timestamp_borrow"] + [[constraints.mem]] kind = "interaction" -tag = "MEMW" -input = [1, ["cast", ["*", 2, 255], "DWordWL"], ["arr", ["idx", "next_pc", 0], ["idx", "next_pc", 1], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 1, "DWordWL"]], 1, 0, 0] -output = ["arr", ["idx", "pc", 0], ["idx", "pc", 1], 0, 0, 0, 0, 0, 0] +tag = "memory" +input = [1, ["arr", ["+", ["*", 2, 255], "i"], 0], ["arr", ["+", ["-", ["idx", "timestamp", 0], ["*", 3, ["not", "pc_double_read"]]], ["*", ["^", 2, 32], "prev_pc_timestamp_borrow"]], ["-", ["idx", "timestamp", 1], "prev_pc_timestamp_borrow"]], ["idx", "pc", "i"]] multiplicity = ["not", "pad"] +iter = ["i", 0, 1] + +[[constraints.mem]] +kind = "interaction" +tag = "memory" +input = [1, ["arr", ["+", ["*", 2, 255], "i"], 0], ["+", "timestamp", ["cast", 1, "DWordWL"]], ["idx", "next_pc", "i"]] +multiplicity = ["-", ["not", "pad"]] +iter = ["i", 0, 1] [[constraint_groups]] From 6363f3e0c2594bc26611efdfd049f88df7d8bae2 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Tue, 28 Apr 2026 16:56:00 +0200 Subject: [PATCH 4/9] spec: shiroa fixes (#533) * Fix shiroa build - Strip raw blocks from chapter titles for `project` argument - Add an explicit description (based on chapter title) to chapters to avoid compilation issues when context appears early in the chapter - Export interaction counts from the pdf version to use in shiroa, since otherwise we run into convergence issues that are hard to debug * cd into script directory and harden against stale interaction counts * Update spec/book.typ Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> * Fall back to single-threaded shiroa when insufficient memory --------- Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> --- spec/.gitignore | 1 + spec/book.typ | 13 +++++++++---- spec/build_shiroa.sh | 27 +++++++++++++++++++++++++++ spec/chip.typ | 11 ++++++++--- 4 files changed, 45 insertions(+), 7 deletions(-) create mode 100755 spec/build_shiroa.sh diff --git a/spec/.gitignore b/spec/.gitignore index 73218d5ba..b5ca9ae62 100644 --- a/spec/.gitignore +++ b/spec/.gitignore @@ -1,2 +1,3 @@ dist/* +interaction_count.json ebook.pdf diff --git a/spec/book.typ b/spec/book.typ index 3b65642f9..5bf498b49 100644 --- a/spec/book.typ +++ b/spec/book.typ @@ -107,6 +107,7 @@ // Invisibly include another chapter, so that its labels can be resolved #let xref-include(f) = { + show ref: none context { place(hide(box(width: auto, height: 0%, strip-all(include "/" + f)))) } @@ -171,13 +172,17 @@ } }) let cond() = _toplevel.final() == file - project.with(..args, title: context meta_sections.find(x => x.at(0) == _toplevel.final()).at(1), cond: cond)([ - #show ref: it => context if _toplevel.final() == file { - xref(it) - } + show ref: it => context if cond() { xref(it) } + let title = context { + // Strip raw, because shiroa already makes the title raw + show raw: it => it.text + meta_sections.find(x => x.at(0) == _toplevel.final()).at(1) + } + project.with(..args, title: title, description: plain-text(meta_sections.find(x => x.at(0) == file).at(1)), cond: cond)([ #context _xref-included.final().pairs().map(((key, value)) => context if value and cond() { xref-include(key) }).join() + #metadata(json("interaction_count.json").sum(default: (:))) #body ]) } diff --git a/spec/build_shiroa.sh b/spec/build_shiroa.sh new file mode 100755 index 000000000..55a3c8d24 --- /dev/null +++ b/spec/build_shiroa.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# cd into the script directory +cd "$(dirname "${BASH_SOURCE[0]}")" + +# Clean up potential old file +rm -f interaction_count.json + +# Always clean up after ourselves +trap 'rm -f interaction_count.json' EXIT + +# Query the ebook version for the proper counts +typst query ebook.typ '' --field value > interaction_count.json + +# Check if there's enough memory available for a parallel shiroa build +# 20GiB as comfortable baseline +available_kb=$(awk '/MemAvailable/ { print $2 }' /proc/meminfo) +required_kb=$((20 * 1024 * 1024)) +if [ "$available_kb" -lt "$required_kb" ]; then + echo "Falling back to single-thread" + export RAYON_NUM_THREADS=1 +fi + +# And build +shiroa build diff --git a/spec/chip.typ b/spec/chip.typ index c6cce5073..1c89dcc55 100644 --- a/spec/chip.typ +++ b/spec/chip.typ @@ -51,14 +51,19 @@ // store it as metadata under the `` label // with tag `chip.name`. This tag is overwritten by `name` when specified. #let set_nr_interactions(chip, name: none) = { + // Skip when building shiroa, since the web/chapter structure fails to converge properly + import "book.typ": is-shiroa + if is-shiroa { + return + } if name == none { - name = chip.name - } + name = chip.name + } let constraints = chip .constraints .values() - .flatten() + .sum(default: ()) // nr. of direct interactions let nr-direct-interactions = constraints From b6478d6d218a05b780e8142f7bb884f4d801a5a5 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Tue, 28 Apr 2026 17:01:58 +0200 Subject: [PATCH 5/9] spec: Enable heading numbering for section references in shiroa (#534) * spec: Enable heading numbering for section references in shiroa * Remove explicit heading numbering from logup chapter --- spec/book.typ | 7 ++++++- spec/logup.typ | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/spec/book.typ b/spec/book.typ index 5bf498b49..57363ee23 100644 --- a/spec/book.typ +++ b/spec/book.typ @@ -145,7 +145,7 @@ } else { rf.supplement } - [#supplement#numbering(fig.numbering, ..counter.at(lbl))] + [#supplement #numbering(fig.numbering, ..counter.at(lbl))] } cross-link("/" + ch, reference: shiroa-label, link-content) } @@ -183,6 +183,11 @@ xref-include(key) }).join() #metadata(json("interaction_count.json").sum(default: (:))) + + #let chapter-index = meta_sections.position(x => x.at(0) == file) + 1 + #set heading(numbering: (..args) => [#chapter-index.#numbering("1.1", ..args)]) + #counter(heading).update(0) + #body ]) } diff --git a/spec/logup.typ b/spec/logup.typ index 7bb9a085d..038d67a8c 100644 --- a/spec/logup.typ +++ b/spec/logup.typ @@ -1,7 +1,6 @@ #import "/book.typ": book-page, aside, cdsg #show: book-page("logup") -#set heading(numbering: "1.") #show link: underline #show "constraint choice": link()[constraint choice] From 687253725716d22b3262e56e727087f0a2a6eeb6 Mon Sep 17 00:00:00 2001 From: Erik <159244975+erik-3milabs@users.noreply.github.com> Date: Tue, 28 Apr 2026 17:22:28 +0200 Subject: [PATCH 6/9] spec: `ARE_BYTES` (#532) * spec/ARE_BYTES: introduce ARE_BYTES signature * spec/ARE_BYTES: introduce ARE_BYTES lookup * spec/ARE_BYTES: introduce IS_BYTE template * spec/ARE_BYTES: switch IS_BYTE lookup to IS_BYTE template * spec/ARE_BYTES: drop IS_BYTE interaction * spec/ARE_BYTES: drop IS_BYTE lookup signature * spec/ARE_BYTES: turn multiplicity into cond * spec/ARE_BYTES: remove as potential optimization * spec/ARE_BYTES: update assumptions using IS_BYTE --- spec/bitwise.typ | 2 -- spec/book.typ | 1 + spec/is_byte.typ | 22 ++++++++++++++++++++++ spec/src/bitwise.toml | 11 ++++++++--- spec/src/branch.toml | 4 ++-- spec/src/cpu.toml | 18 ++++++------------ spec/src/is_byte.toml | 21 +++++++++++++++++++++ spec/src/keccak_round.toml | 18 +++++++++--------- spec/src/page.toml | 6 ++---- spec/src/sha256.toml | 4 ++-- spec/src/sha256msgsched.toml | 8 ++++---- spec/src/sha256round.toml | 8 ++++---- spec/src/shift.toml | 2 +- spec/src/signatures.toml | 13 ++++++++++--- 14 files changed, 92 insertions(+), 46 deletions(-) create mode 100644 spec/is_byte.typ create mode 100644 spec/src/is_byte.toml diff --git a/spec/bitwise.typ b/spec/bitwise.typ index 82f9e36f9..332bef3d9 100644 --- a/spec/bitwise.typ +++ b/spec/bitwise.typ @@ -37,8 +37,6 @@ This chip adds the following interactions to the lookup: = Notes/Optimizations The following ideas may prove to be optimizations for the #bitwise chip: -+ Extend `IS_BYTE[X]` to `ARE_BYTES[X, Y]`, such that two bytes are range checked at once. - When only a single check is required, one can still execute `IS_BYTE[X] := ARE_BYTES[X, 0]`. + Drop `MSB8` column, and instead define the `MSB8` lookup as `MSB8 := MSB16[256X]`. Note: currently, `MSB8` also implicity range checks the input `X` (the lookup fails if `X` is not a `Byte`). This optimization should only be executed when all chips leveraging `MSB8` do _not_ need this implicit range check. diff --git a/spec/book.typ b/spec/book.typ index 57363ee23..f4e95e493 100644 --- a/spec/book.typ +++ b/spec/book.typ @@ -18,6 +18,7 @@ )), ("TEMPLATES", ( ("is_bit.typ", [`IS_BIT` template], ), + ("is_byte.typ", [`IS_BYTE` template], ), ("sign.typ", [`SIGN` template], ), ("add.typ", [`ADD`/`SUB` template], ), ("neg.typ", [`NEG` template], ), diff --git a/spec/is_byte.typ b/spec/is_byte.typ new file mode 100644 index 000000000..09bf78e98 --- /dev/null +++ b/spec/is_byte.typ @@ -0,0 +1,22 @@ +#import "/book.typ": book-page +#import "/src.typ": load_config, load_chip +#import "/chip.typ": render_chip_variable_table, render_constraint_table, compute_nr_interactions, total_nr_variables, total_nr_instantiated_columns + +#let config = load_config() +#let chip = load_chip("src/is_byte.toml", config) + +#show: book-page(chip.name) +#let is_byte = raw(chip.name) + +#is_byte is a constraint template that is used to assert that a variable lies in the range $[0, 255]$ under the condition that `cond` is non-zero. Note: when `cond` is omitted, it defaults to $1$. + +When a chip leverages this template twice or more, implementors are encouraged to merge pairs of #is_byte interactions with identical conditions into `ARE_BYTES` interactions; the #is_byte template is included for convenience of notation, and to complete the specification of chips that use an odd number of #is_byte range checks. + += Variables +#let nr_interactions = compute_nr_interactions(chip) + +The #is_byte template leverages #nr_interactions interaction(s): +#render_chip_variable_table(chip, config) + += Constraints +#render_constraint_table(chip, config) \ No newline at end of file diff --git a/spec/src/bitwise.toml b/spec/src/bitwise.toml index 67d73facd..30875a810 100644 --- a/spec/src/bitwise.toml +++ b/spec/src/bitwise.toml @@ -101,6 +101,11 @@ name = "μ_IS_BYTE" type = "BaseField" desc = "" +[[variables.multiplicity]] +name = "μ_ARE_BYTES" +type = "BaseField" +desc = "" + [[variables.multiplicity]] name = "μ_IS_HALF" type = "BaseField" @@ -164,9 +169,9 @@ multiplicity = ["-", "μ_ZERO"] [[constraints.contributions]] kind = "interaction" -tag = "IS_BYTE" -input = ["X"] -multiplicity = ["-", "μ_IS_BYTE"] +tag = "ARE_BYTES" +input = ["X", "Y"] +multiplicity = ["-", "μ_ARE_BYTES"] [[constraints.contributions]] kind = "interaction" diff --git a/spec/src/branch.toml b/spec/src/branch.toml index a98974678..49a7833a3 100644 --- a/spec/src/branch.toml +++ b/spec/src/branch.toml @@ -116,10 +116,10 @@ output = "next_pc_unmasked" cond = "JALR" [[constraints.all]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", "next_pc_low", 1]] -multiplicity = "μ" +cond = "μ" [[constraints.all]] kind = "interaction" diff --git a/spec/src/cpu.toml b/spec/src/cpu.toml index b25138d4d..a0bd3925c 100644 --- a/spec/src/cpu.toml +++ b/spec/src/cpu.toml @@ -523,43 +523,37 @@ input = ["EBREAK"] ref = "cpu:c:range_EBREAK" [[constraints.range]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["rs1"] -multiplicity = 1 [[constraints.range]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["rs2"] -multiplicity = 1 [[constraints.range]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["rd"] -multiplicity = 1 [[constraints.range]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", "arg1", "i"]] iter = ["i", 0, 7] -multiplicity = 1 [[constraints.range]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", "arg2", "i"]] iter = ["i", 0, 7] -multiplicity = 1 [[constraints.range]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", "res", "i"]] iter = ["i", 0, 7] -multiplicity = 1 [[constraint_groups]] diff --git a/spec/src/is_byte.toml b/spec/src/is_byte.toml new file mode 100644 index 000000000..c9f8ee363 --- /dev/null +++ b/spec/src/is_byte.toml @@ -0,0 +1,21 @@ +name = "IS_BYTE" + +[[variables.condition]] +name = "cond" +type = "BaseField" +desc = "" + +[[variables.input]] +name = "X" +type = "BaseField" +desc = "Value for which to assert that it lies in the range $[0, 255]$." + +[[constraint_groups]] +name = "all" + +[[constraints.all]] +kind = "interaction" +tag = "ARE_BYTES" +input = [0, "X"] +multiplicity = "cond" +ref = "isbyte:c:isbyte" diff --git a/spec/src/keccak_round.toml b/spec/src/keccak_round.toml index 548cb5151..d41b7c472 100644 --- a/spec/src/keccak_round.toml +++ b/spec/src/keccak_round.toml @@ -211,18 +211,18 @@ multiplicity = "μ" # Cxz_left = [-1, 256, -1, 256, -1, 256, -1, 256] and # Cxz_right = [ 1, -256, 1, -256, 1, -256, 1, -256] [[constraints.theta]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", ["idx", "Cxz_left", "x"], "z"]] iters = [["x", 0, 4], ["z", 0, 7]] -multiplicity = "μ" +cond = "μ" [[constraints.theta]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", ["idx", "Cxz_right", "x"], "z"]] iters = [["x", 0, 4], ["z", 0, 7]] -multiplicity = "μ" +cond = "μ" [[constraints.theta]] kind = "interaction" @@ -257,18 +257,18 @@ multiplicity = "μ" # rot_left = [-1, 256, -1, 256, -1, 256, -1, 256] and # rot_right = [ 1, -256, 1, -256, 1, -256, 1, -256] [[constraints.rho]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", ["idx", ["idx", "rot_left", "x"], "y"], "z"]] -iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] -multiplicity = "μ" +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] +cond = "μ" [[constraints.rho]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", ["idx", ["idx", "rot_right", "x"], "y"], "z"]] iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] -multiplicity = "μ" +cond = "μ" [[constraint_groups]] name = "chi" diff --git a/spec/src/page.toml b/spec/src/page.toml index dff939558..b6247a7c7 100644 --- a/spec/src/page.toml +++ b/spec/src/page.toml @@ -40,16 +40,14 @@ def = ["+", "page", ["*", "offset", ["cast", 1, "DWordWL"]]] name = "all" [[constraints.all]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["init"] -multiplicity = 1 [[constraints.all]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["fini"] -multiplicity = 1 [[constraints.all]] kind = "interaction" diff --git a/spec/src/sha256.toml b/spec/src/sha256.toml index 4cd4de9ba..78d022515 100644 --- a/spec/src/sha256.toml +++ b/spec/src/sha256.toml @@ -232,11 +232,11 @@ input = ["timestamp", "last_round_out", 64] multiplicity = ["-", "μ"] [[constraints.compress]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["idx", "out", "i"]] -multiplicity = "μ" iter = ["i", 0, 31] +cond = "μ" [[constraints.compress]] kind = "template" diff --git a/spec/src/sha256msgsched.toml b/spec/src/sha256msgsched.toml index 79664a797..402f7459a 100644 --- a/spec/src/sha256msgsched.toml +++ b/spec/src/sha256msgsched.toml @@ -79,10 +79,10 @@ desc = "#`IS_WORD[SHA256_M[timestamp, i]]` for $0 <= i < #`index`$" name = "lookback" [[constraints.lookback]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = [["-", "index", 16]] -multiplicity = "μ" +cond = "μ" [[constraints.lookback]] kind = "interaction" @@ -130,10 +130,10 @@ output = "s1" multiplicity = "μ" [[constraints.calc]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["carry"] -multiplicity = "μ" +cond = "μ" [[constraints.calc]] kind = "interaction" diff --git a/spec/src/sha256round.toml b/spec/src/sha256round.toml index 8ec93ea36..2469b560c 100644 --- a/spec/src/sha256round.toml +++ b/spec/src/sha256round.toml @@ -254,10 +254,10 @@ multiplicity = "μ" iter = ["i", 0, 1] [[constraints.addition]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["carry_a"] -multiplicity = "μ" +cond = "μ" [[constraints.addition]] kind = "interaction" @@ -267,10 +267,10 @@ multiplicity = "μ" iter = ["i", 0, 1] [[constraints.addition]] -kind = "interaction" +kind = "template" tag = "IS_BYTE" input = ["carry_e"] -multiplicity = "μ" +cond = "μ" [[constraint_groups]] name = "output" diff --git a/spec/src/shift.toml b/spec/src/shift.toml index bbe22a5d9..18c03ecad 100644 --- a/spec/src/shift.toml +++ b/spec/src/shift.toml @@ -147,7 +147,7 @@ iter = ["i", 0, 3] ref = "shift:a:range_in" [[assumptions]] -desc = "`IS_BYTE[shift]`" +desc = "`IS_BYTE`" ref = "shift:a:range_shift" [[assumptions]] diff --git a/spec/src/signatures.toml b/spec/src/signatures.toml index b04b3d561..e93c87b05 100644 --- a/spec/src/signatures.toml +++ b/spec/src/signatures.toml @@ -5,6 +5,13 @@ kind = "template" input = ["BaseField"] cond = "BaseField" +# IS_BYTE +[[signatures]] +tag = "IS_BYTE" +kind = "template" +input = ["BaseField"] +cond = "BaseField" + # cond => ADD [[signatures]] tag = "ADD" @@ -157,11 +164,11 @@ kind = "interaction" input = ["B20"] output = "Bit" -# IS_BYTE[X] +# ARE_BYTES[X, Y] [[signatures]] -tag = "IS_BYTE" +tag = "ARE_BYTES" kind = "interaction" -input = ["Byte"] +input = ["Byte", "Byte"] # IS_HALF[X] [[signatures]] From 2e05a257a25d3cb21463142a7441c611699f2c43 Mon Sep 17 00:00:00 2001 From: Erik <159244975+erik-3milabs@users.noreply.github.com> Date: Tue, 5 May 2026 10:25:03 +0200 Subject: [PATCH 7/9] spec: Fix `KECCAK` (#554) * spec/keccak: fix cyclic-shift indexing mistakes * spec/ecall: fix negative ECALL numbering * spec/keccak: optimize Cxz_right from Byte to Bit * spec/KECCAK: add potential optimization rot_left and rot_right contain 96 constant zero-columns, which can be dropped. Additionally, those zeroes do not have to be byte-checked. * spec/KECCAK: fix typo * spec/KECCAK: list another potential optimization * spec/keccak: fix index division problems * spec/keccak: remove condition from IS_BIT --- spec/keccak.typ | 5 +++++ spec/src/keccak.toml | 2 +- spec/src/keccak_round.toml | 40 +++++++++++++++++++++++++------------- spec/src/sha256.toml | 2 +- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/spec/keccak.typ b/spec/keccak.typ index e6e16f43f..11d900c73 100644 --- a/spec/keccak.typ +++ b/spec/keccak.typ @@ -105,6 +105,11 @@ Lastly, the round chip contributes the following interactions to the lookup: == Notes/potential optimizations - one does not have to repeat `addr` in `state_ptr`; this saves 4 columns and 4 `IS_HALF` checks. - step $rho$ does not need to be applied to `state[0][0]`; its has a zero-shift. This saves 16 columns and 4 `HWSL` interactions. +- when the output of `HWSL` are `Byte`s mapped as `Half`s, we find that out of every four output bytes, at least one is zero. + Since `rnc` is constant, @keccak:c:rho_rotation makes those zero-bytes show up in `rot_left` and `rot_right` at constant locations. + This means 96 columns can be removed from the chip at no cost. + Likewise, 96 `IS_BYTE` interactions can be dropped from @keccak:c:range_rot_left and @keccak:c:range_rot_right. + - the shift-constants are equivalent to $1 mod 16$ for $(#`x`, #`y`) = (1, 0)$ and $-1 mod 16$ for $(2, 3)$. This means that for those lanes it suffices to constrain `rot_left`/`rot_right` as `Bit`s rather than `Byte`s, saving an additional 8 `IS_BYTE` interactions. - $#`rc[2]` = #`rc[4]` = #`rc[5]` = #`rc[6]` = 0$. As such, those elements need not be stored in `rc`, and need not be XORed into the state in the $iota$-step. This saves 8 columns and 4 `XOR_BYTE` interactions. - when executed in large volumnes, `KECCAK_RND` could benefit from having a three-way XOR lookup table. With this in place, the 80 interactions in @keccak:c:theta_cxz_start and @keccak:c:theta_cxz could be dropped. Likewise, 80 columns could be removed from the chip (a \~5% savings). diff --git a/spec/src/keccak.toml b/spec/src/keccak.toml index b8f2d91c2..1c3bade44 100644 --- a/spec/src/keccak.toml +++ b/spec/src/keccak.toml @@ -48,7 +48,7 @@ name = "output" [[constraints.output]] kind = "interaction" tag = "ECALL" -input = ["timestamp", ["arr", ["-", ["^", 2, 32], 1], ["-", ["^", 2, 32], 2]]] +input = ["timestamp", ["cast", ["-", ["^", 2, 64], 2], "DWordWL"]] multiplicity = ["-", "μ"] [[constraint_groups]] diff --git a/spec/src/keccak_round.toml b/spec/src/keccak_round.toml index d41b7c472..59daba923 100644 --- a/spec/src/keccak_round.toml +++ b/spec/src/keccak_round.toml @@ -28,8 +28,8 @@ desc = "the left-rotated component of `rotated_Cxz`" [[variables.auxiliary]] name = "Cxz_right" -type = [["Byte", 8], 5] -desc = "the right-rotated component of `rotated_Cxz`" +type = [["Bit", 4], 5] +desc = "the right-rotated component of `rotated_Cxz` (which is a single bit)" [[variables.auxiliary]] name = "Dxz" @@ -75,7 +75,16 @@ desc = "state update following from step $iota$." name = "rotated_Cxz" type = [["Byte", 8], 5] desc = "$#`Cxz[x,`3#`,z]` <<< 1$" -def = {iters=[["x", 0, 4], ["z", 0, 7]], poly=["+", ["idx", ["idx", "Cxz_left", "x"], "z"], ["idx", ["idx", "Cxz_right", "x"], ["mod", ["-", "z", 1], 8]]]} +def = {polys=[ + {iters=[["x", 0, 4], ["z", 0]], poly=["+", ["idx", ["idx", "Cxz_left", "x"], "z"], ["idx", ["idx", "Cxz_right", "x"], 3]]}, + {iters=[["x", 0, 4], ["z", 1]], poly=["idx", ["idx", "Cxz_left", "x"], "z"]}, + {iters=[["x", 0, 4], ["z", 2]], poly=["+", ["idx", ["idx", "Cxz_left", "x"], "z"], ["idx", ["idx", "Cxz_right", "x"], 0]]}, + {iters=[["x", 0, 4], ["z", 3]], poly=["idx", ["idx", "Cxz_left", "x"], "z"]}, + {iters=[["x", 0, 4], ["z", 4]], poly=["+", ["idx", ["idx", "Cxz_left", "x"], "z"], ["idx", ["idx", "Cxz_right", "x"], 1]]}, + {iters=[["x", 0, 4], ["z", 5]], poly=["idx", ["idx", "Cxz_left", "x"], "z"]}, + {iters=[["x", 0, 4], ["z", 6]], poly=["+", ["idx", ["idx", "Cxz_left", "x"], "z"], ["idx", ["idx", "Cxz_right", "x"], 2]]}, + {iters=[["x", 0, 4], ["z", 7]], poly=["idx", ["idx", "Cxz_left", "x"], "z"]}, +]} [[variables.virtual]] name = "out" @@ -91,7 +100,7 @@ def = {polys=[ [[variables.virtual]] name = "rho" type = [[["Byte", 8], 5], 5] -desc = "$(rho compose theta)(#`start`)$; the state state after applying $rho$" +desc = "$(rho compose theta)(#`start`)$; the state after applying $rho$" def = {iters=[["x", 0, 4], ["y", 0, 4], ["z", 0, 7]], poly=[ "+", ["*", @@ -99,7 +108,7 @@ def = {iters=[["x", 0, 4], ["y", 0, 4], ["z", 0, 7]], poly=[ ["not", ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1]], ["+", ["idx", ["idx", ["idx", "rot_left", "x"], "y"], "z"], - ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 1], 8]], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 2], 8]], ] ], ["*", @@ -107,7 +116,7 @@ def = {iters=[["x", 0, 4], ["y", 0, 4], ["z", 0, 7]], poly=[ ["not", ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1]], ["+", ["idx", ["idx", ["idx", "rot_left", "x"], "y"], ["mod", ["-", "z", 2], 8]], - ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 3], 8]], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 4], 8]], ] ], ["*", @@ -115,7 +124,7 @@ def = {iters=[["x", 0, 4], ["y", 0, 4], ["z", 0, 7]], poly=[ ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1], ["+", ["idx", ["idx", ["idx", "rot_left", "x"], "y"], ["mod", ["-", "z", 4], 8]], - ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 5], 8]], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 6], 8]], ] ], ["*", @@ -123,7 +132,7 @@ def = {iters=[["x", 0, 4], ["y", 0, 4], ["z", 0, 7]], poly=[ ["idx", ["idx", ["idx", "rbc", "x"], "y"], 1], ["+", ["idx", ["idx", ["idx", "rot_left", "x"], "y"], ["mod", ["-", "z", 6], 8]], - ["idx", ["idx", ["idx", "rot_right", "x"], "y"], ["mod", ["-", "z", 7], 8]], + ["idx", ["idx", ["idx", "rot_right", "x"], "y"], "z"], ] ], ]} @@ -202,7 +211,10 @@ ref = "keccak:c:theta_cxz" kind = "interaction" tag = "HWSL" input = [["idx", ["cast", ["idx", ["idx", "Cxz", "x"], 3], "DWordHL"], "z"], 1] -output = ["arr", ["idx", ["cast", ["idx", "Cxz_left", "x"], "DWordHL"], "z"], ["idx", ["cast", ["idx", "Cxz_right", "x"], "DWordHL"], "z"]] +output = ["arr", + ["idx", ["cast", ["idx", "Cxz_left", "x"], "DWordHL"], "z"], + ["cast", ["idx", ["idx", "Cxz_right", "x"], "z"], "Half"] +] iters = [["x", 0, 4], ["z", 0, 3]] multiplicity = "μ" @@ -219,10 +231,9 @@ cond = "μ" [[constraints.theta]] kind = "template" -tag = "IS_BYTE" +tag = "IS_BIT" input = [["idx", ["idx", "Cxz_right", "x"], "z"]] -iters = [["x", 0, 4], ["z", 0, 7]] -cond = "μ" +iters = [["x", 0, 4], ["z", 0, 3]] [[constraints.theta]] kind = "interaction" @@ -251,6 +262,7 @@ input = [["idx", ["cast", ["idx", ["idx", "theta", "x"], "y"], "DWordHL"], "z"], output = ["arr", ["idx", ["cast", ["idx", ["idx", "rot_left", "x"], "y"], "DWordHL"], "z"], ["idx", ["cast", ["idx", ["idx", "rot_right", "x"], "y"], "DWordHL"], "z"]] iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 3]] multiplicity = "μ" +ref = "keccak:c:rho_rotation" # Note: these IS_BYTE checks are necessary. # Without them, it is possible to prove 0 <<< S evaluates to -1 by setting @@ -260,8 +272,9 @@ multiplicity = "μ" kind = "template" tag = "IS_BYTE" input = [["idx", ["idx", ["idx", "rot_left", "x"], "y"], "z"]] -iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] +iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] cond = "μ" +ref = "keccak:c:range_rot_left" [[constraints.rho]] kind = "template" @@ -269,6 +282,7 @@ tag = "IS_BYTE" input = [["idx", ["idx", ["idx", "rot_right", "x"], "y"], "z"]] iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] cond = "μ" +ref = "keccak:c:range_rot_right" [[constraint_groups]] name = "chi" diff --git a/spec/src/sha256.toml b/spec/src/sha256.toml index 78d022515..59d710998 100644 --- a/spec/src/sha256.toml +++ b/spec/src/sha256.toml @@ -266,5 +266,5 @@ input = ["μ"] [[constraints.lookup]] kind = "interaction" tag = "ECALL" -input = ["timestamp", ["arr", ["-", ["^", 2, 32], 1], ["-", ["^", 2, 32], 1]]] +input = ["timestamp", ["cast", ["-", ["^", 2, 64], 1], "DWordWL"]] multiplicity = ["-", "μ"] From ec9f75627a61b510db48df7b396917fe73e20927 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Mon, 1 Jun 2026 10:48:43 +0200 Subject: [PATCH 8/9] spec: Fix typographical errors in SHA256 and ROTXOR constraints (#635) --- spec/src/rotxor.toml | 2 +- spec/src/sha256.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/src/rotxor.toml b/spec/src/rotxor.toml index 730e9bda5..f1ff904b0 100644 --- a/spec/src/rotxor.toml +++ b/spec/src/rotxor.toml @@ -154,7 +154,7 @@ poly = ["-", ["idx", ["cast", "a2", "WordHL"], 0], ["idx", "a2_left", 1], ["idx" [[constraints.shift]] kind = "arith" constraint = "$#`a2[1]` = #`last_rot` dot #`a2_left[0]` + #`a2_right[1]`$" -poly = ["-", ["idx", ["cast", "a2", "WordHL"], 0], ["*", "last_rot", ["idx", "a2_left", 0]], ["idx", "a2_right", 1]] +poly = ["-", ["idx", ["cast", "a2", "WordHL"], 1], ["*", "last_rot", ["idx", "a2_left", 0]], ["idx", "a2_right", 1]] [[constraint_groups]] name = "xor" diff --git a/spec/src/sha256.toml b/spec/src/sha256.toml index 59d710998..8ef356578 100644 --- a/spec/src/sha256.toml +++ b/spec/src/sha256.toml @@ -56,7 +56,7 @@ name = "memory" kind = "interaction" tag = "MEMW" input = [1, ["cast", ["*", 2, 11], "DWordWL"], ["arr", ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 0], ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 1], 0, 0, 0, 0, 0, 0], "timestamp", 1, 0, 0] -output = ["arr", ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 0], ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 0], 0, 0, 0, 0, 0, 0] +output = ["arr", ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 0], ["idx", ["cast", ["idx", "m_addr", 0], "DWordWL"], 1], 0, 0, 0, 0, 0, 0] multiplicity = "μ" [[constraints.memory]] From f006292e976920496e3b62feb975611e2b39243c Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Tue, 2 Jun 2026 16:05:34 +0200 Subject: [PATCH 9/9] spec: rework CPU for smaller footprint (#624) * spec: Draft CPU rework, still missing updates for all ALU chips, DECODE and CPU32 * Decoding for the new CPU * Bitwise using BYTE_ALU * Update old chips for the new CPU * Fix decode for MEMORY signed flag * Add input range check to SHIFTs typ * MEMORY signature HL -> WL * Clarify SHIFTW decoding * Further updates and new chips for the new CPU * Fix arg2 muxes * Link instruction length to CPU32 * Apply easy changes from code review Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> * Address more review comments * Fix CPU32 register interactions * Fix CPUs PC write multiplicity * Fix HALT's interaction with the CPU padding * Add some optional extra arith constraints to enforce the "easy" assumptions * Combine assumptions constraints * Remove todo comments that got a tracking issue * word_instr gate for read_registerX out of caution * Fix JAL(R) decoding and ALU * Placate the type checker * Represent instruction length as half * Update spec/src/cpu.toml Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> * fixes --------- Co-authored-by: Erik <159244975+erik-3milabs@users.noreply.github.com> --- spec/bitwise.typ | 4 + spec/book.typ | 12 +- spec/branch.typ | 5 + spec/bytewise.typ | 38 ++ spec/cpu.typ | 76 ++-- spec/cpu32.typ | 61 +++ spec/decode.typ | 109 +++-- spec/dvrm.typ | 8 +- spec/eq.typ | 41 ++ spec/expr.typ | 8 + spec/halt.typ | 7 +- spec/lt.typ | 9 +- spec/memw.typ | 13 +- spec/mul.typ | 3 - spec/shift.typ | 8 +- spec/src/bitwise.toml | 12 +- spec/src/branch.toml | 11 +- spec/src/bytewise.toml | 49 +++ spec/src/config.toml | 9 + spec/src/cpu.toml | 676 +++++++++--------------------- spec/src/cpu32.toml | 416 ++++++++++++++++++ spec/src/decode.toml | 44 +- spec/src/decode_uncompressed.toml | 153 ++++--- spec/src/dvrm.toml | 55 ++- spec/src/eq.toml | 93 ++++ spec/src/halt.toml | 19 +- spec/src/keccak_round.toml | 28 +- spec/src/load.toml | 44 +- spec/src/lt.toml | 72 +++- spec/src/memw.toml | 46 +- spec/src/memw_aligned.toml | 28 +- spec/src/mul.toml | 44 +- spec/src/rotxor.toml | 8 +- spec/src/sha256round.toml | 20 +- spec/src/shift.toml | 71 ++-- spec/src/signatures.toml | 65 +-- spec/src/store.toml | 122 ++++++ spec/store.typ | 47 +++ spec/tooling/chip.py | 7 + 39 files changed, 1659 insertions(+), 882 deletions(-) create mode 100644 spec/bytewise.typ create mode 100644 spec/cpu32.typ create mode 100644 spec/eq.typ create mode 100644 spec/src/bytewise.toml create mode 100644 spec/src/cpu32.toml create mode 100644 spec/src/eq.toml create mode 100644 spec/src/store.toml create mode 100644 spec/store.typ diff --git a/spec/bitwise.typ b/spec/bitwise.typ index 332bef3d9..1babeefcc 100644 --- a/spec/bitwise.typ +++ b/spec/bitwise.typ @@ -26,11 +26,15 @@ and convenience functionalities over small domains. The #bitwise chip is comprised of #nr_variables variables that are expressed using #nr_columns columns. Of these, the _input_ and _output_ variables (#nr_precomputed in total) are precomputed. + #render_chip_variable_table(chip, config) *Note*: This table contains one row for every possible value of `(X, Y, Z)`. As such, it has length $2^8 dot 2^8 dot 2^4 = 2^(20)$. +We use the ALU operation descriptors from @decode to identify the operations in the `BYTE_ALU` interaction. +Since each of the three columns is only $2^16$ rows long, they can be combined in a single $2^20$ column (with room to spare). + = Lookup This chip adds the following interactions to the lookup: #render_constraint_table(chip, config) diff --git a/spec/book.typ b/spec/book.typ index f4e95e493..8bf8612af 100644 --- a/spec/book.typ +++ b/spec/book.typ @@ -23,21 +23,25 @@ ("add.typ", [`ADD`/`SUB` template], ), ("neg.typ", [`NEG` template], ), )), - ("MEMORY", ( - ("memw.typ", [`MEMW` chip], ), - )), ("CPU", ( ("decode.typ", [`DECODE` table], ), ("cpu.typ", [`CPU` chip], ), + ("cpu32.typ", [`CPU32` chip], ), )), ("ALU", ( ("shift.typ", [`SHIFT` chip], ), ("branch.typ", [`BRANCH` chip], ), ("lt.typ", [`LT` chip], ), + ("eq.typ", [`EQ` chip], ), ("mul.typ", [`MUL` chip], ), ("dvrm.typ", [`DVRM` chip], ), - ("load.typ", [`LOAD` chip], ), ("bitwise.typ", [`BITWISE` chips], ), + ("bytewise.typ", [`BYTEWISE` chip], ) + )), + ("MEMORY", ( + ("memw.typ", [`MEMW` chip], ), + ("load.typ", [`LOAD` chip], ), + ("store.typ", [`STORE` chip], ), )), ("ECALLS", ( ("about_ecalls.typ", [About `ECALL`], ), diff --git a/spec/branch.typ b/spec/branch.typ index 0743ae2f9..d6ab53a18 100644 --- a/spec/branch.typ +++ b/spec/branch.typ @@ -30,6 +30,11 @@ The #branch chip is comprised of #nr_variables variables that are expressed usin #render_chip_assumptions(chip, config) +Some of the assumptions can be checked with only arithmetic constraints, so we +provide these below. + +#render_constraint_table(chip, config, groups: "assumptions") + = Constraints We constrain `next_pc` to be $#`base_address` + #`offset`$, diff --git a/spec/bytewise.typ b/spec/bytewise.typ new file mode 100644 index 000000000..452e4fdbc --- /dev/null +++ b/spec/bytewise.typ @@ -0,0 +1,38 @@ +#import "/book.typ": book-page, rj +#import "/src.typ": load_config, load_chip +#import "/chip.typ": ( + render_chip_assumptions, + render_chip_variable_table, + total_nr_variables, + total_nr_instantiated_columns, + compute_nr_interactions, + render_constraint_table, + render_chip_padding_table, +) + +#let config = load_config() +#let chip = load_chip("src/bytewise.toml", config) + +#show: book-page(chip.name) +#let bytewise = raw(chip.name) + +The #bytewise chip is an ALU chip that decomposes the input `DWordWL` values into bytes and +performs a `BITWISE` operation pairwise (AND, OR, XOR). +The `BITWISE` lookup inherently performs a range check, so no further constraints are necessary. + += Variables +#let nr_variables = total_nr_variables(chip) +#let nr_columns = total_nr_instantiated_columns(chip, config) +#let nr_interactions = compute_nr_interactions(chip) + +The #bytewise chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): +#render_chip_variable_table(chip, config) + += Constraints + +#render_constraint_table(chip, config) + += Padding + +The chip can be padded with the following values: +#render_chip_padding_table(chip, config) diff --git a/spec/cpu.typ b/spec/cpu.typ index 2ee85befa..cb11661f3 100644 --- a/spec/cpu.typ +++ b/spec/cpu.typ @@ -17,7 +17,7 @@ #let cpu = raw(chip.name) The #cpu chip coordinates memory accesses and dispatches to other chips for arithmetic and logical operations. -It bases its decisions on the entry of the `DECODE` table (@decode) corresponding the the current program counter (PC). +It bases its decisions on the entry of the `DECODE` table (@decode) corresponding the current program counter (PC). = Variables #let nr_variables = total_nr_variables(chip) @@ -30,36 +30,52 @@ The #cpu chip is comprised of #nr_variables variables that are expressed using # = Assumptions #render_chip_assumptions(chip, config) +Additionally, the following constraints can be used to provide defense-in-depth +validation of the assumptions. + +#render_constraint_table(chip, config, groups: "assumptions") + = Constraints + First, we perform a decoding lookup for the current PC. +Instructions having the `word_instr` flag set are not decoded here, as they are delegated to the `CPU32` chip. +In that case, we ensure that the current row of the CPU cannot have any other observable effects. #render_constraint_table(chip, config, groups: "decode") == Range checks We constrain all columns to have the appropriate ranges. -The flags and register indices looked up from the decoding need to be checked, -as they are communicated through the interaction in a packed form. +All values in `packed_decode` need to be checked to ensure +the packing is correct for the interaction. In contrast, we know ahead of time that decoding will ensure proper range checks for `pc` and `imm`. Similarly, since `next_pc` will propagate through the memory argument and be looked up -in the instruction decoding on the next cycle, it is forced to be in the correct range.#rj[is this true, do we need this elsewhere for chip assumptions?] -For the auxiliary columns, we need to check the limbs of `arg1`, `arg2`, and `res`. +in the instruction decoding on the next cycle, it is forced to be in the correct range; +the final value for `next_pc` is similarly fixed by the memory finalization. +For the auxiliary columns, we need to check the limbs of `res`, since +`rv1` and `rv2` are enforced by the memory argument, and `rvd` is correct by the correctness of the dependent chips. The ranges of the other auxiliary columns are enforced through later constraints. -#rj[Make sure we argue for every column here] -#rj[is `rvd` still sufficiently constrained? (can also be done through the memory argument like `pc`?)] #render_constraint_table(chip, config, groups: "range") == ALU -The ALU functionality is then obtained through judicious dispatching to the corresponding chips. +The ALU functionality is then obtained through delegation to the `ALU` signature, backed by the various ALU chips, +or by using the appropriate template. +For the pure ALU path, `arg2` is computed as `rv2 + imm`, which relies on @cpu:a:arg2-multiplex to +be either `rv2` or `imm`, depending on the instruction. +The other contributions for `arg2` are specific to the (mutually exclusive, @cpu:a:mem-branch-mutex) +`MEMORY` and `BRANCH` flags: +- For the `MEMORY` path, we want the output of the ALU to be $#`rv1` + #`imm`$, as that is the + address at which the memory access occurs. +- For the `BRANCH` path, we want the ALU output to reflect the branch condition (or just be inactive for JALR). #render_constraint_table(chip, config, groups: "alu") == Memory -The interactions with the memory, both for register loading and storing, as for `LOAD` and `STORE` instructions are handled. -Note that since registers need no byte-addressing, we store them in the memory argument with `Word` limbs. +Note that since registers need no byte-addressing, we store them in the memory argument with `Word` limbs, +simultaneously ensuring that register reads are properly range checked as long as all writes are. The `pc` register behaves very predictably with respect to its timestamps and when it is being read, so for performance reasons, we inline its memory interactions directly into the #cpu chip. @@ -70,32 +86,36 @@ Constraints regarding whether `pc_double_read` corresponds to an `AUIPC` instruc as regardless of its value, the old timestamp is guaranteed smaller than the new timestamp, and the integrity of the memory argument therefore ensures the correctness of this bit. -#render_constraint_table(chip, config, groups: "mem") - -=== Potential optimizations +The memory interaction itself is handled by the `MEMORY` signature, +which will read the `mem_flags` argument to perform either a `LOAD` or a `STORE`. +We refer to the previous section's description of `arg2` for how +the address is computed. -- `double_pc_read` could be integrated into decoding, so that `AUIPC` could set `read_register1 = 0` and no extra MEMW access for `rv1` is needed at this point. +The value to (potentially) be written back to `rd` is stored in `rvd`, +which can either come from the ALU --- in case of an ALU operation or a JALR branch --- +or from the MEMORY interaction. -== System - -The interactions with the wider system. +#render_constraint_table(chip, config, groups: "mem") -#render_constraint_table(chip, config, groups: "sys") +== Branching -== Input and output to the ALU +A branch is expressed by having the `BRANCH` flag set to 1. +Since `BRANCH` and `MEMORY` are mutually exclusive (@cpu:a:mem-branch-mutex), +we can repurpose the `mem_flags` field to indicate a JALR instruction. +When JALR is not set, we have a conditional branch that is decided upon by the result of the ALU instructions, +as set in the `res` variable. +As such, we can set `branch_cond` appropriately as multiplicity flag for the `BRANCH` chip. -We constrain `arg1`, `arg2` and `rvd` to correspond to the wanted values, -including the appropriate sign/zero extension, depending on `word_instr`. +#render_constraint_table(chip, config, groups: "branch") -#render_constraint_table(chip, config, groups: "ext") +== System -== Other constraints -For @cpu:c:is_equal, note that @cpu:c:sub sets `res` to be the difference between `arg1` and `arg2` whenever `BEQ` is $1$. -Given that this difference is $0$ when both are equal, @cpu:c:is_equal ensures `is_equal` is set to $1$ if and only if $#`arg1` = #`arg2`$ and `BEQ` is set. +The interactions with the wider system go through the `ECALL` interface. +Since we treat `EBREAK` instructions as unprovable traps, we avoid emitting `DECODE` rows +for these, and do not need any further handling in the CPU. -#render_constraint_table(chip, config, groups: "misc") +#render_constraint_table(chip, config, groups: "sys") -#rj[Document the choice to not have a multiplicity column here for padding] = Padding @@ -104,4 +124,4 @@ in the DECODE table, at the _odd_ address 1, only reachable through a HALT ecall #render_chip_padding_table(chip, config) -This approach minimizes the number of dependent lookups, increasing only multiplicities in the DECODE table and the IS_BYTE lookup. +This approach minimizes the number of dependent lookups, increasing only multiplicities in the `DECODE` table and the `IS_BYTE` and `IS_HALF` lookups. diff --git a/spec/cpu32.typ b/spec/cpu32.typ new file mode 100644 index 000000000..e5e8963bc --- /dev/null +++ b/spec/cpu32.typ @@ -0,0 +1,61 @@ +#import "/book.typ": book-page, rj +#import "/src.typ": load_config, load_chip +#import "/chip.typ": ( + render_chip_assumptions, + render_chip_variable_table, + render_chip_padding_table, + render_constraint_table, + compute_nr_interactions, + total_nr_instantiated_columns, + total_nr_variables, +) + +#let config = load_config() +#let chip = load_chip("src/cpu32.toml", config) + +#show: book-page(chip.name) +#let cpu32 = raw(chip.name) + +The #cpu32 chip is used to delegate the 32-bit instructions of the RV64I instruction set +from the main CPU table (@cpu). +All 32-bit instructions are ALU-only instructions, so the BRANCH, MEMORY and ECALL paths need no elaboration. +The timestamp and PC have already been read by the CPU table at this point, and need no further checking; +the PC for the next instruction will also already be handled by CPU. + +The structure follows the regular ALU path, with some extra variables and constraints to contain the required sign extensions. + += Variables +#let nr_variables = total_nr_variables(chip) +#let nr_columns = total_nr_instantiated_columns(chip, config) +#let nr_interactions = compute_nr_interactions(chip) + +The #cpu32 chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): +#render_chip_variable_table(chip, config) + += Assumptions + +#render_chip_assumptions(chip, config) + +Some of the assumptions can be checked with only arithmetic constraints, so we +provide these below. + +#render_constraint_table(chip, config, groups: "assumptions") + += Constraints + +Most constraints correspond to those already present in the CPU, and we present them here first, +including some updates to the range checking corresponding to the differing types. + +#render_constraint_table(chip, config, groups: ("decode", "range", "alu", "mem", "logup")) + +Then, we have the constraints corresponding to the sign-extension and definition of `arg1`, `arg2` and `rd`. +This includes a step where we extract the `signed` bit from the `alu_flags`, as this determines +whether to sign extend the inputs or not. + +#render_constraint_table(chip, config, groups: "ext") + += Padding + +The table can be padded with the following values: +#render_chip_padding_table(chip, config) + diff --git a/spec/decode.typ b/spec/decode.typ index 21cd26acb..6defcdc84 100644 --- a/spec/decode.typ +++ b/spec/decode.typ @@ -8,6 +8,7 @@ render_constraint_table, render_chip_padding_table, ) +#import "/expr.typ": expr_to_math #let config = load_config() #let chip = load_chip("src/decode.toml", config) @@ -34,24 +35,44 @@ Empty rows with the following content can be added to achieve this: #render_chip_padding_table(chip, config) -Note that this row sets the `EBREAK` flag. -Given that `CPU` asserts that `EBREAK = 0` (see @cpu:c:ebreak_traps), using this "padding-instruction" would immediately make the CPU table unprovable. -Note moreover that the `pc` is set to $7$. -This value is the _smallest odd number_ (i.e., not reachable during regular execution) that is more than _$4$_ (i.e., the max `pc`-increment) greater than _$1$_ (i.e., the `pc`-value used in the #link()[additional instruction] referred to by `CPU`-padding lines). +This is simultaneously the row that is used for padding rows in the CPU, +if the multiplicity is nonzero, +so we need to ensure that this table has at least one row of padding. = Decoding For the purposes of explaining decoding, we decompress #decode's `packed_decode` variable into its constituent variables. Note that the below table is _not_ used in practice: it is solely used for the purposes of this explanation. +The construction of the `alu_flags` and `mem_flags` columns is given here through virtual columns. #let config = load_config() #let uncompressed_chip = load_chip("src/decode_uncompressed.toml", config) #render_chip_variable_table(uncompressed_chip, config) +First, we provide a mapping from an an ALU operation "descriptor" to the numerical value as used for the `alu_op` column. +This is the table used to find the value for the #expr_to_math(("opsel", "OPERATION")) notation when performing `ALU` or `BYTE_ALU` interactions. + +#figure( + table(columns: (auto, auto), + stroke: 0pt, + inset: (right: .5em), + align: (left, left), table.header[*Descriptor*][*value*], table.hline(stroke: 1.5pt))[ + *AND*][0][ + *OR*][1][ + *XOR*][2][ + *EQ*][3][ + *LT*][4][ + *SHIFT*][5][ + *SHIFTW*][6][ + *MUL*][7][ + *DIVREM*][8] +) + We will illustrate how each instruction should be expressed in this (uncompressed) decoding table. The columns of the accompanying table represent the following: - *`operation`*: the assembly operation being encoded. -- *`op-flag`*: which of the "`ALU` selector flags" operation flags to set. Each operation sets exactly one. +- *`alu`*: Set to the descriptor of the ALU operation to be used for `alu_op`. + If listed as `ADD` or `SUB`, the corresponding flag should be set, otherwise set `ALU = 1` when this column is not empty. - *`w_instr`*, *`signed`*: whether to set the `word_instr` and `signed` flags, respectively. - *other*: the other flags that should be set or variables that should be given specific values. @@ -59,8 +80,6 @@ For the purpose of brevity and readability, the table uses the following rules-o + `rd`, `rs1`, `rs2`, and `imm` are mapped to the values provided by the instruction; when a value is not specified by an instruction it defaults to $0$. + `read_register1`, `read_register2` and `write_register` are set to $1$ when respectively $#`rs1` != 0$, $#`rs2` != 0$, or $#`rd` != 0$. -+ Any flag that is not listed is set to $0$, with the exception of the `c_type` flag. - *The `c_type` flag is set independently of the below table*, as explained next. Further clarification is provided in the notes following the table. @@ -81,38 +100,38 @@ Further clarification is provided in the notes following the table. // Overlay a low-opacity fill color to distinguish the different rows better if calc.odd(y) and y <= lines.len() { color.rgb(0, 0, 100, 20) } else { color.rgb(255, 255, 255, 20) }, - table.header([*Operation*], [*op-flag*], [*`w_instr`*], [*`signed`*], [*other*], []), + table.header([*Operation*], [*alu*], [*`w_instr`*], [*`signed`*], [*other*], []), table.hline(stroke: 1.5pt), table.vline(x: 1, start: 1, end: lines.len() + 1, stroke: .5pt), ..lines.flatten(), table.hline(stroke: 1.5pt), - table.footer([*Operation*], [*op-flag*], [*`w_instr`*], [*`signed`*], [*other*]), + table.footer([*Operation*], [*alu*], [*`w_instr`*], [*`signed`*], [*other*]), )) } #let decoding = ( // OP-IMM ([`ADDI[W] rd, rs1, imm`], [`ADD`], [`[W]`], [], [], [#ref_note()]), - ([`SLTI[U] rd, rs1, imm`], [`SLT`], [], [#sym.not`[U]`], [], [#ref_note()]), + ([`SLTI[U] rd, rs1, imm`], [`LT`], [], [#sym.not`[U]`], [], [#ref_note()]), ([`ANDI rd, rs1, imm`], [`AND`], [], [], [], []), ([`ORI rd, rs1, imm`], [`OR`], [], [], [], []), ([`XORI rd, rs1, imm`], [`XOR`], [], [], [], []), - ([`SLLI[W] rd, rs1, imm`], [`SHIFT`], [`[W]`], [], [], []), - ([`SRLI[W] rd, rs1, imm`], [`SHIFT`], [`[W]`], [], [`mp_selector`], [#ref_note()]), - ([`SRAI[W] rd, rs1, imm`], [`SHIFT`], [`[W]`], [1], [`mp_selector`], [#ref_note()]), + ([`SLLI[W] rd, rs1, imm`], [`SHIFT[W]`], [`[W]`], [], [], [#ref_note()]), + ([`SRLI[W] rd, rs1, imm`], [`SHIFT[W]`], [`[W]`], [], [`invert`], [#ref_note()]), + ([`SRAI[W] rd, rs1, imm`], [`SHIFT[W]`], [`[W]`], [1], [`invert`], [#ref_note()]), // OP ([`ADD[W] rd, rs1, rs2`], [`ADD`], [`[W]`], [], [], [#ref_note()]), ([`SUB[W] rd, rs1, rs2`], [`SUB`], [`[W]`], [], [], [#ref_note()]), - ([`SLT[U] rd, rs1, rs2`], [`SLT`], [], [#sym.not`[U]`], [], [#ref_note()]), + ([`SLT[U] rd, rs1, rs2`], [`LT`], [], [#sym.not`[U]`], [], [#ref_note()]), ([`AND rd, rs1, rs2`], [`AND`], [], [], [], []), ([`OR rd, rs1, rs2`], [`OR`], [], [], [], []), ([`XOR rd, rs1, rs2`], [`XOR`], [], [], [], []), - ([`SLL[W] rd, rs1, rs2`], [`SHIFT`], [`[W]`], [], [], [#ref_note()]), - ([`SRL[W] rd, rs1, rs2`], [`SHIFT`], [`[W]`], [], [`mp_selector`], [#ref_note()]), - ([`SRA[W] rd, rs1, rs2`], [`SHIFT`], [`[W]`], [1], [`mp_selector`], [#ref_note()]), + ([`SLL[W] rd, rs1, rs2`], [`SHIFT[W]`], [`[W]`], [], [], [#ref_note()]), + ([`SRL[W] rd, rs1, rs2`], [`SHIFT[W]`], [`[W]`], [], [`invert`], [#ref_note()]), + ([`SRA[W] rd, rs1, rs2`], [`SHIFT[W]`], [`[W]`], [1], [`invert`], [#ref_note()]), // OP - M - ([`MUL[W] rd, rs1, rs2`], [`MUL`], [`[W]`], [1], [`mp_selector`], [#ref_note()]), - ([`MULH rd, rs1, rs2`], [`MUL`], [], [1], [`mp_selector`, `muldiv_selector`], []), + ([`MUL[W] rd, rs1, rs2`], [`MUL`], [`[W]`], [1], [`signed2`], [#ref_note()]), + ([`MULH rd, rs1, rs2`], [`MUL`], [], [1], [`signed2`, `muldiv_selector`], []), ([`MULHU rd, rs1, rs2`], [`MUL`], [], [], [`muldiv_selector`], []), ([`MULHSU rd, rs1, rs2`], [`MUL`], [], [1], [`muldiv_selector`], []), ([`DIV[U][W] rd, rs1, rs2`], [`DIVREM`], [`[W]`], [#sym.not`[U]`], [], [#ref_note(, )]), @@ -120,37 +139,42 @@ Further clarification is provided in the notes following the table. // LUI/AUIPC ([`LUI rd, imm`], [`ADD`], [], [], [], [#ref_note()]), ([`AUIPC rd, imm`], [`ADD`], [], [], [`rs1 := x255`], [#ref_note()]), - ([`JAL rd, imm`], [`JALR`], [], [], [`rs1 := x255`], [#ref_note()]), + ([`JAL rd, imm`], [], [], [], [`BRANCH`, `JALR`, `rs1 := x255`], [#ref_note()]), // Branching - ([`JALR rd, rs1, imm`], [`JALR`], [], [], [], []), - ([`BEQ rs1, rs2, imm`], [`BEQ`], [], [], [], []), - ([`BNE rs1, rs2, imm`], [`BEQ`], [], [], [`mp_selector`], []), - ([`BLT[U] rs1, rs2, imm`], [`BLT`], [], [#sym.not`[U]`], [], [#ref_note()]), - ([`BGE[U] rs1, rs2, imm`], [`BLT`], [], [#sym.not`[U]`], [`mp_selector`], [#ref_note()]), + ([`JALR rd, rs1, imm`], [], [], [], [`BRANCH`, `JALR`], []), + ([`BEQ rs1, rs2, imm`], [`EQ`], [], [], [`BRANCH`], []), + ([`BNE rs1, rs2, imm`], [`EQ`], [], [], [`BRANCH`, `invert`], []), + ([`BLT[U] rs1, rs2, imm`], [`LT`], [], [#sym.not`[U]`], [`BRANCH`], [#ref_note()]), + ([`BGE[U] rs1, rs2, imm`], [`LT`], [], [#sym.not`[U]`], [`BRANCH`, `invert`], [#ref_note()]), // LOAD - ([`LD rd, rs1, imm`], [`LOAD`], [], [], [`mem_8B`], []), - ([`LW[U] rd, rs1, imm`], [`LOAD`], [], [#sym.not`[U]`], [`mem_4B`], [#ref_note()]), - ([`LH[U] rd, rs1, imm`], [`LOAD`], [], [#sym.not`[U]`], [`mem_2B`], [#ref_note()]), - ([`LB[U] rd, rs1, imm`], [`LOAD`], [], [#sym.not`[U]`], [], [#ref_note()]), + ([`LD rd, rs1, imm`], [`ADD`], [], [], [`MEMORY`, `mem_8B`], []), + ([`LW[U] rd, rs1, imm`], [`ADD`], [], [], [`MEMORY`, `mem_signed := `#sym.not`[U]`, `mem_4B`], [#ref_note()]), + ([`LH[U] rd, rs1, imm`], [`ADD`], [], [], [`MEMORY`, `mem_signed := `#sym.not`[U]`, `mem_2B`], [#ref_note()]), + ([`LB[U] rd, rs1, imm`], [`ADD`], [], [], [`MEMORY`, `mem_signed := `#sym.not`[U]`], [#ref_note()]), // STORE - ([`SD rs1, rs2, imm`], [`STORE`], [], [], [`mem_8B`], []), - ([`SW rs1, rs2, imm`], [`STORE`], [], [], [`mem_4B`], []), - ([`SH rs1, rs2, imm`], [`STORE`], [], [], [`mem_2B`], []), - ([`SB rs1, rs2, imm`], [`STORE`], [], [], [], []), + ([`SD rs1, rs2, imm`], [`ADD`], [], [], [`MEMORY`, `memory_op`, `mem_8B`], []), + ([`SW rs1, rs2, imm`], [`ADD`], [], [], [`MEMORY`, `memory_op`, `mem_4B`], []), + ([`SH rs1, rs2, imm`], [`ADD`], [], [], [`MEMORY`, `memory_op`, `mem_2B`], []), + ([`SB rs1, rs2, imm`], [`ADD`], [], [], [`MEMORY`, `memory_op`], []), // ECALL/EBREAK - ([`ECALL`], [`ECALL`], [], [], [$#`rs1` := #`x17`$], [#ref_note()]), - ([`EBREAK`], [`EBREAK`], [], [], [], []), + ([`ECALL`], [], [], [], [`ECALL`, $#`rs1` := #`x17`$], [#ref_note()]), // FENCE ([`FENCE`], [`ADD`], [], [], [], [#ref_note()]), ) #decoding_table(decoding) +Note that the above table has no entry for the `EBREAK` instruction. +We treat `EBREAK` as an unprovable trap, and its absence from the table enables +this by having no valid decoding available for when the instruction is encountered. + == C-type instructions The `RV64C` extension for compressed instructions specifies that \~50% of all instructions can be represented using a 16-bit instruction (rather than 32-bits), saving \~25% in code size. This execution of assembly code is _not_ agnostic to an instruction's compression state; after executing a compressed instruction, the `pc` should be incremented by $2$ rather than $4$. -To indicate an instruction is provided in compressed form, the `c_type` flag is introduced. -*This flag should be set to $1$ whenever the decoded instruction is provided in compressed form and $0$ otherwise.* +As such, we provide the `half_instruction_length` column that *must take on the value $1$ for compressed instructions and $2$ for regular instructions*. +It is represented as half the number of bytes in the instruction to make misaligned instructions lengths unrepresentable. +Additionally, having the variable opens the door for future optimizations involving "fused" instructions, where common sequences +of instructions are merged into a single decoded version and need only a single CPU row to prove. // Construct a note that can be referenced through `lbl` #let referenceable_note(lbl, note) = { @@ -164,7 +188,7 @@ We note the following about the above decoding table: enum.item( referenceable_note( "note_word_instr", - [`word_instr`: `[W]` indicates that $#`word_instr` = 1$ for the `W`-variant of the operation, and $0$ for the non-`W`-variant.] + [`word_instr`: `[W]` indicates that $#`word_instr` = 1$ for the `W`-variant of the operation, and $0$ for the non-`W`-variant. Similarly, `SHIFT[W]` indicates the `SHIFTW` operation for the `W`-variant, and `SHIFT` otherwise.] ) ), enum.item( @@ -192,7 +216,7 @@ We note the following about the above decoding table: enum.item( referenceable_note( "note-jal", - [`JAL`: this operation stores $#`pc` + 4$ in `rd` and adds two times the sign-extended 20-bit immediate to the `pc`. + [`JAL`: this operation stores $#`pc` + #`2 * half_instruction_length`$ in `rd` and adds two times the sign-extended 20-bit immediate to the `pc`. Note that this can be represented using `JALR rd, x255, imm`. As such, *we expect the decoding to take care of writing the immediate in bit range $[1:21]$ of `imm` and extending it to 64 bits; the least significant bit should always be 0.*] ) @@ -212,10 +236,3 @@ We note the following about the above decoding table: ) ) ) - -== One more instruction -In addition to decoding all instructions provided in the ELF and adding a corresponding entry to the #decode table, one must include an entry that has $#`pc` = 1$ and every other variable set to $0$. -Note that this will never conflict with any entry in the ELF, since it has an odd `pc` value. - -This entry is used to pad the `CPU` table. -More details on this matter are provided in the `CPU` chip. diff --git a/spec/dvrm.typ b/spec/dvrm.typ index 1118aa10a..c2d1c6450 100644 --- a/spec/dvrm.typ +++ b/spec/dvrm.typ @@ -27,10 +27,12 @@ The #dvrm chip provides division and remainder functionality, both signed and un The #dvrm chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): #render_chip_variable_table(chip, config) -= Assumptions -#render_chip_assumptions(chip, config) = Constraints + +First, we range-check all inputs. +#render_constraint_table(chip, config, groups: "range") + From the ISA, we gather five requirements for the `DIV[U][W]` and `REM[U][W]` instructions: #enum(numbering: "R1.", enum.item([ @@ -106,7 +108,7 @@ Rewriting R1, we find the constraint $not#`overflow` => #`n` - #`r` = #`qd`$. #footnote([Recall that @dvrm:c:sign_q allows to assert this equality even when `overflow`.]) Since `n`, `d`, `q` and `r` are all 64-bit integers, we must assert this equality $mod 2^128$, rather than $mod 2^64$. To this end, we introduce `extended_n_sub_r` and leverage the `MUL` chip to verify that it is equal to $#`qd` mod 2^128$ using constraints @dvrm:c:mul_lower and @dvrm:c:mul_upper; -@dvrm:c:q_range is included to uphold assumption @mul:a:rhs. +@dvrm:c:q_range is included to uphold assumption @mul:c:rhs. #render_constraint_table(chip, config, groups:("equality", )) diff --git a/spec/eq.typ b/spec/eq.typ new file mode 100644 index 000000000..379df796d --- /dev/null +++ b/spec/eq.typ @@ -0,0 +1,41 @@ +#import "/book.typ": book-page, rj +#import "/src.typ": load_config, load_chip +#import "/chip.typ": ( + render_chip_assumptions, + render_chip_variable_table, + total_nr_variables, + total_nr_instantiated_columns, + compute_nr_interactions, + render_constraint_table, + render_chip_padding_table, +) + +#let config = load_config() +#let chip = load_chip("src/eq.toml", config) + +#show: book-page(chip.name) +#let eq = raw(chip.name) + +The #eq chip is an ALU chip that compares two values and outputs a bit indicating whether they are equal or not. +It optionally inverts the result if the `invert` flag is set. + += Variables +#let nr_variables = total_nr_variables(chip) +#let nr_columns = total_nr_instantiated_columns(chip, config) +#let nr_interactions = compute_nr_interactions(chip) + +The #eq chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): +#render_chip_variable_table(chip, config) + += Assumptions + +#render_chip_assumptions(chip, config) + += Constraints + +#render_constraint_table(chip, config) + += Padding + +The chip can be padded with the following values: +#render_chip_padding_table(chip, config) diff --git a/spec/expr.typ b/spec/expr.typ index 20a55d753..5a275f47e 100644 --- a/spec/expr.typ +++ b/spec/expr.typ @@ -99,6 +99,10 @@ // Typeset an expression as code #let expr_to_code = make_expr_formatter( ( + "opsel": (pp, rec, e) => { + assert(type(e.at(1)) == type(""), message: "opsel expects a string") + `⧼` + raw(e.at(1)) + `⧽` + }, "arr": (pp, rec, e) => `[` + e.slice(1).map(rec.with(PREC.MAX)).join(`, `) + `]`, "idx": (pp, rec, e) => rec(PREC.MIN, e.at(1)) + `[` + rec(PREC.MAX, e.at(2)) + `]`, "not": (pp, rec, e) => cwrap(rec(PREC.not, 1) + ` - ` + rec(PREC.not, e.at(1)), pp < PREC.not), @@ -165,6 +169,10 @@ // Typeset an expression as math #let expr_to_math = make_expr_formatter( ( + "opsel": (pp, rec, e) => { + assert(type(e.at(1)) == type(""), message: "opsel expects a string") + $lr(chevron.l.curly#raw(e.at(1))chevron.r.curly)$ + }, "arr": (pp, rec, e) => $[#e.slice(1).map(rec.with(PREC.MAX)).join($, $)]$, "idx": (pp, rec, e) => { let (val, idxs) = flat_idxs(e) diff --git a/spec/halt.typ b/spec/halt.typ index 691154f61..50a39ac40 100644 --- a/spec/halt.typ +++ b/spec/halt.typ @@ -32,9 +32,12 @@ It is assumed the input is range checked: The #halt chip: + makes sure register `x10` (containing the exit code) equals $0$ (@halt:c:read_zero_exit_code), + writes $0$ to all other registers (@halt:c:zeroize_registers_lo/@halt:c:zeroize_registers_hi), and -+ sets `pc` equal to $1$ (@halt:c:pc). -Note that the writes performed by all these interactions are accompanied by the timestamp $2^64-1$; the maximum timestamp. ++ sets `pc` equal to $1$ (@halt:c:consume_pc, @halt:c:emit_pc). +Note that the writes performed by all these interactions --- except for the `pc` --- are accompanied by the timestamp $2^64-1$; the maximum timestamp. This prevents any other operation involving memory from being executed hereafter. +The `pc` is consumed and re-emitted at the same timestamp to enable padding rows for the CPU. +This means that the verifier will have to know the final timestamp at which a CPU padding `pc` was written +to be able to balance the final LogUp. #render_constraint_table(chip, config, groups: "all") #aside("Note on register clean up", diff --git a/spec/lt.typ b/spec/lt.typ index 2e0ac8be4..6dd60b236 100644 --- a/spec/lt.typ +++ b/spec/lt.typ @@ -17,6 +17,7 @@ #let lt = raw(chip.name) The #lt chip constrains an indicator bit for the less-than relation, signed or unsigned. +If the `invert` flag is set, it inverts the result. = Variables #let nr_variables = total_nr_variables(chip) @@ -31,7 +32,7 @@ We assume the inputs `lhs`, `rhs` and `signed` are partially range checked. #render_chip_assumptions(chip, config) = Constraints -We first constrain that all variables correspond to their definition. +We first constrain that all inputs are range checked and all variables correspond to their definition. For the defining constraint of `lt`, @lt:c:lt, observe that it is a choice between two options, depending on the input flag `signed`. In the case of unsigned comparison, we simply need `unsigned_lt`, indicating @@ -74,7 +75,7 @@ However, the left hand side of this is at least $3 dot 2^31$, as $(A, C) = (1, 1 and the right hand side is at most $(2^31 - 1) + (2^32 - 1) + 1 = 3 dot 2^31 - 1$. Therefore, we can use $Q$ to constrain `lt` when `signed = 1`. -#render_constraint_table(chip, config, groups: "defs") +#render_constraint_table(chip, config, groups: ("range", "defs")) And then we constrain the subtraction, taking care of the remaining range checking not yet covered by the assumptions or the `MSB16` lookup. @@ -90,3 +91,7 @@ The chip contributes the following to the lookup argument. The table can be padded to the next power of two with the following value assignments: #render_chip_padding_table(chip, config) + += Potential optimizations + +- Split the chip into a signed and an unsigned chip, making the unsigned version cheaper. diff --git a/spec/memw.typ b/spec/memw.typ index 425508597..b963ea562 100644 --- a/spec/memw.typ +++ b/spec/memw.typ @@ -34,6 +34,11 @@ The #memw chip is comprised of #nr_variables variables that are expressed using #render_chip_assumptions(chip, config) +Some of the assumptions can be checked with only arithmetic constraints, so we +provide these below. + +#render_constraint_table(chip, config, groups: "assumptions") + Our assumptions do not explicitly cover any range checks for the `is_register` and `value` columns, as these are not necessary for the correctness of this chip in isolation. Still, these properties are necessary for the consistency of the system as a whole, and therefore @@ -93,6 +98,12 @@ Further logic remains essentially the same, so we briefly present the relevant t The #aligned chip only needs #nr_variables variables, expressed through #nr_columns columns; it leverages #nr_aligned_interactions interactions. #render_chip_variable_table(alignedchip, config) #render_chip_assumptions(alignedchip, config) + +Some of the assumptions can be checked with only arithmetic constraints, so we +provide these below. + +#render_constraint_table(alignedchip, config, groups: "assumptions") + #render_constraint_table(alignedchip, config) == Padding @@ -159,4 +170,4 @@ The table can be padded to the next power of two with the following value assign The following ideas may prove to be optimizations for the #memw/#aligned/#reg chip: - `MEMB` chip that does a one-byte write to remove old_timestamp from here (uncertain tradeoffs) - Adding `μ_sum`/`w2`/`w4`/`write8` multiplicities to the `IS_HALF` lookups may make some GKR things faster if there are known zeroes. -- For the register fast-path, one may upgrade the `IS_HALF` check to an `IS_B20` check for extended range at the cost of looking through a larger table. \ No newline at end of file +- For the register fast-path, one may upgrade the `IS_HALF` check to an `IS_B20` check for extended range at the cost of looking through a larger table. diff --git a/spec/mul.typ b/spec/mul.typ index 6e6de12e0..e5dc0de7c 100644 --- a/spec/mul.typ +++ b/spec/mul.typ @@ -32,9 +32,6 @@ The #mul chip is comprised of #nr_variables variables that are expressed using # $mat(delim: #none, top; bottom)$ } -= Assumptions -The following range checks are assumed to be performed/enforced outside of this chip: -#render_chip_assumptions(chip, config) = Constraints == Overview diff --git a/spec/shift.typ b/spec/shift.typ index c464d5d55..a1583e3e7 100644 --- a/spec/shift.typ +++ b/spec/shift.typ @@ -42,9 +42,6 @@ Here, `<<` and `>>` denote the _logical_ left and right shift operations, while The `SHIFT` chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): #render_chip_variable_table(chip, config) -= Assumptions -#render_chip_assumptions(chip, config) - = Explanation This chip has a rather complex design as a result of designing it to fit in as few columns possible. We briefly discuss the intricacies of the design, attempting to illustrate its correctness. @@ -109,7 +106,10 @@ Copies of this variable are used for any full limbs shifted in when $#`right` = Moreover, `X[4]` contains a copy of `extension` shifted over by the right number of bits, to allow the construction of $#`in >>> shift` mod 16$ as the appropriate intermediate. = Constraints -First, we constrain `bit_shift` based on whether we are left or right-shifting. +First, we range check our inputs appropriately. +#render_constraint_table(chip, config, groups: "input") + +Then, we constrain `bit_shift` based on whether we are left or right-shifting. @shift:c:zbs makes sure `zbs` is set to `1` if and only if `bit_shift = 0`. This flag is used to indicate the special case that $#`right` = 1$ and $#`shift` = 0 mod 16$. #render_constraint_table(chip, config, groups: "bit_shift") diff --git a/spec/src/bitwise.toml b/spec/src/bitwise.toml index 30875a810..24b54442a 100644 --- a/spec/src/bitwise.toml +++ b/spec/src/bitwise.toml @@ -127,22 +127,22 @@ name = "contributions" [[constraints.contributions]] kind = "interaction" -tag = "AND_BYTE" -input = ["X", "Y"] +tag = "BYTE_ALU" +input = [["opsel", "AND"], "X", "Y"] output = "AND" multiplicity = ["-", "μ_AND"] [[constraints.contributions]] kind = "interaction" -tag = "OR_BYTE" -input = ["X", "Y"] +tag = "BYTE_ALU" +input = [["opsel", "OR"], "X", "Y"] output = "OR" multiplicity = ["-", "μ_OR"] [[constraints.contributions]] kind = "interaction" -tag = "XOR_BYTE" -input = ["X", "Y"] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], "X", "Y"] output = "XOR" multiplicity = ["-", "μ_XOR"] diff --git a/spec/src/branch.toml b/spec/src/branch.toml index 49a7833a3..92b93d015 100644 --- a/spec/src/branch.toml +++ b/spec/src/branch.toml @@ -97,6 +97,13 @@ iter = ["i", 0, 1] desc = "`IS_BIT`" +[[constraint_groups]] +name = "assumptions" + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["JALR"] [[constraint_groups]] name = "all" @@ -123,8 +130,8 @@ cond = "μ" [[constraints.all]] kind = "interaction" -tag = "AND_BYTE" -input = ["unmasked_low_byte", 254] +tag = "BYTE_ALU" +input = [["opsel", "AND"], "unmasked_low_byte", 254] output = ["idx", "next_pc_low", 0] multiplicity = "μ" diff --git a/spec/src/bytewise.toml b/spec/src/bytewise.toml new file mode 100644 index 000000000..b315a92b3 --- /dev/null +++ b/spec/src/bytewise.toml @@ -0,0 +1,49 @@ +name = "BYTEWISE" + +[[variables.input]] +name = "a" +type = "DWordBL" +desc = "The first input" +pad = 0 + +[[variables.input]] +name = "b" +type = "DWordBL" +desc = "The second input" +pad = 0 + +[[variables.input]] +name = "op" +type = "Byte" +desc = "The operation to perform" +pad = 0 + +[[variables.output]] +name = "res" +type = "DWordBL" +desc = "The result" +pad = 0 + +[[variables.multiplicity]] +name = "μ" +type = "BaseField" +desc = "" +pad = 0 + +[[constraint_groups]] +name = "all" + +[[constraints.all]] +kind = "interaction" +tag = "BYTE_ALU" +input = ["op", ["idx", "a", "i"], ["idx", "b", "i"]] +output = ["idx", "res", "i"] +multiplicity = "μ" +iter = ["i", 0, 7] + +[[constraints.all]] +kind = "interaction" +tag = "ALU" +input = [["cast", "a", "DWordWL"], ["cast", "b", "DWordWL"], "op"] +output = ["cast", "res", "DWordWL"] +multiplicity = ["-", "μ"] diff --git a/spec/src/config.toml b/spec/src/config.toml index 9ced2ce0d..9eac1d8c8 100644 --- a/spec/src/config.toml +++ b/spec/src/config.toml @@ -91,6 +91,15 @@ desc = """\ The `Word` is the *most* significant digit. """ +[[variables.types]] +label = "DWordWHBB" +subtypes = ["Byte", "Byte", "Half", "Word"] +desc = """\ + Variable that can only assume values in the range $[0, 2^64)$. \\ + Represented as a `Word`, a `Half` and two `Byte` variables. \ + The `Word` is the *most* significant digit. + """ + [[variables.types]] label = "WordBL" subtypes = ["Byte", "Byte", "Byte", "Byte"] diff --git a/spec/src/cpu.toml b/spec/src/cpu.toml index a0bd3925c..85c3aaf70 100644 --- a/spec/src/cpu.toml +++ b/spec/src/cpu.toml @@ -7,7 +7,7 @@ name = "CPU" [[variables.input]] name = "timestamp" type = "Timestamp" -desc = "A preprocessed timestamp to coordinate the memory argument. Since we have at most 3 non-disjoint memory accesses (`(rs1, rs2, rd)`, `(rs1, pc, pc)`, `(LOAD)` or `(STORE)`) a maximum of 4 slots is enough." +desc = "A preprocessed timestamp to coordinate the memory argument. Since we have at most 3 non-disjoint memory accesses (`(rs1, rs2, rd)`, `(rs1, pc, pc)`, `MEMORY`) a maximum of 4 slots is enough." [[variables.input]] name = "pc" @@ -51,58 +51,18 @@ type = "Bit" desc = "Whether to write back to the destination register" pad = 0 -# TODO: can we compress this to a single value? (1: is it worth it, 2: does it work) -[[variables.input]] -name = "memory_2bytes" -type = "Bit" -desc = "Whether the memory access (read or write) touches exactly 2 bytes" -pad = 0 - -[[variables.input]] -name = "memory_4bytes" -type = "Bit" -desc = "Whether the memory access (read or write) touches exactly 4 bytes" -pad = 0 - -[[variables.input]] -name = "memory_8bytes" -type = "Bit" -desc = "Whether the memory access (read or write) touches exactly 8 bytes" -pad = 0 - -# TODO: Are there usecases where it's nicer to just have this as a length constant? -[[variables.input]] -name = "c_type_instruction" -type = "Bit" -desc = "Whether the instruction is of C type, i.e., whether it is 2 bytes long instead of 4" -pad = 0 - [[variables.input]] name = "imm" type = "DWordWL" desc = "The fully extended 64-bit version of the immediate" pad = 0 +# We encode C-type instructions with a length of 2, as this generality allows fusing common instruction combos [[variables.input]] -name = "signed" -type = "Bit" -desc = "Indicates whether we're dealing with a signed or unsigned instruction" -pad = 0 - -[[variables.input]] -name = "mp_selector" -type = "Bit" -desc = """Multi-purpose selector used by different ALU operations for different purposes. Currently, it is used - - by the `MUL` chip to select between `MUL`/`MULH` and `MULH[S]U`, and - - as flag for inverting the condition of conditional branches (see `branch_cond`) - - as direction (left or right) for `SHIFT`""" -pad = 0 - -[[variables.input]] -name = "muldiv_selector" -type = "Bit" -desc = "Selects which output of `MUL` (lo/hi) or `DIV` (quo/rem) is wanted" -pad = 0 +name = "half_instruction_length" +type = "Byte" +desc = "Half the number of bytes consumed by this instruction, commonly used to indicate whether the instruction is of C type, i.e., whether it is 2 bytes long (= 1) instead of 4 (= 2)" +pad = 2 [[variables.input]] name = "word_instr" @@ -111,108 +71,59 @@ desc = "Whether the instruction is a \\*W instruction, requiring the inputs and pad = 0 [[variables.input]] -name = "ADD" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - -[[variables.input]] -name = "SUB" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - -[[variables.input]] -name = "SLT" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - -[[variables.input]] -name = "AND" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - -[[variables.input]] -name = "OR" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - -[[variables.input]] -name = "XOR" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - -[[variables.input]] -name = "SHIFT" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - -[[variables.input]] -name = "JALR" +name = "ALU" type = "Bit" -desc = "One-hot ALU selector flag" +desc = "Whether to use the ALU for this instruction" pad = 0 [[variables.input]] -name = "BEQ" -type = "Bit" -desc = "One-hot ALU selector flag" +name = "alu_flags" +type = "Byte" +desc = "The ALU operation + flags (interpreting things as signed/unsigned, choosing the MUL/DVRM output, ...) to pass to the ALU" pad = 0 [[variables.input]] -name = "BLT" +name = "ADD" type = "Bit" -desc = "One-hot ALU selector flag" +desc = "Addition fast-path bypassing the ALU" pad = 0 [[variables.input]] -name = "LOAD" +name = "SUB" type = "Bit" -desc = "One-hot ALU selector flag" +desc = "Subtraction fast-path bypassing the ALU" pad = 0 [[variables.input]] -name = "STORE" +name = "MEMORY" type = "Bit" -desc = "One-hot ALU selector flag" +desc = "Whether this instruction touches memory (LOAD/STORE)" pad = 0 [[variables.input]] -name = "MUL" -type = "Bit" -desc = "One-hot ALU selector flag" +name = "mem_flags" +type = "Byte" +desc = "The flags to pass for MEMORY operations (LOAD vs STORE, number of bytes touched, signed)" pad = 0 [[variables.input]] -name = "DIVREM" +name = "BRANCH" type = "Bit" -desc = "One-hot ALU selector flag" +desc = "Whether this instruction is a conditional branch (BLT, BEQ)" pad = 0 [[variables.input]] name = "ECALL" type = "Bit" -desc = "One-hot ALU selector flag" +desc = "Whether this instruction is an ECALL" pad = 0 -[[variables.input]] -name = "EBREAK" -type = "Bit" -desc = "One-hot ALU selector flag" -pad = 0 - - # Output [[variables.output]] name = "next_pc" type = "DWordWL" desc = "The program counter for the next instruction" -pad = 5 +pad = 1 [[variables.output]] name = "rvd" @@ -235,65 +146,41 @@ pad = 0 [[variables.auxiliary]] name = "rv1" -type = "DWordWHH" +type = "DWordWL" desc = "The value of register `rs1`" pad = 0 [[variables.auxiliary]] name = "rv2" -type = "DWordWHH" +type = "DWordWL" desc = "The value of register `rs2`" pad = 0 -[[variables.auxiliary]] -name = "rv1_ext_bit" -type = "Bit" -desc = "The sign bit of `rv1` if seen as a 32-bit word, used for sign extension with `word_instr`" -pad = 0 - -[[variables.auxiliary]] -name = "arg1" -type = "DWordBL" -desc = "The extended version of `rv1`, depending on `word_instr`" -pad = 0 - -[[variables.auxiliary]] -name = "rv2_ext_bit" -type = "Bit" -desc = "The sign bit of `rv2` if seen as a 32-bit word, used for sign extension with `word_instr`" -pad = 0 - [[variables.auxiliary]] name = "arg2" -type = "DWordBL" +type = "DWordWL" desc = "A multiplexed version of `rv2` and `imm`, to be used as second argument to ALU calls" pad = 0 -[[variables.auxiliary]] -name = "res_ext_bit" -type = "Bit" -desc = "The sign bit of `res`, if seen as a 32-bit word, used for sign extension with `word_instr`" -pad = 0 - [[variables.auxiliary]] name = "res" -type = "DWordBL" +type = "DWordHL" desc = "The ALU result" pad = 0 -[[variables.auxiliary]] -name = "is_equal" -type = "Bit" -desc = "Whether `rv1` and `arg2` are equal" -pad = 0 - [[variables.auxiliary]] name = "branch_cond" type = "Bit" -desc = "Whether a branch is taken, i.e., the branch condition" +desc = "Whether a branch is taken: the branch condition evaluates to true, or we are doing an unconditional jump" pad = 0 # Virtual +[[variables.virtual]] +name = "JALR" +type = "Bit" +desc = "Read whether our BRANCH corresponds to a JAL(R) instruction from `mem_flags`, as `MEMORY` and `BRANCH` are mutually exclusive" +def = "mem_flags" + [[variables.virtual]] name = "packed_decode" type = "BaseField" @@ -302,50 +189,53 @@ def = ["+", ["*", ["^", 2, 0], "read_register1"], ["*", ["^", 2, 1], "read_register2"], ["*", ["^", 2, 2], "write_register"], - ["*", ["^", 2, 3], "memory_2bytes"], - ["*", ["^", 2, 4], "memory_4bytes"], - ["*", ["^", 2, 5], "memory_8bytes"], - ["*", ["^", 2, 6], "c_type_instruction"], - ["*", ["^", 2, 7], "signed"], - ["*", ["^", 2, 8], "mp_selector"], - ["*", ["^", 2, 9], "muldiv_selector"], - ["*", ["^", 2, 10], "word_instr"], - ["*", ["^", 2, 11], "ADD"], - ["*", ["^", 2, 12], "SUB"], - ["*", ["^", 2, 13], "SLT"], - ["*", ["^", 2, 14], "AND"], - ["*", ["^", 2, 15], "OR"], - ["*", ["^", 2, 16], "XOR"], - ["*", ["^", 2, 17], "SHIFT"], - ["*", ["^", 2, 18], "JALR"], - ["*", ["^", 2, 19], "BEQ"], - ["*", ["^", 2, 20], "BLT"], - ["*", ["^", 2, 21], "LOAD"], - ["*", ["^", 2, 22], "STORE"], - ["*", ["^", 2, 23], "MUL"], - ["*", ["^", 2, 24], "DIVREM"], - ["*", ["^", 2, 25], "ECALL"], - ["*", ["^", 2, 26], "EBREAK"], - ["*", ["^", 2, 27], "rs1"], - ["*", ["^", 2, 35], "rs2"], - ["*", ["^", 2, 43], "rd"], + ["*", ["^", 2, 3], "word_instr"], + ["*", ["^", 2, 4], "ALU"], + ["*", ["^", 2, 5], "ADD"], + ["*", ["^", 2, 6], "SUB"], + ["*", ["^", 2, 7], "MEMORY"], + ["*", ["^", 2, 8], "BRANCH"], + ["*", ["^", 2, 9], "ECALL"], + ["*", ["^", 2, 10], "rs1"], + ["*", ["^", 2, 18], "rs2"], + ["*", ["^", 2, 26], "rd"], + ["*", ["^", 2, 34], "half_instruction_length"], + ["*", ["^", 2, 42], "alu_flags"], + ["*", ["^", 2, 50], "mem_flags"], ] -[[variables.virtual]] -name = "pad" -type = "Bit" -desc = "When no flags are set, we must be in a padding row." -def = ["-", 1, "ADD", "SUB", "SLT", "AND", "OR", "XOR", "SHIFT", "JALR", "BEQ", "BLT", "LOAD", "STORE", "MUL", "DIVREM", "ECALL", "EBREAK"] - [[assumptions]] -desc = "At most one ALU selector flag is 1 by the decoding, and every other flag is 0." -ref = "cpu:a:one-hot" +desc = "`MEMORY` and `BRANCH` are mutually exclusive" +ref = "cpu:a:mem-branch-mutex" [[assumptions]] -desc = "When `STORE + LOAD + BEQ + BLT = 0`, either `rs2 = 0` or `imm = 0` should be enforced by the decoding. This is needed for `arg2`." +desc = "When `MEMORY + BRANCH = 0`, either `read_register2 = 0` or `imm = 0` should be enforced by the decoding. This is needed for `arg2`." ref = "cpu:a:arg2-multiplex" +[[assumptions]] +desc = "$#`!MEMORY` => #`IS_BIT`$" + +[[constraint_groups]] +name = "assumptions" + +[[constraints.assumptions]] +kind = "arith" +constraint = "$not (#`MEMORY` and #`BRANCH`)$" +poly = ["*", "MEMORY", "BRANCH"] + +[[constraints.assumptions]] +kind = "arith" +constraint = "$(1 - #`MEMORY` - #`BRANCH`) => (#`read_register2` = 0 or #`imm[i]` = 0)$" +poly = ["*", ["-", 1, "MEMORY", "BRANCH"], "read_register2", ["+", ["idx", "imm", 0], ["idx", "imm", 1]]] + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["mem_flags"] +cond = ["not", "MEMORY"] + + [[constraint_groups]] name = "decode" @@ -353,7 +243,44 @@ name = "decode" kind = "interaction" tag = "DECODE" input = ["pc", "imm", "packed_decode"] -multiplicity = 1 +multiplicity = ["not", "word_instr"] + +[[constraints.decode]] +kind = "arith" +constraint = "$#`word_instr` => #`MEMORY = 0`$" +poly = ["*", "word_instr", "MEMORY"] + +[[constraints.decode]] +kind = "arith" +constraint = "$#`word_instr` => #`BRANCH = 0`$" +poly = ["*", "word_instr", "BRANCH"] + +[[constraints.decode]] +kind = "arith" +constraint = "$#`word_instr` => #`ECALL = 0`$" +poly = ["*", "word_instr", "ECALL"] + +[[constraints.decode]] +kind = "arith" +constraint = "$#`word_instr` => #`read_register1 = 0`$" +poly = ["*", "word_instr", "read_register1"] + +[[constraints.decode]] +kind = "arith" +constraint = "$#`word_instr` => #`read_register2 = 0`$" +poly = ["*", "word_instr", "read_register2"] + +[[constraints.decode]] +kind = "arith" +constraint = "$#`word_instr` => #`write_register = 0`$" +poly = ["*", "word_instr", "write_register"] + +[[constraints.decode]] +kind = "interaction" +tag = "CPU32" +input = ["timestamp", "pc"] +output = "half_instruction_length" +multiplicity = "word_instr" [[constraint_groups]] @@ -380,51 +307,27 @@ ref = "cpu:c:range_write_register" [[constraints.range]] kind = "template" -tag = "IS_BIT" -input = ["memory_2bytes"] -ref = "cpu:c:range_memory_2bytes" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["memory_4bytes"] -ref = "cpu:c:range_memory_4bytes" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["memory_8bytes"] -ref = "cpu:c:range_memory_8bytes" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["c_type_instruction"] +tag = "IS_BYTE" +input = ["half_instruction_length"] ref = "cpu:c:range_c_type_instruction" [[constraints.range]] kind = "template" tag = "IS_BIT" -input = ["signed"] -ref = "cpu:c:range_signed" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["mp_selector"] -ref = "cpu:c:range_mp_selector" +input = ["word_instr"] +ref = "cpu:c:range_word_instr" [[constraints.range]] kind = "template" tag = "IS_BIT" -input = ["muldiv_selector"] -ref = "cpu:c:range_muldiv_selector" +input = ["ALU"] +ref = "cpu:c:range_ALU" [[constraints.range]] kind = "template" -tag = "IS_BIT" -input = ["word_instr"] -ref = "cpu:c:range_word_instr" +tag = "IS_BYTE" +input = ["alu_flags"] +ref = "cpu:c:range_alu_flags" [[constraints.range]] kind = "template" @@ -441,74 +344,20 @@ ref = "cpu:c:range_SUB" [[constraints.range]] kind = "template" tag = "IS_BIT" -input = ["SLT"] -ref = "cpu:c:range_SLT" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["AND"] -ref = "cpu:c:range_AND" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["OR"] -ref = "cpu:c:range_OR" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["XOR"] -ref = "cpu:c:range_XOR" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["SHIFT"] -ref = "cpu:c:range_SHIFT" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["JALR"] -ref = "cpu:c:range_JALR" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["BEQ"] -ref = "cpu:c:range_BEQ" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["BLT"] -ref = "cpu:c:range_BLT" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["LOAD"] -ref = "cpu:c:range_LOAD" - -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["STORE"] -ref = "cpu:c:range_STORE" +input = ["MEMORY"] +ref = "cpu:c:range_MEMORY" [[constraints.range]] kind = "template" -tag = "IS_BIT" -input = ["MUL"] -ref = "cpu:c:range_MUL" +tag = "IS_BYTE" +input = ["mem_flags"] +ref = "cpu:c:range_mem_flags" [[constraints.range]] kind = "template" tag = "IS_BIT" -input = ["DIVREM"] -ref = "cpu:c:range_DIVREM" +input = ["BRANCH"] +ref = "cpu:c:range_BRANCH" [[constraints.range]] kind = "template" @@ -516,12 +365,6 @@ tag = "IS_BIT" input = ["ECALL"] ref = "cpu:c:range_ECALL" -[[constraints.range]] -kind = "template" -tag = "IS_BIT" -input = ["EBREAK"] -ref = "cpu:c:range_EBREAK" - [[constraints.range]] kind = "template" tag = "IS_BYTE" @@ -538,22 +381,11 @@ tag = "IS_BYTE" input = ["rd"] [[constraints.range]] -kind = "template" -tag = "IS_BYTE" -input = [["idx", "arg1", "i"]] -iter = ["i", 0, 7] - -[[constraints.range]] -kind = "template" -tag = "IS_BYTE" -input = [["idx", "arg2", "i"]] -iter = ["i", 0, 7] - -[[constraints.range]] -kind = "template" -tag = "IS_BYTE" +kind = "interaction" +tag = "IS_HALF" input = [["idx", "res", "i"]] -iter = ["i", 0, 7] +multiplicity = 1 +iter = ["i", 0, 3] [[constraint_groups]] @@ -561,91 +393,36 @@ name = "alu" prefix = "A" [[constraints.alu]] -kind = "template" -tag = "ADD" -cond = ["+", "ADD", "LOAD"] -input = [["cast", "arg1", "DWordWL"], ["cast", "arg2", "DWordWL"]] -output = ["cast", "res", "DWordWL"] +kind = "arith" +constraint = "$#`arg2` = #`MEMORY` dot #`imm` + #`BRANCH` dot #`rv2` + (1 - #`MEMORY` - #`BRANCH`) dot (#`rv2` + #`imm`)$" +poly = ["-", ["idx", "arg2", "i"], + ["*", "MEMORY", ["idx", "imm", "i"]], + ["*", "BRANCH", ["idx", "rv2", "i"]], + ["*", ["-", 1, "MEMORY", "BRANCH"], ["idx", ["+", "rv2", "imm"], "i"]] +] +iter = ["i", 0, 1] [[constraints.alu]] kind = "template" tag = "ADD" -cond = "STORE" -input = [["cast", "arg1", "DWordWL"], "imm"] +cond = "ADD" +input = ["rv1", "arg2"] output = ["cast", "res", "DWordWL"] [[constraints.alu]] kind = "template" tag = "SUB" -cond = ["+", "SUB", "BEQ"] -input = [["cast", "arg1", "DWordWL"], ["cast", "arg2", "DWordWL"]] +cond = "SUB" +input = ["rv1", "arg2"] output = ["cast", "res", "DWordWL"] ref = "cpu:c:sub" [[constraints.alu]] kind = "interaction" -tag = "LT" -input = [["cast", "arg1", "DWordWL"], ["cast", "arg2", "DWordWL"], "signed"] -output = ["idx", "res", 0] -multiplicity = ["+", "SLT", "BLT"] - -[[constraints.alu]] -kind = "arith" -constraint = "$#`SLT` + #`BLT` => #`res[i]` = 0$" -poly = ["*", ["+", "SLT", "BLT"], ["idx", "res", "i"]] -iter = ["i", 1, 7] - -[[constraints.alu]] -kind = "interaction" -tag = "AND_BYTE" -input = [["idx", "arg1", "i"], ["idx", "arg2", "i"]] -output = ["idx", "res", "i"] -multiplicity = "AND" -iter = ["i", 0, 7] - -[[constraints.alu]] -kind = "interaction" -tag = "OR_BYTE" -input = [["idx", "arg1", "i"], ["idx", "arg2", "i"]] -output = ["idx", "res", "i"] -multiplicity = "OR" -iter = ["i", 0, 7] - -[[constraints.alu]] -kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", "arg1", "i"], ["idx", "arg2", "i"]] -output = ["idx", "res", "i"] -multiplicity = "XOR" -iter = ["i", 0, 7] - -[[constraints.alu]] -kind = "interaction" -tag = "SHIFT" -input = [["cast", "arg1", "DWordHL"], ["idx", "arg2", 0], "mp_selector", "signed", "word_instr"] +tag = "ALU" +input = ["rv1", "arg2", "alu_flags"] output = ["cast", "res", "DWordWL"] -multiplicity = "SHIFT" - -[[constraints.alu]] -kind = "template" -tag = "ADD" -input = ["pc", ["*", ["+", ["*", 2, "c_type_instruction"], ["*", 4, ["not", "c_type_instruction"]]], ["cast", 1, "DWordWL"]]] -output = ["cast", "res", "DWordWL"] -cond = "JALR" - -[[constraints.alu]] -kind = "interaction" -tag = "MUL" -input = [["cast", "arg1", "DWordHL"], "signed", ["cast", "arg2", "DWordHL"], "mp_selector", "muldiv_selector"] -output = ["cast", "res", "DWordWL"] -multiplicity = "MUL" - -[[constraints.alu]] -kind = "interaction" -tag = "DVRM" -input = [["cast", "arg1", "DWordHL"], ["cast", "arg2", "DWordHL"], "signed", "muldiv_selector"] -output = ["cast", "res", "DWordWL"] -multiplicity = "DIVREM" +multiplicity = "ALU" [[constraint_groups]] @@ -655,8 +432,8 @@ prefix = "M" [[constraints.mem]] kind = "interaction" tag = "MEMW" -input = [1, ["*", ["cast", 2, "DWordWL"], "rs1"], ["arr", ["idx", ["cast", "rv1", "DWordWL"], 0], ["idx", ["cast", "rv1", "DWordWL"], 1], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 0, "DWordWL"]], 1, 0, 0] -output = ["arr", ["idx", ["cast", "rv1", "DWordWL"], 0], ["idx", ["cast", "rv1", "DWordWL"], 1], 0, 0, 0, 0, 0, 0] +input = [1, ["*", ["cast", 2, "DWordWL"], "rs1"], ["arr", ["idx", "rv1", 0], ["idx", "rv1", 1], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 0, "DWordWL"]], 1, 0, 0] +output = ["arr", ["idx", "rv1", 0], ["idx", "rv1", 1], 0, 0, 0, 0, 0, 0] multiplicity = "read_register1" ref = "cpu:c:read_rv1" @@ -664,20 +441,20 @@ ref = "cpu:c:read_rv1" kind = "arith" constraint = "$#`!read_register1` => #`rv1[i]` = 0$" poly = ["*", ["not", "read_register1"], ["idx", "rv1", "i"]] -iter = ["i", 0, 2] +iter = ["i", 0, 1] [[constraints.mem]] kind = "interaction" tag = "MEMW" -input = [1, ["*", ["cast", 2, "DWordWL"], "rs2"], ["arr", ["idx", ["cast", "rv2", "DWordWL"], 0], ["idx", ["cast", "rv2", "DWordWL"], 1], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 1, "DWordWL"]], 1, 0, 0] -output = ["arr", ["idx", ["cast", "rv2", "DWordWL"], 0], ["idx", ["cast", "rv2", "DWordWL"], 1], 0, 0, 0, 0, 0, 0] +input = [1, ["*", ["cast", 2, "DWordWL"], "rs2"], ["arr", ["idx", "rv2", 0], ["idx", "rv2", 1], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 1, "DWordWL"]], 1, 0, 0] +output = ["arr", ["idx", "rv2", 0], ["idx", "rv2", 1], 0, 0, 0, 0, 0, 0] multiplicity = "read_register2" [[constraints.mem]] kind = "arith" constraint = "$#`!read_register2` => #`rv2[i]` = 0$" poly = ["*", ["not", "read_register2"], ["idx", "rv2", "i"]] -iter = ["i", 0, 2] +iter = ["i", 0, 1] [[constraints.mem]] kind = "interaction" @@ -687,16 +464,16 @@ multiplicity = "write_register" [[constraints.mem]] kind = "interaction" -tag = "LOAD" -input = [["cast", "res", "DWordWL"], ["+", "timestamp", ["cast", 0, "DWordWL"]], "memory_2bytes", "memory_4bytes", "memory_8bytes", "signed"] +tag = "MEMOP" +input = ["timestamp", ["cast", "res", "DWordWL"], "rv2", "mem_flags"] output = "rvd" -multiplicity = "LOAD" +multiplicity = "MEMORY" [[constraints.mem]] -kind = "interaction" -tag = "MEMW" -input = [0, ["cast", "res", "DWordWL"], ["cast", "arg2", ["Byte", 8]], ["+", "timestamp", ["cast", 1, "DWordWL"]], "memory_2bytes", "memory_4bytes", "memory_8bytes"] -multiplicity = "STORE" +kind = "arith" +constraint = "$#`!MEMORY` and #`!BRANCH` => #`rvd` = #`res`$" +poly = ["*", ["-", 1, "MEMORY", "BRANCH"], ["-", ["idx", "rvd", "i"], ["idx", ["cast", "res", "DWordWL"], "i"]]] +iter = ["i", 0, 1] [[constraints.mem]] kind = "template" @@ -712,124 +489,59 @@ input = ["prev_pc_timestamp_borrow"] kind = "interaction" tag = "memory" input = [1, ["arr", ["+", ["*", 2, 255], "i"], 0], ["arr", ["+", ["-", ["idx", "timestamp", 0], ["*", 3, ["not", "pc_double_read"]]], ["*", ["^", 2, 32], "prev_pc_timestamp_borrow"]], ["-", ["idx", "timestamp", 1], "prev_pc_timestamp_borrow"]], ["idx", "pc", "i"]] -multiplicity = ["not", "pad"] iter = ["i", 0, 1] +multiplicity = 1 [[constraints.mem]] kind = "interaction" tag = "memory" input = [1, ["arr", ["+", ["*", 2, 255], "i"], 0], ["+", "timestamp", ["cast", 1, "DWordWL"]], ["idx", "next_pc", "i"]] -multiplicity = ["-", ["not", "pad"]] iter = ["i", 0, 1] - +multiplicity = -1 [[constraint_groups]] -name = "sys" -prefix = "S" +name = "branch" +prefix = "B" -[[constraints.sys]] +[[constraints.branch]] kind = "arith" -constraint = "`!EBREAK`" -desc = "We treat `EBREAK` as an unprovable trap" -poly = ["not", "EBREAK"] -ref = "cpu:c:ebreak_traps" - -[[constraints.sys]] -kind = "interaction" -tag = "ECALL" -input = ["timestamp", ["cast", "rv1", "DWordWL"]] -multiplicity = "ECALL" - - -[[constraint_groups]] -name = "ext" -prefix = "E" - -[[constraints.ext]] -kind = "template" -tag = "SIGN" -input = [["idx", "rv1", 1], "word_instr"] -output = "rv1_ext_bit" - -[[constraints.ext]] -kind = "arith" -constraint = "$#`arg1[:4]` = #`rv1[:2]`$" -poly = ["-", ["idx", ["cast", "arg1", "DWordWL"], 0], ["idx", ["cast", "rv1", "DWordWL"], 0]] - -[[constraints.ext]] -kind = "arith" -constraint = "$#`arg1[4:]` = #`rv1[2]` dot (1 - #`word_instr`) + (2^(32) - 1) dot #`rv1_ext_bit` dot #`signed`$" -poly = ["-", ["idx", ["cast", "arg1", "DWordWL"], 1], ["*", ["not", "word_instr"], ["idx", "rv1", 2]], ["*", "signed", "rv1_ext_bit", ["-", ["^", 2, 32], 1]]] - -[[constraints.ext]] -kind = "template" -tag = "SIGN" -input = [["idx", "rv2", 1], "word_instr"] -output = "rv2_ext_bit" - -[[constraints.ext]] -kind = "arith" -constraint = "$#`arg2[:4]` = (1 - #`LOAD`) dot #`rv2[:2]` + (1 - #`BEQ` - #`BLT` - #`STORE`) dot #`imm[0]`$" -poly = ["-", ["idx", ["cast", "arg2", "DWordWL"], 0], ["*", ["not", "LOAD"], ["idx", ["cast", "rv2", "DWordWL"], 0]], ["*", ["-", 1, "BEQ", "BLT", "STORE"], ["idx", "imm", 0]]] - -[[constraints.ext]] -kind = "arith" -constraint = "$#`arg2[4:]` = (1 - #`LOAD`) dot ((1 - #`word_instr`) dot #`rv2[2]` + #`signed` dot #`rv2_ext_bit` dot (2^(32) - 1)) + (1 - #`BEQ` - #`BLT` - #`STORE`) dot #`imm[1]`$" -poly = ["-", ["idx", ["cast", "arg2", "DWordWL"], 1], ["*", ["not", "LOAD"], ["not", "word_instr"], ["idx", "rv2", 2]], ["*", ["not", "LOAD"], "signed", "rv2_ext_bit", ["-", ["^", 2, 32], 1]], ["*", ["-", 1, "BEQ", "BLT", "STORE"], ["idx", "imm", 1]]] - -[[constraints.ext]] -kind = "template" -tag = "SIGN" -input = [["idx", ["cast", "res", "DWordHL"], 1], "word_instr"] -output = "res_ext_bit" - -[[constraints.ext]] -kind = "arith" -constraint = "$#`!LOAD` => #`rvd[0]` = #`res[:4]`$" -poly = ["*", ["not", "LOAD"], ["-", ["idx", "rvd", 0], ["idx", ["cast", "res", "DWordWL"], 0]]] - -[[constraints.ext]] -kind = "arith" -constraint = "$#`!LOAD` => #`rvd[1]` = (1 - #`word_instr`) dot #`res[4:]` + #`res_ext_bit` dot (2^(32) - 1)$" -desc = "_Sign_ extend the output if it wasn't a `LOAD`. Only `LOAD` has both `write_register = 1` and `rvd ≠ res`. `LOAD` and `word_instr` are disjoint" -poly = ["*", ["not", "LOAD"], ["-", ["idx", "rvd", 1], ["*", ["not", "word_instr"], ["idx", ["cast", "res", "DWordWL"], 1]], ["*", "res_ext_bit", ["-", ["^", 2, 32], 1]]]] - - -[[constraint_groups]] -name = "misc" -prefix = "O" - -[[constraints.misc]] -kind = "interaction" -tag = "ZERO" -input = [["+", ["idx", "res", 0], ["idx", "res", 1], ["idx", "res", 2], ["idx", "res", 3], ["idx", "res", 4], ["idx", "res", 5], ["idx", "res", 6], ["idx", "res", 7]]] -output = "is_equal" -multiplicity = "BEQ" -ref = "cpu:c:is_equal" - -[[constraints.misc]] -kind = "arith" -constraint = "$#`branch_cond` = #`JALR` or (#`BLT` and (#`res` xor #`invert`)) or (#`BEQ` and (#`is_equal` xor #`invert`))$" -desc = "where `invert` is represented by `mp_selector`" -poly = ["+", - ["-", "branch_cond"], - "JALR", - ["*", ["idx", "res", 0], ["not", "mp_selector"], "BLT"], - ["*", ["-", 1, ["idx", "res", 0]], "mp_selector", "BLT"], - ["*", "is_equal", ["not", "mp_selector"], "BEQ"], - ["*", ["not", "is_equal"], "mp_selector", "BEQ"] +constraint = "$#`branch_cond` = #`BRANCH` and (#`JALR` or #`res`)$" +poly = ["-", + "branch_cond", + ["*", "BRANCH", "JALR"], + ["*", "BRANCH", ["-", 1, "JALR"], ["idx", "res", 0]] ] -[[constraints.misc]] +[[constraints.branch]] kind = "interaction" tag = "BRANCH" -input = ["pc", "imm", ["cast", "arg1", "DWordWL"], "JALR"] +input = ["pc", "imm", "rv1", "JALR"] output = "next_pc" multiplicity = "branch_cond" -[[constraints.misc]] +[[constraints.branch]] kind = "template" tag = "ADD" -input = ["pc", ["*", ["+", ["*", 2, "c_type_instruction"], ["*", 4, ["not", "c_type_instruction"]]], ["cast", 1, "DWordWL"]]] +input = ["pc", ["arr", ["*", 2, "half_instruction_length"], 0]] output = "next_pc" +cond = ["not", "branch_cond"] desc = "Increment `pc` to `next_pc` if we're not branching" + +[[constraints.branch]] +kind = "template" +tag = "ADD" +input = ["pc", ["arr", ["*", 2, "half_instruction_length"], 0]] +output = "rvd" +cond = "BRANCH" +desc = "Compute the next instruction address in `rvd` when BRANCH is active to enable JALR" + + +[[constraint_groups]] +name = "sys" +prefix = "S" + +[[constraints.sys]] +kind = "interaction" +tag = "ECALL" +input = ["timestamp", "rv1"] +multiplicity = "ECALL" diff --git a/spec/src/cpu32.toml b/spec/src/cpu32.toml new file mode 100644 index 000000000..f26ce0e8c --- /dev/null +++ b/spec/src/cpu32.toml @@ -0,0 +1,416 @@ +name = "CPU32" + +[[variables.input]] +name = "timestamp" +type = "DWordWL" +desc = "The timestamp for the CPU row" +pad = 0 + +[[variables.input]] +name = "pc" +type = "DWordWL" +desc = "The PC at which the instruction occurs" +pad = 0 + +[[variables.output]] +name = "half_instruction_length" +type = "Byte" +desc = "The length of this instruction" +pad = 2 + +[[variables.auxiliary]] +name = "rs1" +type = "Byte" +desc = "Source register 1" +pad = 0 + +[[variables.auxiliary]] +name = "read_register1" +type = "Bit" +desc = "Whether to read from `rs1` or not" +pad = 0 + +# Note in case we inline register accesses here: We don't need the upper word +[[variables.auxiliary]] +name = "rv1" +type = "DWordWHH" +desc = "The value in register `rs1`" +pad = 0 + +[[variables.auxiliary]] +name = "rv1_sign" +type = "Bit" +desc = "The sign bit of the lower word of `rv1`" +pad = 0 + +[[variables.auxiliary]] +name = "arg1" +type = "DWordWL" +desc = "The sign-extended version of `rv1`" +pad = 0 + +[[variables.auxiliary]] +name = "rs2" +type = "Byte" +desc = "Source register 2" +pad = 0 + +[[variables.auxiliary]] +name = "read_register2" +type = "Bit" +desc = "Whether to read from `rs2`" +pad = 0 + +[[variables.auxiliary]] +name = "rv2" +type = "DWordWHH" +desc = "The value in register `rs2`" +pad = 0 + +[[variables.auxiliary]] +name = "rv2_sign" +type = "Bit" +desc = "The sign bit of the lower word of `rv2`" +pad = 0 + +[[variables.auxiliary]] +name = "imm" +type = "DWordWL" +desc = "The fully sign-extended immediate to use" +pad = 0 + +[[variables.auxiliary]] +name = "arg2" +type = "DWordWL" +desc = "Either the sign-extended version of `rv2` or all of `imm`" +pad = 0 + +[[variables.auxiliary]] +name = "res" +type = "DWordHL" +desc = "The ALU result" +pad = 0 + +[[variables.auxiliary]] +name = "res_sign" +type = "Bit" +desc = "The sign bit of the lower word of `res`" +pad = 0 + +[[variables.auxiliary]] +name = "rd" +type = "Byte" +desc = "Destination register" +pad = 0 + +[[variables.auxiliary]] +name = "write_register" +type = "Bit" +desc = "Whether to write back to `rd`" +pad = 0 + +[[variables.auxiliary]] +name = "rvd" +type = "DWordWL" +desc = "The value to write back to `rd`, the sign-extended version of `res`" +pad = 0 + +[[variables.auxiliary]] +name = "ALU" +type = "Bit" +desc = "Whether the full ALU is active" +pad = 0 + +[[variables.auxiliary]] +name = "alu_flags" +type = "Byte" +desc = "The ALU operation + flags" +pad = 0 + +[[variables.auxiliary]] +name = "ADD" +type = "Bit" +desc = "Whether the full ALU is active" +pad = 0 + +[[variables.auxiliary]] +name = "SUB" +type = "Bit" +desc = "Whether the full ALU is active" +pad = 0 + +[[variables.auxiliary]] +name = "signed" +type = "Bit" +desc = "Whether the instruction is signed or not. Extracted from `alu_flags`, used to determine the extension for the inputs" +pad = 0 + +[[variables.virtual]] +name = "packed_decode" +type = "BaseField" +desc = "The packed representation of all flags and information from the decode table" +def = ["+", + ["*", ["^", 2, 0], "read_register1"], + ["*", ["^", 2, 1], "read_register2"], + ["*", ["^", 2, 2], "write_register"], + ["*", ["^", 2, 3], 1], # word_instr + ["*", ["^", 2, 4], "ALU"], + ["*", ["^", 2, 5], "ADD"], + ["*", ["^", 2, 6], "SUB"], + ["*", ["^", 2, 10], "rs1"], + ["*", ["^", 2, 18], "rs2"], + ["*", ["^", 2, 26], "rd"], + ["*", ["^", 2, 34], "half_instruction_length"], + ["*", ["^", 2, 42], "alu_flags"], +] + +[[variables.multiplicity]] +name = "μ" +type = "Bit" +desc = "" +pad = 0 + +[[assumptions]] +desc = "`IS_WORD[timestamp[i]]`" +iter = ["i", 0, 1] + +[[assumptions]] +desc = "`IS_WORD[pc[i]]`" +iter = ["i", 0, 1] + +[[assumptions]] +desc = "`read_register2 = 0` or `imm = 0`, enforced by decoding." + +[[constraint_groups]] +name = "assumptions" + +[[constraints.assumptions]] +kind = "arith" +constraint = "$#`read_register2` = 0 or #`imm[i] = 0`$" +poly = ["*", "read_register2", ["+", ["idx", "imm", 0], ["idx", "imm", 1]]] + +[[constraint_groups]] +name = "decode" + +[[constraints.decode]] +kind = "interaction" +tag = "DECODE" +input = ["pc", "imm", "packed_decode"] +multiplicity = "μ" + +[[constraint_groups]] +name = "range" +prefix = "R" + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["μ"] + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["read_register1"] + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["read_register2"] + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["write_register"] + +[[constraints.range]] +kind = "template" +tag = "IS_BYTE" +input = ["half_instruction_length"] + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["ALU"] + +# Technically constraint by the BYTE_ALU in `ext`, but this is safer/cleaner +[[constraints.range]] +kind = "template" +tag = "IS_BYTE" +input = ["alu_flags"] + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["ADD"] + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["SUB"] + +[[constraints.range]] +kind = "template" +tag = "IS_BYTE" +input = ["rs1"] + +[[constraints.range]] +kind = "template" +tag = "IS_BYTE" +input = ["rs2"] + +[[constraints.range]] +kind = "template" +tag = "IS_BYTE" +input = ["rd"] + +[[constraints.range]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "rv1", "i"]] +multiplicity = "μ" +iter = ["i", 0, 1] + +[[constraints.range]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "rv2", "i"]] +multiplicity = "μ" +iter = ["i", 0, 1] + +[[constraints.range]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "res", "i"]] +multiplicity = "μ" +iter = ["i", 0, 3] + + +[[constraint_groups]] +name = "alu" +prefix = "A" + +[[constraints.alu]] +kind = "template" +tag = "ADD" +cond = "ADD" +input = ["arg1", "arg2"] +output = ["cast", "res", "DWordWL"] + +[[constraints.alu]] +kind = "template" +tag = "SUB" +cond = "SUB" +input = ["arg1", "arg2"] +output = ["cast", "res", "DWordWL"] + +[[constraints.alu]] +kind = "interaction" +tag = "ALU" +input = ["arg1", "arg2", "alu_flags"] +output = ["cast", "res", "DWordWL"] +multiplicity = "ALU" + +[[constraint_groups]] +name = "mem" +prefix = "M" + + +[[constraints.mem]] +kind = "interaction" +tag = "MEMW" +input = [1, ["*", ["cast", 2, "DWordWL"], "rs1"], ["arr", ["idx", ["cast", "rv1", "DWordWL"], 0], ["idx", "rv1", 2], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 0, "DWordWL"]], 1, 0, 0] +output = ["arr", ["idx", ["cast", "rv1", "DWordWL"], 0], ["idx", "rv1", 2], 0, 0, 0, 0, 0, 0] +multiplicity = "read_register1" + +[[constraints.mem]] +kind = "arith" +constraint = "$#`!read_register1` => #`rv1[i]` = 0$" +poly = ["*", ["not", "read_register1"], ["idx", "rv1", "i"]] +iter = ["i", 0, 2] + +[[constraints.mem]] +kind = "interaction" +tag = "MEMW" +input = [1, ["*", ["cast", 2, "DWordWL"], "rs2"], ["arr", ["idx", ["cast", "rv2", "DWordWL"], 0], ["idx", "rv2", 2], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 1, "DWordWL"]], 1, 0, 0] +output = ["arr", ["idx", ["cast", "rv2", "DWordWL"], 0], ["idx", "rv2", 2], 0, 0, 0, 0, 0, 0] +multiplicity = "read_register2" + +[[constraints.mem]] +kind = "arith" +constraint = "$#`!read_register2` => #`rv2[i]` = 0$" +poly = ["*", ["not", "read_register2"], ["idx", "rv2", "i"]] +iter = ["i", 0, 2] + +[[constraints.mem]] +kind = "interaction" +tag = "MEMW" +input = [1, ["*", ["cast", 2, "DWordWL"], "rd"], ["arr", ["idx", "rvd", 0], ["idx", "rvd", 1], 0, 0, 0, 0, 0, 0], ["+", "timestamp", ["cast", 2, "DWordWL"]], 1, 0, 0] +multiplicity = "write_register" + +[[constraint_groups]] +name = "logup" + +[[constraints.logup]] +kind = "interaction" +tag = "CPU32" +input = ["timestamp", "pc"] +output = "half_instruction_length" +multiplicity = ["-", "μ"] + +[[constraint_groups]] +name = "ext" + +[[constraints.ext]] +kind = "interaction" +tag = "BYTE_ALU" +input = [["opsel", "AND"], 32, "alu_flags"] +output = ["*", 32, "signed"] +multiplicity = "μ" + +[[constraints.ext]] +kind = "template" +tag = "SIGN" +input = [["idx", "rv1", 1], "signed"] +output = "rv1_sign" + +[[constraints.ext]] +kind = "arith" +constraint = "$#`arg1[0]` = #`rv1[:2]`$" +poly = ["-", ["idx", "arg1", 0], ["idx", ["cast", "rv1", "DWordWL"], 0]] + +[[constraints.ext]] +kind = "arith" +constraint = "$#`arg1[1]` = (2^(32) - 1) dot #`rv1_sign`$" +poly = ["-", ["idx", "arg1", 1], ["*", ["-", ["^", 2, 32], 1], "rv1_sign"]] + +[[constraints.ext]] +kind = "template" +tag = "SIGN" +input = [["idx", "rv2", 1], "signed"] +output = "rv2_sign" + +[[constraints.ext]] +kind = "arith" +constraint = "$#`arg2[0]` = #`rv2[:2]` + #`imm[0]`$" +poly = ["-", ["idx", "arg2", 0], ["idx", ["cast", "rv2", "DWordWL"], 0], ["idx", "imm", 0]] + +[[constraints.ext]] +kind = "arith" +constraint = "$#`arg2[1]` = (2^(32) - 1) dot #`rv2_sign` + #`imm[1]`$" +poly = ["-", ["idx", "arg2", 1], ["*", ["-", ["^", 2, 32], 1], "rv2_sign"], ["idx", "imm", 1]] + +[[constraints.ext]] +kind = "template" +tag = "SIGN" +input = [["idx", "res", 1], 1] +output = "res_sign" + +[[constraints.ext]] +kind = "arith" +constraint = "$#`rvd[0]` = #`res[:2]`$" +poly = ["-", ["idx", "rvd", 0], ["idx", ["cast", "res", "DWordWL"], 0]] + +[[constraints.ext]] +kind = "arith" +constraint = "$#`rvd[1]` = (2^(32) - 1) dot #`res_sign`$" +poly = ["-", ["idx", "rvd", 1], ["*", ["-", ["^", 2, 32], 1], "res_sign"]] diff --git a/spec/src/decode.toml b/spec/src/decode.toml index 367db1568..1e0c6dd8f 100644 --- a/spec/src/decode.toml +++ b/spec/src/decode.toml @@ -4,7 +4,7 @@ name = "DECODE" name = "pc" type = "DWordWL" desc = "value of the program counter this instruction is associated with." -pad = 7 +pad = 1 [[variables.output]] name = "packed_decode" @@ -15,36 +15,22 @@ A list of each variable and the bit(-range) in which it is located:\\ [0] `read_register1`, \\ [1] `read_register2`, \\ [2] `write_register`, \\ -[3] `memory_2bytes`, \\ -[4] `memory_4bytes`, \\ -[5] `memory_8bytes`, \\ -[6] `c_type`, \\ -[7] `signed`, \\ -[8] `mp_selector`, \\ -[9] `muldiv_selector`, \\ -[10] `word_instr`, \\ -[11] `ADD`, \\ -[12] `SUB`, \\ -[13] `SLT`, \\ -[14] `AND`, \\ -[15] `OR`, \\ -[16] `XOR`, \\ -[17] `SHIFT`, \\ -[18] `JALR`, \\ -[19] `BEQ`, \\ -[20] `BLT`, \\ -[21] `LOAD`, \\ -[22] `STORE`, \\ -[23] `MUL`, \\ -[24] `DIVREM`, \\ -[25] `ECALL`, \\ -[26] `EBREAK`; \\ -[27:35] `rs1`, \\ -[35:43] `rs2`, \\ -[43:51] `rd`, \\ +[3] `word_instr`, \\ +[4] `ALU`, \\ +[5] `ADD`, \\ +[6] `SUB`, \\ +[7] `MEMORY`, \\ +[8] `BRANCH`, \\ +[9] `ECALL`, \\ +[10:17] `rs1`, \\ +[18:25] `rs2`, \\ +[26:33] `rd`, \\ +[34:41] `half_instruction_length`, \\ +[42:49] `alu_flags`, \\ +[50:57] `mem_flags`, \\ the remaining bits are set to zero. """ -pad = ["^", 2, 26] +pad = 0 [[variables.output]] name = "imm" diff --git a/spec/src/decode_uncompressed.toml b/spec/src/decode_uncompressed.toml index 0f6c931c2..692ac4acd 100644 --- a/spec/src/decode_uncompressed.toml +++ b/spec/src/decode_uncompressed.toml @@ -33,27 +33,7 @@ desc = "whether to load the contents of address `rs2` (1) or `0` (0) into `rv2`. [[variables.output]] name = "write_register" type = "Bit" -desc = "whether the result should be written to `rd` ($=0$ for memory write and when $#`rd` = #`x0`$." - -[[variables.output]] -name = "mem_2B" -type = "Bit" -desc = "whether the memory access (read or write) touches exactly $2$ bytes." - -[[variables.output]] -name = "mem_4B" -type = "Bit" -desc = "whether the memory access (read or write) touches exactly $4$ bytes." - -[[variables.output]] -name = "mem_8B" -type = "Bit" -desc = "whether the memory access (read or write) touches exactly $8$ bytes." - -[[variables.output]] -name = "c_type" -type = "Bit" -desc = "Whether the instruction is of type `C`, i.e., whether it is $2$ bytes long instead of $4$." +desc = "whether the result should be written to `rd` ($=0$ for memory write and when $#`rd` = #`x0`)$." [[variables.output]] name = "imm" @@ -61,105 +41,124 @@ type = "DWordWL" desc = "the *fully extended (!)* 64-bit version of the immediate." [[variables.output]] -name = "signed" -type = "Bit" -desc = "selector used to indicate signed or unsigned input interpretation." - -[[variables.output]] -name = "mp_selector" -type = "Bit" -desc = """Multi-purpose selector used by the CPU to to configure several ALU operations in different ways. - See the `CPU` chip for more details.""" - -[[variables.output]] -name = "muldiv_selector" +name = "word_instr" type = "Bit" -desc = "selects which output of `MUL` (lo/hi) or `DVRM` (quo/rem) is wanted." +desc = "Whether the instruction is a `*W` instruction, requiring the inputs and outputs to be (sign) extended." [[variables.output]] -name = "word_instr" +name = "ALU" type = "Bit" -desc = "Whether the instruction is a `*W` instruction, requiring the inputs and outputs to be (sign) extended." +desc = "Enable the ALU" [[variables.output]] name = "ADD" type = "Bit" -desc = "ALU selector flag" +desc = "ALU does an ADD" [[variables.output]] name = "SUB" type = "Bit" -desc = "ALU selector flag" +desc = "ALU does a SUB" [[variables.output]] -name = "SLT" +name = "BRANCH" type = "Bit" -desc = "ALU selector flag" +desc = "The instruction is a branch" [[variables.output]] -name = "AND" +name = "MEMORY" type = "Bit" -desc = "ALU selector flag" +desc = "The instruction is a memory access" [[variables.output]] -name = "OR" +name = "ECALL" type = "Bit" -desc = "ALU selector flag" +desc = "Perform an ECALL" [[variables.output]] -name = "XOR" -type = "Bit" -desc = "ALU selector flag" +name = "half_instruction_length" +type = "Byte" +desc = "Half of how many bytes this instruction takes up in the program" -[[variables.output]] -name = "SHIFT" +[[variables.auxiliary]] +name = "alu_op" +type = "B4" +desc = "Operation selector value for the ALU" + +[[variables.auxiliary]] +name = "signed" type = "Bit" -desc = "ALU selector flag" +desc = "selector used to indicate signed or unsigned input interpretation." -[[variables.output]] -name = "JALR" +[[variables.auxiliary]] +name = "signed2" type = "Bit" -desc = "ALU selector flag" +desc = """A second signed bit, useful for MUL instructions""" -[[variables.output]] -name = "BEQ" +[[variables.auxiliary]] +name = "muldiv_selector" type = "Bit" -desc = "ALU selector flag" +desc = "selects which output of `MUL` (lo/hi) or `DVRM` (quo/rem) is wanted." -[[variables.output]] -name = "BLT" +[[variables.auxiliary]] +name = "invert" type = "Bit" -desc = "ALU selector flag" +desc = "Instructs the EQ or LT chip to invert its result, or inverts the direction of the SHIFT chip (right instead of left)" -[[variables.output]] -name = "LOAD" +[[variables.auxiliary]] +name = "memory_op" type = "Bit" -desc = "ALU selector flag" +desc = "Selects whether to LOAD (0) or STORE (1)" -[[variables.output]] -name = "STORE" +[[variables.auxiliary]] +name = "mem_2B" type = "Bit" -desc = "ALU selector flag" +desc = "whether the memory access (read or write) touches exactly $2$ bytes." -[[variables.output]] -name = "MUL" +[[variables.auxiliary]] +name = "mem_4B" type = "Bit" -desc = "ALU selector flag" +desc = "whether the memory access (read or write) touches exactly $4$ bytes." -[[variables.output]] -name = "DIVREM" +[[variables.auxiliary]] +name = "mem_8B" type = "Bit" -desc = "ALU selector flag" +desc = "whether the memory access (read or write) touches exactly $8$ bytes." -[[variables.output]] -name = "ECALL" +[[variables.auxiliary]] +name = "mem_signed" type = "Bit" -desc = "ALU selector flag" +desc = "Whether the memory operation is a signed one, this is distinct from `signed` to enable the `JALR` flag to alias `mem_flags`" -[[variables.output]] -name = "EBREAK" +[[variables.auxiliary]] +name = "JALR" type = "Bit" -desc = "ALU selector flag" +desc = "The branch is a JAL(R)" + +[[variables.virtual]] +name = "alu_flags" +type = "Byte" +desc = "The combined ALU flags" +def = ["+", + "alu_op", + ["*", 32, "signed"], + ["*", 64, ["+", "signed2", "invert"]], + ["*", 128, "muldiv_selector"] +] + +[[variables.virtual]] +name = "mem_flags" +type = "Byte" +desc = "The combined memory flags (or JALR when BRANCHing)" +def = ["+", + "JALR", + "memory_op", + ["*", 2, "mem_signed"], + ["*", 4, "mem_2B"], + ["*", 8, "mem_4B"], + ["*", 16, "mem_8B"] +] + [[variables.multiplicity]] name = "μ" diff --git a/spec/src/dvrm.toml b/spec/src/dvrm.toml index 52583907c..6ff9b994c 100644 --- a/spec/src/dvrm.toml +++ b/spec/src/dvrm.toml @@ -182,23 +182,32 @@ desc = "" pad = 0 -# Assumptions +# Constraints -[[assumptions]] -desc = "`IS_HALF[n[i]]`" -iter = ["i", 0, 3] -ref = "lt:a:range_n" +[[constraint_groups]] +name = "range" -[[assumptions]] -desc = "`IS_HALF[d[i]]`" +[[constraints.range]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "n", "i"]] iter = ["i", 0, 3] -ref = "lt:a:range_d" +multiplicity = "μ_sum" +ref = "lt:c:range_n" -[[assumptions]] -desc = "`IS_BIT`" -ref = "lt:a:range_signed" +[[constraints.range]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "d", "i"]] +iter = ["i", 0, 3] +multiplicity = "μ_sum" +ref = "lt:c:range_d" -# Constraints +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["signed"] +ref = "lt:c:range_signed" [[constraint_groups]] name = "sign_equality" @@ -214,9 +223,9 @@ name = "abs_diff" [[constraints.abs_diff]] kind = "interaction" -tag = "LT" -input = ["abs_r", "abs_d", 0] -output = ["not", "div_by_zero"] +tag = "ALU" +input = ["abs_r", "abs_d", ["opsel", "LT"]] +output = ["arr", ["not", "div_by_zero"], 0] multiplicity = "μ_sum" ref ="dvrm:c:abs_r_lt_abs_d" @@ -304,16 +313,16 @@ name = "equality" [[constraints.equality]] kind = "interaction" -tag = "MUL" -input = ["d", "signed", "q", "sign_q", 0] +tag = "ALU" +input = [["cast", "d", "DWordWL"], ["cast", "q", "DWordWL"], ["+", ["opsel", "MUL"], ["*", 32, "signed"], ["*", 64, "sign_q"]]] output = ["cast", "n_sub_r", "DWordWL"] multiplicity = "μ_sum" ref = "dvrm:c:mul_lower" [[constraints.equality]] kind = "interaction" -tag = "MUL" -input = ["d", "signed", "q", "sign_q", 1] +tag = "ALU" +input = [["cast", "d", "DWordWL"], ["cast", "q", "DWordWL"], ["+", ["opsel", "MUL"], ["*", 32, "signed"], ["*", 64, "sign_q"], 128]] output = ["cast", "extension_n_sub_r", "DWordWL"] multiplicity = "μ_sum" ref = "dvrm:c:mul_upper" @@ -375,14 +384,14 @@ desc = "Each row contributes the following to the LogUp sum" [[constraints.output]] kind = "interaction" -tag = "DVRM" -input = ["n", "d", "signed", 0] +tag = "ALU" +input = [["cast", "n", "DWordWL"], ["cast", "d", "DWordWL"], ["+", ["opsel", "DIVREM"], ["*", 32, "signed"]]] output = ["cast", "q", "DWordWL"] multiplicity = ["-", "μ_q"] [[constraints.output]] kind = "interaction" -tag = "DVRM" -input = ["n", "d", "signed", 1] +tag = "ALU" +input = [["cast", "n", "DWordWL"], ["cast", "d", "DWordWL"], ["+", ["opsel", "DIVREM"], ["*", 32, "signed"], 128]] output = ["cast", "r", "DWordWL"] multiplicity = ["-", "μ_r"] diff --git a/spec/src/eq.toml b/spec/src/eq.toml new file mode 100644 index 000000000..a06bb1b16 --- /dev/null +++ b/spec/src/eq.toml @@ -0,0 +1,93 @@ +name = "EQ" + +[[variables.input]] +name = "a" +type = "DWordWL" +desc = "The first input" +pad = 0 + +[[variables.input]] +name = "b" +type = "DWordWL" +desc = "The second input" +pad = 0 + +[[variables.input]] +name = "invert" +type = "Bit" +desc = "Whether to invert the result" +pad = 0 + +[[variables.output]] +name = "res" +type = "Bit" +desc = "The result" +pad = 0 + +[[variables.auxiliary]] +name = "diff" +type = "DWordHL" +desc = "The difference `a - b`" +pad = 0 + +[[variables.auxiliary]] +name = "eq" +type = "Bit" +desc = "The bit indicating `a == b`" +pad = 0 + +[[variables.multiplicity]] +name = "μ" +type = "BaseField" +desc = "" +pad = 0 + + +[[assumptions]] +desc = "`IS_WORD[a[i]]`" +iter = ["i", 0, 1] + +[[assumptions]] +desc = "`IS_WORD[b[i]]`" +iter = ["i", 0, 1] + + +[[constraint_groups]] +name = "all" + +[[constraints.all]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "diff", "i"]] +multiplicity = "μ" +iter = ["i", 0, 3] + +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["invert"] + +[[constraints.all]] +kind = "template" +tag = "SUB" +input = ["a", "b"] +output = ["cast", "diff", "DWordWL"] + +[[constraints.all]] +kind = "interaction" +tag = "ZERO" +input = [["+", ["idx", "diff", 0], ["idx", "diff", 1], ["idx", "diff", 2], ["idx", "diff", 3]]] +output = "eq" +multiplicity = "μ" + +[[constraints.all]] +kind = "arith" +constraint = "$#`res` = #`eq` xor #`invert`$" +poly = ["-", ["+", "res", ["*", 2, "eq", "invert"]], "eq", "invert"] + +[[constraints.all]] +kind = "interaction" +tag = "ALU" +input = ["a", "b", ["+", ["opsel", "EQ"], ["*", 64, "invert"]]] +output = ["arr", "res", 0] +multiplicity = ["-", "μ"] diff --git a/spec/src/halt.toml b/spec/src/halt.toml index 9fee04877..6adc1efdb 100644 --- a/spec/src/halt.toml +++ b/spec/src/halt.toml @@ -5,6 +5,10 @@ name = "timestamp" type = "DWordWL" desc = "timestamp at which to halt the program" +[[variables.auxiliary]] +name = "pc" +type = "DWordWL" +desc = "The `next_pc` value the CPU wrote during the instruction HALT was invoked" [[assumptions]] desc = "`IS_WORD[timestamp[i]]`" @@ -40,10 +44,19 @@ ref = "halt:c:zeroize_registers_hi" [[constraints.all]] kind = "interaction" -tag = "MEMW" -input = [1, ["cast", ["*", 2, 255], "DWordWL"], ["arr", 1, 0, 0, 0, 0, 0, 0, 0], ["cast", ["-", ["^", 2, 64], 1], "DWordWL"], 1, 0, 0] +tag = "memory" +input = [1, ["arr", ["+", ["*", 2, 255], "i"], 0], ["arr", ["+", ["idx", "timestamp", 0], 1], ["idx", "timestamp", 1]], ["idx", "pc", "i"]] multiplicity = 1 -ref = "halt:c:pc" +iter = ["i", 0, 1] +ref = "halt:c:consume_pc" + +[[constraints.all]] +kind = "interaction" +tag = "memory" +input = [1, ["arr", ["+", ["*", 2, 255], "i"], 0], ["arr", ["+", ["idx", "timestamp", 0], 1], ["idx", "timestamp", 1]], ["idx", ["arr", 1, 0], "i"]] +multiplicity = -1 +iter = ["i", 0, 1] +ref = "halt:c:emit_pc" [[constraint_groups]] name = "lookup" diff --git a/spec/src/keccak_round.toml b/spec/src/keccak_round.toml index 59daba923..6ee05e29a 100644 --- a/spec/src/keccak_round.toml +++ b/spec/src/keccak_round.toml @@ -191,8 +191,8 @@ name = "theta" [[constraints.theta]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", ["idx", ["idx", "start", "x"], 0], "z"], ["idx", ["idx", ["idx", "start", "x"], 1], "z"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", ["idx", ["idx", "start", "x"], 0], "z"], ["idx", ["idx", ["idx", "start", "x"], 1], "z"]] output = ["idx", ["idx", ["idx", "Cxz", "x"], 0], "z"] iters = [["x", 0, 4], ["z", 0, 7]] multiplicity = "μ" @@ -200,8 +200,8 @@ ref = "keccak:c:theta_cxz_start" [[constraints.theta]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", ["idx", ["idx", "Cxz", "x"], ["-", "y", 2]], "z"], ["idx", ["idx", ["idx", "start", "x"], "y"], "z"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", ["idx", ["idx", "Cxz", "x"], ["-", "y", 2]], "z"], ["idx", ["idx", ["idx", "start", "x"], "y"], "z"]] output = ["idx", ["idx", ["idx", "Cxz", "x"], ["-", "y", 1]], "z"] iters = [["x", 0, 4], ["y", 2, 4], ["z", 0, 7]] multiplicity = "μ" @@ -237,8 +237,8 @@ iters = [["x", 0, 4], ["z", 0, 3]] [[constraints.theta]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", ["idx", ["idx", "Cxz", ["mod", ["-", "x", 1], 5]], 3], "z"], ["idx", ["idx", "rotated_Cxz", ["mod", ["+", "x", 1], 5]], "z"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", ["idx", ["idx", "Cxz", ["mod", ["-", "x", 1], 5]], 3], "z"], ["idx", ["idx", "rotated_Cxz", ["mod", ["+", "x", 1], 5]], "z"]] output = ["idx", ["idx", "Dxz", "x"], "z"] iters = [["x", 0, 4], ["z", 0, 7]] multiplicity = "μ" @@ -246,8 +246,8 @@ ref = "keccak:c:Dxz" [[constraints.theta]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", ["idx", ["idx", "start", "x"], "y"], "z"], ["idx", ["idx", "Dxz", "x"], "z"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", ["idx", ["idx", "start", "x"], "y"], "z"], ["idx", ["idx", "Dxz", "x"], "z"]] output = ["idx", ["idx", ["idx", "theta", "x"], "y"], "z"] iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] multiplicity = "μ" @@ -289,16 +289,16 @@ name = "chi" [[constraints.chi]] kind = "interaction" -tag = "AND_BYTE" -input = [["-", 255, ["idx", ["idx", ["idx", "pi", ["mod", ["+", "x", 1], 5]], "y"], "z"]], ["idx",["idx",["idx", "pi", ["mod", ["+", "x", 2], 5]], "y"], "z"]] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["-", 255, ["idx", ["idx", ["idx", "pi", ["mod", ["+", "x", 1], 5]], "y"], "z"]], ["idx",["idx",["idx", "pi", ["mod", ["+", "x", 2], 5]], "y"], "z"]] output = ["idx", ["idx", ["idx", "chi_ANDs", "x"], "y"], "z"] iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] multiplicity = "μ" [[constraints.chi]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", ["idx", ["idx", "pi", "x"], "y"], "z"], ["idx",["idx",["idx", "chi_ANDs", "x"], "y"], "z"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", ["idx", ["idx", "pi", "x"], "y"], "z"], ["idx",["idx",["idx", "chi_ANDs", "x"], "y"], "z"]] output = ["idx", ["idx", ["idx", "chi", "x"], "y"], "z"] iters = [["x", 0, 4], ["y", 0, 4], ["z", 0, 7]] multiplicity = "μ" @@ -308,8 +308,8 @@ name = "iota" [[constraints.iota]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", ["idx", ["idx", "chi", 0], 0], "z"], ["idx","rc","z"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", ["idx", ["idx", "chi", 0], 0], "z"], ["idx","rc","z"]] output = ["idx", "iota", "z"] iter = ["z", 0, 7] multiplicity = "μ" diff --git a/spec/src/load.toml b/spec/src/load.toml index f8a974c9a..e6cf56f00 100644 --- a/spec/src/load.toml +++ b/spec/src/load.toml @@ -5,7 +5,7 @@ name = "LOAD" [[variables.input]] name = "base_address" type = "DWordWL" -desc = "The base address to read/write from/to, gets offset by $[0, 7]$, depending on how big the access is" +desc = "The base address to read from, gets offset by $[0, 7]$, depending on how big the access is" pad = 0 [[variables.input]] @@ -76,27 +76,37 @@ desc = "`IS_WORD[base_address[i]]`" iter = ["i", 0, 1] [[assumptions]] -desc = "`IS_BIT`" +desc = "`IS_WORD[timestamp[i]]`" +iter = ["i", 0, 1] -[[assumptions]] -desc = "`IS_BIT`" -[[assumptions]] -desc = "`IS_BIT`" +[[constraint_groups]] +name = "all" -[[assumptions]] -desc = "`IS_BIT`" +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["signed"] -[[assumptions]] -desc = "`IS_BIT`" +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["read2"] -[[assumptions]] -desc = "`IS_WORD[timestamp[i]]`" -iter = ["i", 0, 1] +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["read4"] +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["read8"] -[[constraint_groups]] -name = "all" +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = [["+", "read2", "read4", "read8"]] [[constraints.all]] kind = "arith" @@ -154,7 +164,7 @@ name = "output" [[constraints.output]] kind = "interaction" -tag = "LOAD" -input = ["base_address", "timestamp", "read2", "read4", "read8", "signed"] +tag = "MEMOP" +input = ["timestamp", "base_address", ["cast", 0, "DWordWL"], ["+", ["*", 2, "signed"], ["*", 4, "read2"], ["*", 8, "read4"], ["*", 16, "read8"]]] output = ["cast", "res", "DWordWL"] multiplicity = ["-", "μ"] diff --git a/spec/src/lt.toml b/spec/src/lt.toml index 70d25c919..57661333d 100644 --- a/spec/src/lt.toml +++ b/spec/src/lt.toml @@ -21,12 +21,18 @@ type = "Bit" desc = "whether to interpret `lhs` and `rhs` as signed integers (1) or not (0)" pad = 0 +[[variables.input]] +name = "invert" +type = "Bit" +desc = "Whether to invert the result" +pad = 0 + # Output [[variables.output]] -name = "lt" +name = "res" type = "Bit" -desc = "Whether $#`lhs` < #`rhs`$, taking `signed` into account" +desc = "The result" pad = 0 @@ -50,6 +56,12 @@ type = "Bit" desc = "The most significant bit of `rhs`" pad = 0 +[[variables.auxiliary]] +name = "lt" +type = "Bit" +desc = "Whether $#`lhs` < #`rhs`$, taking `signed` into account" +pad = 0 + # Virtual [[variables.virtual]] @@ -85,10 +97,35 @@ ref = "lt:a:range_lhs" desc = "`IS_WORD[rhs[0]]`" ref = "lt:a:range_rhs" -[[assumptions]] -desc = "`IS_BIT`" -ref = "lt:a:range_signed" +[[constraint_groups]] +name = "range" +desc = "Range-check the inputs" + +[[constraints.range]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "lhs", 1]] +multiplicity = "μ" +ref = "lt:c:range_lhs" + +[[constraints.range]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "rhs", 1]] +multiplicity = "μ" +ref = "lt:c:range_rhs" + +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["signed"] +ref = "lt:c:range_signed" +[[constraints.range]] +kind = "template" +tag = "IS_BIT" +input = ["invert"] +ref = "lt:c:range_invert" [[constraint_groups]] name = "defs" @@ -117,6 +154,11 @@ desc = "Where $A = #`lhs_msb`$, $B = #`rhs_msb`$ and $C = #`carry[1]`$" poly = ["-", "lt", ["*", "signed", ["+", ["*", "lhs_msb", ["not", "rhs_msb"]], ["*", "lhs_msb", ["idx", "carry", 1]], ["*", ["not", "rhs_msb"], ["idx", "carry", 1]]]], ["*", ["-", 1, "signed"], "unsigned_lt"]] ref = "lt:c:lt" +[[constraints.defs]] +kind = "arith" +constraint = "$#`res` = #`lt` xor #`invert`$" +poly = ["-", ["+", "res", ["*", 2, "lt", "invert"]], "lt", "invert"] + [[constraint_groups]] name = "sub" @@ -128,20 +170,6 @@ tag = "IS_BIT" input = [["idx", "carry", "i"]] iter = ["i", 0, 1] -[[constraints.defs]] -kind = "interaction" -tag = "IS_HALF" -input = [["idx", "lhs", 1]] -multiplicity = "μ" -ref = "lt:c:range_lhs" - -[[constraints.defs]] -kind = "interaction" -tag = "IS_HALF" -input = [["idx", "rhs", 1]] -multiplicity = "μ" -ref = "lt:c:range_rhs" - [[constraints.sub]] kind = "interaction" tag = "IS_HALF" @@ -157,7 +185,7 @@ desc = "Each row contributes the following to the LogUp sum" [[constraints.output]] kind = "interaction" -tag = "LT" -input = [["cast", "lhs", "DWordWL"], ["cast", "rhs", "DWordWL"], "signed"] -output = "lt" +tag = "ALU" +input = [["cast", "lhs", "DWordWL"], ["cast", "rhs", "DWordWL"], ["+", ["opsel", "LT"], ["*", 32, "signed"], ["*", 64, "invert"]]] +output = ["arr", "res", 0] multiplicity = ["-", "μ"] diff --git a/spec/src/memw.toml b/spec/src/memw.toml index 1cc0dd3c2..c04fe5d34 100644 --- a/spec/src/memw.toml +++ b/spec/src/memw.toml @@ -130,6 +130,28 @@ desc = "`IS_BIT`" desc = "`IS_WORD[timestamp[i]]`" iter = ["i", 0, 1] +[[constraint_groups]] +name = "assumptions" + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["write2"] + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["write4"] + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["write8"] + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = [["+", "write2", "write4", "write8"]] [[constraint_groups]] name = "consistency" @@ -162,31 +184,31 @@ iter = ["i", 0, 6] [[constraints.consistency]] kind = "interaction" -tag = "LT" -input = [["idx", "old_timestamp", 0], "timestamp", 0] -output = 1 +tag = "ALU" +input = [["idx", "old_timestamp", 0], "timestamp", ["opsel", "LT"]] +output = ["arr", 1, 0] multiplicity = "μ_sum" [[constraints.consistency]] kind = "interaction" -tag = "LT" -input = [["idx", "old_timestamp", 1], "timestamp", 0] -output = 1 +tag = "ALU" +input = [["idx", "old_timestamp", 1], "timestamp", ["opsel", "LT"]] +output = ["arr", 1, 0] multiplicity = "w2" [[constraints.consistency]] kind = "interaction" -tag = "LT" -input = [["idx", "old_timestamp", "i"], "timestamp", 0] -output = 1 +tag = "ALU" +input = [["idx", "old_timestamp", "i"], "timestamp", ["opsel", "LT"]] +output = ["arr", 1, 0] iter = ["i", 2, 3] multiplicity = "w4" [[constraints.consistency]] kind = "interaction" -tag = "LT" -input = [["idx", "old_timestamp", "i"], "timestamp", 0] -output = 1 +tag = "ALU" +input = [["idx", "old_timestamp", "i"], "timestamp", ["opsel", "LT"]] +output = ["arr", 1, 0] iter = ["i", 4, 7] multiplicity = "write8" diff --git a/spec/src/memw_aligned.toml b/spec/src/memw_aligned.toml index 93a636aba..0e0e20d5d 100644 --- a/spec/src/memw_aligned.toml +++ b/spec/src/memw_aligned.toml @@ -118,6 +118,28 @@ desc = "`IS_BIT`" desc = "`IS_WORD[timestamp[i]]`" iter = ["i", 0, 1] +[[constraint_groups]] +name = "assumptions" + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["write2"] + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["write4"] + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = ["write8"] + +[[constraints.assumptions]] +kind = "template" +tag = "IS_BIT" +input = [["+", "write2", "write4", "write8"]] [[constraint_groups]] name = "consistency" @@ -150,9 +172,9 @@ poly = ["*", "w2", ["not", "μ_sum"]] [[constraints.consistency]] kind = "interaction" -tag = "LT" -input = ["old_timestamp", "timestamp", 0] -output = 1 +tag = "ALU" +input = ["old_timestamp", "timestamp", ["opsel", "LT"]] +output = ["arr", 1, 0] multiplicity = "μ_sum" [[constraint_groups]] diff --git a/spec/src/mul.toml b/spec/src/mul.toml index a798c682d..e1837020a 100644 --- a/spec/src/mul.toml +++ b/spec/src/mul.toml @@ -121,22 +121,36 @@ type = "BaseField" desc = "" pad = 0 -# Assumptions - -[[assumptions]] -desc = "`IS_HALF[lhs[i]]`" -iter = ["i", 0, 3] - -[[assumptions]] -desc = "`IS_HALF[rhs[i]]`" -iter = ["i", 0, 3] -ref = "mul:a:rhs" - # Constraints [[constraint_groups]] name = "def" +[[constraints.def]] +kind = "template" +tag = "IS_BIT" +input = ["lhs_signed"] + +[[constraints.def]] +kind = "template" +tag = "IS_BIT" +input = ["rhs_signed"] + +[[constraints.def]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "lhs", "i"]] +multiplicity = "μ_sum" +iter = ["i", 0, 3] + +[[constraints.def]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "rhs", "i"]] +multiplicity = "μ_sum" +iter = ["i", 0, 3] +ref = "mul:c:rhs" + [[constraints.def]] kind = "template" tag = "SIGN" @@ -191,16 +205,16 @@ name = "lookup" [[constraints.lookup]] kind = "interaction" -tag = "MUL" -input = ["lhs", "lhs_signed", "rhs", "rhs_signed", 0] +tag = "ALU" +input = [["cast", "lhs", "DWordWL"], ["cast", "rhs", "DWordWL"], ["+", ["opsel", "MUL"], ["*", 32, "lhs_signed"], ["*", 64, "rhs_signed"]]] output = ["cast", "lo", "DWordWL"] multiplicity = ["-", "μ_lo"] ref = "mul:c:lookup_lo" [[constraints.lookup]] kind = "interaction" -tag = "MUL" -input = ["lhs", "lhs_signed", "rhs", "rhs_signed", 1] +tag = "ALU" +input = [["cast", "lhs", "DWordWL"], ["cast", "rhs", "DWordWL"], ["+", ["opsel", "MUL"], ["*", 32, "lhs_signed"], ["*", 64, "rhs_signed"], 128]] output = ["cast", "hi", "DWordWL"] multiplicity = ["-", "μ_hi"] ref = "mul:c:lookup_hi" diff --git a/spec/src/rotxor.toml b/spec/src/rotxor.toml index f1ff904b0..c3c5ce343 100644 --- a/spec/src/rotxor.toml +++ b/spec/src/rotxor.toml @@ -161,16 +161,16 @@ name = "xor" [[constraints.xor]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", "a0", "i"], ["idx", "a1", "i"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", "a0", "i"], ["idx", "a1", "i"]] output = ["idx", "a01", "i"] multiplicity = "μ" iter = ["i", 0, 3] [[constraints.xor]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", "a01", "i"], ["idx", "a2", "i"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", "a01", "i"], ["idx", "a2", "i"]] output = ["idx", "out", "i"] multiplicity = "μ" iter = ["i", 0, 3] diff --git a/spec/src/sha256round.toml b/spec/src/sha256round.toml index 2469b560c..45da4d452 100644 --- a/spec/src/sha256round.toml +++ b/spec/src/sha256round.toml @@ -177,40 +177,40 @@ name = "value" [[constraints.value]] kind = "interaction" -tag = "AND_BYTE" -input = [["idx", "a", "i"], ["idx", "b", "i"]] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["idx", "a", "i"], ["idx", "b", "i"]] output = ["idx", "a_and_b", "i"] multiplicity = "μ" iter = ["i", 0, 3] [[constraints.value]] kind = "interaction" -tag = "XOR_BYTE" -input = [["idx", "a", "i"], ["idx", "b", "i"]] +tag = "BYTE_ALU" +input = [["opsel", "XOR"], ["idx", "a", "i"], ["idx", "b", "i"]] output = ["idx", "a_xor_b", "i"] multiplicity = "μ" iter = ["i", 0, 3] [[constraints.value]] kind = "interaction" -tag = "AND_BYTE" -input = [["idx", "c", "i"], ["idx", "a_xor_b", "i"]] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["idx", "c", "i"], ["idx", "a_xor_b", "i"]] output = ["idx", "c_and_a_xor_b", "i"] multiplicity = "μ" iter = ["i", 0, 3] [[constraints.value]] kind = "interaction" -tag = "AND_BYTE" -input = [["idx", "e", "i"], ["idx", "f", "i"]] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["idx", "e", "i"], ["idx", "f", "i"]] output = ["idx", "e_and_f", "i"] multiplicity = "μ" iter = ["i", 0, 3] [[constraints.value]] kind = "interaction" -tag = "AND_BYTE" -input = [["-", 255, ["idx", "e", "i"]], ["idx", "g", "i"]] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["-", 255, ["idx", "e", "i"]], ["idx", "g", "i"]] output = ["idx", "not_e_and_g", "i"] multiplicity = "μ" iter = ["i", 0, 3] diff --git a/spec/src/shift.toml b/spec/src/shift.toml index 18c03ecad..a7f3a5f43 100644 --- a/spec/src/shift.toml +++ b/spec/src/shift.toml @@ -10,7 +10,7 @@ pad = 0 [[variables.input]] name = "shift" -type = "Byte" +type = "DWordWHBB" desc = "Number of bits to shift `in` by." pad = 0 @@ -137,32 +137,49 @@ type = "Bit" desc = "" pad = 0 +# Constraints +[[constraint_groups]] +name = "input" -# Assumptions - -[[assumptions]] -desc = "`IS_HALF[in[i]]`" +[[constraints.input]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "in", "i"]] iter = ["i", 0, 3] -ref = "shift:a:range_in" +multiplicity = "μ" +ref = "shift:c:range_in" -[[assumptions]] -desc = "`IS_BYTE`" -ref = "shift:a:range_shift" +[[constraints.input]] +kind = "interaction" +tag = "IS_HALF" +input = [["idx", "shift", 2]] +multiplicity = "μ" -[[assumptions]] -desc = "`IS_BIT`" -ref = "shift:a:direction" +[[constraints.input]] +kind = "template" +tag = "IS_BYTE" +input = [["idx", "shift", "i"]] +iter = ["i", 0, 1] +ref = "shift:c:range_shift" -[[assumptions]] -desc = "`IS_BIT`" -ref = "shift:a:signed" +[[constraints.input]] +kind = "template" +tag = "IS_BIT" +input = ["direction"] +ref = "shift:c:direction" -[[assumptions]] -desc = "`IS_BIT`" -ref = "shift:a:word_instr" +[[constraints.input]] +kind = "template" +tag = "IS_BIT" +input = ["signed"] +ref = "shift:c:signed" -# Constraints +[[constraints.input]] +kind = "template" +tag = "IS_BIT" +input = ["word_instr"] +ref = "shift:c:word_instr" [[constraint_groups]] name = "left_flag" @@ -192,16 +209,16 @@ name = "bit_shift" [[constraints.bit_shift]] kind = "interaction" -tag = "AND_BYTE" -input = ["shift", 0x0F] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["idx", "shift", 0], 0x0F] output = "bit_shift" ref = "shift:c:bit_shift_if_left" multiplicity = "left" [[constraints.bit_shift]] kind = "interaction" -tag = "AND_BYTE" -input = [["-", ["^", 2, 8], ["*", 16, "zbs"], "shift"], 0x0F] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["-", ["^", 2, 8], ["*", 16, "zbs"], ["idx", "shift", 0]], 0x0F] output = "bit_shift" ref = "shift:c:bit_shift_if_right" multiplicity = "right" @@ -268,8 +285,8 @@ ref = "shift:c:limb_shift_is_bit" [[constraints.limb_shifting]] kind = "interaction" -tag = "AND_BYTE" -input = ["shift", ["-", 0x30, ["*", 0x20, "word_instr"]]] +tag = "BYTE_ALU" +input = [["opsel", "AND"], ["idx", "shift", 0], ["-", 0x30, ["*", 0x20, "word_instr"]]] output = ["+", ["-", 1, ["idx", "limb_shift", 0]], ["*", 15, ["idx", "limb_shift", 1]], ["*", 31, ["idx", "limb_shift", 2]], ["*", 47, ["idx", "limb_shift", 3]]] ref = "shift:c:limb_shift_lookup" multiplicity = "μ" @@ -289,8 +306,8 @@ name = "lookups" [[constraints.lookups]] kind = "interaction" -tag = "SHIFT" -input = ["in", "shift", "direction", "signed", "word_instr"] +tag = "ALU" +input = [["cast", "in", "DWordWL"], ["cast", "shift", "DWordWL"], ["+", ["opsel", "SHIFT"], "word_instr", ["*", 32, "signed"], ["*", 64, "direction"]]] output = "out" multiplicity = ["-", "μ"] ref = "shift:c:lookup" diff --git a/spec/src/signatures.toml b/spec/src/signatures.toml index e93c87b05..6ea23bb9e 100644 --- a/spec/src/signatures.toml +++ b/spec/src/signatures.toml @@ -49,11 +49,25 @@ tag = "DECODE" kind = "interaction" input = ["DWordWL", "DWordWL", "BaseField"] -# SHIFT[out; in, shift, direction, signed, word_instr] +# CPU32[half_instruction_length; timestamp, pc] [[signatures]] -tag = "SHIFT" +tag = "CPU32" kind = "interaction" -input = ["DWordHL", "Byte", "Bit", "Bit", "Bit"] +input = ["DWordWL", "DWordWL"] +output = "Byte" + +# ALU[out; in1, in2, flags] +[[signatures]] +tag = "ALU" +kind = "interaction" +input = ["DWordWL", "DWordWL", "Byte"] +output = "DWordWL" + +# MEMOP[out; timestamp, address, value, flags] +[[signatures]] +tag = "MEMOP" +kind = "interaction" +input = ["DWordWL", "DWordWL", "DWordWL", "Byte"] output = "DWordWL" # BRANCH[next_pc; pc, offset, register, JALR] @@ -76,32 +90,11 @@ tag = "MEMW" kind = "interaction" input = ["Bit", "DWordWL", ["BaseField", 8], "DWordWL", "Bit", "Bit", "Bit"] -# LT[lt; lhs, rhs, signed] -[[signatures]] -tag = "LT" -kind = "interaction" -input = ["DWordWL", "DWordWL", "Bit"] -output = "Bit" - -# MUL[lo/hi; lhs, lhs_signed, rhs, rhs_signed, 0/1] -[[signatures]] -tag = "MUL" -kind = "interaction" -input = ["DWordHL", "Bit", "DWordHL", "Bit", "Bit"] -output = "DWordWL" - -# DVRM[q/r; n, d, signed, 0/1] -[[signatures]] -tag = "DVRM" -kind = "interaction" -input = ["DWordHL", "DWordHL", "Bit", "Bit"] -output = "DWordWL" - -# LOAD[res; base_address, timestamp, read2, read4, read8, signed] +# LOAD[res; base_address, timestamp, flags] [[signatures]] tag = "LOAD" kind = "interaction" -input = ["DWordWL", "DWordWL", "Bit", "Bit", "Bit", "Bit"] +input = ["DWordWL", "DWordWL", "Byte"] output = "DWordWL" # ECALL[timestamp, syscallnr] @@ -122,25 +115,11 @@ tag = "COMMIT" kind = "interaction" input = ["BaseField", "Byte"] -# AND_BYTE[res; X, Y] -[[signatures]] -tag = "AND_BYTE" -kind = "interaction" -input = ["Byte", "Byte"] -output = "Byte" - -# OR_BYTE[res; X, Y] -[[signatures]] -tag = "OR_BYTE" -kind = "interaction" -input = ["Byte", "Byte"] -output = "Byte" - -# XOR_BYTE[res; X, Y] +# BYTE_ALU[res; selector, X, Y] [[signatures]] -tag = "XOR_BYTE" +tag = "BYTE_ALU" kind = "interaction" -input = ["Byte", "Byte"] +input = ["Byte", "Byte", "Byte"] output = "Byte" # MSB8[msb; X] diff --git a/spec/src/store.toml b/spec/src/store.toml new file mode 100644 index 000000000..2d97dd00b --- /dev/null +++ b/spec/src/store.toml @@ -0,0 +1,122 @@ +name = "STORE" + +# Input + +[[variables.input]] +name = "base_address" +type = "DWordWL" +desc = "The base address to write to, gets offset by $[0, 7]$, depending on how big the access is" +pad = 0 + +[[variables.input]] +name = "timestamp" +type = "DWordWL" +desc = "The timestamp at which this memory access is said to occur" +pad = 0 + +[[variables.input]] +name = "write2" +type = "Bit" +desc = "Whether to write exactly 2 bytes" +pad = 0 + +[[variables.input]] +name = "write4" +type = "Bit" +desc = "Whether to write exactly 4 bytes" +pad = 0 + +[[variables.input]] +name = "write8" +type = "Bit" +desc = "Whether to write exactly 8 bytes" +pad = 0 + +[[variables.input]] +name = "value" +type = "DWordBL" +desc = "The value to store" +pad = 0 + +# Virtual + +[[variables.virtual]] +name = "write1" +type = "Bit" +desc = "Whether to write exactly 1 byte" +def = ["-", "μ", "write2", "write4", "write8"] + +# Multiplicity + +[[variables.multiplicity]] +name = "μ" +type = "Bit" +desc = "" +pad = 0 + + +[[assumptions]] +desc = "`IS_WORD[base_address[i]]`" +iter = ["i", 0, 1] + +[[assumptions]] +desc = "`IS_WORD[timestamp[i]]`" +iter = ["i", 0, 1] + + +[[constraint_groups]] +name = "all" + +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["μ"] + +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["write2"] + +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["write4"] + +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = ["write8"] + +[[constraints.all]] +kind = "template" +tag = "IS_BIT" +input = [["+", "write2", "write4", "write8"]] + +[[constraints.all]] +kind = "arith" +constraint = "$#`write2` + #`write4` + #`write8` => #`μ` = 1$" +poly = ["*", ["+", "write2", "write4", "write8"], ["not", "μ"]] + +[[constraints.all]] +kind = "template" +tag = "IS_BYTE" +input = [["idx", "value", "i"]] +cond = "μ" +iter = ["i", 0, 7] + +[[constraints.all]] +kind = "interaction" +tag = "MEMW" +input = [0, "base_address", "value", "timestamp", "write2", "write4", "write8"] +multiplicity = "μ" + + +[[constraint_groups]] +name = "output" + +[[constraints.output]] +kind = "interaction" +tag = "MEMOP" +input = ["timestamp", "base_address", ["cast", "value", "DWordWL"], ["+", 1, ["*", 4, "write2"], ["*", 8, "write4"], ["*", 16, "write8"]]] +output = ["cast", 0, "DWordWL"] +multiplicity = ["-", "μ"] diff --git a/spec/store.typ b/spec/store.typ new file mode 100644 index 000000000..5b9872b58 --- /dev/null +++ b/spec/store.typ @@ -0,0 +1,47 @@ +#import "/book.typ": book-page, rj +#import "/src.typ": load_config, load_chip +#import "/chip.typ": ( + render_chip_assumptions, + render_chip_variable_table, + render_chip_padding_table, + render_constraint_table, + compute_nr_interactions, + total_nr_instantiated_columns, + total_nr_variables, +) + +#let config = load_config() +#let chip = load_chip("src/store.toml", config) + +#show: book-page(chip.name) +#let store = raw(chip.name) + +The #store chip provides functionality to store a value to memory. +It decomposes a `DWord` into bytes and delegates low-level memory handling to the `MEMW` chip (@memw). + += Variables +#let nr_variables = total_nr_variables(chip) +#let nr_columns = total_nr_instantiated_columns(chip, config) +#let nr_interactions = compute_nr_interactions(chip) + +The #store chip is comprised of #nr_variables variables that are expressed using #nr_columns columns and leverages #nr_interactions interaction(s): +#render_chip_variable_table(chip, config) + += Assumptions +#render_chip_assumptions(chip, config) + += Constraints +The chip delegates the actual memory interaction to the `MEMW` chip, +and ensures the values are proper bytes. + +#render_constraint_table(chip, config, groups: "all") + +The chip contributes the following to the lookup argument. + +#render_constraint_table(chip, config, groups: "output") + += Padding + +The table can be padded to the next power of two with the following value assignments: + +#render_chip_padding_table(chip, config) diff --git a/spec/tooling/chip.py b/spec/tooling/chip.py index 7f7ecca81..44cbbed83 100644 --- a/spec/tooling/chip.py +++ b/spec/tooling/chip.py @@ -92,6 +92,8 @@ def constant_fits(cst: int, target: Type) -> bool: | DummyExpr ) +OPSEL = ["AND", "OR", "XOR", "EQ", "LT", "SHIFT", "SHIFTW", "MUL", "DIVREM"] + @dataclass class Environment: @@ -351,6 +353,11 @@ def build_expr(config: Optional["Config"], data: object) -> Expr: x.isidentifier(), f"Invalid identifier name for variable {x!r}" ) return VarExpr(x) + case ["opsel", str(x)]: + if x not in OPSEL: + reporter.error(f"Unknown operation selector: {x!r}") + return LitExpr(0) + return LitExpr(OPSEL.index(x)) case ["arr", *elems]: return ArrExpr([build_expr(config, e) for e in elems]) case ["idx", x, y]: