diff --git a/Cargo.lock b/Cargo.lock index b43f7308e..b4c950a82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3686,6 +3686,7 @@ dependencies = [ "async-trait", "bincode 1.3.3", "e3-config", + "e3-crypto", "e3-data", "e3-events", "e3-fhe-params", diff --git a/crates/ciphernode-builder/src/ciphernode_builder.rs b/crates/ciphernode-builder/src/ciphernode_builder.rs index 9a3042437..7e5a2a51b 100644 --- a/crates/ciphernode-builder/src/ciphernode_builder.rs +++ b/crates/ciphernode-builder/src/ciphernode_builder.rs @@ -32,6 +32,7 @@ use e3_net::{ create_channel_bridge, setup_libp2p_keypair, setup_net, setup_net_interface, NetRepositoryFactory, }; +use e3_request::E3CipherExtension; use e3_request::E3LifecycleCoordinator; use e3_request::E3Router; use e3_slashing::{AccusationManagerExtension, CommitmentConsistencyCheckerExtension}; @@ -692,9 +693,13 @@ impl CiphernodeBuilder { ) -> Result { let mut e3_builder = E3Router::builder(bus, store.clone()); + // ── Per-E3 forward-secrecy cipher (must be first so later extensions can consume it) ── + info!("Setting up E3CipherExtension (forward secrecy)"); + e3_builder = e3_builder.with(E3CipherExtension::create(&self.cipher)); + // ── Threshold keyshare + ZK actors ── if let Some(KeyshareKind::Threshold) = self.keyshare { - let _ = self.ensure_multithread(bus); + let _ = self.ensure_multithread(bus, &store); let backend = self .zk_backend .as_ref() @@ -716,7 +721,7 @@ impl CiphernodeBuilder { e3_builder = e3_builder.with(FheExtension::create(bus, &self.rng)); info!("Setting up PublicKeyAggregationExtension"); - let _ = self.ensure_multithread(bus); + let _ = self.ensure_multithread(bus, &store); e3_builder = e3_builder.with(PublicKeyAggregatorExtension::create(bus)); if self.keyshare.is_none() { @@ -733,7 +738,7 @@ impl CiphernodeBuilder { // ── Threshold plaintext aggregation ── if self.threshold_plaintext_agg { info!("Setting up ThresholdPlaintextAggregatorExtension"); - let _ = self.ensure_multithread(bus); + let _ = self.ensure_multithread(bus, &store); e3_builder = e3_builder.with(ThresholdPlaintextAggregatorExtension::create( bus, sortition, )); @@ -796,7 +801,11 @@ impl CiphernodeBuilder { } } - fn ensure_multithread(&mut self, bus: &BusHandle) -> Addr { + fn ensure_multithread( + &mut self, + bus: &BusHandle, + store: &e3_data::DataStore, + ) -> Addr { if let Some(cached) = self.multithread_cache.clone() { return cached; } @@ -816,6 +825,7 @@ impl CiphernodeBuilder { bus, self.rng.clone(), self.cipher.clone(), + store.clone(), task_pool, self.multithread_report.clone(), backend, @@ -825,6 +835,7 @@ impl CiphernodeBuilder { bus, self.rng.clone(), self.cipher.clone(), + store.clone(), task_pool, self.multithread_report.clone(), ) diff --git a/crates/crypto/src/cipher.rs b/crates/crypto/src/cipher.rs index 30568021a..a09781238 100644 --- a/crates/crypto/src/cipher.rs +++ b/crates/crypto/src/cipher.rs @@ -120,6 +120,30 @@ impl Cipher { Ok(Self { key }) } + /// Create a `Cipher` from raw 32-byte key material (no KDF). Used for per-E3 ephemeral + /// keys that are already uniformly random (generated by `Cipher::generate()`). + pub fn from_key_bytes(key: impl Into>) -> Result { + let key = Zeroizing::new(key.into()); + anyhow::ensure!( + key.len() == ARGON2_OUTPUT_LEN, + "key must be exactly 32 bytes" + ); + Ok(Self { key }) + } + + /// Generate a fresh random 32-byte `Cipher`. Used to create per-E3 ephemeral keys. + pub fn generate() -> Result { + let mut raw = vec![0u8; ARGON2_OUTPUT_LEN]; + rand::rng().fill_bytes(&mut raw); + Self::from_key_bytes(raw) + } + + /// Export the raw key bytes so they can be encrypted and persisted. + /// The returned bytes are wrapped in `Zeroizing` and must be handled with care. + pub fn key_bytes(&self) -> &Zeroizing> { + &self.key + } + pub async fn from_password(value: &str) -> Result { Self::new(InMemPasswordManager::from_str(value)).await } diff --git a/crates/events/src/enclave_event/e3_failed.rs b/crates/events/src/enclave_event/e3_failed.rs index ebce4b531..2e7ea8062 100644 --- a/crates/events/src/enclave_event/e3_failed.rs +++ b/crates/events/src/enclave_event/e3_failed.rs @@ -47,6 +47,21 @@ pub struct E3Failed { pub reason: FailureReason, } +impl FailureReason { + /// Returns true when the failure was caused purely by a deadline expiring rather + /// than by a node acting maliciously. Timeout failures have no associated + /// accusation/slashing lifecycle, so their E3 context can be torn down immediately. + pub fn is_timeout(&self) -> bool { + matches!( + self, + Self::CommitteeFormationTimeout + | Self::DKGTimeout + | Self::ComputeTimeout + | Self::DecryptionTimeout + ) + } +} + impl Display for E3Failed { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( diff --git a/crates/events/src/store_keys.rs b/crates/events/src/store_keys.rs index 8ab5789cd..dac26abbf 100644 --- a/crates/events/src/store_keys.rs +++ b/crates/events/src/store_keys.rs @@ -49,6 +49,10 @@ impl StoreKeys { String::from("//sortition") } + pub fn e3_key(e3_id: &E3id) -> String { + format!("//e3_keys/{e3_id}") + } + pub fn eth_private_key() -> String { String::from("//eth_private_key") } diff --git a/crates/keyshare/src/actors/threshold_keyshare.rs b/crates/keyshare/src/actors/threshold_keyshare.rs index c41bcdcbc..29650f318 100644 --- a/crates/keyshare/src/actors/threshold_keyshare.rs +++ b/crates/keyshare/src/actors/threshold_keyshare.rs @@ -108,6 +108,8 @@ impl AllThresholdSharesCollected { pub struct ThresholdKeyshareParams { pub bus: BusHandle, + /// Per-E3 forward-secrecy cipher. `SensitiveBytes` sent to the shared `Multithread` compute + /// actor are decrypted there with the same per-E3 cipher, which it resolves by `e3_id`. pub cipher: Arc, pub state: Persistable, pub share_enc_preset: BfvPreset, @@ -873,7 +875,8 @@ impl ThresholdKeyshare { bail!("Invalid state - expected GeneratingThresholdShare with all data"); }; - // Decrypt our shares from local storage + // Decrypt our shares from local storage. sk_sss and esi_sss were produced by the + // Multithread compute actor under this round's per-E3 cipher. let decrypted_sk_sss: SharedSecret = sk_sss.decrypt(&self.cipher)?; let decrypted_esi_sss: Vec = esi_sss .into_iter() @@ -2047,7 +2050,7 @@ impl Handler for ThresholdKeyshare { self.bus.publish_without_context(E3Failed { e3_id: msg.e3_id, failed_at_stage: E3Stage::CommitteeFinalized, - reason: FailureReason::InsufficientCommitteeMembers, + reason: FailureReason::DKGTimeout, })?; // Stop this actor since we can't proceed without all encryption keys @@ -2081,7 +2084,7 @@ impl Handler for ThresholdKeyshare { self.bus.publish_without_context(E3Failed { e3_id: msg.e3_id, failed_at_stage: E3Stage::CommitteeFinalized, - reason: FailureReason::InsufficientCommitteeMembers, + reason: FailureReason::DKGTimeout, })?; ctx.stop(); @@ -2129,7 +2132,7 @@ impl Handler for ThresholdKeyshare { self.bus.publish_without_context(E3Failed { e3_id: msg.e3_id.clone(), failed_at_stage: E3Stage::CommitteeFinalized, - reason: FailureReason::InsufficientCommitteeMembers, + reason: FailureReason::DecryptionTimeout, })?; ctx.stop(); @@ -2217,9 +2220,10 @@ mod tests { E3id, )> { let (bus, history) = test_bus(); + let cipher = Arc::new(Cipher::from_password("test-password").await?); let actor = ThresholdKeyshare::new(ThresholdKeyshareParams { bus, - cipher: Arc::new(Cipher::from_password("test-password").await?), + cipher, state: test_state(), share_enc_preset: DEFAULT_BFV_PRESET, }) @@ -2269,7 +2273,7 @@ mod tests { EnclaveEventData::E3Failed(data) if data.e3_id == failure.e3_id && data.failed_at_stage == E3Stage::CommitteeFinalized - && data.reason == FailureReason::InsufficientCommitteeMembers + && data.reason == FailureReason::DKGTimeout )); Ok(()) @@ -2300,7 +2304,7 @@ mod tests { EnclaveEventData::E3Failed(data) if data.e3_id == failure.e3_id && data.failed_at_stage == E3Stage::CommitteeFinalized - && data.reason == FailureReason::InsufficientCommitteeMembers + && data.reason == FailureReason::DKGTimeout )); Ok(()) @@ -2323,9 +2327,61 @@ mod tests { EnclaveEventData::E3Failed(data) if data.e3_id == failure.e3_id && data.failed_at_stage == E3Stage::CommitteeFinalized - && data.reason == FailureReason::InsufficientCommitteeMembers + && data.reason == FailureReason::DecryptionTimeout )); Ok(()) } + + // ── cipher boundary tests ──────────────────────────────────────────────── + // + // Forward-secrecy contract: the keyshare actor encrypts ALL SensitiveBytes — both + // at-rest shares and data sent to the shared Multithread compute actor — with this + // round's per-E3 cipher. Multithread resolves the same per-E3 cipher by `e3_id`, so a + // single key must round-trip and any other key must fail. (Multithread holding a + // different key is exactly the cipher-mismatch class of bug these tests guard against.) + + #[test] + fn per_e3_cipher_round_trips_compute_bound_shares() { + use e3_trbfv::shares::{Encrypted, SharedSecret}; + use ndarray::Array2; + + let per_e3 = Arc::new(Cipher::from_key_bytes(vec![0xAAu8; 32]).unwrap()); + let other = Arc::new(Cipher::from_key_bytes(vec![0xBBu8; 32]).unwrap()); + + // sk_sss / esi_sss are encrypted by the actor with the per-E3 cipher and decrypted + // by Multithread with the per-E3 cipher it resolves for the same e3_id. + let secret = SharedSecret::new(vec![Array2::zeros((2, 4))]); + let encrypted = Encrypted::new(secret, &per_e3).unwrap(); + + assert!( + encrypted.clone().decrypt(&per_e3).is_ok(), + "the round's per-E3 cipher must decrypt compute-bound shares" + ); + assert!( + encrypted.decrypt(&other).is_err(), + "a cipher for a different round must not decrypt these shares" + ); + } + + #[test] + fn per_e3_cipher_round_trips_own_shares() { + // own_sk_share_raw / own_esi_shares_raw are encrypted by the actor with the per-E3 + // cipher (and later forwarded to Multithread C4 under the same key). + use e3_crypto::SensitiveBytes; + + let per_e3 = Arc::new(Cipher::from_key_bytes(vec![0xAAu8; 32]).unwrap()); + let other = Arc::new(Cipher::from_key_bytes(vec![0xBBu8; 32]).unwrap()); + + let own_share = SensitiveBytes::new(b"own share data".to_vec(), &per_e3).unwrap(); + + assert!( + own_share.clone().access(&per_e3).is_ok(), + "per-E3 cipher must decrypt own share data" + ); + assert!( + own_share.access(&other).is_err(), + "a different cipher must not decrypt own share data" + ); + } } diff --git a/crates/keyshare/src/domain/share_generation.rs b/crates/keyshare/src/domain/share_generation.rs index d8d3cb59d..b9aa8e9da 100644 --- a/crates/keyshare/src/domain/share_generation.rs +++ b/crates/keyshare/src/domain/share_generation.rs @@ -61,6 +61,8 @@ pub(crate) struct SharesGeneratedPlan { /// requests for this party's freshly generated DKG share material. #[allow(clippy::too_many_arguments)] pub(crate) fn build_shares_generated_plan( + // Per-E3 forward-secrecy cipher. All `SensitiveBytes` here are either stored at rest or sent + // to the Multithread compute actor, which resolves the same per-E3 cipher by `e3_id`. cipher: &Cipher, share_enc_preset: BfvPreset, party_id: u64, @@ -117,7 +119,7 @@ pub(crate) fn build_shares_generated_plan( ) })?; - // Serialize for C2a/C2b proof requests (encrypted at rest) + // Serialize for C2a/C2b proof requests (encrypted at rest, decrypted by Multithread). let sk_sss_raw = SensitiveBytes::new( bincode::serialize(&decrypted_sk_sss) .map_err(|e| anyhow!("Failed to serialize sk_sss: {}", e))?, diff --git a/crates/keyshare/src/ext.rs b/crates/keyshare/src/ext.rs index 00f18dfd7..1042ed0c0 100644 --- a/crates/keyshare/src/ext.rs +++ b/crates/keyshare/src/ext.rs @@ -14,14 +14,17 @@ use async_trait::async_trait; use e3_crypto::Cipher; use e3_data::{AutoPersist, RepositoriesFactory}; use e3_events::{prelude::*, BusHandle, EType, EnclaveEvent, EnclaveEventData}; -use e3_request::{E3Context, E3ContextSnapshot, E3Extension, META_KEY}; +use e3_request::{E3Context, E3ContextSnapshot, E3Extension, E3_CIPHER_KEY, META_KEY}; use crate::KeyshareState; use std::sync::Arc; pub struct ThresholdKeyshareExtension { bus: BusHandle, - cipher: Arc, + /// Fallback cipher used when no per-E3 cipher is present in the context. + /// Normally `E3CipherExtension` is registered first and provides a per-E3 + /// cipher; this field exists for backward compatibility and testing. + master_cipher: Arc, address: String, } @@ -29,10 +32,16 @@ impl ThresholdKeyshareExtension { pub fn create(bus: &BusHandle, cipher: &Arc, address: &str) -> Box { Box::new(Self { bus: bus.clone(), - cipher: cipher.to_owned(), + master_cipher: cipher.to_owned(), address: address.to_owned(), }) } + + /// Return the per-E3 cipher if available, otherwise fall back to the master cipher. + fn resolve_cipher<'a>(&'a self, ctx: &'a E3Context) -> &'a Arc { + ctx.get_dependency(E3_CIPHER_KEY) + .unwrap_or(&self.master_cipher) + } } const ERROR_KEYSHARE_META_MISSING: &str = @@ -57,6 +66,7 @@ impl E3Extension for ThresholdKeyshareExtension { .err(EType::KeyGeneration, anyhow!(ERROR_KEYSHARE_META_MISSING)); return; }; + let cipher = self.resolve_cipher(ctx).clone(); let repo = ctx.repositories().threshold_keyshare(&e3_id); let container = repo.send(Some(ThresholdKeyshareState::new( e3_id.clone(), @@ -75,7 +85,7 @@ impl E3Extension for ThresholdKeyshareExtension { Some( ThresholdKeyshare::new(ThresholdKeyshareParams { bus: self.bus.clone(), - cipher: self.cipher.clone(), + cipher, state: container, share_enc_preset: meta .params_preset @@ -114,10 +124,12 @@ impl E3Extension for ThresholdKeyshareExtension { .dkg_counterpart() .unwrap_or(meta.params_preset); + let cipher = self.resolve_cipher(ctx).clone(); + // Construct from snapshot let value = ThresholdKeyshare::new(ThresholdKeyshareParams { bus: self.bus.clone(), - cipher: self.cipher.clone(), + cipher, state, share_enc_preset, }) diff --git a/crates/multithread/src/multithread.rs b/crates/multithread/src/multithread.rs index e6b8ac867..0ab4ca387 100644 --- a/crates/multithread/src/multithread.rs +++ b/crates/multithread/src/multithread.rs @@ -6,7 +6,9 @@ #![allow(clippy::result_large_err)] +use std::collections::HashMap; use std::sync::Arc; +use std::sync::Mutex; use std::thread; use std::time::Duration; use std::time::Instant; @@ -19,8 +21,11 @@ use actix::prelude::*; use actix::{Actor, Handler}; use anyhow::Result; use e3_crypto::Cipher; +use e3_data::DataStore; use e3_events::run_once; use e3_events::trap_fut; +use e3_events::E3id; +use e3_events::StoreKeys; use e3_events::EType; use e3_events::EffectsEnabled; @@ -87,11 +92,73 @@ use num_bigint::BigInt; use rand::Rng; use tracing::{error, info}; +/// In-memory cache of per-E3 forward-secrecy ciphers, keyed by `E3id`. +type E3CipherCache = Arc>>>; + +/// Resolve the cipher to use for decrypting a compute request's `SensitiveBytes`. +/// +/// `SensitiveBytes` produced by the keyshare actor are encrypted with the round's per-E3 +/// forward-secrecy key. That key is persisted in the KV store **encrypted under the master +/// cipher** by `E3CipherExtension`. Here we load it (caching the result), decrypt it under the +/// master cipher and return the per-E3 `Cipher`. +/// +/// `Ok(None)` from the store (no key stored) is the only case that falls back to the master +/// cipher — that covers rounds with no per-E3 key (e.g. requests predating the forward-secrecy +/// scheme). Once a key blob *exists*, a read or unwrap failure means this round is no longer +/// recoverable (corrupt entry, or the wrong master key after a restart), so we fail fast rather +/// than silently continuing under the wrong cipher assumption. +async fn resolve_e3_cipher( + store: &DataStore, + master: &Arc, + cache: &E3CipherCache, + e3_id: &E3id, +) -> Result> { + if let Some(found) = cache.lock().unwrap().get(e3_id).cloned() { + return Ok(found); + } + + // `base` (absolute location), matching `E3CipherExtension`'s writer, so resolution is + // independent of whatever scope this store handle currently carries. + let scoped = store.base(StoreKeys::e3_key(e3_id)); + let resolved = match scoped.read::>().await { + Ok(Some(encrypted_key)) => match master + .decrypt_data(&encrypted_key) + .and_then(|raw| Cipher::from_key_bytes(raw)) + { + Ok(cipher) => Arc::new(cipher), + Err(e) => { + error!(e3_id = %e3_id, "failed to decrypt per-E3 cipher key: {e}"); + anyhow::bail!("failed to unwrap per-E3 cipher for {e3_id}"); + } + }, + Ok(None) => { + // No per-E3 key stored for this round — use the master cipher. + return Ok(master.clone()); + } + Err(e) => { + error!(e3_id = %e3_id, "failed to read per-E3 cipher key: {e}"); + anyhow::bail!("failed to load per-E3 cipher for {e3_id}"); + } + }; + + cache + .lock() + .unwrap() + .insert(e3_id.clone(), resolved.clone()); + Ok(resolved) +} + /// Multithread actor pub struct Multithread { bus: BusHandle, rng: SharedRng, - cipher: Arc, + /// Node master cipher. Used to unwrap per-E3 keys from the store and as the fallback + /// cipher when a round has no per-E3 key. + master_cipher: Arc, + /// KV store handle used to load per-E3 forward-secrecy keys. + store: DataStore, + /// Cache of resolved per-E3 ciphers, evicted on round completion/failure. + e3_cipher_cache: E3CipherCache, task_pool: TaskPool, report: Option>, zk_prover: Option>, @@ -102,13 +169,16 @@ impl Multithread { bus: BusHandle, rng: SharedRng, cipher: Arc, + store: DataStore, task_pool: TaskPool, report: Option>, ) -> Self { Self { bus, rng, - cipher, + master_cipher: cipher, + store, + e3_cipher_cache: Arc::new(Mutex::new(HashMap::new())), task_pool, report, zk_prover: None, @@ -134,10 +204,25 @@ impl Multithread { bus: &BusHandle, rng: SharedRng, cipher: Arc, + store: DataStore, task_pool: TaskPool, report: Option>, ) -> Addr { - let addr = Self::new(bus.clone(), rng.clone(), cipher.clone(), task_pool, report).start(); + let addr = Self::new( + bus.clone(), + rng.clone(), + cipher.clone(), + store, + task_pool, + report, + ) + .start(); + + // Evict cached per-E3 ciphers once a round reaches a terminal state. + bus.subscribe_all( + &[EventType::E3Failed, EventType::E3RequestComplete], + addr.clone().into(), + ); // Gate ComputeRequest behind EffectsEnabled — proof generation should // not trigger during historical event replay. @@ -162,13 +247,21 @@ impl Multithread { bus: &BusHandle, rng: SharedRng, cipher: Arc, + store: DataStore, task_pool: TaskPool, report: Option>, zk_backend: &ZkBackend, ) -> Addr { let zk_prover = Arc::new(ZkProver::new(zk_backend)); - let actor = Self::new(bus.clone(), rng.clone(), cipher.clone(), task_pool, report) - .with_zk_prover(zk_prover); + let actor = Self::new( + bus.clone(), + rng.clone(), + cipher.clone(), + store, + task_pool, + report, + ) + .with_zk_prover(zk_prover); let addr = actor.start(); bus.subscribe_all( &[ @@ -212,8 +305,17 @@ impl Handler for Multithread { type Result = (); fn handle(&mut self, msg: EnclaveEvent, ctx: &mut Self::Context) -> Self::Result { let (data, ec) = msg.into_components(); - if let EnclaveEventData::ComputeRequest(data) = data { - ctx.notify(TypedEvent::new(data, ec)) + match data { + EnclaveEventData::ComputeRequest(data) => ctx.notify(TypedEvent::new(data, ec)), + // Drop the cached per-E3 cipher once the round is terminal so the key does not + // linger in memory after it has been purged from the store (forward secrecy). + EnclaveEventData::E3RequestComplete(data) => { + self.e3_cipher_cache.lock().unwrap().remove(&data.e3_id); + } + EnclaveEventData::E3Failed(data) => { + self.e3_cipher_cache.lock().unwrap().remove(&data.e3_id); + } + _ => {} } } } @@ -221,7 +323,9 @@ impl Handler for Multithread { impl Handler> for Multithread { type Result = ResponseFuture<()>; fn handle(&mut self, msg: TypedEvent, _: &mut Self::Context) -> Self::Result { - let cipher = self.cipher.clone(); + let master_cipher = self.master_cipher.clone(); + let store = self.store.clone(); + let cache = self.e3_cipher_cache.clone(); let rng = self.rng.clone(); let bus = self.bus.clone(); let pool = self.task_pool.clone(); @@ -230,15 +334,47 @@ impl Handler> for Multithread { trap_fut( EType::Computation, &self.bus.clone(), - handle_compute_request_event(msg, bus, cipher, rng, pool, report, zk_prover), + handle_compute_request_event( + msg, + bus, + master_cipher, + store, + cache, + rng, + pool, + report, + zk_prover, + ), ) } } +/// Build the variant-matched [`ComputeRequestErrorKind`] for a failed request, tagging it with +/// `msg`. Shared by the cipher-resolution and task-pool failure paths. +fn compute_error_kind(request: &ComputeRequestKind, msg: String) -> ComputeRequestErrorKind { + match request { + ComputeRequestKind::Zk(_) => { + ComputeRequestErrorKind::Zk(ZkEventError::ProofGenerationFailed(msg)) + } + ComputeRequestKind::TrBFV(trbfv_req) => ComputeRequestErrorKind::TrBFV(match trbfv_req { + TrBFVRequest::GenPkShareAndSkSss(_) => TrBFVError::GenPkShareAndSkSss(msg), + TrBFVRequest::GenEsiSss(_) => TrBFVError::GenEsiSss(msg), + TrBFVRequest::CalculateDecryptionKey(_) => TrBFVError::CalculateDecryptionKey(msg), + TrBFVRequest::CalculateDecryptionShare(_) => TrBFVError::CalculateDecryptionShare(msg), + TrBFVRequest::CalculateThresholdDecryption(_) => { + TrBFVError::CalculateThresholdDecryption(msg) + } + }), + } +} + +#[allow(clippy::too_many_arguments)] async fn handle_compute_request_event( msg: TypedEvent, bus: BusHandle, - cipher: Arc, + master_cipher: Arc, + store: DataStore, + cache: E3CipherCache, rng: SharedRng, pool: TaskPool, report: Option>, @@ -249,6 +385,25 @@ async fn handle_compute_request_event( let (msg, ctx) = msg.into_components(); let request_snapshot = msg.clone(); + // Resolve the per-E3 forward-secrecy cipher for this round; all `SensitiveBytes` in the + // request were encrypted with it by the producing keyshare actor. An unrecoverable resolve + // failure aborts the request rather than proceeding under the wrong cipher assumption. + let cipher = match resolve_e3_cipher(&store, &master_cipher, &cache, &msg.e3_id).await { + Ok(cipher) => cipher, + Err(resolve_err) => { + error!( + "Could not resolve per-E3 cipher for compute request '{}': {resolve_err}", + msg_string + ); + let error_kind = compute_error_kind( + &request_snapshot.request, + format!("Cipher error: {resolve_err}"), + ); + bus.publish(ComputeRequestError::new(error_kind, request_snapshot), ctx)?; + return Ok(()); + } + }; + let report_for_worker = report.clone(); let pool_result = pool .spawn(job_name, TaskTimeouts::default(), move || { @@ -263,27 +418,8 @@ async fn handle_compute_request_event( "Task pool error for compute request '{}': {pool_err}", msg_string ); - let error_kind = match &request_snapshot.request { - ComputeRequestKind::Zk(_) => ComputeRequestErrorKind::Zk( - ZkEventError::ProofGenerationFailed(format!("Pool error: {pool_err}")), - ), - ComputeRequestKind::TrBFV(ref trbfv_req) => { - let msg = format!("Pool error: {pool_err}"); - ComputeRequestErrorKind::TrBFV(match trbfv_req { - TrBFVRequest::GenPkShareAndSkSss(_) => TrBFVError::GenPkShareAndSkSss(msg), - TrBFVRequest::GenEsiSss(_) => TrBFVError::GenEsiSss(msg), - TrBFVRequest::CalculateDecryptionKey(_) => { - TrBFVError::CalculateDecryptionKey(msg) - } - TrBFVRequest::CalculateDecryptionShare(_) => { - TrBFVError::CalculateDecryptionShare(msg) - } - TrBFVRequest::CalculateThresholdDecryption(_) => { - TrBFVError::CalculateThresholdDecryption(msg) - } - }) - } - }; + let error_kind = + compute_error_kind(&request_snapshot.request, format!("Pool error: {pool_err}")); bus.publish(ComputeRequestError::new(error_kind, request_snapshot), ctx)?; return Ok(()); } diff --git a/crates/request/Cargo.toml b/crates/request/Cargo.toml index 654e97cf0..defb7773a 100644 --- a/crates/request/Cargo.toml +++ b/crates/request/Cargo.toml @@ -8,6 +8,7 @@ repository = "https://github.com/gnosisguild/enclave/crates/request" [dependencies] actix = { workspace = true } +e3-crypto = { workspace = true } e3-events = { workspace = true } e3-fhe-params = { workspace = true } e3-data = { workspace = true} diff --git a/crates/request/src/domain/routing.rs b/crates/request/src/domain/routing.rs index cd262858b..9cbde7b58 100644 --- a/crates/request/src/domain/routing.rs +++ b/crates/request/src/domain/routing.rs @@ -66,14 +66,21 @@ impl RequestRouter { // If this e3 round has already been completed then this event is unexpected. if completed.contains(&e3_id) { - // Plaintext Aggregated Triggers E3RequestComplete which tears down the per-E3 context - // and mark it as completed, but the E3StageChanged(Complete) that arrives from the EVM - // after local teardown is expected and should be ignored rather than treated as an error. - if matches!( - msg.get_data(), + // On-chain confirmation events that lag behind local teardown are expected and + // should be silently ignored rather than treated as an error. + let is_late_terminal = match msg.get_data() { + // E3StageChanged(Complete) always lags local PlaintextAggregated completion. EnclaveEventData::E3StageChanged(data) - if matches!(data.new_stage, E3Stage::Complete) - ) { + if matches!(data.new_stage, E3Stage::Complete | E3Stage::Failed) => + { + true + } + // E3Failed from on-chain markE3Failed may arrive after a local timeout already + // cleaned up the context. + EnclaveEventData::E3Failed(data) if data.reason.is_timeout() => true, + _ => false, + }; + if is_late_terminal { return RoutingDecision::Ignore; } return RoutingDecision::AlreadyCompleted(e3_id); @@ -88,8 +95,12 @@ impl RequestRouter { { PostForward::PublishComplete } - // NOTE: E3Stage::Failed does NOT trigger E3RequestComplete. Failed rounds need the - // accusation/slashing lifecycle to complete before the context is torn down. + // Timeout failures have no accusation/slashing lifecycle, so the context can be + // torn down immediately. Misbehaviour failures (DKGInvalidShares, etc.) still need + // the accusation/slashing lifecycle to complete before teardown. + EnclaveEventData::E3Failed(data) if data.reason.is_timeout() => { + PostForward::PublishComplete + } EnclaveEventData::E3RequestComplete(_) => PostForward::Teardown, _ => PostForward::None, }; @@ -105,8 +116,8 @@ impl RequestRouter { mod tests { use super::*; use e3_events::{ - E3RequestComplete, E3Stage, E3StageChanged, EnclaveEvent, PlaintextAggregated, Sequenced, - Shutdown, + E3Failed, E3RequestComplete, E3Stage, E3StageChanged, EnclaveEvent, FailureReason, + PlaintextAggregated, Sequenced, Shutdown, }; fn e3id() -> E3id { @@ -190,9 +201,9 @@ mod tests { } #[test] - fn stage_changed_to_failed_still_errors_when_completed() { - // E3StageChanged(Failed) after completion IS unexpected and should still error, - // because the failed path goes through accusation/slashing, not simple completion. + fn stage_changed_to_failed_ignored_when_completed() { + // E3StageChanged(Failed) from the EVM can arrive after a local timeout already cleaned up + // the context. Treat it as a silent no-op, the same way we handle E3StageChanged(Complete). let id = e3id(); let mut completed = HashSet::new(); completed.insert(id.clone()); @@ -203,7 +214,7 @@ mod tests { }); assert_eq!( RequestRouter::route(&msg, &completed), - RoutingDecision::AlreadyCompleted(id) + RoutingDecision::Ignore ); } @@ -285,4 +296,109 @@ mod tests { } ); } + + // --- timeout-triggered E3Failed tests --- + + fn e3_failed(id: E3id, reason: FailureReason) -> EnclaveEvent { + from_data(E3Failed { + e3_id: id, + failed_at_stage: E3Stage::CommitteeFinalized, + reason, + }) + } + + #[test] + fn e3_failed_dkg_timeout_publishes_complete() { + let id = e3id(); + let msg = e3_failed(id.clone(), FailureReason::DKGTimeout); + assert_eq!( + RequestRouter::route(&msg, &HashSet::new()), + RoutingDecision::Process { + e3_id: id, + post_forward: PostForward::PublishComplete, + } + ); + } + + #[test] + fn e3_failed_committee_formation_timeout_publishes_complete() { + let id = e3id(); + let msg = e3_failed(id.clone(), FailureReason::CommitteeFormationTimeout); + assert_eq!( + RequestRouter::route(&msg, &HashSet::new()), + RoutingDecision::Process { + e3_id: id, + post_forward: PostForward::PublishComplete, + } + ); + } + + #[test] + fn e3_failed_compute_timeout_publishes_complete() { + let id = e3id(); + let msg = e3_failed(id.clone(), FailureReason::ComputeTimeout); + assert_eq!( + RequestRouter::route(&msg, &HashSet::new()), + RoutingDecision::Process { + e3_id: id, + post_forward: PostForward::PublishComplete, + } + ); + } + + #[test] + fn e3_failed_decryption_timeout_publishes_complete() { + let id = e3id(); + let msg = e3_failed(id.clone(), FailureReason::DecryptionTimeout); + assert_eq!( + RequestRouter::route(&msg, &HashSet::new()), + RoutingDecision::Process { + e3_id: id, + post_forward: PostForward::PublishComplete, + } + ); + } + + #[test] + fn e3_failed_invalid_shares_does_not_complete() { + // Slashable failures must NOT trigger E3RequestComplete — the accusation/slashing + // lifecycle must be allowed to finish first. + let id = e3id(); + let msg = e3_failed(id.clone(), FailureReason::DKGInvalidShares); + assert_eq!( + RequestRouter::route(&msg, &HashSet::new()), + RoutingDecision::Process { + e3_id: id, + post_forward: PostForward::None, + } + ); + } + + #[test] + fn e3_failed_timeout_ignored_when_already_completed() { + let id = e3id(); + let mut completed = HashSet::new(); + completed.insert(id.clone()); + let msg = e3_failed(id.clone(), FailureReason::DKGTimeout); + assert_eq!( + RequestRouter::route(&msg, &completed), + RoutingDecision::Ignore + ); + } + + #[test] + fn stage_changed_to_failed_ignored_when_already_completed() { + let id = e3id(); + let mut completed = HashSet::new(); + completed.insert(id.clone()); + let msg = from_data(E3StageChanged { + e3_id: id.clone(), + previous_stage: E3Stage::CommitteeFinalized, + new_stage: E3Stage::Failed, + }); + assert_eq!( + RequestRouter::route(&msg, &completed), + RoutingDecision::Ignore + ); + } } diff --git a/crates/request/src/e3_cipher.rs b/crates/request/src/e3_cipher.rs new file mode 100644 index 000000000..9503b3926 --- /dev/null +++ b/crates/request/src/e3_cipher.rs @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: LGPL-3.0-only +// +// This file is provided WITHOUT ANY WARRANTY; +// without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. + +//! Per-E3 forward-secrecy cipher management. +//! +//! Each E3 round gets its own randomly-generated AES-256-GCM key ("E3 key"). The E3 key is +//! persisted in the KV store **encrypted under the node's master cipher**, so a restart can +//! re-derive the per-E3 cipher without user interaction. +//! +//! When the E3 round reaches a terminal state (`E3RequestComplete`) the E3 key is deleted from +//! the store. From that point on, `SensitiveBytes` values encrypted with the per-E3 cipher are +//! permanently irrecoverable, even if the master passphrase is later leaked +//! (forward-secrecy). + +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use e3_crypto::Cipher; +use e3_data::{Repositories, RepositoriesFactory, Repository}; +use e3_events::{ + E3Failed, E3Stage, E3id, EnclaveEvent, EnclaveEventData, Event, EventConstructorWithTimestamp, + EventSource, FailureReason, StoreKeys, Unsequenced, +}; +use std::sync::Arc; +use tracing::{debug, info}; + +use crate::{E3Context, E3ContextSnapshot, E3Extension, TypedKey}; + +/// Context dependency key under which the per-E3 `Cipher` is stored. +pub const E3_CIPHER_KEY: TypedKey> = TypedKey::new("e3_cipher"); + +// ── Repository helpers ──────────────────────────────────────────────────────── + +pub trait E3CipherRepositoryFactory { + fn e3_cipher(&self, e3_id: &E3id) -> Repository>; +} + +impl E3CipherRepositoryFactory for Repositories { + fn e3_cipher(&self, e3_id: &E3id) -> Repository> { + // `base` (absolute), not `scope` (relative): the per-E3 key must live at a fixed, + // node-global location so that the shared `Multithread` actor — which holds the root + // store, not this per-E3 context scope — resolves the exact same key. The e3_id is + // already in the key, so it stays unique per round. + Repository::new(self.store.base(StoreKeys::e3_key(e3_id))) + } +} + +// ── Extension ──────────────────────────────────────────────────────────────── + +/// An [`E3Extension`] that injects a per-E3 forward-secrecy cipher into the context. +/// +/// Register this extension **before** any extension that consumes `E3_CIPHER_KEY` (e.g. +/// `ThresholdKeyshareExtension`) so that the cipher is present by the time it is needed. +pub struct E3CipherExtension { + master_cipher: Arc, +} + +impl E3CipherExtension { + pub fn create(master_cipher: &Arc) -> Box { + Box::new(Self { + master_cipher: master_cipher.clone(), + }) + } + + /// Generate a new E3 key, persist it (encrypted under `master`) and return a `Cipher` for it. + fn create_and_store(&self, repo: &Repository>, e3_id: &E3id) -> Result> { + let e3_cipher = Cipher::generate()?; + // Encrypt the raw key under the master cipher before storing. + let mut key_copy: Vec = e3_cipher.key_bytes().as_slice().to_vec(); + let encrypted = self.master_cipher.encrypt_data(&mut key_copy)?; + repo.write(&encrypted); + debug!(e3_id = %e3_id, "generated and stored new E3 cipher key"); + Ok(Arc::new(e3_cipher)) + } + + /// Load an existing E3 key from the store and return the corresponding `Cipher`. + async fn load(&self, repo: &Repository>, e3_id: &E3id) -> Result>> { + let Some(encrypted_key) = repo.read().await? else { + return Ok(None); + }; + let raw = self.master_cipher.decrypt_data(&encrypted_key)?; + let cipher = Cipher::from_key_bytes(raw)?; + debug!(e3_id = %e3_id, "loaded existing E3 cipher key"); + Ok(Some(Arc::new(cipher))) + } + + /// Delete the E3 key from the store — called when the round completes. + fn purge(repo: &Repository>, e3_id: &E3id) { + repo.clear(); + info!(e3_id = %e3_id, "purged E3 cipher key (forward secrecy)"); + } +} + +#[async_trait] +impl E3Extension for E3CipherExtension { + fn on_event(&self, ctx: &mut E3Context, evt: &EnclaveEvent) { + match evt.get_data() { + // Create the E3 cipher the moment this round is first seen. + EnclaveEventData::E3Requested(data) => { + if ctx.get_dependency(E3_CIPHER_KEY).is_some() { + return; + } + let repo = ctx.repositories().e3_cipher(&data.e3_id); + match self.create_and_store(&repo, &data.e3_id) { + Ok(cipher) => ctx.set_dependency(E3_CIPHER_KEY, cipher), + Err(e) => { + tracing::error!( + e3_id = %data.e3_id, + "failed to create E3 cipher: {e}; aborting round to preserve forward secrecy" + ); + let fail_evt = EnclaveEvent::::new_with_timestamp( + EnclaveEventData::from(E3Failed { + e3_id: data.e3_id.clone(), + failed_at_stage: E3Stage::Requested, + reason: FailureReason::None, + }), + None, + 0, + None, + EventSource::Local, + ) + .into_sequenced(0); + ctx.forward_message_now(&fail_evt); + } + } + } + // Purge the E3 key on completion. + EnclaveEventData::E3RequestComplete(data) => { + let repo = ctx.repositories().e3_cipher(&data.e3_id); + Self::purge(&repo, &data.e3_id); + } + _ => {} + } + } + + async fn hydrate(&self, ctx: &mut E3Context, snapshot: &E3ContextSnapshot) -> Result<()> { + if !snapshot.contains("e3_cipher") { + return Ok(()); + } + let repo = ctx.repositories().e3_cipher(&snapshot.e3_id); + match self.load(&repo, &snapshot.e3_id).await? { + Some(cipher) => ctx.set_dependency(E3_CIPHER_KEY, cipher), + None => { + return Err(anyhow!( + "E3 cipher key for {} not found in store during hydration; \ + the round may have completed before the node restarted", + snapshot.e3_id + )); + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use actix::Actor; + use e3_data::{DataStore, InMemStore, Repositories, Repository}; + use e3_events::{E3id, EnclaveEvent}; + + fn master() -> Arc { + // Synchronous construction via from_key_bytes so tests don't need async for the master. + Arc::new(Cipher::from_key_bytes(vec![0xABu8; 32]).unwrap()) + } + + fn test_repos() -> Repositories { + let store = InMemStore::new(false).start(); + DataStore::from_in_mem(&store).into() + } + + fn e3id() -> E3id { + E3id::new("1", 1) + } + + fn repo_for(repos: &Repositories, id: &E3id) -> Repository> { + repos.e3_cipher(id) + } + + // ── create_and_store / load round-trip ─────────────────────────────────── + + #[actix::test] + async fn create_store_load_round_trips() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let repo = repo_for(&repos, &id); + + let cipher = ext.create_and_store(&repo, &id).unwrap(); + + // Load should recover the same key material. + let loaded = ext.load(&repo, &id).await.unwrap().unwrap(); + assert_eq!( + cipher.key_bytes().as_slice(), + loaded.key_bytes().as_slice(), + "loaded key must match the one that was stored" + ); + } + + // Regression: the per-E3 key must resolve to the SAME absolute location whether accessed + // through a per-E3 context-scoped store (as `E3CipherExtension` does) or through the root + // store (as the shared `Multithread` actor does). A relative `scope` here would write under + // the context scope and the root reader would miss it — surfacing as "Could not decrypt data" + // during key generation. + #[actix::test] + async fn e3_cipher_key_resolves_at_same_location_across_scopes() { + use e3_data::RepositoriesFactory; + + let ext = E3CipherExtension::create(&master()); + let root = test_repos(); + let id = e3id(); + + // Writer side: a deeply scoped store, mimicking the router's per-E3 context scope. + let scoped_repos: Repositories = root + .store + .scope(StoreKeys::router()) + .scope(StoreKeys::context(&id)) + .repositories(); + let stored = ext + .create_and_store(&scoped_repos.e3_cipher(&id), &id) + .unwrap(); + + // Reader side: the root store (what Multithread holds) must find the same key. + let loaded = ext + .load(&root.e3_cipher(&id), &id) + .await + .unwrap() + .expect("root reader must resolve the key written through the scoped store"); + assert_eq!( + stored.key_bytes().as_slice(), + loaded.key_bytes().as_slice(), + "key must be identical regardless of the store scope it is accessed through" + ); + } + + #[actix::test] + async fn load_returns_none_when_no_key_stored() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let repo = repo_for(&repos, &id); + + let result = ext.load(&repo, &id).await.unwrap(); + assert!(result.is_none()); + } + + // ── purge ──────────────────────────────────────────────────────────────── + + #[actix::test] + async fn purge_removes_key_from_store() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let repo = repo_for(&repos, &id); + + ext.create_and_store(&repo, &id).unwrap(); + assert!(ext.load(&repo, &id).await.unwrap().is_some()); + + E3CipherExtension::purge(&repo, &id); + assert!( + ext.load(&repo, &id).await.unwrap().is_none(), + "key must be absent after purge" + ); + } + + // ── wrong master cipher ────────────────────────────────────────────────── + + #[actix::test] + async fn load_fails_with_wrong_master() { + let ext_a = E3CipherExtension::create(&master()); + let wrong_master = Arc::new(Cipher::from_key_bytes(vec![0x01u8; 32]).unwrap()); + let ext_b = E3CipherExtension::create(&wrong_master); + + let repos = test_repos(); + let id = e3id(); + let repo = repo_for(&repos, &id); + + ext_a.create_and_store(&repo, &id).unwrap(); + // Decryption with a different master must fail. + assert!(ext_b.load(&repo, &id).await.is_err()); + } + + // ── on_event ───────────────────────────────────────────────────────────── + + fn make_context(repos: Repositories, id: E3id) -> E3Context { + use crate::HetrogenousMap; + use e3_data::Repository; + + E3Context { + e3_id: id, + repository: Repository::new(repos.store.clone()), + recipients: std::collections::HashMap::new(), + dependencies: HetrogenousMap::new(), + } + } + + fn e3_requested_event(id: E3id) -> EnclaveEvent { + use e3_events::{E3Requested, Sequenced}; + EnclaveEvent::::test_event("e3_requested") + .data(E3Requested { + e3_id: id, + ..E3Requested::default() + }) + .seq(1) + .build() + } + + fn e3_complete_event(id: E3id) -> EnclaveEvent { + use e3_events::{E3RequestComplete, Sequenced}; + EnclaveEvent::::test_event("e3_complete") + .data(E3RequestComplete { e3_id: id }) + .seq(2) + .build() + } + + #[actix::test] + async fn on_event_e3_requested_sets_cipher_in_context() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let mut ctx = make_context(repos, id.clone()); + + let evt = e3_requested_event(id.clone()); + ext.on_event(&mut ctx, &evt); + + assert!( + ctx.get_dependency(E3_CIPHER_KEY).is_some(), + "E3_CIPHER_KEY must be set in context after E3Requested" + ); + } + + #[actix::test] + async fn on_event_e3_requested_is_idempotent() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let mut ctx = make_context(repos, id.clone()); + + let evt = e3_requested_event(id.clone()); + ext.on_event(&mut ctx, &evt); + let key_first = ctx + .get_dependency(E3_CIPHER_KEY) + .unwrap() + .key_bytes() + .clone(); + + // Second call must not overwrite the key. + ext.on_event(&mut ctx, &evt); + let key_second = ctx + .get_dependency(E3_CIPHER_KEY) + .unwrap() + .key_bytes() + .clone(); + + assert_eq!( + key_first.as_slice(), + key_second.as_slice(), + "repeated E3Requested must not regenerate the key" + ); + } + + #[actix::test] + async fn on_event_e3_request_complete_purges_key_from_store() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let mut ctx = make_context(repos.clone(), id.clone()); + + // Seed the context with a key. + let evt_req = e3_requested_event(id.clone()); + ext.on_event(&mut ctx, &evt_req); + + // Confirm it's in the store. + let repo = repo_for(&repos, &id); + assert!(ext.load(&repo, &id).await.unwrap().is_some()); + + // Complete the round. + let evt_done = e3_complete_event(id.clone()); + ext.on_event(&mut ctx, &evt_done); + + // Key must be gone. + assert!( + ext.load(&repo, &id).await.unwrap().is_none(), + "E3 key must be purged after E3RequestComplete" + ); + } + + // ── hydrate ────────────────────────────────────────────────────────────── + + #[actix::test] + async fn hydrate_restores_cipher_from_store() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + + // Pre-store a key as if a previous `on_event(E3Requested)` ran. + let repo = repo_for(&repos, &id); + let original = ext.create_and_store(&repo, &id).unwrap(); + + let mut ctx = make_context(repos, id.clone()); + let snapshot = E3ContextSnapshot { + e3_id: id.clone(), + recipients: vec![], + dependencies: vec!["e3_cipher".to_string()], + }; + + ext.hydrate(&mut ctx, &snapshot).await.unwrap(); + + let restored = ctx.get_dependency(E3_CIPHER_KEY).unwrap(); + assert_eq!( + original.key_bytes().as_slice(), + restored.key_bytes().as_slice(), + "hydrated cipher must match the stored key" + ); + } + + #[actix::test] + async fn hydrate_skips_when_snapshot_has_no_e3_cipher() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let mut ctx = make_context(repos, id.clone()); + + let snapshot = E3ContextSnapshot { + e3_id: id.clone(), + recipients: vec![], + dependencies: vec![], // no "e3_cipher" + }; + + ext.hydrate(&mut ctx, &snapshot).await.unwrap(); + assert!(ctx.get_dependency(E3_CIPHER_KEY).is_none()); + } + + #[actix::test] + async fn hydrate_errors_when_key_missing_from_store() { + let ext = E3CipherExtension::create(&master()); + let repos = test_repos(); + let id = e3id(); + let mut ctx = make_context(repos, id.clone()); + + // Snapshot claims the cipher was present, but nothing is in the store. + let snapshot = E3ContextSnapshot { + e3_id: id.clone(), + recipients: vec![], + dependencies: vec!["e3_cipher".to_string()], + }; + + let result = ext.hydrate(&mut ctx, &snapshot).await; + assert!( + result.is_err(), + "hydrate must return Err when key is missing from store" + ); + } +} diff --git a/crates/request/src/lib.rs b/crates/request/src/lib.rs index 5add119db..f31cdad4e 100644 --- a/crates/request/src/lib.rs +++ b/crates/request/src/lib.rs @@ -7,6 +7,7 @@ mod actors; mod context; mod domain; +mod e3_cipher; mod hetrogenous_map; mod meta; mod repo; @@ -14,6 +15,7 @@ mod repo; pub use actors::*; pub use context::*; pub use domain::*; +pub use e3_cipher::*; pub use hetrogenous_map::*; pub use meta::*; pub use repo::*;