From b4f7dd9c7b48b81aaef63a150a1ab2077f44a1af Mon Sep 17 00:00:00 2001 From: Luke Wagner Date: Wed, 22 Apr 2026 13:08:18 -0500 Subject: [PATCH 1/2] CABI: redefine reentrance invariants and rules; fix cancellation rules --- design/mvp/CanonicalABI.md | 613 +++++++++++++----------- design/mvp/Concurrency.md | 203 ++++++-- design/mvp/Explainer.md | 44 +- design/mvp/Linking.md | 164 +++++++ design/mvp/canonical-abi/definitions.py | 187 ++++---- design/mvp/canonical-abi/run_tests.py | 376 +++++++-------- 6 files changed, 935 insertions(+), 652 deletions(-) diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md index 81a8a32c..441d9e69 100644 --- a/design/mvp/CanonicalABI.md +++ b/design/mvp/CanonicalABI.md @@ -123,10 +123,11 @@ class ComponentInstance: parent: Optional[ComponentInstance] handles: Table[ResourceHandle | Waitable | WaitableSet | ErrorContext] threads: Table[Thread] + may_enter: bool may_leave: bool backpressure: int num_waiting_to_enter: int - exclusive: Optional[Task] + exclusive_thread: Optional[Thread] def __init__(self, store, parent = None): assert(parent is None or parent.store is store) @@ -134,10 +135,11 @@ class ComponentInstance: self.parent = parent self.handles = Table() self.threads = Table() + self.may_enter = True self.may_leave = True self.backpressure = 0 self.num_waiting_to_enter = 0 - self.exclusive = None + self.exclusive_thread = None ``` Components are always instantiated in the context of a *store* (analogous to the Core WebAssembly [`store`]) which is saved immutably in the instance's `store` @@ -155,127 +157,118 @@ by the host, `None`, in the `parent` field. Thus, the set of component instances in a store forms a forest rooted by the component instances that were instantiated directly by the host. -Based on this, the "reflexive ancestors" of a component instance (i.e., itself -and all parent component instances up to the root component instance) can be -enumerated and tested via these two helper functions: +The `ComponentInstance.may_enter_from`, `enter_from` and `leave_to` methods +defined here are used to guard and record execution entering and exiting a +component instance. These methods are used by the `Store` methods and +`Task.request_cancellation`, defined below, to ensure [Component Invariant] #2. ```python - def reflexive_ancestors(self) -> set[ComponentInstance]: - s = set() - inst = self - while inst is not None: - s.add(inst) - inst = inst.parent - return s + def may_enter_from(self, caller: Optional[ComponentInstance]): + for inst in self.entering_set(caller): + if not inst.may_enter: + return False + return True - def is_reflexive_ancestor_of(self, other): - while other is not None: - if self is other: - return True - other = other.parent - return False -``` + def enter_from(self, caller: Optional[ComponentInstance]): + for inst in self.entering_set(caller): + assert(inst.may_enter) + inst.may_enter = False -How the host instantiates and invokes root components is up to the host and not -specified by the Component Model. Exports of previously-instantiated root -components *may* be supplied as the imports of subsequently-instantiated root -components. Due to the ordered nature of instantiation, root components cannot -directly import each others' exports in a cyclic manner. However, the host *may* -attempt to perform cyclic component-to-host-to-component calls using host -powers. - -Because a child component is fully encapsulated by its parent component (with -all child imports specified by the parent's `instantiate` expression and access -to all child exports controlled by the parent through its private instance index -space), the host does not have direct control over how a child component is -instantiated or invoked. However, if a child's ancestors transitively forward -the root component's host-supplied imports to the child, direct child-to-host -calls are possible. Symmetrically, if a child's ancestors transitively -re-export the child's exports from the root component, direct host-to-child -calls are possible. - -Recursive component calls are technically possible using either host powers (as -mentioned above) or via a parent component lowering a child component's export -to a `funcref` and then recursively calling this `funcref` from a lifted parent -function passed as an import to the child. However, for the time being, both -cases are prevented via trap for several reasons: -* automatic [backpressure] would otherwise deadlock in unpredictable and - surprising ways; -* by default, most code does not expect [recursive reentrance] and will break - in subtle and potentially security sensitive ways if allowed; -* to properly handle recursive reentrance, an extra ABI parameter is required - to link recursive calls and this requires opting in via some - [TBD](Concurrency.md#TODO) function effect type or canonical ABI option. - -To detect and prevent recursive calls, the runtime tracks the dynamic call -stack via a linked list of `Supertask` nodes. The `inst` field of `Supertask` -either points to the `ComponentInstance` of the calling component or, if -`None`, indicates that the caller is the host. -```python -class Supertask: - inst: Optional[ComponentInstance] - supertask: Optional[Supertask] -``` - -The `call_might_be_recursive` predicate is used by `Store.lift` to -conservatively detect recursive reentrance and subsequently trap. -```python -def call_might_be_recursive(caller: Supertask, callee_inst: ComponentInstance): - if caller.inst is None: - while caller is not None: - if caller.inst and caller.inst.reflexive_ancestors() & callee_inst.reflexive_ancestors(): - return True - caller = caller.supertask - return False - else: - return (caller.inst.is_reflexive_ancestor_of(callee_inst) or - callee_inst.is_reflexive_ancestor_of(caller.inst)) -``` -The first case (where `caller.inst` is `None`) covers host-to-component calls. -By testing whether any of the callers' reflexive anecestor sets intersect the -callee's ancestor set, the following case is considered recursive: -``` - +-------+ - | A |<-. - | +---+ | | -host-->| B |-->host - | +---+ | - +-------+ -``` -Here, when attempting to recursively call back into `A`, `caller` points to the -following stack: -``` -|inst=None| --supertask--> |inst=B| --supertask--> |inst=None| --supertask--> None -``` -while `A` does not appear as the `inst` of any `Supertask` on this stack, -`B.reflexive_ancestors()` is `{ B, A }`, so the loop correctly determines that -`A` is being reentered. This ensures that child components are kept an -encapsulated detail of the parent. - -The second case (where `caller.inst` is not `None`) covers component-to- -component calls by conservatively rejecting any call from a component to its -anecestor or descendant (thereby preventing any possible recursion via ancestor -`funcref`). Thus, the following sibling-to-sibling component call is allowed: -``` - +----------------+ - | P | - | +----+ +----+ | -host-->| C1 |->| C2 | | - | +----+ +----+ | - +----------------+ -``` -while the following child-to-parent and parent-to-child calls are disallowed: -``` - +----------+ +----------+ - | +---+ | | +---+ | -host-->| C |->P | host->| P->| C | | - | +---+ | | +---+ | - +----------+ +----------+ -``` -This conservative approximation allows `call_might_be_recursive` to be computed -ahead-of-time when compiling a fused component-to-component adapter (where both -caller and callee intances and their relationship are statically known). In the -future this check will be relaxed and more sophisticated optimizations can be -used to statically eliminate the check in common cases. + def leave_to(self, caller: Optional[ComponentInstance]): + for inst in self.entering_set(caller): + assert(not inst.may_enter) + inst.may_enter = True + + def entering_set(self, caller: Optional[ComponentInstance]) -> set[ComponentInstance]: + if caller: + return self.self_and_ancestors() - caller.self_and_ancestors() + else: + return self.self_and_ancestors() + + def self_and_ancestors(self) -> set[ComponentInstance]: + s = { self } + ancestor = self.parent + while ancestor is not None: + s.add(ancestor) + ancestor = ancestor.parent + return s +``` +In `may_enter_from`, `enter_from` and `leave_to`, the `caller` parameter is +either the caller's `ComponentInstance` in a component-to-component call or +`None` for a host-to-component call. This `caller` is used to avoid trapping in +the case of a parent component [donut wrapping] a child component and being +reentered by a child component import call which by definition does not violate +[Component Invariant] #2. + +To distinguish and allow donut-wrapping-reentrance, we say that entering a +component instance C also implicitly enters all of C's transitive parents +("ancestors") but when calling from one component into another, any component +instance *already* entered by the caller (including itself) is *subtracted* from +the set of component instances being entered by the callee because execution is +not "entering" but rather "staying inside" those instances held in common. + +For example, given a parent component instance `P` which contains core module +instances `M1` and `M2` and child component instances `C1` and `C2`, +`may_enter_from` allows every call in this callstack to succeed: +``` + +-------------------------------------------------+ + | P | + | +-----------+ +----+ +----+ +-----------+ | +host-->| M1 (in P) |-->| C1 |-->| C2 |-->| M2 (in P) | | + | +-----------+ +----+ +----+ +-----------+ | + +-------------------------------------------------+ +``` +In particular, when the host first calls into `P` (via `lift`ed `M1`), +`P.entering_set(None)` is `{ P }`, so `P.may_enter` is tested and then set to +`False`. When `P` calls into `C1`, `C1.entering_set(P)` is `{ C1 }` (since +`P.self_and_ancestors() = { P }` is subtracted from `C1.self_and_ancestors() = +{ C1, P }`) and thus `C1.may_enter` is tested and set to `False`. When `C1` +calls `C2`, `C2.entering_set(C1)` is `{ C2 }`, so `C2.may_enter` is also set to +`False`. And then finally when `C2` calls back into `P` (via `lift`ed `M2`), +`P.entering_set(C2)` is empty (because `C2.self_and_ancestors() = { C2, P }` is +subtracted from `P.self_and_ancestors() = { P }`) and thus there is no +`trap_if(not P.may_enter)` (which would have otherwise failed). + +If now `P` tries to call from `M2` back into `C1` (using the power of +`call_indirect`), there *would* be a trap, since `C1.entering_set(P)` is +`{ C1 }` and `C1.may_enter` is already `False`: +``` + +-----------------------------------------------------------+ + | P | + | +-----------+ +----+ +----+ +-----------+ +----+ | +host-->| M1 (in P) |-->| C1 |-->| C2 |-->| M2 (in P) |-X->| C1 | | + | +-----------+ +----+ +----+ +-----------+ +----+ | + +-----------------------------------------------------------+ +``` + +Alternatively, let's say `P` also contains a third child `C3` whose exports are +re-exported by `P` so that they can be called directly by the host. Then if `M2` +calls back out into the host and the host tries to call `C3` directly, it also +traps since `C3.entering_set(None)` is `{ C3, P }` and `P.may_enter` is already +set to `False`: +``` + +-------------------------------------------------+ +--------+ + | P | | P | + | +-----------+ +----+ +----+ +-----------+ | | +----+ | +host-->| M1 (in P) |-->| C1 |-->| C2 |-->| M2 (in P) |-->host-X->| C3 | | + | +-----------+ +----+ +----+ +-----------+ | | +----+ | + +-------------------------------------------------+ +--------+ +``` + +From an optimizing compiler's perspective, the `set[ComponentInstance]` returned +by `entering_set` is known *statically* when compiling a component-to-component +trampoline and thus the compiler can fully unroll the `for` loops in +`may_enter_from`, `enter_from` and `leave_to` into fixed sequences of branches +and stores with fixed memory locations for the `may_enter` flags. Furthermore, +because component-to-component reentrance is only possible via [donut wrapping] +and donut wrapping is only possible when a parent component contains a `canon +lower` definition, whenever the compiler sees a component with no `canon lower` +definitions, it can mark the `may_enter` flags of all its direct children as +*optimized-out* and then completely ignore them. Since donut wrapping is rare, +this means that, in practice, only root component instances' `may_enter` flags +will be tested and only for host-to-component or component-to-component calls +between different root components (linked by the host). Thus, the overall cost +of reentrance should be very low, in exchange for allowing the producer +toolchain to not have to safely handle reentrance at every single import call. The other fields of `ComponentInstance` are described below as they are used. @@ -740,16 +733,14 @@ spec-level function type, where the host can be the caller, the callee or even OnStart = Callable[[], list[any]] OnResolve = Callable[[Optional[list[any]]], None] OnCancel = Callable[[], None] -FuncInst = Callable[[OnStart, OnResolve, Supertask], OnCancel] +FuncInst = Callable[[OnStart, OnResolve, Optional[ComponentInstance]], OnCancel] ``` The three parameters of `FuncInst` are: * an `OnStart` callback that is called by the callee when it is ready to receive its arguments after waiting for any [backpressure] to subside; * an `OnResolve` callback that is called by the callee when it is ready to return its value or, if cancellation has been requested, `None`. -* a calling `Supertask` which is used to maintain the - [async callstack][Structured Concurrency] and enforce the - non-reentrance [component invariant]; +* the caller's `ComponentInstance`, if the caller is not the host Critically, if the callee [blocks] at the wasm level, the spec-level `FuncInst` returns immediately to the caller while continuing to execute the callee in a @@ -765,7 +756,7 @@ call creates a `Task` object to track the state of the call and ensure that the wasm guest code adheres to the above `FuncInst` calling convention (or else traps). `Task` is introduced in chunks, starting with fields and initialization: ```python -class Task(Supertask): +class Task: class State(Enum): INITIAL = 1 STARTED = 2 @@ -778,38 +769,37 @@ class Task(Supertask): inst: ComponentInstance on_start: OnStart on_resolve: OnResolve - supertask: Supertask + caller: Optional[ComponentInstance] state: State num_borrows: int - waiting_to_enter: Optional[Thread] + implicit_thread: Optional[Thread] threads: list[Thread] - def __init__(self, ft, opts, inst, on_start, on_resolve, supertask): + def __init__(self, ft, opts, inst, on_start, on_resolve, caller): self.ft = ft self.opts = opts self.inst = inst self.on_start = on_start self.on_resolve = on_resolve - self.supertask = supertask + self.caller = caller self.state = Task.State.INITIAL self.num_borrows = 0 - self.waiting_to_enter = None + self.implicit_thread = None self.threads = [] ``` -The `Task.needs_exclusive` method returns whether an `async`-typed function's -ABI options indicate that the Core WebAssembly code requires serialized -execution (with the common reason being that there is a single, global linear -memory shadow stack). This serialized execution is implemented by -acquiring/releasing the component-instance-wide `exclusive` lock before/after -executing Core WebAssembly code executing on the task's *implicit thread* -(explicit threads created by `thread.new-indirect` ignore the `exclusive` lock). -Specifically, sync- and stackless-async-lifted (`async callback`) functions -require the `exclusive` lock and stackful-async-lifted (`async`) functions -ignore the `exclusive` lock (just like explicit threads). Note that -non-`async`-typed functions' implicit threads also ignore the `exclusive` lock -since they must complete synchronously without blocking and thus don't have to -worry about non-LIFO stack interleaving. +The `Task.needs_exclusive` predicate returns whether this task's implicit thread +(`Task.implicit_thread`) has *not* opted in to multiple concurrent linear memory +shadow stacks (via "stackful" lift) and thus, according to [Component Invariant] +#3, requires serialization with all the other implicit threads in the component +instance that have similarly not opted in. This question only applies to +`async`-typed functions, since synchronous functions can't block and thus can +always execute in a LIFO fashion using a single linear memory shadow stack. When +`needs_exclusive` is true, core wasm execution is gated on acquiring the +`ComponentInstance.exclusive_thread` lock. Due to cooperativity, the +`exclusive_thread` "lock" is simply a mutable field holding either `None`, when +unlocked, or, when locked, a reference to the `Task.implicit_thread` currently +holding the lock. ```python def needs_exclusive(self): assert(self.ft.async_) @@ -835,7 +825,7 @@ of backpressure: `backpressure.{inc,dec}` which modify the `ComponentInstance.backpressure` counter. 2. *Implicit backpressure* triggered when `Task.needs_exclusive()` is true and - the `exclusive` lock is already held. + the `ComponentInstance.exclusive_thread` lock is already held. 3. *Residual backpressure* triggered by explicit or implicit backpressure having been enabled then disabled, but there still being tasks waiting to enter that need to be given the chance to start without getting starved @@ -850,24 +840,22 @@ exports. ```python def enter_implicit_thread(self): assert(self.state == Task.State.INITIAL) - thread = current_thread() + self.implicit_thread = current_thread() if self.ft.async_: def has_backpressure(): return (self.inst.backpressure > 0 or - (self.needs_exclusive() and self.inst.exclusive is not None)) + (self.needs_exclusive() and self.inst.exclusive_thread is not None)) if has_backpressure() or self.inst.num_waiting_to_enter > 0: self.inst.num_waiting_to_enter += 1 - self.waiting_to_enter = thread - cancelled = thread.wait_until(lambda: not has_backpressure(), cancellable = True) - self.waiting_to_enter = None + cancelled = self.implicit_thread.wait_until(lambda: not has_backpressure(), cancellable = True) self.inst.num_waiting_to_enter -= 1 if cancelled: self.cancel() return False if self.needs_exclusive(): - assert(self.inst.exclusive is None) - self.inst.exclusive = self - self.register_thread(thread) + assert(self.inst.exclusive_thread is None) + self.inst.exclusive_thread = self.implicit_thread + self.register_thread(self.implicit_thread) return True def register_thread(self, thread): @@ -882,9 +870,7 @@ implicit thread due to backpressure, the above definition allows the host to arbitrarily select which threads to resume in which order. Additionally, the above definition ensures the following properties: * While a callee is waiting to enter, if the caller requests cancellation, - the callee is immediately cancelled. The `Task.waiting_to_enter` field is - used by `Task.request_cancellation` below to know which thread to - `resume` with `Cancelled.TRUE`. + the callee is immediately cancelled. * When backpressure is disabled then reenabled, no new tasks start, even tasks that were blocked and then unblocked by the first occurrence of backpressure (i.e., disabling backpressure never unleashes an unstoppable @@ -896,16 +882,17 @@ the lists of threads running inside the current task and component instance by Symmetrically, the `Task.exit_implicit_thread` method is called before a task's implicit thread returns to reverse the effects of `Task.enter_implicit_thread`. -In particular, for a synchronous or `async callback` task, it clears `exclusive` -to allow the next such task to start. `Task.unregister_thread` (which is also -called by `thread.new-indirect`, below) traps if the task's last thread is -unregistered and the task has not yet returned a value to its caller. +In particular, if the `exclusive_thread` lock was acquired, it is released. +`Task.unregister_thread` (which is also called by `thread.new-indirect`, below) +traps if the task's last thread is unregistered and the task has not yet +returned a value to its caller. ```python def exit_implicit_thread(self): - self.unregister_thread(current_thread()) + assert(current_thread() is self.implicit_thread) + self.unregister_thread(self.implicit_thread) if self.ft.async_ and self.needs_exclusive(): - assert(self.inst.exclusive is self) - self.inst.exclusive = None + assert(self.inst.exclusive_thread is self.implicit_thread) + self.inst.exclusive_thread = None def unregister_thread(self, thread): assert(thread in self.threads and thread.task is self) @@ -919,36 +906,40 @@ unregistered and the task has not yet returned a value to its caller. The `Task.request_cancellation` method is called by the host or wasm caller to signal that they don't need the return value and that the callee should hurry up -and call the `OnResolve` callback. If a task is waiting to start in -`Task.enter_implicit_thread` due to backpressure, then it is immediately -cancelled without running any guest code. Otherwise, if *any* of a cancelled -task's threads are expecting cancellation (e.g., when an `async callback` export -returns to the event loop or when a `waitable-set.*` or `thread.*` built-in is -called with `cancellable` set), `request_cancellation` immediately resumes that -thread (picking one nondeterministically if there are multiple), giving the -thread the chance to handle cancellation promptly (allowing `subtask.cancel` to -complete eagerly without returning `BLOCKED`). +and call the `OnResolve` callback. If a task's implicit thread is waiting to +start (in `Task.enter_implicit_thread`) due to backpressure, then it is +immediately cancelled without running any guest code. Otherwise, if any of a +cancelled task's threads are expecting cancellation (e.g., when an `async +callback` export returns to the event loop or when `waitable-set.wait` or a +`thread.*` built-in is called with `cancellable` set), `request_cancellation` +considers resuming that thread (picking one nondeterministically if there are +multiple), giving the thread the chance to handle cancellation promptly so that +`subtask.cancel` completes without blocking. ```python def request_cancellation(self): + assert(not self.caller or self.caller is current_instance()) if self.state == Task.State.INITIAL: self.state = Task.State.CANCEL_DELIVERED - self.waiting_to_enter.resume(Cancelled.TRUE) - return - assert(self.state == Task.State.STARTED) - if not self.needs_exclusive() or not self.inst.exclusive or self.inst.exclusive is self: + self.implicit_thread.resume(Cancelled.TRUE) + else: + assert(self.state == Task.State.STARTED) candidates = { t for t in self.threads if t.cancellable } - if candidates: + if self.needs_exclusive() and self.inst.exclusive_thread not in { None, self.implicit_thread }: + candidates.discard(self.implicit_thread) + if candidates and self.inst.may_enter_from(self.caller): self.state = Task.State.CANCEL_DELIVERED + self.inst.enter_from(self.caller) random.choice(list(candidates)).resume(Cancelled.TRUE) - return - self.state = Task.State.PENDING_CANCEL + self.inst.leave_to(self.caller) + else: + self.state = Task.State.PENDING_CANCEL ``` -As handled above, cancellation must avoid running two `needs_exclusive` tasks at -the same time in the corner case where the first task starts and blocks and then -the other task is cancelled. However, a single `needs_exclusive` task that -starts and blocks calling a built-in with `cancellable` set *can* be immediately -resumed. Thus, the `exclusive` lock tracks *which* task is exclusively running -to distinguish these cases. +As handled above, cancellation must additionally avoid resuming a `cancellable` +thread when doing so would violate [Component Invariant] #2 or #3. In +particular, invariant #2 requires not resuming any thread while the task's +containing component instance may not be reentered and invariant #3 requires not +resuming a `needs_exclusive` task's implicit thread while another task's +implicit thread is running exclusively. If cancellation cannot be immediately delivered by `Task.request_cancellation`, the request is remembered in `Task.state` and delivered at the next opportunity @@ -1008,86 +999,132 @@ call `task.cancel`. ## Embedding -A WebAssembly Component Model implementation will typically be *embedded* into -a *host* environment. An *embedder* implements the connection between such a -host environment and the WebAssembly semantics as defined by the rest of the -Python definitions below. A full Embedding interface would contain functions -for decoding, validating, instantiating and interrogating components, just like -the [Core WebAssembly Embedding]. However, for the purpose of defining the -runtime behavior of the Canonical ABI, the Embedding interface here just -includes functions for the embedder to: -1. construct a Component Model `Store`, analogous to [`store_init`]ing a Core - WebAssembly [`store`]; -2. `invoke` a Component Model `FuncInst`, analogous to [`func_invoke`]ing a - Core WebAssembly [`funcinst`]; -3. `lift` a Core WebAssembly function into a `FuncInst`, given the `canon lift` - immediates and containing component instance; -4. `lower` a `FuncInst` into a Core WebAssembly function, given the - `canon lower` immediates and containing component instance; and -5. allow a cooperative thread (created during a previous call to `invoke`) to - execute until blocking or exiting. - -In a proper Embedding API, `Store.lift` and `Store.lower` would be replaced by a -single, higher-level `Store.instantiate` function of type `Component -> -ComponentInstance`, analogous to Core WebAssembly's [`module_instantiate`], that -called `Store.lift` for each `canon lift` definition in the component being -instantiated and, similarly, `Store.lower` for each `canon lower` definition. -However, for the purpose of specifying the Canonical ABI, `Store.lift` and -`Store.lower` are sufficient to specify how execution enters and exits component -instances. +A WebAssembly Component Model implementation will typically be *embedded* into a +*host* environment. An *embedder* implements the connection between such a host +environment and the Component Model semantics defined here. A full Embedding +interface would contain functions for decoding, validating, instantiating and +interrogating components, just like the [Core WebAssembly Embedding]. However, +the Embedding interface here just covers the subset that is necessary to define +the behavior of the Canonical ABI. + +The Embedding interface is defined as the methods of the `Store` class, which is +the Component Model's version of a Core WebAssembly [`store`]. Defining `Store` +in chunks, the `Store` constructor is analogous to Core WebAssembly +[`store_init`] and defines the initial state of the `Store`: ```python class Store: waiting: list[Thread] + nesting_depth: int def __init__(self): self.waiting = [] - - def invoke(self, f: FuncInst, caller: Optional[Supertask], on_start, on_resolve) -> OnCancel: - host = Supertask() - host.inst = None - host.supertask = caller - return f(on_start, on_resolve, caller = host) - + self.nesting_depth = 0 +``` +The `waiting` field is populated by `Thread` methods, as defined above, and the +`nesting_depth` field is purely a specification device used by `Store` methods +below to define the valid host call interleavings. + +The `Store.invoke` method is analogous to Core WebAssembly's [`func_invoke`] and +takes a `FuncInst` (analogous to a Core WebAssembly [`funcinst`]) along with its +runtime `OnStart` and `OnResolve` arguments (which are described above alongside +their definitions). The `Store.nesting_depth` field tracks whether there are any +active `Store.invoke` calls for the benefit of `Store.tick`, defined below. +```python + def invoke(self, f: FuncInst, on_start: OnStart, on_resolve: OnResolve) -> OnCancel: + self.nesting_depth += 1 + on_cancel = f(on_start, on_resolve, caller = None) + self.nesting_depth -= 1 + return on_cancel +``` +The `FuncInst` passed to `Store.invoke` can be either a guest function (produced +by `Store.lift`, defined next) or (in the special case of component re-exports) +a host function. Symmetrically, `FuncInst`s can be called either from the host +(via `Store.invoke`) or core wasm code (via `Store.lower`). `Store.invoke` +passes a `None` `caller` to signal that the host is the caller. + +The `Store.lift` method is called for each `canon lift` definition in a +component to wrap a core wasm `CoreFuncInst` into a component-level `FuncInst`, +passing `canon lift`'s immediate arguments as well as the containing component +instance. Similarly, `Store.lower` is called for each `canon lower` definition +in a component to wrap a component-level `FuncInst` into a core wasm +`CoreFuncInst`. (In a complete Embedding API, `Store.lift` and `Store.lower` +would be replaced by a single, higher-level `Store.instantiate` method of type +`Component -> ComponentInstance`, analogous to the Core WebAssembly's +[`module_instantiate`]. But for the Canonical ABI, just `lift` and `lower` are +sufficient to define relevant ABI behavior.) +```python CoreFuncInst = Callable[[list[CoreValType]], list[CoreValType]] def lift(self, f: CoreFuncInst, ft: FuncType, opts: CanonicalOptions, inst: ComponentInstance) -> FuncInst: - def func_inst(on_start: OnStart, on_resolve: OnResolve, caller: Supertask) -> OnCancel: - trap_if(call_might_be_recursive(caller, inst)) - return canon_lift(f, ft, opts, inst, on_start, on_resolve, caller) + def func_inst(on_start: OnStart, on_resolve: OnResolve, caller: Optional[ComponentInstance]) -> OnCancel: + assert(not caller or caller is current_instance()) + trap_if(not inst.may_enter_from(caller)) + inst.enter_from(caller) + on_cancel = canon_lift(f, ft, opts, inst, on_start, on_resolve, caller) + inst.leave_to(caller) + return on_cancel return func_inst def lower(self, f: FuncInst, ft: FuncType, opts: CanonicalOptions, inst: ComponentInstance) -> CoreFuncInst: def core_func_inst(args: list[CoreValType]) -> list[CoreValType]: - assert(current_instance() is inst) - return canon_lower(f, ft, opts, args) + assert(inst is current_instance()) + assert(all(not i.may_enter for i in inst.self_and_ancestors())) + results = canon_lower(f, ft, opts, args) + assert(all(not i.may_enter for i in inst.self_and_ancestors())) + return results return core_func_inst - +``` +Before entering a component via core wasm export call, the `FuncInst` wrapper +produced by `Store.lift` traps if entering the component would violate +[Component Invariant] #2, and then records that the instance was entered by +calling `ComponentInstance.enter_from`. The rest of the trampoline is defined by +`canon_lift` below. Importantly though, `canon_lift` will return immediately if +it [blocks], thereby calling `ComponentInstance.leave_to` and allowing +reentrance (via `Store.invoke` or `Store.tick`) without trapping. + +Before temporarily leaving a component via core wasm import call, the +`CoreFuncInst` wrapper produced by `Store.lower` asserts that the `may_enter` +flags of the current component instance and all its ancestors are already +`False` (as set by `ComponentInstance.enter_from` in `Store.lift`). Thus, +by default, reentrance is disallowed. *However*, if the lowered `FuncInst` +callee [blocks] before returning a value and the `canon lower` definition didn't +specify the `async` ABI option (which opts in to the non-blocking async ABI), +`canon_lower` will *block* until the callee returns (via `Thread.wait_until`, +defined above) which will suspend the current thread and return from +`canon_lift` to `Store.lift` which then calls `ComponentInstance.leave_to` to +enable reentrance for as long as `Thread.wait_until` stays blocked. Thus, +in accordance with [Component Invariant] #2, synchronous (blocking) calls to +`async`-typed function imports *may* be reentered during `canon_lower`. + +Lastly, the `Store.tick` method does not have an analogue in Core WebAssembly +but is necessary to enable native concurrency support in the Component Model. +`Store.tick` allows the runtime to nondeterministically resume a thread that +previously [blocked] but is now ready. As defined above, `Thread.resume` will +execute the thread until it either returns or [blocks] again. Thus, each call to +`Store.tick` just allows a single thread to make a single quantum of cooperative +progress and the expectation is that the host heuristically interleaves calls to +`Store.invoke` with calls to `Store.tick` so that concurrent tasks can complete +while new tasks are being started. +```python def tick(self): - random.shuffle(self.waiting) - for thread in self.waiting: - if thread.ready(): - thread.resume() - return -``` -The `FuncInst` passed to `Store.invoke` is described above and can represent -either a guest function (produced by `Store.lift`) or (in the special case of a -component re-export of a host import) a host function. `Store.invoke` describes -how a `FuncInst` is invoked by the host, but `FuncInst`s can also be invoked by -guest code that calls the `CoreFuncInst` produced by `Store.lower`. - -When a `FuncInst` is produced by lifting core wasm guest code, it is guarded -by a call to `call_might_be_recursive`, which is described above. - -The `Store.tick` method does not have an analogue in Core WebAssembly and -enables [native concurrency support](Concurrency.md) in the Component Model. The -expectation is that the host will interleave calls to `invoke` with calls to -`tick`, repeatedly calling `tick` until there is no more work to do or the -store is destroyed. The nondeterministic `random.shuffle` indicates that the -embedder is allowed to use any algorithm (involving priorities, fairness, etc) -to choose which thread to schedule next (and hopefully an algorithm more -efficient than the simple polling loop written above). The `Thread.ready` and -`Thread.resume` methods along with how the `waiting` list is populated are all -defined [above](#threads) as part of the `Thread` class. + assert(self.nesting_depth == 0) + assert(all(thread.task.inst.may_enter_from(None) for thread in self.waiting)) + self.nesting_depth += 1 + candidates = { thread for thread in self.waiting if thread.ready() } + if candidates: + thread = random.choice(list(candidates)) + thread.task.inst.enter_from(None) + thread.resume() + thread.task.inst.leave_to(None) + self.nesting_depth -= 1 +``` +As shown above, `Store.nesting_depth` is greater than zero while calling +`Store.invoke` and thus the first `assert` prohibits the host from calling +`Store.tick` during an active `Store.invoke`. This prohibition ensures that the +second `assert` holds, which is that all component instances in the store can be +(re)entered. If this were *not* the case, a random thread might be resumed while +one of its imports' component instances was on the stack and not reenterable, +leading to a spurious trap when it was called. ## Lifting and Lowering Context @@ -3539,7 +3576,7 @@ When instantiating a component instance, the runtime calls `Store.lift` (defined above) to capture the `$callee`, `$ft` and `$opts` immediates of `canon lift` along with the component instance being instantiated. These are then passed into `canon_lift` every time the generated `FuncInst` is called, along with the -runtime `OnStart`, `OnResolve` and `Supertask` arguments. +runtime `on_start`, `on_resolve` and `caller` arguments. Based on this, `canon_lift` is defined in chunks as follows. The whole call executes in a new *implicit thread* defined here by `thread_func`. The first @@ -3604,7 +3641,7 @@ In the `async` non-`callback` ("stackful async") case, there is a single call to the core wasm callee which must return empty core results. Waiting for async I/O happens by the callee synchronously calling built-ins like `waitable-set.wait`. Note that, since `Task.enter_implicit_thread` does *not* -acquire the `exclusive` lock for stackful async functions, calls to +acquire the `exclusive_thread` lock for stackful async functions, calls to `waitable-set.wait` made by a stackful async function do not prevent any other threads from starting or resuming in the same component instance. ```python @@ -3623,11 +3660,11 @@ function (specified as a `funcidx` immediate in `canon lift`) until the [packed] = call_and_trap_on_throw(callee, flat_args) code,si = unpack_callback_result(packed) while code != CallbackCode.EXIT: - assert(task.needs_exclusive() and inst.exclusive is task) - inst.exclusive = None + assert(task.needs_exclusive() and inst.exclusive_thread is task.implicit_thread) + inst.exclusive_thread = None match code: case CallbackCode.YIELD: - cancelled = thread.yield_until(lambda: not inst.exclusive, cancellable = True) + cancelled = thread.yield_until(lambda: not inst.exclusive_thread, cancellable = True) if cancelled: event = (EventCode.TASK_CANCELLED, 0, 0) else: @@ -3636,11 +3673,11 @@ function (specified as a `funcidx` immediate in `canon lift`) until the trap_if(not task.may_block()) wset = inst.handles.get(si) trap_if(not isinstance(wset, WaitableSet)) - event = wset.wait_for_event_and(lambda: not inst.exclusive, cancellable = True) + event = wset.wait_for_event_and(lambda: not inst.exclusive_thread, cancellable = True) case _: trap() - assert(inst.exclusive is None) - inst.exclusive = task + assert(inst.exclusive_thread is None) + inst.exclusive_thread = task.implicit_thread event_code, p1, p2 = event [packed] = call_and_trap_on_throw(opts.callback, [event_code, p1, p2]) code,si = unpack_callback_result(packed) @@ -3658,14 +3695,15 @@ If a `Task` is not allowed to block (because it was created for a non-`async`- typed function call and has not yet returned a value), `YIELD` is always a no-op and `WAIT` always traps. -The event loop also releases `ComponentInstance.exclusive` (which was acquired -by `Task.enter_implicit_thread`) before potentially suspending the thread to -allow other synchronous and `async callback` tasks to execute in the interim. -However, other synchronous and `async callback` tasks *cannot* execute while -running core wasm called from the event loop as this could break the -non-reentrancy assumptions of the core wasm code. Thus, `async callback` tasks -allow less concurrency than non-`callback` `async` tasks, which entirely ignore -`ComponentInstance.exclusive`. +The event loop releases `ComponentInstance.exclusive_thread` (which was acquired +by `Task.enter_implicit_thread`) before potentially blocking the thread to allow +other `needs_exclusive` tasks to execute in the interim. However, the +`exclusive_thread` lock is held throughout each core wasm invocation from the +event loop to maintain [Component Invariant] #3. Thus, `async callback`-lifted +tasks allow *more* concurrency than synchronously-lifted tasks (which only +release the `exclusive_thread` lock after they've returned) but *less* +concurrency than (stackful) non-`callback` `async`-lifted tasks, which entirely +ignore `exclusive_thread`. The end of `canon_lift` creates a new task/thread pair for the call and then calls `Thread.resume` on the new thread to synchronously transfer control flow @@ -3774,12 +3812,9 @@ owned handles are not dropped before `Subtask.deliver_resolve` is called (below) cx = LiftLowerContext(opts, thread.task.inst, subtask) ``` -The next chunk makes the call to `callee` (which has type `FuncInst`, as -defined in the [Embedding](#embedding) interface). The [current task] serves as -the `Supertask` argument and the `OnStart` and `OnResolve` callback arguments -are defined in terms of the `$opts` immediates of the `canon lower` definition -and the Canonical ABI's `lift_flat_values` and `lower_flat_values` (defined -above). +The next chunk makes the call to `callee` using the `opts` immediates of the +`canon lower` definition to configure `lift_flat_values` and `lower_flat_values` +(both defined above) and the current instance as the `caller`. ```python flat_ft = flatten_functype(opts, ft, 'lower') assert(types_match_values(flat_ft.params, flat_args)) @@ -3816,7 +3851,7 @@ above). nonlocal flat_results flat_results = lower_flat_values(cx, max_flat_results, result, ft.result_type(), flat_args) - subtask.on_cancel = callee(on_start, on_resolve, caller = thread.task) + subtask.on_cancel = callee(on_start, on_resolve, caller = thread.task.inst) assert(ft.async_ or subtask.state == Subtask.State.RETURNED) ``` The `Subtask.state` field is updated by the callbacks to keep track of the @@ -3940,16 +3975,12 @@ def canon_resource_drop(rt, i): trap_if(h.num_lends != 0) if h.own: assert(h.borrow_scope is None) - if inst is rt.impl: - if rt.dtor: - rt.dtor(h.rep) - else: - ft = FuncType([U32Type()], [], async_ = False) - dtor = rt.dtor or (lambda rep: []) - opts = CanonicalOptions(async_ = False) - callee = inst.store.lift(dtor, ft, opts, rt.impl) - caller = inst.store.lower(callee, ft, opts, inst) - caller([h.rep]) + opts = CanonicalOptions(async_ = False) + ft = FuncType([U32Type()], [], async_ = False) + dtor = rt.dtor or (lambda rep: []) + callee = inst.store.lift(dtor, ft, opts, rt.impl) + caller = inst.store.lower(callee, ft, opts, inst) + caller([h.rep]) else: h.borrow_scope.num_borrows -= 1 return [] @@ -3961,15 +3992,13 @@ rules to, for example, catch reentrance. Because the type, lifting and lowering are all non-`async`, the destructor may not block. However, the destructor may spawn a cooperative thread that does. -Since there are valid reasons to call `resource.drop` in the same component -instance that defined the resource, which would otherwise trap at the -reentrance guard of `Store.lift`, an exception is made when the resource type's -implementation-instance is the same as the current instance (which is -statically known for any given `canon resource.drop`). - -When a destructor isn't present, there is still a trap on recursive reentrance -since this is the caller's responsibility and the presence or absence of a -destructor is an encapsulated implementation detail of the resource type. +In particular, `Store.lift` may trap (if `rt.impl.may_enter_from(inst)` is +`False`) if the call to the destructor would reenter the destructor's instance +in a way that violates [Component Invariant] #2. In the special case where the +`current_instance` is the *same* as the destructor's instance, `may_enter_from` +will always return `True` (because the set of instances being freshly entered is +empty) and so, as one might expect, component instances can `resource.drop` the +owned handles of the resources they implement. ### `canon resource.rep` @@ -5241,7 +5270,7 @@ def canon_thread_available_parallelism(): [Type Definitions]: Explainer.md#type-definitions [`instance` definition]: Explainer.md#instance-definitions [Component Invariant]: Explainer.md#component-invariants -[Component Invariants]: Explainer.md#component-invariants +[Donut Wrapping]: Linking.md#higher-order-shared-nothing-linking-aka-donut-wrapping [JavaScript Embedding]: Explainer.md#JavaScript-embedding [ESM-integration]: Explainer.md#esm-integration [Adapter Functions]: FutureFeatures.md#custom-abis-via-adapter-functions diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md index 0e06e1b2..8c8ea7b4 100644 --- a/design/mvp/Concurrency.md +++ b/design/mvp/Concurrency.md @@ -3,9 +3,7 @@ This document contains a high-level summary of the native concurrency support added as part of [WASI Preview 3], providing background for understanding the definitions in the [WIT], [AST explainer], [binary format] and [Canonical ABI -explainer] documents that are gated by the πŸ”€ (async) and 🧡 (threading) -emojis. For an even higher-level introduction, see [these][wasmio-2024] -[presentations][wasmio-2025]. +explainer] documents that are gated by the πŸ”€ (async) and 🧡 (threading) emojis. * [Goals](#goals) * [Summary](#summary) @@ -24,6 +22,7 @@ emojis. For an even higher-level introduction, see [these][wasmio-2024] * [Borrows](#borrows) * [Cancellation](#cancellation) * [Nondeterminism](#nondeterminism) + * [Asynchronous Recursion](#asynchronous-recursion) * [Interaction with the start function](#interaction-with-the-start-function) * [Async ABI](#async-abi) * [Async Import ABI](#async-import-abi) @@ -56,8 +55,8 @@ concurrency-specific goals and use cases: * Allow polyfilling in browsers via JavaScript Promise Integration ([JSPI]) * Avoid partitioning interfaces and components into separate ecosystems based on degree of concurrency; don't give components a "[color]". -* Maintain meaningful cross-language call stacks (for the benefit of debugging, - logging and tracing). +* Allow runtimes to maintain meaningful cross-language call stacks (for the + benefit of debugging, logging, tracing and profiling). * Consider backpressure and cancellation as part of the design. * Allow non-reentrant synchronous and event-loop-driven core wasm code that assumes a single global linear memory stack to not have to worry about @@ -120,13 +119,14 @@ language's style of concurrency, most `world`s (including `wasi:cli/command`, functions so that the contained Core WebAssembly code is free to block. Implementing a non-`async` function will primarily only arise when a component is *virtualizing* the non-`async` *imports* of a `world` (e.g., the getters and -setters of `wasi:http/types.headers`). In this virtualization scenario (once -functions are allowed to be [recursive](#TODO)), the Canonical ABI and/or Core -WebAssembly [stack-switching] proposal will allow a parent component to -implement a child's non-`async` imports in terms of the parent's `async` -imports in the same manner as [JSPI]. Thus, overall, `async` in WIT and the -Component Model does not behave like a "color" in the sense described by the -popular [What Color Is Your Function?] essay. +setters of `wasi:http/types.headers`). In this more exotic virtualization +scenario, a [future extension](#TODO) could allow a parent component that +imports `async` functions to implement its child's non-`async` imports in the +same manner as [JSPI] in the browser. + +Thus, overall, `async` in WIT and the Component Model does not behave like a +"color" in the sense described by the popular [What Color Is Your Function?] +essay. Each time a component export is called, the wasm runtime logically spawns a new [green thread] (as opposed to a [kernel thread]) to execute the export call @@ -190,18 +190,23 @@ immediately block. This backpressure mechanism provides the basis for how the sync and async ABIs interoperate: 1. If a component calls an import using the async ABI, and the import is - implemented by a component using the sync ABI, and the callee blocks, - execution is immediately transferred back to the caller (as required by the - async ABI) and the callee's component instance is marked "suspended". -2. If another async call attempts to start in a "suspended" component instance, - the Component Model automatically makes the call block, the same way as when - backpressure is active. + implemented by a component using the sync ABI, the callee first acquires + an "exclusive" lock on the component instance and then starts executing. If + the callee blocks, execution is immediately transferred back to the caller + (as required by the async ABI). +2. If another async call attempts to start in this same component instance, the + callee immediately blocks when acquiring the "exclusive" lock, waiting for the + previous call to return and release the lock. Note that because functions without `async` in their type are not allowed to -block, non-`async` functions do not check for backpressure or suspension; they -always run synchronously. Components exporting a mix of `async` and non-`async` +block, non-`async` functions do not attempt to acquire the "exclusive" lock; +they just barge in. Components exporting a mix of `async` and non-`async` functions (which again mostly only arises in the more advanced virtualization -scenarios) must thus take care to handle non-`async` reentrance gracefully. +scenarios) must therefore take care to handle the "barge-in" case gracefully. +Because this nested non-`async` call will complete synchronously without +blocking, this behavior does not break [Component Invariant] #3: a single +global shadow stack can still be (re)used in a LIFO manner, much like a +traditional signal handler. Lastly, WIT is extended with two new type constructorsβ€”`future` and `stream`β€”to allow new WIT interfaces to explicitly represent concurrency in @@ -314,17 +319,11 @@ of the new subtask created for the import call. Thus, one reason for associating every thread with a "containing task" is to ensure that there is always a well-defined async call stack. -A semantically-observable use of the async call stack is to distinguish between -hazardous **recursive reentrance**, in which a component instance is reentered -when one of its tasks is already on the callstack, from business-as-usual -**sibling reentrance**, in which a component instance is reentered for the -first time on a particular async call stack. Recursive reentrance currently -always traps, but will be allowed (and indicated to core wasm) in an opt-in -manner in the [future](#TODO). - -The async call stack is also useful for non-semantic purposes such as providing -backtraces when debugging, profiling and tracing. While particular languages -can and do maintain their own async call stacks in core wasm state, without the +The async call stack is not currently observable to running components, except +that it may nondeterministically appear as part of the callstack stored in +`error-context` πŸ“. Instead, the async call stack is meant to provide better +backtraces when debugging, profiling and tracing. While particular languages can +and do maintain their own async call stacks in core wasm state, without the Component Model's async call stack, linkage *between* different languages would be lost at component boundaries, leading to a loss of overall context in multi-component applications. @@ -489,7 +488,11 @@ all of which are described above or below in more detail: [`subtask.cancel`](#cancellation) built-in At each of these points, the [current thread](#current-thread-and-task) will be -suspended and execution will transfer to a caller's thread, if there is one. +suspended. Execution transfers to a caller's thread if there is one, or +otherwise back to the runtime, which may invoke new component exports or +nondeterministically resume a cooperative thread that is ready to run. Thus, +each of these represents **cooperative yield points**. + Additionally, each of these potentially-blocking operations will trap if the [current task's function type](#current-thread-and-task) does not declare the `async` effect, since only `async`-typed functions are allowed to block. As an @@ -663,14 +666,15 @@ instead of a boolean flag, unrelated pieces of code can report backpressure for distinct limited resources without prior coordination. In addition to *explicit* backpressure set by wasm code, there is also an -*implicit* source of backpressure used to protect non-reentrant core wasm code. -In particular, when an export uses the sync ABI or the stackless async ABI, a -component-instance-wide lock is implicitly acquired every time core wasm is -executed. By returning to the event loop after every event (instead of once at -the end of the task), stackless async exports release the lock between every -event, allowing a higher degree of concurrency than synchronous exports. -Stackful async exports ignore the lock entirely and thus achieve the highest -degree of (cooperative) concurrency. +*implicit* source of backpressure to ensure [Component Invariant] #3 and protect +non-reentrant core wasm code. In particular, when an `async`-typed export is +lifted with the sync ABI or the stackless async ABI, a component-instance-wide +lock is implicitly acquired every time core wasm is executed. By returning to +the event loop after every event (instead of once at the end of the task), +stackless async exports release the lock between every event, allowing a higher +degree of concurrency than synchronous exports. Stackful async exports ignore +the lock entirely and thus achieve the highest degree of (cooperative) +concurrency. Since non-`async` functions are not allowed to block (including due to backpressure) and also don't pile up like `async` functions, non-`async` @@ -851,6 +855,104 @@ Despite the above, the following scenarios do behave deterministically: (modulo any nondeterministic execution that determines the ordering in which the operations are performed). +### Asynchronous Recursion + +Even without concurrency support, it is possible to reenter a component instance +by recursively calling the component's export from a function called by the +component's import. For example, given a component importing `imp` and exporting +`exp`, using the [JS API], JS code could write: +```js +import source component from './component.wasm'; +var instance; +function imp() { + instance.exports.exp(); +} +instance = WebAssembly.instantiate(component, { imp }); +instance.exports.exp(); // exp ~~> imp ~~> exp +``` +To relieve generic bindings generators and component authors from having to +conservatively assume that *every* import call might reenter in this manner, +the Component Model has [Component Invariant] #2. This is enforced by the +[Canonical ABI](CanonicalABI.md#embedding) using strategically placed traps and +boolean flags on component instances. + +With Preview 3, a desirable outcome is that if our component imports `imp` and +exports `exp` as `async` functions, then the following JS code could run the +two `exp` calls concurrently just like if they were JS `async` functions: +```js +import source component from './component.wasm'; +async function imp() { + await ... some Web API I/O +} +instance = WebAssembly.instantiate(component, { imp }); +await Promise.all([ + instance.exports.exp(), + instance.exports.exp() +]); +``` +In particular, if `exp` transitively awaits `imp`, then when `imp` blocks (via +`await`), control flow returns to the top-level JS script with `instance` in a +reenterable state, so that `exp` can be concurrently invoked a second time. + +However, this also means that if we slightly change our original recursive +example to use `async` and then `await` before attempting to reenter `instance`, +there is no trap. The first `await` in `imp` returns to top-level, leaving +`instance` in a reenterable state, so when `imp` is later resumed from the event +loop, it is allowed to reenter `exp`. +```js +import source component from './component.wasm'; +var instance; +async function imp() { + await Promise.resolve(); + await instance.exports.exp(); +} +instance = WebAssembly.instantiate(component, { imp }); +await instance.exports.exp(); // exp ~~> imp ~~> exp +``` +The hazard with this example is that if the outer call to `exp` internally grabs +and holds a lock while awaiting the call to `imp`, and if the recursive call to +`exp` waits to acquire the same lock, there will be a deadlock. In the preceding +`async` example, since there is no circular dependency between the two calls to +`exp`, the second call can simply wait for the first to release any lock it +holds. + +A concrete example of this hazard is the implicit per-component-instance lock +taken and released by [backpressure](#backpressure). E.g., if `component` lifts +`exp` synchronously (which triggers implicit backpressure while a call to `exp` +is running), the recursive call to `exp` will immediately deadlock. + +Unfortunately, it's not possible to reliably discriminate the two cases so that +the second example traps (as it did in the synchronous case) while the first +example succeeds. Given the Component Model's well-defined [async call +stack](#subtasks-and-supertasks), it might seem possible to tell the cases apart +by checking whether `instance` is already *on the call stack* when attempting to +enter `exp`. However, this doesn't work for two reasons: + +First, to properly detect asynchronous recursion, the host embedding would have +to maintain something analogous to the Component Model's async call stack, which +some hosts (including, currently, browsers) simply do not have a well-defined +way to do. + +Second, the async call stack is neither necessary nor sufficient to catch these +kinds of asynchronous recursive deadlocks. The async call stack tracks the +*causality* leading up to a call, which is useful for debugging, tracing, +profiling, etc., but the async call stack doesn't imply that every call on the +stack is blocking on the result of the next call in the chain (unlike with a +synchronous call stack, which does imply this). Moreover, the async call stack +can arbitrarily reset through indirect forms of asynchronous calls (e.g., host +APIs with callbacks like, in a browser, `setTimeout`), so the absence of +recursion on the async call stack does not guarantee the absence of a circular +asynchronous dependency. + +Thus, the Canonical ABI rules don't attempt to distinguish the different kinds +of asynchronous reentrance. It is thus the responsibility of component clients +to avoid async recursion. Fortunately, in component-to-component compositions, +this kind of recursion is only possible when doing advanced higher-order linking +(aka [donut wrapping]). And unlike [Component Invariant] #2, which directly +impacts bindings generators, async recursion only arises when there's +[blocking](#blocking) and so it's already necessary to support (non-recursive) +reentrance. + ## Interaction with the start function @@ -1116,7 +1218,7 @@ with `...` to focus on the overall flow of function calls: ;; requires 🚟 for the stackful abi (canon lower $fetch async (memory $mem) (realloc $realloc) (core func $fetch')) (canon waitable-set.new (core func $new)) - (canon waitable-set.wait async (memory $mem) (core func $wait)) + (canon waitable-set.wait (memory $mem) (core func $wait)) (canon waitable.join (core func $join)) (canon task.return (result string) (memory $mem) (core func $task_return)) (core instance $main (instantiate $Main (with "" (instance @@ -1228,7 +1330,7 @@ core wasm code between events, not externally-visible behavior. (canon lower $fetch async (memory $mem) (realloc $realloc) (core func $fetch')) (canon waitable-set.new (core func $new)) (canon waitable.join (core func $join)) - (canon task.return (result string) async (memory $mem) (realloc $realloc) (core func $task_return)) + (canon task.return (result string) (memory $mem) (core func $task_return)) (core instance $main (instantiate $Main (with "" (instance (export "mem" (memory $mem)) (export "realloc" (func $realloc)) @@ -1373,21 +1475,20 @@ comes after: type to block during instantiation * add an `async` effect on `resource` type definitions allowing a resource type to block during its destructor -* `recursive` function type attribute: allow a function to opt in to - recursive [reentrance], extending the ABI to link the inner and - outer activations +* allow a parent component to perform [JSPI]-like suspension of the sync calls + of its child components, thereby allowing the parent to implement the child's + sync import calls in terms of the parent's `async` imports. * add a `strict-callback` option that adds extra trapping conditions to provide the semantic guarantees needed for engines to statically avoid fiber creation at component-to-component `async` call boundaries +* allow function closures to be passed as first-class values, supporting the + "callback" pattern in many pre-existing APIs, including Web APIs * allow pipelining multiple `stream.read`/`write` calls * allow chaining multiple async calls together ("promise pipelining") * integrate with `shared`: define how to lift and lower functions `async` *and* `shared` -[wasmio-2024]: https://www.youtube.com/watch?v=y3x4-nQeXxc -[wasmio-2025]: https://www.youtube.com/watch?v=mkkYNw8gTQg - [Color]: https://journal.stuffwithstuff.com/2015/02/01/what-color-is-your-function/ [What Color Is Your Function?]: https://journal.stuffwithstuff.com/2015/02/01/what-color-is-your-function/ [Weak Memory Model]: https://people.mpi-sws.org/~rossberg/papers/Watt,%20Rossberg,%20Pichon-Pharabod%20-%20Weakening%20WebAssembly%20[Extended].pdf @@ -1420,6 +1521,7 @@ comes after: [AST Explainer]: Explainer.md [Canonical Built-in]: Explainer.md#canonical-built-ins +[Component Invariant]: Explainer.md#component-invariants [`context.get`]: Explainer.md#-contextget [`context.set`]: Explainer.md#-contextset [`backpressure.inc`]: Explainer.md#-backpressureinc-and-backpressuredec @@ -1441,6 +1543,7 @@ comes after: [`{stream,future}.new`]: Explainer.md#-streamnew-and-futurenew [`{stream,future}.{read,write}`]: Explainer.md#-streamread-and-streamwrite [`stream.cancel-write`]: Explainer.md#-streamcancel-read-streamcancel-write-futurecancel-read-and-futurecancel-write +[Donut Wrapping]: Linking.md#higher-order-shared-nothing-linking-aka-donut-wrapping [Canonical ABI Explainer]: CanonicalABI.md [specified in terms of]: CanonicalABI.md#stack-switching @@ -1457,8 +1560,8 @@ comes after: [Binary Format]: Binary.md [WIT]: WIT.md [Blast Zone]: FutureFeatures.md#blast-zones -[Reentrance]: Explainer.md#component-invariants [`start`]: Explainer.md#start-definitions +[JS API]: Explainer.md#JS-API [Store]: https://webassembly.github.io/spec/core/exec/runtime.html#syntax-store [Deterministic Profile]: https://webassembly.github.io/spec/versions/core/WebAssembly-3.0-draft.pdf#subsubsection*.798 diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md index a79d089a..1398db7f 100644 --- a/design/mvp/Explainer.md +++ b/design/mvp/Explainer.md @@ -2877,14 +2877,17 @@ start being rejected some time after after [WASI Preview 3] is released. ## Component Invariants -As a consequence of the shared-nothing design described above, all calls into -or out of a component instance necessarily transit through a component function -definition. Thus, component functions form a "membrane" around the collection -of core module instances contained by a component instance, allowing the -Component Model to establish invariants that increase optimizability and -composability in ways not otherwise possible in the shared-everything setting -of Core WebAssembly. The Component Model proposes establishing the following -two runtime invariants: +Component validation rules only allow a component to import and export +component-level functions, not Core WebAssembly functions. Because component- +level functions can only be produced or consumed by Canonical ABI [`lift` and +`lower` definitions](#canonical-definitions), which effectively define +[trampolines] into and out of Core WebAssembly code, the Component Model is able +to define and enforce invariants that component authors and producer toolchains +can depend on. This is analogous to the invariants provided by a traditional +Operating System to user-space code running inside a process. + +In particular, the Component Model maintains the following invariants: + 1. Components define a "lockdown" state that prevents continued execution after a trap. This both prevents continued execution with corrupt state and also allows more-aggressive compiler optimizations (e.g., store reordering). @@ -2894,13 +2897,21 @@ two runtime invariants: implicitly checked at every execution step by component functions. Thus, after a trap, it's no longer possible to observe the internal state of a component instance. -2. The Component Model disallows reentrance by trapping if a callee's - component-instance is already on the stack when the call starts. - (For details, see [`call_might_be_recursive`](CanonicalABI.md#component-instances) - in the Canonical ABI explainer.) This default prevents obscure - composition-time bugs and also enables more-efficient non-reentrant - runtime glue code. This rule will be relaxed by an opt-in - function type attribute in the [future](Concurrency.md#todo). + +2. Components can only be reentered (via component export or thread resumption) + when they explicitly [block] or call a [donut wrapped] child component. Calls + to non-`async` functions do *not* count as "blocking" nor do non-blocking + (`async`-lowered) calls to `async` functions. Thus, bindings generators and + component authors do not need to always safely handle reentrance at all + import call sites. (In the [future](Concurrency.md#TODO), support for + first-class functions (as parameter and result values) would loosen this + restriction in an explicit opt-in manner.) + +3. To ease adoption, unless a component opts in (via "stackful" lift 🚟 or + cooperative threads 🧡), all core wasm execution inside a component instance + is locally serialized (via automatic backpressure applied at export calls) so + that producer toolchains can continue to use a single global linear memory + shadow stack that is pushed and popped in LIFO order. ## JavaScript Embedding @@ -3219,6 +3230,7 @@ For some use-case-focused, worked examples, see: [Universal Types]: https://en.wikipedia.org/wiki/System_F [Existential Types]: https://en.wikipedia.org/wiki/System_F [Unit]: https://en.wikipedia.org/wiki/Unit_type +[Trampolines]: https://en.wikipedia.org/wiki/Trampoline_(computing) [Generative]: https://www.researchgate.net/publication/2426300_A_Syntactic_Theory_of_Type_Generativity_and_Sharing [Avoidance Problem]: https://counterexamples.org/avoidance.html @@ -3241,6 +3253,7 @@ For some use-case-focused, worked examples, see: [Strongly-unique]: #name-uniqueness +[Donut Wrapped]: Linking.md#higher-order-shared-nothing-linking-aka-donut-wrapping [Adapter Functions]: FutureFeatures.md#custom-abis-via-adapter-functions [Canonical ABI explainer]: CanonicalABI.md [`canon_context_get`]: CanonicalABI.md#-canon-contextget @@ -3303,6 +3316,7 @@ For some use-case-focused, worked examples, see: [Resolved]: Concurrency.md#cancellation [Cancellation]: Concurrency.md#cancellation [Cancelled]: Concurrency.md#cancellation +[Block]: Concurrency.md#blocking [Component Model Documentation]: https://component-model.bytecodealliance.org [`wizer`]: https://github.com/bytecodealliance/wizer diff --git a/design/mvp/Linking.md b/design/mvp/Linking.md index e79f87b0..293dfc4c 100644 --- a/design/mvp/Linking.md +++ b/design/mvp/Linking.md @@ -99,6 +99,169 @@ content-hash of common modules or components by placing them in separate OCI the first layer of the OCI Wasm Artifact. +## Higher-order Shared-Nothing Linking (aka "donut wrapping") + +When using shared-nothing linking, the Component Model allows a traditional +"first-order" style of linking wherein one component's exports are supplied as +the imports of another component. This kind of linking captures the traditional +developer experience of package managers and package dependencies. + +In WAT, a "first-order" dependency from `B` to `A` looks like: +```wat +(component $A + ... + (export "foo" (func $foo-internal) (func (result string))) +) +``` +```wat +(component $B + (import "A" (instance + (export "foo" (func $foo-internal (result string))) + )) + ... +) +``` + +`A` can be linked to `B` either directly by the host (e.g., in browsers, using +`WebAssembly.instantiate` or [ESM-integration]) or by another parent component. +For example, the following parent component `P` links `A` and `B` together: +```wat +(component $P + (import "A" (component $A (export "foo" (func (result string))))) + (import "B" (component $B (import "A" (instance (export "foo" (func (result string))))))) + (instance $a (instantiate $A)) + (instance $b (instantiate $B (with "A" (instance $a)))) +) +``` +Note that `P` is the "parent" of `A` and `B` because `P` `instantiate`s `A` and +`B`. Whether `P` physically contains the bytecode defining `A` and `B` (as +nested `(component ...)` definitions) or `import`s the `component` definitions, +as shown here, is an orthogonal *bundling* choice that does not affect runtime +behavior (as long as the bytecode is the same in the end). + +When `P` is instantiated, the resulting 3 component instances can be visualized +as nested boxes: +``` ++---------------+ +| P | +| +---+ +---+ | +| | A |-->| B | | +| +---+ +---+ | ++---------------+ +``` +Since `A` and `B` can themselves have child components, boxes can nest and form +a tree. And since `instantiate` can refer to any preceding definition in the +component, the linkage within a single box forms a Directed Acyclic Graph (DAG). + +With simpler "first-order" shared-nothing linking, the definitions of parent +components like `P` only contain component-level "linking" definitions +(like `import`, `export`, `alias`, `instance`) and not any Core WebAssembly +"implementation" definitions (like `canon lift` and `canon lower`). Thus `P` +disappears at runtime, with the compiler baking all of `P`'s linkage information +into the generated code and metadata. However, there is nothing to prevent +parent components from including *both* "linking" and "implementation" +definitions. + +For example, a parent component `Q` can link a child component `C` to its own +lifted and lowered core wasm modules `M1` and `M2` as follows: +```wat +(component $Q + (import "C" (component $C + (import "foo" (func (result string))) + (export "bar" (func (result string))) + )) + (core module $M1 + ... + (export "foo-impl" (func ...)) + ) + (core instance $m1 (instantiate $M1)) + (canon lift (core func $m1 "foo-impl") (func $foo-impl (result string))) + (instance $c (instantiate $C (with "foo" (func $foo-impl)))) + (canon lower (func $c "bar") (core func $bar)) + (core module $M2 + (import "c" "bar" (func ...)) + ... + ) + (core instance $m2 (instantiate $M2 (with "c" (instance (export "bar" (func $bar)))))) +) +``` +This new, more complex instance graph can be represented diagrammatically as: +``` ++----------------------------------------------------+ +| Q | +| +-----------+ +---+ +-----------+ | +| | M1 (in Q) |--lift-->| C |--lower-->| M2 (in Q) | | +| +-----------+ +---+ +-----------+ | ++----------------------------------------------------+ +``` +The informal term **donut wrapping** is used to describe this more advanced kind +of linking where `Q` is the "donut" with a `C`-shaped donut hole in the middle +and with `M1` and `M2` serving as the toroidal dough. (In general, parent +components can have many child instances, arbitrarily linked together and to the +internal `lift` and `lower` definitions of the parent, so perhaps a different +metaphor than "donut" would be appropriate.) + +Because parent components control all linkage of their children's imports and +exports, donut wrapping allows a parent component to run its own Core +WebAssembly code on all paths into and out of all child components, allowing the +parent to arbitrarily *virtualize* the execution environment of its child +components. This is analogous to how a traditional operating system kernel can +control how and when its user-space processes run and what happens when they +make syscalls. + +What is particularly powerful about donut wrapping is that, since `M1` and `M2` +are both inside the same component instance, they can be linked together +directly (without intervening `lift` and `lower` definitions) which allows them +to share arbitrary Core WebAssembly definitions (like functions, linear memory, +tables and globals). For example, extending the above definition of `$Q`, `$M1` +could export its `memory` and `funcref` `table` directly to `$M2`: +```wat +(component $Q + ... + (core module $M1 + ... + (memory $mem 0) + (table $ftbl 0 funcref) + (export "mem" (memory $mem)) + (export "ftbl" (table $ftbl)) + ) + (core instance $m1 (instantiate $M1)) + ... + (core module $M2 + (import "m1" "mem" (memory 0)) + (import "m1" "ftbl" (table 0 funcref)) + ... + ) + (core instance $m2 (instantiate $M2 (with "m1" (instance $m1)))) + ... +) +``` + +Once `M1` and `M2` share linear memory and table state, `M2` can import the +`canon lower`ed exports of the child component `C` and store them into `ftbl`, +so that `M1` can call `C`'s exports via `call_indirect`. This provides `Q` the +flexibility to put *all* its core wasm code in `M1` (using `M2` to only do +`funcref`-plumbing), which is convenient. But this also allows `M1` to attempt +to reenter `C` while `C` is calling an import of `M1`, which would violate +[Component Invariant] #2. To prevent this, the Canonical ABI must place runtime +guards in `lift` that trap if `M1` tries to recursively reenter `C`. + +Similarly, donut wrapping allows `Q` to both define resource types that are +imported by `C` and consume resource types that are defined by `C`. This allows +`Q` to create ownership cycles with `C` which may lead to resource leaks that +would normally be prevented in non-donut-wrapping cases by the acyclicity of +component instantiation. + +In both of the above problematic cases, the parent is responsible for "closing +the loop" to create the cycle and thus any bugs arising from cycles are, by +default, bugs in the parent. This asymmetry reflects the fact that, when +donut-wrapping, the parent component is taking on part of the role of the "host" +with the child component being the "guest". This is an asymmetric relationship +that gives the host greater power over the guest (e.g., to virtualize the +guest's execution environment), but with this greater power comes greater +responsibility to avoid creating cycles with the guest. + + ## Fully-runtime dynamic linking While many use cases for dynamic linking are covered by what is described @@ -159,6 +322,7 @@ post-Preview 2 features of WIT and the Component Model.) [WIT]: WIT.md [`depname`]: Explainer.md#import-and-export-definitions [`hashname`]: Explainer.md#import-and-export-definitions +[Component Invariant]: Explainer.md#component-invariants [WebAssembly/tool-conventions]: https://github.com/WebAssembly/tool-conventions [WebAssembly Object File]: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py index 85cfd3cf..1915bd95 100644 --- a/design/mvp/canonical-abi/definitions.py +++ b/design/mvp/canonical-abi/definitions.py @@ -193,10 +193,11 @@ class ComponentInstance: parent: Optional[ComponentInstance] handles: Table[ResourceHandle | Waitable | WaitableSet | ErrorContext] threads: Table[Thread] + may_enter: bool may_leave: bool backpressure: int num_waiting_to_enter: int - exclusive: Optional[Task] + exclusive_thread: Optional[Thread] def __init__(self, store, parent = None): assert(parent is None or parent.store is store) @@ -204,40 +205,41 @@ def __init__(self, store, parent = None): self.parent = parent self.handles = Table() self.threads = Table() + self.may_enter = True self.may_leave = True self.backpressure = 0 self.num_waiting_to_enter = 0 - self.exclusive = None - - def reflexive_ancestors(self) -> set[ComponentInstance]: - s = set() - inst = self - while inst is not None: - s.add(inst) - inst = inst.parent - return s + self.exclusive_thread = None - def is_reflexive_ancestor_of(self, other): - while other is not None: - if self is other: - return True - other = other.parent - return False + def may_enter_from(self, caller: Optional[ComponentInstance]): + for inst in self.entering_set(caller): + if not inst.may_enter: + return False + return True -class Supertask: - inst: Optional[ComponentInstance] - supertask: Optional[Supertask] + def enter_from(self, caller: Optional[ComponentInstance]): + for inst in self.entering_set(caller): + assert(inst.may_enter) + inst.may_enter = False -def call_might_be_recursive(caller: Supertask, callee_inst: ComponentInstance): - if caller.inst is None: - while caller is not None: - if caller.inst and caller.inst.reflexive_ancestors() & callee_inst.reflexive_ancestors(): - return True - caller = caller.supertask - return False - else: - return (caller.inst.is_reflexive_ancestor_of(callee_inst) or - callee_inst.is_reflexive_ancestor_of(caller.inst)) + def leave_to(self, caller: Optional[ComponentInstance]): + for inst in self.entering_set(caller): + assert(not inst.may_enter) + inst.may_enter = True + + def entering_set(self, caller: Optional[ComponentInstance]) -> set[ComponentInstance]: + if caller: + return self.self_and_ancestors() - caller.self_and_ancestors() + else: + return self.self_and_ancestors() + + def self_and_ancestors(self) -> set[ComponentInstance]: + s = { self } + ancestor = self.parent + while ancestor is not None: + s.add(ancestor) + ancestor = ancestor.parent + return s ## Concurrency @@ -429,9 +431,9 @@ def yield_to(self, cancellable, other: Thread) -> Cancelled: OnStart = Callable[[], list[any]] OnResolve = Callable[[Optional[list[any]]], None] OnCancel = Callable[[], None] -FuncInst = Callable[[OnStart, OnResolve, Supertask], OnCancel] +FuncInst = Callable[[OnStart, OnResolve, Optional[ComponentInstance]], OnCancel] -class Task(Supertask): +class Task: class State(Enum): INITIAL = 1 STARTED = 2 @@ -444,22 +446,22 @@ class State(Enum): inst: ComponentInstance on_start: OnStart on_resolve: OnResolve - supertask: Supertask + caller: Optional[ComponentInstance] state: State num_borrows: int - waiting_to_enter: Optional[Thread] + implicit_thread: Optional[Thread] threads: list[Thread] - def __init__(self, ft, opts, inst, on_start, on_resolve, supertask): + def __init__(self, ft, opts, inst, on_start, on_resolve, caller): self.ft = ft self.opts = opts self.inst = inst self.on_start = on_start self.on_resolve = on_resolve - self.supertask = supertask + self.caller = caller self.state = Task.State.INITIAL self.num_borrows = 0 - self.waiting_to_enter = None + self.implicit_thread = None self.threads = [] def needs_exclusive(self): @@ -471,24 +473,22 @@ def may_block(self): def enter_implicit_thread(self): assert(self.state == Task.State.INITIAL) - thread = current_thread() + self.implicit_thread = current_thread() if self.ft.async_: def has_backpressure(): return (self.inst.backpressure > 0 or - (self.needs_exclusive() and self.inst.exclusive is not None)) + (self.needs_exclusive() and self.inst.exclusive_thread is not None)) if has_backpressure() or self.inst.num_waiting_to_enter > 0: self.inst.num_waiting_to_enter += 1 - self.waiting_to_enter = thread - cancelled = thread.wait_until(lambda: not has_backpressure(), cancellable = True) - self.waiting_to_enter = None + cancelled = self.implicit_thread.wait_until(lambda: not has_backpressure(), cancellable = True) self.inst.num_waiting_to_enter -= 1 if cancelled: self.cancel() return False if self.needs_exclusive(): - assert(self.inst.exclusive is None) - self.inst.exclusive = self - self.register_thread(thread) + assert(self.inst.exclusive_thread is None) + self.inst.exclusive_thread = self.implicit_thread + self.register_thread(self.implicit_thread) return True def register_thread(self, thread): @@ -498,10 +498,11 @@ def register_thread(self, thread): thread.index = self.inst.threads.add(thread) def exit_implicit_thread(self): - self.unregister_thread(current_thread()) + assert(current_thread() is self.implicit_thread) + self.unregister_thread(self.implicit_thread) if self.ft.async_ and self.needs_exclusive(): - assert(self.inst.exclusive is self) - self.inst.exclusive = None + assert(self.inst.exclusive_thread is self.implicit_thread) + self.inst.exclusive_thread = None def unregister_thread(self, thread): assert(thread in self.threads and thread.task is self) @@ -513,18 +514,22 @@ def unregister_thread(self, thread): self.inst.threads.remove(thread.index) def request_cancellation(self): + assert(not self.caller or self.caller is current_instance()) if self.state == Task.State.INITIAL: self.state = Task.State.CANCEL_DELIVERED - self.waiting_to_enter.resume(Cancelled.TRUE) - return - assert(self.state == Task.State.STARTED) - if not self.needs_exclusive() or not self.inst.exclusive or self.inst.exclusive is self: + self.implicit_thread.resume(Cancelled.TRUE) + else: + assert(self.state == Task.State.STARTED) candidates = { t for t in self.threads if t.cancellable } - if candidates: + if self.needs_exclusive() and self.inst.exclusive_thread not in { None, self.implicit_thread }: + candidates.discard(self.implicit_thread) + if candidates and self.inst.may_enter_from(self.caller): self.state = Task.State.CANCEL_DELIVERED + self.inst.enter_from(self.caller) random.choice(list(candidates)).resume(Cancelled.TRUE) - return - self.state = Task.State.PENDING_CANCEL + self.inst.leave_to(self.caller) + else: + self.state = Task.State.PENDING_CANCEL def deliver_pending_cancel(self, cancellable) -> bool: if cancellable and self.state == Task.State.PENDING_CANCEL: @@ -554,36 +559,50 @@ def cancel(self): class Store: waiting: list[Thread] + nesting_depth: int def __init__(self): self.waiting = [] + self.nesting_depth = 0 - def invoke(self, f: FuncInst, caller: Optional[Supertask], on_start, on_resolve) -> OnCancel: - host = Supertask() - host.inst = None - host.supertask = caller - return f(on_start, on_resolve, caller = host) + def invoke(self, f: FuncInst, on_start: OnStart, on_resolve: OnResolve) -> OnCancel: + self.nesting_depth += 1 + on_cancel = f(on_start, on_resolve, caller = None) + self.nesting_depth -= 1 + return on_cancel CoreFuncInst = Callable[[list[CoreValType]], list[CoreValType]] def lift(self, f: CoreFuncInst, ft: FuncType, opts: CanonicalOptions, inst: ComponentInstance) -> FuncInst: - def func_inst(on_start: OnStart, on_resolve: OnResolve, caller: Supertask) -> OnCancel: - trap_if(call_might_be_recursive(caller, inst)) - return canon_lift(f, ft, opts, inst, on_start, on_resolve, caller) + def func_inst(on_start: OnStart, on_resolve: OnResolve, caller: Optional[ComponentInstance]) -> OnCancel: + assert(not caller or caller is current_instance()) + trap_if(not inst.may_enter_from(caller)) + inst.enter_from(caller) + on_cancel = canon_lift(f, ft, opts, inst, on_start, on_resolve, caller) + inst.leave_to(caller) + return on_cancel return func_inst def lower(self, f: FuncInst, ft: FuncType, opts: CanonicalOptions, inst: ComponentInstance) -> CoreFuncInst: def core_func_inst(args: list[CoreValType]) -> list[CoreValType]: - assert(current_instance() is inst) - return canon_lower(f, ft, opts, args) + assert(inst is current_instance()) + assert(all(not i.may_enter for i in inst.self_and_ancestors())) + results = canon_lower(f, ft, opts, args) + assert(all(not i.may_enter for i in inst.self_and_ancestors())) + return results return core_func_inst def tick(self): - random.shuffle(self.waiting) - for thread in self.waiting: - if thread.ready(): - thread.resume() - return + assert(self.nesting_depth == 0) + assert(all(thread.task.inst.may_enter_from(None) for thread in self.waiting)) + self.nesting_depth += 1 + candidates = { thread for thread in self.waiting if thread.ready() } + if candidates: + thread = random.choice(list(candidates)) + thread.task.inst.enter_from(None) + thread.resume() + thread.task.inst.leave_to(None) + self.nesting_depth -= 1 ## Lifting and Lowering Context @@ -2128,11 +2147,11 @@ def thread_func(): [packed] = call_and_trap_on_throw(callee, flat_args) code,si = unpack_callback_result(packed) while code != CallbackCode.EXIT: - assert(task.needs_exclusive() and inst.exclusive is task) - inst.exclusive = None + assert(task.needs_exclusive() and inst.exclusive_thread is task.implicit_thread) + inst.exclusive_thread = None match code: case CallbackCode.YIELD: - cancelled = thread.yield_until(lambda: not inst.exclusive, cancellable = True) + cancelled = thread.yield_until(lambda: not inst.exclusive_thread, cancellable = True) if cancelled: event = (EventCode.TASK_CANCELLED, 0, 0) else: @@ -2141,11 +2160,11 @@ def thread_func(): trap_if(not task.may_block()) wset = inst.handles.get(si) trap_if(not isinstance(wset, WaitableSet)) - event = wset.wait_for_event_and(lambda: not inst.exclusive, cancellable = True) + event = wset.wait_for_event_and(lambda: not inst.exclusive_thread, cancellable = True) case _: trap() - assert(inst.exclusive is None) - inst.exclusive = task + assert(inst.exclusive_thread is None) + inst.exclusive_thread = task.implicit_thread event_code, p1, p2 = event [packed] = call_and_trap_on_throw(opts.callback, [event_code, p1, p2]) code,si = unpack_callback_result(packed) @@ -2222,7 +2241,7 @@ def on_resolve(result): nonlocal flat_results flat_results = lower_flat_values(cx, max_flat_results, result, ft.result_type(), flat_args) - subtask.on_cancel = callee(on_start, on_resolve, caller = thread.task) + subtask.on_cancel = callee(on_start, on_resolve, caller = thread.task.inst) assert(ft.async_ or subtask.state == Subtask.State.RETURNED) if not opts.async_: @@ -2268,16 +2287,12 @@ def canon_resource_drop(rt, i): trap_if(h.num_lends != 0) if h.own: assert(h.borrow_scope is None) - if inst is rt.impl: - if rt.dtor: - rt.dtor(h.rep) - else: - ft = FuncType([U32Type()], [], async_ = False) - dtor = rt.dtor or (lambda rep: []) - opts = CanonicalOptions(async_ = False) - callee = inst.store.lift(dtor, ft, opts, rt.impl) - caller = inst.store.lower(callee, ft, opts, inst) - caller([h.rep]) + opts = CanonicalOptions(async_ = False) + ft = FuncType([U32Type()], [], async_ = False) + dtor = rt.dtor or (lambda rep: []) + callee = inst.store.lift(dtor, ft, opts, rt.impl) + caller = inst.store.lower(callee, ft, opts, inst) + caller([h.rep]) else: h.borrow_scope.num_borrows -= 1 return [] diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py index 10936698..6217f8b7 100644 --- a/design/mvp/canonical-abi/run_tests.py +++ b/design/mvp/canonical-abi/run_tests.py @@ -53,22 +53,21 @@ def mk_cx(memory = MemInst(bytearray(), 'i32'), encoding = 'utf8', realloc = Non def lift_and_run(opts, inst, ft, callee, on_start, on_resolve): func_inst = inst.store.lift(callee, ft, opts, inst) - _ = inst.store.invoke(func_inst, None, on_start, on_resolve) + _ = inst.store.invoke(func_inst, on_start, on_resolve) while inst.store.waiting: inst.store.tick() -def mk_task(on_start, on_resolve, caller, thread_func): - inst = ComponentInstance(caller.inst.store) - opts = mk_opts(async_ = True) - task = Task(FuncType([],[],async_=True), opts, inst, on_start, on_resolve, caller) - thread = Thread(task, thread_func) - thread.resume(Cancelled.FALSE) - return task.request_cancellation - -def mk_done_task(caller): - def empty(): - current_thread().task.state = Task.State.RESOLVED - return mk_task(lambda:[], lambda _:(), caller, empty) +def mk_host_func(store, host_func, ft): + def func_inst(on_start, on_resume, caller) -> OnCancel: + def thread_func(): + wait_until = lambda rf: host_thread.wait_until(rf, cancellable = True) + host_func(caller, on_start, on_resume, wait_until) + inst = ComponentInstance(store) + task = Task(ft, CanonicalOptions(), inst, on_start, on_resume, caller) + host_thread = Thread(task, thread_func) + host_thread.resume() + return partial(host_thread.resume, Cancelled.TRUE) + return func_inst def mk_str(s): return (s, 'utf8', len(s.encode('utf-8'))) @@ -481,18 +480,25 @@ def dtor(args): return [] store = Store() - rt = ResourceType(ComponentInstance(store), dtor) # usable in imports and exports - inst = ComponentInstance(store) + root_inst = ComponentInstance(store) + rt = ResourceType(ComponentInstance(store, root_inst), dtor) # usable in imports and exports + inst = ComponentInstance(store, root_inst) rt2 = ResourceType(inst, dtor) # only usable in exports opts = mk_opts() - def host_import(on_start, on_resolve, caller): + host_ft = FuncType([ + BorrowType(rt), + BorrowType(rt) + ],[ + OwnType(rt) + ]) + def host_func(caller, on_start, on_return, wait_until): args = on_start() assert(len(args) == 2) assert(args[0] == 42) assert(args[1] == 44) - on_resolve([45]) - return mk_done_task(caller) + on_return([45]) + host_func_inst = mk_host_func(store, host_func, host_ft) def core_wasm(args): nonlocal dtor_value @@ -511,17 +517,7 @@ def core_wasm(args): assert((canon_resource_rep(rt, h2))[0] == 43) assert((canon_resource_rep(rt, h3))[0] == 44) - host_ft = FuncType([ - BorrowType(rt), - BorrowType(rt) - ],[ - OwnType(rt) - ]) - args = [ - h1, - h3 - ] - results = store.lower(host_import, host_ft, opts, inst)(args) + results = store.lower(host_func_inst, host_ft, opts, inst)([h1, h3]) assert(len(results) == 1) assert(results[0] == 4) h4 = results[0] @@ -586,7 +582,8 @@ def test_async_to_async(): producer_opts.async_ = True store = Store() - producer_inst = ComponentInstance(store) + root_inst = ComponentInstance(store) + producer_inst = ComponentInstance(store, root_inst) eager_ft = FuncType([], [U8Type()], async_=True) def core_eager_producer(args): @@ -625,7 +622,7 @@ def core_blocking_producer(args): consumer_heap = Heap(20) consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32')) consumer_opts.async_ = True - consumer_inst = ComponentInstance(store) + consumer_inst = ComponentInstance(store, root_inst) def consumer(args): [b] = args @@ -694,7 +691,8 @@ def on_resolve(results): def test_async_callback(): store = Store() - producer_inst = ComponentInstance(store) + root_inst = ComponentInstance(store) + producer_inst = ComponentInstance(store, root_inst) producer_opts = mk_opts() producer_opts.async_ = True producer_ft = FuncType([], [], async_ = True) @@ -711,6 +709,7 @@ def core_producer_pre(fut, args): core_producer2 = partial(core_producer_pre, fut2) producer2 = store.lift(core_producer2, producer_ft, producer_opts, producer_inst) + consumer_inst = ComponentInstance(store, root_inst) consumer_ft = FuncType([],[U32Type()], async_ = True) consumer_inst = ComponentInstance(store) seti = 0 @@ -784,7 +783,8 @@ def on_resolve(results): def test_callback_interleaving(): store = Store() - producer_inst = ComponentInstance(store) + root_inst = ComponentInstance(store) + producer_inst = ComponentInstance(store, root_inst) producer_ft = FuncType([U32Type(), FutureType(None),FutureType(None),FutureType(None)],[U32Type()], async_ = True) fut3s = [None,None] def core_producer(args): @@ -832,7 +832,7 @@ def core_sync_callee(args): sync_callee_opts = mk_opts() sync_callee = store.lift(core_sync_callee, sync_callee_ft, sync_callee_opts, producer_inst) - consumer_inst = ComponentInstance(store) + consumer_inst = ComponentInstance(store, root_inst) consumer_ft = FuncType([], [], async_ = True) consumer_mem = bytearray(24) consumer_opts = mk_opts(MemInst(consumer_mem, 'i32'), async_ = True) @@ -955,10 +955,11 @@ def core_consumer(args): def test_sync_ignores_backpressure(): store = Store() + root_inst = ComponentInstance(store) sync_opts = mk_opts(async_ = False) async_opts = mk_opts(async_ = True) - callee_inst = ComponentInstance(store) + callee_inst = ComponentInstance(store, root_inst) async_ft = FuncType([U32Type(), FutureType(None)],[U32Type()], async_ = True) def core_callee1(args): @@ -974,7 +975,7 @@ def core_callee2(args): return [84 + i] sync_callee = store.lift(core_callee2, sync_ft, sync_opts, callee_inst) - caller_inst = ComponentInstance(store) + caller_inst = ComponentInstance(store, root_inst) caller_ft = FuncType([], [], async_ = True) caller_mem = bytearray(24) caller_opts = mk_opts(memory = MemInst(caller_mem, 'i32'), async_ = True) @@ -1012,8 +1013,9 @@ def core_caller(args): def test_async_to_sync(): store = Store() + root_inst = ComponentInstance(store) producer_opts = CanonicalOptions() - producer_inst = ComponentInstance(store) + producer_inst = ComponentInstance(store, root_inst) producer_ft = FuncType([],[], async_ = True) fut = RacyBool(False) @@ -1039,7 +1041,7 @@ def producer2_core(args): consumer_heap = Heap(20) consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32')) consumer_opts.async_ = True - consumer_inst = ComponentInstance(store) + consumer_inst = ComponentInstance(store, root_inst) consumer_ft = FuncType([],[U8Type()], async_ = True) def consumer(args): assert(len(args) == 0) @@ -1097,9 +1099,10 @@ def on_resolve(results): def test_async_backpressure(): store = Store() + root_inst = ComponentInstance(store) producer_opts = CanonicalOptions() producer_opts.async_ = True - producer_inst = ComponentInstance(store) + producer_inst = ComponentInstance(store, root_inst) producer_ft = FuncType([],[], async_ = True) fut = RacyBool(False) @@ -1126,7 +1129,7 @@ def producer2_core(args): consumer_heap = Heap(20) consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32'), async_ = True) - consumer_inst = ComponentInstance(store) + consumer_inst = ComponentInstance(store, root_inst) consumer_ft = FuncType([],[U8Type()], async_ = True) def consumer(args): assert(len(args) == 0) @@ -1180,33 +1183,34 @@ def on_resolve(results): def test_sync_using_wait(): store = Store() - hostcall_opts = mk_opts() - hostcall_opts.async_ = True - hostcall_inst = ComponentInstance(store) + root_inst = ComponentInstance(store) + producer_opts = mk_opts() + producer_opts.async_ = True + producer_inst = ComponentInstance(store, root_inst) ft = FuncType([], [], async_ = True) - def core_hostcall_pre(fut, args): + def core_producer_pre(fut, args): current_thread().wait_until(fut.is_set) - [] = canon_task_return([], hostcall_opts, []) + [] = canon_task_return([], producer_opts, []) return [] fut1 = RacyBool(False) - core_hostcall1 = partial(core_hostcall_pre, fut1) - hostcall1 = store.lift(core_hostcall1, ft, hostcall_opts, hostcall_inst) + core_producer1 = partial(core_producer_pre, fut1) + producer1 = store.lift(core_producer1, ft, producer_opts, producer_inst) fut2 = RacyBool(False) - core_hostcall2 = partial(core_hostcall_pre, fut2) - hostcall2 = store.lift(core_hostcall2, ft, hostcall_opts, hostcall_inst) + core_producer2 = partial(core_producer_pre, fut2) + producer2 = store.lift(core_producer2, ft, producer_opts, producer_inst) - lower_heap = Heap(20) - lower_opts = mk_opts(MemInst(lower_heap.memory, 'i32')) - lower_opts.async_ = True - inst = ComponentInstance(store) + consumer_heap = Heap(20) + consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32')) + consumer_opts.async_ = True + consumer_inst = ComponentInstance(store, root_inst) def core_func(args): - [ret] = store.lower(hostcall1, ft, lower_opts, inst)([]) + [ret] = store.lower(producer1, ft, consumer_opts, consumer_inst)([]) state,subi1 = unpack_result(ret) assert(subi1 == 1) assert(state == Subtask.State.STARTED) - [ret] = store.lower(hostcall2, ft, lower_opts, inst)([]) + [ret] = store.lower(producer2, ft, consumer_opts, consumer_inst)([]) state,subi2 = unpack_result(ret) assert(subi2 == 2) assert(state == Subtask.State.STARTED) @@ -1217,18 +1221,18 @@ def core_func(args): fut1.set() - retp = lower_heap.realloc(0,0,8,4) - [event] = canon_waitable_set_wait(True, MemInst(lower_heap.memory, 'i32'), seti, retp) + retp = consumer_heap.realloc(0,0,8,4) + [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), seti, retp) assert(event == EventCode.SUBTASK) - assert(lower_heap.memory[retp] == subi1) - assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED) + assert(consumer_heap.memory[retp] == subi1) + assert(consumer_heap.memory[retp+4] == Subtask.State.RETURNED) fut2.set() - [event] = canon_waitable_set_wait(True, MemInst(lower_heap.memory, 'i32'), seti, retp) + [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), seti, retp) assert(event == EventCode.SUBTASK) - assert(lower_heap.memory[retp] == subi2) - assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED) + assert(consumer_heap.memory[retp] == subi2) + assert(consumer_heap.memory[retp+4] == Subtask.State.RETURNED) canon_subtask_drop(subi1) canon_subtask_drop(subi2) @@ -1238,7 +1242,7 @@ def core_func(args): def on_start(): return [] def on_resolve(results): pass - lift_and_run(mk_opts(), inst, ft, core_func, on_start, on_resolve) + lift_and_run(mk_opts(), consumer_inst, ft, core_func, on_start, on_resolve) class HostSource(ReadableStream): @@ -1399,13 +1403,13 @@ def consume(self, n): def test_eager_stream_completion(): store = Store() - ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())]) inst = ComponentInstance(store) mem = bytearray(20) opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True) sync_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=False) - def host_import(on_start, on_resolve, caller): + ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())]) + def host_func(caller, on_start, on_resolve, wait_until): args = on_start() assert(len(args) == 1) assert(isinstance(args[0], ReadableStream)) @@ -1419,7 +1423,7 @@ def add10(): outgoing.write(vs) outgoing.drop() threading.Thread(target = add10).start() - return mk_done_task(caller) + host_func_inst = mk_host_func(store, host_func, ft) src_stream = HostSource(U8Type(), [1,2,3,4,5,6,7,8], chunk=4) def on_start(): @@ -1445,7 +1449,7 @@ def core_func(args): [packed] = canon_stream_new(StreamType(U8Type())) rsi3,wsi3 = unpack_new_ends(packed) retp = 12 - [ret] = store.lower(host_import, ft, opts, inst)([rsi3, retp]) + [ret] = store.lower(host_func_inst, ft, opts, inst)([rsi3, retp]) assert(ret == Subtask.State.RETURNED) rsi4 = mem[retp] [ret] = canon_stream_write(StreamType(U8Type()), opts, wsi3, 0, 4) @@ -1482,7 +1486,6 @@ def core_func(args): def test_async_stream_ops(): store = Store() - ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())]) inst = ComponentInstance(store) mem = bytearray(24) opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True) @@ -1490,34 +1493,32 @@ def test_async_stream_ops(): host_import_incoming = None host_import_outgoing = None - def host_import(on_start, on_resolve, caller): - def thread_func(): - thread = current_thread() - nonlocal host_import_incoming, host_import_outgoing - args = thread.task.start() - assert(len(args) == 1) - assert(isinstance(args[0], ReadableStream)) - host_import_incoming = HostSink(args[0], chunk=4, remain = 0) - host_import_outgoing = HostSource(U8Type(), [], chunk=4, destroy_if_empty=False) - thread.task.return_([host_import_outgoing]) - while True: - vs = None - results_ready = RacyBool(False) - def consume_results(): - nonlocal vs - vs = host_import_incoming.consume(4) - results_ready.set() - threading.Thread(target = consume_results).start() - thread.wait_until(results_ready.is_set) - if vs: - for i in range(len(vs)): - vs[i] += 10 - else: - break - host_import_outgoing.write(vs) - host_import_outgoing.destroy_once_empty() - - return mk_task(on_start, on_resolve, caller, thread_func) + ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())], async_ = True) + def host_func(caller, on_start, on_resolve, wait_until): + nonlocal host_import_incoming, host_import_outgoing + args = on_start() + assert(len(args) == 1) + assert(isinstance(args[0], ReadableStream)) + host_import_incoming = HostSink(args[0], chunk=4, remain = 0) + host_import_outgoing = HostSource(U8Type(), [], chunk=4, destroy_if_empty=False) + on_resolve([host_import_outgoing]) + while True: + vs = None + results_ready = RacyBool(False) + def consume_results(): + nonlocal vs + vs = host_import_incoming.consume(4) + results_ready.set() + threading.Thread(target = consume_results).start() + wait_until(results_ready.is_set) + if vs: + for i in range(len(vs)): + vs[i] += 10 + else: + break + host_import_outgoing.write(vs) + host_import_outgoing.destroy_once_empty() + host_func_inst = mk_host_func(store, host_func, ft) src_stream = HostSource(U8Type(), [], chunk=4, destroy_if_empty = False) def on_start(): @@ -1550,7 +1551,7 @@ def core_func(args): assert(mem[0:4] == b'\x01\x02\x03\x04') [packed] = canon_stream_new(StreamType(U8Type())) rsi3,wsi3 = unpack_new_ends(packed) - [ret] = store.lower(host_import, ft, opts, inst)([rsi3, retp]) + [ret] = store.lower(host_func_inst, ft, opts, inst)([rsi3, retp]) assert(ret == Subtask.State.RETURNED) rsi4 = mem[16] assert(rsi4 == 4) @@ -1640,12 +1641,12 @@ def test_receive_own_stream(): opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True) host_ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())]) - def host_import(on_start, on_resolve, caller): + def host_func(caller, on_start, on_resolve, wait_until): args = on_start() assert(len(args) == 1) assert(isinstance(args[0], ReadableStream)) on_resolve(args) - return mk_done_task(caller) + host_func_inst = mk_host_func(store, host_func, host_ft) def core_func(args): assert(len(args) == 0) @@ -1656,7 +1657,7 @@ def core_func(args): [ret] = canon_stream_write(StreamType(U8Type()), opts, wsi, 0, 4) assert(ret == definitions.BLOCKED) retp = 8 - [ret] = store.lower(host_import, host_ft, opts, inst)([rsi, retp]) + [ret] = store.lower(host_func_inst, host_ft, opts, inst)([rsi, retp]) assert(ret == Subtask.State.RETURNED) rsi2 = int.from_bytes(mem[retp : retp+4], 'little', signed=False) assert(rsi2 == 1) @@ -1676,28 +1677,28 @@ def test_host_partial_reads_writes(): store = Store() mem = bytearray(20) opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True) - inst = ComponentInstance(Store()) + inst = ComponentInstance(store) src = HostSource(U8Type(), [1,2,3,4], chunk=2, destroy_if_empty = False) source_ft = FuncType([], [StreamType(U8Type())]) - def host_source(on_start, on_resolve, caller): + def host_source_func(caller, on_start, on_resolve, wait_until): [] = on_start() on_resolve([src]) - return mk_done_task(caller) + host_source_func_inst = mk_host_func(store, host_source_func, source_ft) dst = None sink_ft = FuncType([StreamType(U8Type())], []) - def host_sink(on_start, on_resolve, caller): + def host_sink_func(caller, on_start, on_resolve, wait_until): nonlocal dst [s] = on_start() dst = HostSink(s, chunk=1, remain=2) on_resolve([]) - return mk_done_task(caller) + host_sink_func_inst = mk_host_func(store, host_sink_func, sink_ft) def core_func(args): assert(len(args) == 0) retp = 4 - [ret] = store.lower(host_source, source_ft, opts, inst)([retp]) + [ret] = store.lower(host_source_func_inst, source_ft, opts, inst)([retp]) assert(ret == Subtask.State.RETURNED) rsi = mem[retp] assert(rsi == 1) @@ -1726,7 +1727,7 @@ def core_func(args): rsi,wsi = unpack_new_ends(packed) assert(rsi == 1) assert(wsi == 3) - [ret] = store.lower(host_sink, sink_ft, opts, inst)([rsi]) + [ret] = store.lower(host_sink_func_inst, sink_ft, opts, inst)([rsi]) assert(ret == Subtask.State.RETURNED) mem[0:6] = b'\x01\x02\x03\x04\x05\x06' [ret] = canon_stream_write(StreamType(U8Type()), opts, wsi, 0, 6) @@ -1757,9 +1758,10 @@ def on_resolve(results): assert(len(results) == 0) def test_wasm_to_wasm_stream(): store = Store() + root_inst = ComponentInstance(store) fut1, fut2, fut3, fut4 = RacyBool(False), RacyBool(False), RacyBool(False), RacyBool(False) - inst1 = ComponentInstance(store) + inst1 = ComponentInstance(store, root_inst) mem1 = bytearray(24) opts1 = mk_opts(memory=MemInst(mem1, 'i32'), async_=True) ft1 = FuncType([], [StreamType(U8Type())]) @@ -1824,7 +1826,7 @@ def core_func1(args): func1 = store.lift(core_func1, ft1, opts1, inst1) - inst2 = ComponentInstance(store) + inst2 = ComponentInstance(store, root_inst) heap2 = Heap(24) mem2 = heap2.memory opts2 = mk_opts(memory=MemInst(heap2.memory, 'i32'), realloc=heap2.realloc, async_=True) @@ -1890,9 +1892,10 @@ def core_func2(args): def test_wasm_to_wasm_stream_empty(): store = Store() + root_inst = ComponentInstance(store) fut1, fut2, fut3, fut4 = RacyBool(False), RacyBool(False), RacyBool(False), RacyBool(False) - inst1 = ComponentInstance(store) + inst1 = ComponentInstance(store, root_inst) mem1 = bytearray(24) opts1 = mk_opts(memory=MemInst(mem1, 'i32'), async_=True) ft1 = FuncType([], [StreamType(None)]) @@ -1937,7 +1940,7 @@ def core_func1(args): func1 = store.lift(core_func1, ft1, opts1, inst1) - inst2 = ComponentInstance(store) + inst2 = ComponentInstance(store, root_inst) heap2 = Heap(10) mem2 = heap2.memory opts2 = mk_opts(memory=MemInst(heap2.memory, 'i32'), realloc=heap2.realloc, async_=True) @@ -1995,21 +1998,21 @@ def test_cancel_copy(): host_ft1 = FuncType([StreamType(U8Type())],[]) host_sink = None - def host_func1(on_start, on_resolve, caller): + def host_func1(caller, on_start, on_resolve, wait_until): nonlocal host_sink [stream] = on_start() host_sink = HostSink(stream, 2, remain = 0) on_resolve([]) - return mk_done_task(caller) + host_func1_inst = mk_host_func(store, host_func1, host_ft1) host_ft2 = FuncType([], [StreamType(U8Type())]) host_source = None - def host_func2(on_start, on_resolve, caller): + def host_func2(caller, on_start, on_resolve, wait_until): nonlocal host_source [] = on_start() host_source = HostSource(U8Type(), [], chunk=2, destroy_if_empty = False) on_resolve([host_source]) - return mk_done_task(caller) + host_func2_inst = mk_host_func(store, host_func2, host_ft2) lift_opts = mk_opts() def core_func(args): @@ -2017,7 +2020,7 @@ def core_func(args): [packed] = canon_stream_new(StreamType(U8Type())) rsi,wsi = unpack_new_ends(packed) - [ret] = store.lower(host_func1, host_ft1, lower_opts, inst)([rsi]) + [ret] = store.lower(host_func1_inst, host_ft1, lower_opts, inst)([rsi]) assert(ret == Subtask.State.RETURNED) mem[0:4] = b'\x0a\x0b\x0c\x0d' [ret] = canon_stream_write(StreamType(U8Type()), lower_opts, wsi, 0, 4) @@ -2034,7 +2037,7 @@ def core_func(args): [packed] = canon_stream_new(StreamType(U8Type())) rsi,wsi = unpack_new_ends(packed) - [ret] = store.lower(host_func1, host_ft1, lower_opts, inst)([rsi]) + [ret] = store.lower(host_func1_inst, host_ft1, lower_opts, inst)([rsi]) assert(ret == Subtask.State.RETURNED) mem[0:4] = b'\x01\x02\x03\x04' [ret] = canon_stream_write(StreamType(U8Type()), lower_opts, wsi, 0, 4) @@ -2050,7 +2053,7 @@ def core_func(args): assert(host_sink.consume(100) is None) retp = 16 - [ret] = store.lower(host_func2, host_ft2, lower_opts, inst)([retp]) + [ret] = store.lower(host_func2_inst, host_ft2, lower_opts, inst)([retp]) assert(ret == Subtask.State.RETURNED) rsi = mem[retp] [ret] = canon_stream_read(StreamType(U8Type()), lower_opts, rsi, 0, 4) @@ -2060,7 +2063,7 @@ def core_func(args): assert(n == 0 and result == CopyResult.CANCELLED) [] = canon_stream_drop_readable(StreamType(U8Type()), rsi) - [ret] = store.lower(host_func2, host_ft2, lower_opts, inst)([retp]) + [ret] = store.lower(host_func2_inst, host_ft2, lower_opts, inst)([retp]) assert(ret == Subtask.State.RETURNED) rsi = mem[retp] [ret] = canon_stream_read(StreamType(U8Type()), lower_opts, rsi, 0, 4) @@ -2148,19 +2151,17 @@ def test_futures(): mem = bytearray(24) lower_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True) - host_ft1 = FuncType([FutureType(U8Type())],[FutureType(U8Type())]) - def host_func(on_start, on_resolve, caller): - def thread_func(): - thread = current_thread() - [future] = thread.task.start() - outgoing = HostFutureSource(U8Type()) - thread.task.return_([outgoing]) - incoming = HostFutureSink(U8Type()) - future.read(None, incoming, lambda why:()) - thread.wait_until(incoming.has_v.is_set) - assert(incoming.v == 42) - outgoing.set_result(43) - return mk_task(on_start, on_resolve, caller, thread_func) + host_ft1 = FuncType([FutureType(U8Type())],[FutureType(U8Type())], async_ = True) + def host_func(caller, on_start, on_resolve, wait_until): + [future] = on_start() + outgoing = HostFutureSource(U8Type()) + on_resolve([outgoing]) + incoming = HostFutureSink(U8Type()) + future.read(None, incoming, lambda why:()) + wait_until(incoming.has_v.is_set) + assert(incoming.v == 42) + outgoing.set_result(43) + host_func_inst = mk_host_func(store, host_func, host_ft1) lift_opts = mk_opts() def core_func(args): @@ -2169,7 +2170,7 @@ def core_func(args): [packed] = canon_future_new(FutureType(U8Type())) rfi,wfi = unpack_new_ends(packed) retp = 16 - [ret] = store.lower(host_func, host_ft1, lower_opts, inst)([rfi, retp]) + [ret] = store.lower(host_func_inst, host_ft1, lower_opts, inst)([rfi, retp]) assert(ret == Subtask.State.RETURNED) rfi = mem[retp] @@ -2196,7 +2197,7 @@ def core_func(args): [packed] = canon_future_new(FutureType(U8Type())) rfi,wfi = unpack_new_ends(packed) - [ret] = store.lower(host_func, host_ft1, lower_opts, inst)([rfi, retp]) + [ret] = store.lower(host_func_inst, host_ft1, lower_opts, inst)([rfi, retp]) assert(ret == Subtask.State.RETURNED) rfi = mem[retp] @@ -2236,12 +2237,13 @@ def core_func(args): def test_cancel_subtask(): store = Store() + root_inst = ComponentInstance(store) ft = FuncType([U8Type()], [U8Type()], async_ = True) callee_heap = Heap(10) callee_opts = mk_opts(MemInst(callee_heap.memory, 'i32'), async_ = True) sync_callee_opts = mk_opts(MemInst(callee_heap.memory, 'i32'), async_ = False) - callee_inst = ComponentInstance(store) + callee_inst = ComponentInstance(store, root_inst) def core_callee1(args): assert(False) @@ -2279,18 +2281,16 @@ def core_callee3(args): callee3 = store.lift(core_callee3, ft, callee_opts, callee_inst) host_fut4 = RacyBool(False) - def host_import4(on_start, on_resolve, caller): - def thread_func(): - thread = current_thread() - args = thread.task.start() - assert(len(args) == 1) - assert(args[0] == 42) - thread.wait_until(host_fut4.is_set) - thread.task.return_([43]) - return mk_task(on_start, on_resolve, caller, thread_func) + def host_func4(caller, on_start, on_resolve, wait_until): + args = on_start() + assert(len(args) == 1) + assert(args[0] == 42) + wait_until(host_fut4.is_set) + on_resolve([43]) + host_func4_inst = mk_host_func(store, host_func4, ft) def core_callee4(args): [x] = args - [result] = store.lower(host_import4, ft, sync_callee_opts, callee_inst)([42]) + [result] = store.lower(host_func4_inst, ft, sync_callee_opts, callee_inst)([42]) assert(result == 43) try: [] = canon_task_cancel() @@ -2307,21 +2307,18 @@ def core_callee4(args): callee4 = store.lift(core_callee4, ft, callee_opts, callee_inst) host_fut5 = RacyBool(False) - def host_import5(on_start, on_resolve, caller): - def thread_func(): - thread = current_thread() - args = thread.task.start() - assert(len(args) == 1) - assert(args[0] == 42) - thread.wait_until(host_fut5.is_set) - assert(thread.task.state == Task.State.PENDING_CANCEL) - thread.wait_until(host_fut5.is_set) - thread.task.return_([43]) - return mk_task(on_start, on_resolve, caller, thread_func) + def host_func5(caller, on_start, on_resolve, wait_until): + args = on_start() + assert(len(args) == 1) + assert(args[0] == 42) + wait_until(host_fut5.is_set) + wait_until(host_fut5.is_set) + on_resolve([43]) + host_func5_inst = mk_host_func(store, host_func5, ft) def core_callee5(args): [x] = args assert(x == 13) - [ret] = store.lower(host_import5, ft, callee_opts, callee_inst)([42, 0]) + [ret] = store.lower(host_func5_inst, ft, callee_opts, callee_inst)([42, 0]) state,subi = unpack_result(ret) assert(state == Subtask.State.STARTED) [ret] = canon_subtask_cancel(False, subi) @@ -2378,7 +2375,7 @@ def core_callee6(args): caller_heap = Heap(20) caller_opts = mk_opts(MemInst(caller_heap.memory, 'i32'), async_ = True) - caller_inst = ComponentInstance(store) + caller_inst = ComponentInstance(store, root_inst) def core_caller(args): [x] = args @@ -2616,7 +2613,7 @@ def test_async_flat_params(): inst = ComponentInstance(store) ft1 = FuncType([F32Type(), F64Type(), U32Type(), S64Type()],[]) - def f1(on_start, on_resolve, caller): + def f1(caller, on_start, on_resolve, wait_until): args = on_start() assert(len(args) == 4) assert(args[0] == 1.1) @@ -2624,33 +2621,33 @@ def f1(on_start, on_resolve, caller): assert(args[2] == 3) assert(args[3] == 4) on_resolve([]) - return mk_done_task(caller) + f1_inst = mk_host_func(store, f1, ft1) ft2 = FuncType([U32Type(),U8Type(),U8Type(),U8Type()],[]) - def f2(on_start, on_resolve, caller): + def f2(caller, on_start, on_resolve, wait_until): args = on_start() assert(len(args) == 4) assert(args == [1,2,3,4]) on_resolve([]) - return mk_done_task(caller) + f2_inst = mk_host_func(store, f2, ft2) ft3 = FuncType([U32Type(),U8Type(),U8Type(),U8Type(),U8Type()],[]) - def f3(on_start, on_resolve, caller): + def f3(caller, on_start, on_resolve, wait_until): args = on_start() assert(len(args) == 5) assert(args == [1,2,3,4,5]) on_resolve([]) - return mk_done_task(caller) + f3_inst = mk_host_func(store, f3, ft3) def core_func(args): - [ret] = store.lower(f1, ft1, opts, inst)([1.1, 2.2, 3, 4]) + [ret] = store.lower(f1_inst, ft1, opts, inst)([1.1, 2.2, 3, 4]) assert(ret == Subtask.State.RETURNED) - [ret] = store.lower(f2, ft2, opts, inst)([1,2,3,4]) + [ret] = store.lower(f2_inst, ft2, opts, inst)([1,2,3,4]) assert(ret == Subtask.State.RETURNED) heap.memory[12:20] = b'\x01\x00\x00\x00\x02\x03\x04\x05' - [ret] = store.lower(f3, ft3, opts, inst)([12]) + [ret] = store.lower(f3_inst, ft3, opts, inst)([12]) assert(ret == Subtask.State.RETURNED) canon_task_return([], opts, []) @@ -2735,7 +2732,8 @@ def on_resolve(v): def test_thread_cancel_callback(): store = Store() - producer_inst = ComponentInstance(store) + root_inst = ComponentInstance(store) + producer_inst = ComponentInstance(store, root_inst) producer_ft = FuncType([], [U32Type()], async_ = True) producer_opts1 = mk_opts(async_ = True) @@ -2762,7 +2760,7 @@ def core_producer_callback2(args): producer_opts2.callback = core_producer_callback2 producer_callee2 = store.lift(core_producer2, producer_ft, producer_opts2, producer_inst) - consumer_inst = ComponentInstance(store) + consumer_inst = ComponentInstance(store, root_inst) consumer_ft = FuncType([], [], async_ = True) consumer_mem = bytearray(24) consumer_opts = mk_opts(MemInst(consumer_mem, 'i32'), async_ = True) @@ -2803,45 +2801,6 @@ def core_consumer(args): lift_and_run(mk_opts(), consumer_inst, consumer_ft, core_consumer, lambda:[], lambda _:()) -def test_reentrance(): - def mk_task(supertask, inst): - t = Supertask() - t.supertask = supertask - t.inst = inst - return t - - store = Store() - root_task = mk_task(None, None) - - c1 = ComponentInstance(store, None) - c2 = ComponentInstance(store, None) - c1_task = mk_task(root_task, c1) - assert(call_might_be_recursive(mk_task(c1_task, None), c1)) - assert(not call_might_be_recursive(mk_task(c1_task, None), c2)) - c1c2_task = mk_task(c1_task, c2) - assert(call_might_be_recursive(mk_task(c1c2_task, None), c1)) - assert(call_might_be_recursive(mk_task(c1c2_task, None), c2)) - c1host_task = mk_task(c1_task, None) - assert(call_might_be_recursive(mk_task(c1host_task, None), c1)) - assert(not call_might_be_recursive(mk_task(c1host_task, None), c2)) - - p = ComponentInstance(store, None) - c1 = ComponentInstance(store, p) - c2 = ComponentInstance(store, p) - c3 = ComponentInstance(store, None) - c1_task = mk_task(root_task, c1) - assert(call_might_be_recursive(c1_task, p)) - c1c2_task = mk_task(c1_task, c2) - assert(call_might_be_recursive(c1c2_task, p)) - c1c2host_task = mk_task(c1c2_task, None) - assert(call_might_be_recursive(c1c2host_task, p)) - assert(call_might_be_recursive(c1c2host_task, c1)) - assert(call_might_be_recursive(c1c2host_task, c2)) - p_task = mk_task(root_task, p) - assert(call_might_be_recursive(p_task, c1)) - assert(call_might_be_recursive(p_task, c2)) - - test_roundtrips() test_handles() test_async_to_async() @@ -2867,6 +2826,5 @@ def mk_task(supertask, inst): test_async_flat_params() test_threads() test_thread_cancel_callback() -test_reentrance() print("All tests passed") From 2167015174bb655826b6fcae26150871a054ba56 Mon Sep 17 00:00:00 2001 From: Luke Wagner Date: Tue, 19 May 2026 14:40:11 -0500 Subject: [PATCH 2/2] Add a future TODO bullet regarding the async call stack --- design/mvp/Concurrency.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md index 8c8ea7b4..339ebb18 100644 --- a/design/mvp/Concurrency.md +++ b/design/mvp/Concurrency.md @@ -321,12 +321,14 @@ always a well-defined async call stack. The async call stack is not currently observable to running components, except that it may nondeterministically appear as part of the callstack stored in -`error-context` πŸ“. Instead, the async call stack is meant to provide better -backtraces when debugging, profiling and tracing. While particular languages can -and do maintain their own async call stacks in core wasm state, without the -Component Model's async call stack, linkage *between* different languages would -be lost at component boundaries, leading to a loss of overall context in -multi-component applications. +`error-context` πŸ“. (In the [future](#TODO), functionality could be added to +allow a [donut wrapping] parent to follow the async call stack from a child's +import call to a child's export call.) Instead, the async call stack is +currently used to provide backtraces when debugging, profiling, tracing and +logging. While particular languages can and do maintain their own async call +stacks in core wasm state, without the Component Model's async call stack, +linkage *between* different languages would be lost at component boundaries, +leading to a loss of overall context in multi-component applications. There is an important gap between the Component Model's minimal form of Structured Concurrency and the Structured Concurrency support that appears in @@ -1478,6 +1480,9 @@ comes after: * allow a parent component to perform [JSPI]-like suspension of the sync calls of its child components, thereby allowing the parent to implement the child's sync import calls in terms of the parent's `async` imports. +* allow a [donut wrapping] parent component to ask which of a child's export + calls a particular child import call is associated with (e.g., for logging + purposes). * add a `strict-callback` option that adds extra trapping conditions to provide the semantic guarantees needed for engines to statically avoid fiber creation at component-to-component `async` call boundaries