From c40385c14bf803b8b32054b27ed035893d7e5a44 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Thu, 28 May 2026 15:27:41 -0700 Subject: [PATCH 01/18] Add builds schema with _default sentinel key (Phase 1: schema only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a top-level uilds section on RunConfig that lets users declare multiple independent execution units (pipelines x devices x components) in one workflow config. * Add BuildConfigPartial / BuildConfig and a merge_build_default helper in olive/engine/config.py. _default lives inside uilds as a sentinel key whose partial fields are merged into every sibling build with full-replace semantics (lists are not deep-merged). * Add uilds: dict[str, BuildConfig] to RunConfig with an xpand_build_defaults before-validator that pops _default and merges it into siblings, plus a alidate_builds_references after-validator that checks pipeline/host/target/evaluator string refs resolve to known entries. * Schema-only change: the engine runner does not yet act on uilds. Existing workflows without uilds keep their current behavior. * Add 8 unit tests in test/workflows/test_run_config_builds.py covering the merge, override, full-replace, missing-field, invalid-ref, absent-builds and empty-default cases. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- olive/engine/config.py | 88 +++++++++++++ olive/workflows/run/config.py | 75 ++++++++++- test/workflows/test_run_config_builds.py | 156 +++++++++++++++++++++++ 3 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 test/workflows/test_run_config_builds.py diff --git a/olive/engine/config.py b/olive/engine/config.py index fabef33eb3..22e323ace9 100644 --- a/olive/engine/config.py +++ b/olive/engine/config.py @@ -19,6 +19,9 @@ # list of all pruned configs PRUNED_CONFIGS = (FAILED_CONFIG, INVALID_CONFIG) +# sentinel key inside `builds` that holds partial defaults applied to all sibling builds +BUILD_DEFAULT_KEY = "_default" + class EngineConfig(ConfigBase): model_config = ConfigDict(extra="forbid") @@ -90,3 +93,88 @@ class RunPassConfig(AbstractPassConfig): " If not provided, use the engine's evaluator." ), ) + + +class BuildConfigPartial(ConfigBase): + """Partial build configuration. + + All fields are optional. Used as the schema for the ``_default`` sentinel inside ``builds`` + and as the unmerged form of every sibling entry before defaults are applied. + """ + + model_config = ConfigDict(extra="forbid") + + components: Optional[list[str]] = Field( + None, + description=( + "Names of input model components this build operates on. Each name must match an entry in the input" + " model's ``model_component_names``. When omitted, the build runs on the full input model." + " When a single name is given, the build receives the unwrapped component handler instead of a one-element" + " composite." + ), + ) + pipeline: Optional[list[str]] = Field( + None, + description=( + "Ordered list of pass names (referencing entries in the top-level ``passes`` dict) that form this build's" + " pipeline." + ), + ) + output_dir: Optional[str] = Field( + None, + description="Directory where this build's final model artifacts get saved.", + ) + host: Optional[Union[SystemConfig, str]] = Field( + None, + description=( + "Host system override for this build. If a string, must refer to a system config under ``systems``." + " If omitted, the engine's host is used." + ), + ) + target: Optional[Union[SystemConfig, str]] = Field( + None, + description=( + "Target system override for this build. If a string, must refer to a system config under ``systems``." + " If omitted, the engine's target is used." + ), + ) + evaluator: Optional[Union[OliveEvaluatorConfig, str]] = Field( + None, + description=( + "Evaluator override for this build. If a string, must refer to an evaluator config under ``evaluators``." + " If omitted, the engine's evaluator is used." + ), + ) + search_strategy: Optional[Union[SearchStrategyConfig, bool]] = Field( + None, + description="Search strategy override for this build. If omitted, the engine's search strategy is used.", + ) + + +class BuildConfig(BuildConfigPartial): + """Full build configuration after defaults have been merged. + + ``pipeline`` and ``output_dir`` are required post-merge; the other fields remain optional and + fall back to the engine-level configuration when not provided. + """ + + pipeline: list[str] = Field( + ..., + description=( + "Ordered list of pass names (referencing entries in the top-level ``passes`` dict) that form this build's" + " pipeline." + ), + ) + output_dir: str = Field( + ..., + description="Directory where this build's final model artifacts get saved.", + ) + + +def merge_build_default(default_partial: dict, sibling: dict) -> dict: + """Merge ``_default`` partial values into a sibling build dict. + + Sibling values fully override default values (no deep merge). Returns a new dict; inputs are + not mutated. + """ + return {**{k: v for k, v in default_partial.items() if v is not None}, **sibling} diff --git a/olive/workflows/run/config.py b/olive/workflows/run/config.py index ff641fd728..5b4f899a29 100644 --- a/olive/workflows/run/config.py +++ b/olive/workflows/run/config.py @@ -15,7 +15,14 @@ from olive.data.container.dummy_data_container import TRANSFORMER_DUMMY_DATA_CONTAINER from olive.data.container.huggingface_container import HuggingfaceContainer from olive.engine import Engine -from olive.engine.config import EngineConfig, RunPassConfig +from olive.engine.config import ( + BUILD_DEFAULT_KEY, + BuildConfig, + BuildConfigPartial, + EngineConfig, + RunPassConfig, + merge_build_default, +) from olive.engine.packaging.packaging_config import PackagingConfig from olive.evaluator.olive_evaluator import OliveEvaluatorConfig from olive.model import ModelConfig @@ -146,6 +153,44 @@ class RunConfig(NestedConfig): ), ) passes: dict[str, list[RunPassConfig]] = Field(default_factory=dict, description="Pass configurations.") + builds: dict[str, BuildConfig] = Field( + default_factory=dict, + description=( + "Build configurations. Each entry declares an independent execution unit (a pipeline of passes optionally" + " scoped to a subset of input model components and overriding host/target/evaluator). The reserved" + " ``_default`` key holds partial defaults that are merged into every sibling build (sibling values fully" + " replace defaults; no deep merge). When ``builds`` is omitted, the workflow behaves as before and runs the" + " ``passes`` dict as a single implicit pipeline in its declared order." + ), + ) + + @model_validator(mode="before") + @classmethod + def expand_build_defaults(cls, values): + """Pop ``builds._default`` and merge its partial fields into every sibling build.""" + if values is None: + return {} + if not isinstance(values, dict): + return values + builds = values.get("builds") + if not builds or not isinstance(builds, dict): + return values + default_raw = builds.pop(BUILD_DEFAULT_KEY, None) + if default_raw is None: + return values + # validate default as partial schema (catches unknown keys / wrong types early) + default_partial = BuildConfigPartial.model_validate(default_raw).model_dump(exclude_none=True) + if not default_partial: + # `_default: {}` is a no-op + return values + if BUILD_DEFAULT_KEY in default_partial: + raise ValueError(f"Nested {BUILD_DEFAULT_KEY!r} inside builds._default is not allowed.") + for name, sibling in list(builds.items()): + if not isinstance(sibling, dict): + continue + builds[name] = merge_build_default(default_partial, sibling) + values["builds"] = builds + return values @model_validator(mode="before") @classmethod @@ -184,6 +229,34 @@ def validate_python_environment_paths(self): # noqa: N804 # model_validator mo _validate_python_environment_path(systems) return self + @model_validator(mode="after") + def validate_builds_references(self): # noqa: N804 # model_validator mode="after" uses self + """Verify each build's pipeline / host / target / evaluator references resolve to a known entry.""" + if not self.builds: + return self + pass_names = set(self.passes or {}) + system_names = set(self.systems or {}) + evaluator_names = set(self.evaluators or {}) + for build_name, build in self.builds.items(): + for pass_ref in build.pipeline: + if pass_ref not in pass_names: + raise ValueError( + f"Build {build_name!r} pipeline references unknown pass {pass_ref!r}." + f" Known passes: {sorted(pass_names)}." + ) + for field_name, registry, registry_label in ( + ("host", system_names, "systems"), + ("target", system_names, "systems"), + ("evaluator", evaluator_names, "evaluators"), + ): + value = getattr(build, field_name) + if isinstance(value, str) and value not in registry: + raise ValueError( + f"Build {build_name!r} {field_name} references unknown entry {value!r}." + f" Known {registry_label}: {sorted(registry)}." + ) + return self + @field_validator("data_configs", mode="before") @classmethod def validate_data_config_names(cls, v): diff --git a/test/workflows/test_run_config_builds.py b/test/workflows/test_run_config_builds.py new file mode 100644 index 0000000000..656da3f5b8 --- /dev/null +++ b/test/workflows/test_run_config_builds.py @@ -0,0 +1,156 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from copy import deepcopy + +import pytest +from pydantic import ValidationError + +from olive.workflows.run.config import RunConfig + +# pylint: disable=attribute-defined-outside-init + + +class TestRunConfigBuilds: + @pytest.fixture(autouse=True) + def setup(self): + self.template = { + "input_model": { + "type": "HfModel", + "model_path": "dummy_model", + "task": "dummy_task", + }, + "systems": { + "local_system": {"type": "LocalSystem", "accelerators": [{"device": "cpu"}]}, + "other_system": {"type": "LocalSystem", "accelerators": [{"device": "gpu"}]}, + }, + "passes": { + "convert": {"type": "OnnxConversion"}, + "tune": {"type": "OrtSessionParamsTuning"}, + }, + "evaluate_input_model": False, + } + + def _build_config(self, builds): + config_dict = deepcopy(self.template) + config_dict["builds"] = builds + return config_dict + + def test_builds_absent_keeps_existing_behavior(self): + # Sanity: when `builds` is omitted, RunConfig validates and the field defaults to an empty dict. + run_config = RunConfig.model_validate(deepcopy(self.template)) + assert run_config.builds == {} + assert "convert" in run_config.passes + assert "tune" in run_config.passes + + def test_builds_default_merge_basic(self): + # `_default` partial fields should be merged into every sibling that omits them. + config_dict = self._build_config( + { + "_default": {"host": "local_system", "target": "local_system"}, + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": {"pipeline": ["convert", "tune"], "output_dir": "out/second"}, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert set(run_config.builds) == {"first", "second"}, "the `_default` sentinel must be removed after merge" + assert run_config.builds["first"].host == "local_system" + assert run_config.builds["first"].target == "local_system" + assert run_config.builds["second"].host == "local_system" + assert run_config.builds["second"].target == "local_system" + + def test_builds_override_default(self): + # Sibling values should fully override `_default` values. + config_dict = self._build_config( + { + "_default": {"host": "local_system", "target": "local_system"}, + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": { + "pipeline": ["convert"], + "output_dir": "out/second", + "host": "other_system", + "target": "other_system", + }, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert run_config.builds["first"].host == "local_system" + assert run_config.builds["second"].host == "other_system" + assert run_config.builds["second"].target == "other_system" + + def test_builds_default_pipeline_full_replace(self): + # Lists from `_default` should be fully replaced (not concatenated) by sibling values. + config_dict = self._build_config( + { + "_default": { + "pipeline": ["convert", "tune"], + "components": ["text_encoder"], + "output_dir": "out/default", + }, + "override": { + "pipeline": ["convert"], + "components": ["unet"], + "output_dir": "out/override", + }, + "inherit": {}, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert run_config.builds["override"].pipeline == ["convert"] + assert run_config.builds["override"].components == ["unet"] + assert run_config.builds["inherit"].pipeline == ["convert", "tune"] + assert run_config.builds["inherit"].components == ["text_encoder"] + assert run_config.builds["inherit"].output_dir == "out/default" + + def test_builds_missing_pipeline_after_merge_errors(self): + # If neither `_default` nor the sibling supply `pipeline`/`output_dir`, validation fails. + config_dict = self._build_config( + { + "_default": {"host": "local_system"}, + "broken": {"components": ["text_encoder"]}, + } + ) + with pytest.raises(ValidationError, match="pipeline"): + RunConfig.model_validate(config_dict) + + def test_builds_invalid_pipeline_ref_errors(self): + # Pass names in `pipeline` must exist in the top-level `passes` dict. + config_dict = self._build_config( + { + "broken": { + "pipeline": ["convert", "no_such_pass"], + "output_dir": "out/broken", + }, + } + ) + with pytest.raises(ValidationError, match="unknown pass"): + RunConfig.model_validate(config_dict) + + def test_builds_invalid_host_ref_errors(self): + # String host/target refs must exist in the top-level `systems` dict. + config_dict = self._build_config( + { + "broken": { + "pipeline": ["convert"], + "output_dir": "out/broken", + "host": "no_such_system", + }, + } + ) + with pytest.raises(ValidationError, match="unknown entry"): + RunConfig.model_validate(config_dict) + + def test_builds_empty_default_is_noop(self): + # `_default: {}` should validate cleanly and leave siblings unchanged. + config_dict = self._build_config( + { + "_default": {}, + "only": {"pipeline": ["convert"], "output_dir": "out/only"}, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert set(run_config.builds) == {"only"} + assert run_config.builds["only"].pipeline == ["convert"] + assert run_config.builds["only"].host is None From e9cfd95c89b6550ea7d1a99577a9cb71020344c0 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Thu, 28 May 2026 15:45:38 -0700 Subject: [PATCH 02/18] Implement builds runner with per-component slicing (Phase 2) Execute the uilds schema added in Phase 1. * Add CompositeModelHandler.select_components(names) that returns the unwrapped child handler when one name is given and a sliced CompositeModelHandler otherwise. Unknown names raise a clear error. * Add ModelConfig.select_components(names) so the runner can slice a composite input config without materializing the full handler. * Add a builds-aware execution branch in olive/workflows/run/run.py. When uilds is non-empty, the runner: validates components against the composite input model, then loops over builds. For each build it builds a per-build engine config (host/target/evaluator/search_strategy overrides resolved against systems/evaluators), a per-build pipeline subset from passes in the order declared by pipeline, the per-build accelerator spec, and calls engine.run with build.output_dir. Returns dict[build_name -> WorkflowOutput]. The no-builds path is unchanged and still returns a single WorkflowOutput. * Tests: - 7 new composite handler / ModelConfig select_components cases in test/model/test_composite_model.py. - 7 new runner smoke tests in test/workflows/test_run_builds.py with mocked Engine.run covering: no-builds backward compat, multi-build dispatch, pipeline-subset ordering, per-build output_dir, host/target override, non-composite + components error, unknown component error. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- olive/model/config/model_config.py | 35 ++++++ olive/model/handler/composite.py | 27 +++++ olive/workflows/run/run.py | 136 ++++++++++++++++++++-- test/model/test_composite_model.py | 82 +++++++++++++ test/workflows/test_run_builds.py | 177 +++++++++++++++++++++++++++++ 5 files changed, 445 insertions(+), 12 deletions(-) create mode 100644 test/workflows/test_run_builds.py diff --git a/olive/model/config/model_config.py b/olive/model/config/model_config.py index d6eac90043..ad577905f1 100644 --- a/olive/model/config/model_config.py +++ b/olive/model/config/model_config.py @@ -43,6 +43,41 @@ def create_model(self): cls = get_model_handler(self.type) return cls(**self.config) + def select_components(self, names: list[str]) -> "ModelConfig": + """Return a new ModelConfig holding only the named components of a CompositeModel. + + Returns the unwrapped child component ``ModelConfig`` when exactly one name is given; + returns a new ``CompositeModel`` ``ModelConfig`` containing the subset (in the requested + order) otherwise. Raises ``ValueError`` if invoked on a non-composite model or if any + name is missing from ``model_component_names``. + """ + if self.type != "compositemodel": + raise ValueError( + f"select_components is only supported on CompositeModel input configs (got type {self.type!r})." + ) + if not names: + raise ValueError("select_components requires a non-empty list of names.") + component_names = self.config.get("model_component_names") or [] + model_components = self.config.get("model_components") or [] + if len(component_names) != len(model_components): + raise ValueError("CompositeModel config has mismatched model_components and model_component_names lengths.") + missing = [n for n in names if n not in component_names] + if missing: + raise ValueError(f"Unknown component name(s) {missing}. Available components: {list(component_names)}.") + component_map = dict(zip(component_names, model_components)) + selected = [deepcopy(component_map[n]) for n in names] + if len(selected) == 1: + child = selected[0] + if isinstance(child, ModelConfig): + return child + return ModelConfig.model_validate(child) + new_config = { + **{k: v for k, v in self.config.items() if k not in ("model_components", "model_component_names")}, + "model_components": selected, + "model_component_names": list(names), + } + return ModelConfig(type=self.type, config=new_config) + def get_model_id(self): for v in self.config.values(): if callable(v): diff --git a/olive/model/handler/composite.py b/olive/model/handler/composite.py index c52bd1a315..e3332ce38e 100644 --- a/olive/model/handler/composite.py +++ b/olive/model/handler/composite.py @@ -77,6 +77,33 @@ def to_json(self, check_object: bool = False): def get_model_components(self) -> list[tuple[str, OliveModelHandler]]: return zip(self.model_component_names, self.model_components) + def select_components(self, names: list[str]) -> "OliveModelHandler": + """Return a handler holding only the named components. + + Returns the unwrapped child handler when exactly one name is given; returns a new + ``CompositeModelHandler`` containing the subset (in the requested order) otherwise. + Raises ``ValueError`` if any name is missing from ``model_component_names``. + """ + if not names: + raise ValueError("select_components requires a non-empty list of names.") + missing = [n for n in names if n not in self.model_component_names] + if missing: + raise ValueError( + f"Unknown component name(s) {missing}. Available components: {list(self.model_component_names)}." + ) + component_map = dict(zip(self.model_component_names, self._model_components)) + selected = [component_map[n] for n in names] + if len(selected) == 1: + child = selected[0] + child.model_attributes = {**(self.model_attributes or {}), **(child.model_attributes or {})} + return child + return CompositeModelHandler( + model_components=selected, + model_component_names=list(names), + model_path=self.model_path, + model_attributes=self.model_attributes, + ) + def load_model(self, rank: int = None, cache_model: bool = True): raise NotImplementedError diff --git a/olive/workflows/run/run.py b/olive/workflows/run/run.py index 89100e1c1c..4e421db435 100644 --- a/olive/workflows/run/run.py +++ b/olive/workflows/run/run.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +from collections import OrderedDict from copy import deepcopy from pathlib import Path from typing import TYPE_CHECKING, Optional, Union @@ -13,10 +14,10 @@ from olive.package_config import OlivePackageConfig from olive.systems.accelerator_creator import create_accelerator from olive.systems.common import SystemType -from olive.workflows.run.config import RunConfig +from olive.workflows.run.config import RunConfig, RunEngineConfig if TYPE_CHECKING: - from olive.engine.config import RunPassConfig + from olive.engine.config import BuildConfig, RunPassConfig logger = logging.getLogger(__name__) @@ -111,21 +112,16 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig): logger.warning("ORT log severity level configuration ignored since the module isn't installed.") olive_config = run_config.to_json() + + if run_config.builds: + return _run_builds(package_config, run_config, olive_config) + engine = run_config.engine.create_engine(package_config, workflow_id) engine.cache.cache_olive_config(olive_config) # check if target is not used used_passes_configs = get_used_passes_configs(run_config) - target_not_used = ( - # no evaluator given (also implies no search) - engine.evaluator_config is None - # no pass specific evaluator - # no pass needs to run on target - and all( - pass_config.evaluator is None and not get_run_on_target(package_config, pass_config) - for pass_config in used_passes_configs - ) - ) + target_not_used = _compute_target_not_used(package_config, engine.evaluator_config, used_passes_configs) is_ep_required = is_execution_provider_required(run_config, package_config) accelerator_spec = create_accelerator( @@ -147,6 +143,122 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig): ) +def _run_builds(package_config: OlivePackageConfig, run_config: RunConfig, olive_config: dict) -> dict: + """Run every entry in ``run_config.builds`` as an independent workflow. + + Returns a ``dict[str, WorkflowOutput]`` keyed by build name. Each build gets its own engine, + pipeline subset (from ``passes`` in the order declared by ``pipeline``), input model slice + (via ``select_components`` when ``components`` is set) and host/target/evaluator overrides. + """ + _validate_build_components(run_config) + workflow_id = run_config.workflow_id + outputs: dict = OrderedDict() + for build_name, build in run_config.builds.items(): + logger.info("Running build %s", build_name) + engine_config = _make_build_engine_config(run_config, build) + engine = engine_config.create_engine(package_config, f"{workflow_id}_{build_name}") + engine.cache.cache_olive_config(olive_config) + + pipeline_subset: dict[str, list[RunPassConfig]] = OrderedDict() + for pass_name in build.pipeline: + pipeline_subset[pass_name] = run_config.passes[pass_name] + + used_passes_configs = [p for passes in pipeline_subset.values() for p in passes] + target_not_used = _compute_target_not_used(package_config, engine.evaluator_config, used_passes_configs) + is_ep_required = _is_execution_provider_required_for_passes(package_config, used_passes_configs) or ( + engine.evaluator_config is not None + and engine_config.evaluate_input_model + and run_config.input_model.type.lower() == "onnxmodel" + ) + accelerator_spec = create_accelerator( + engine.target_config, skip_supported_eps_check=target_not_used, is_ep_required=is_ep_required + ) + + input_model = run_config.input_model + if build.components: + input_model = input_model.select_components(build.components) + + engine.set_input_passes_configs(pipeline_subset) + outputs[build_name] = engine.run( + input_model, + accelerator_spec, + engine_config.packaging_config, + build.output_dir, + engine_config.evaluate_input_model, + engine_config.log_to_file, + engine_config.log_severity_level, + ) + return outputs + + +def _validate_build_components(run_config: RunConfig) -> None: + """Verify ``build.components`` names exist in the composite input model's components.""" + needs_component_check = any(build.components for build in run_config.builds.values()) + if not needs_component_check: + return + if run_config.input_model.type != "compositemodel": + bad = [name for name, build in run_config.builds.items() if build.components] + raise ValueError( + f"Builds {bad} declare `components` but the input model is not a CompositeModel" + f" (got type {run_config.input_model.type!r})." + ) + available = list(run_config.input_model.config.get("model_component_names") or []) + for build_name, build in run_config.builds.items(): + if not build.components: + continue + missing = [n for n in build.components if n not in available] + if missing: + raise ValueError( + f"Build {build_name!r} references unknown component(s) {missing}. Available components: {available}." + ) + + +def _make_build_engine_config(run_config: RunConfig, build: "BuildConfig") -> RunEngineConfig: + """Clone the parent engine config and apply this build's host/target/evaluator/search overrides.""" + engine_dump = run_config.engine.model_dump() + systems = run_config.systems or {} + evaluators = run_config.evaluators or {} + if build.host is not None: + engine_dump["host"] = _resolve_build_ref(build.host, systems, "system") + if build.target is not None: + engine_dump["target"] = _resolve_build_ref(build.target, systems, "system") + if build.evaluator is not None: + engine_dump["evaluator"] = _resolve_build_ref(build.evaluator, evaluators, "evaluator") + if build.search_strategy is not None: + # search_strategy can be a bool or a SearchStrategyConfig; pydantic round-trips either form. + ss = build.search_strategy + engine_dump["search_strategy"] = ss if isinstance(ss, bool) else ss.model_dump() + return RunEngineConfig.model_validate(engine_dump) + + +def _resolve_build_ref(value, registry: dict, label: str): + """Resolve a string reference into the named entry; pass through dict/model instances unchanged.""" + if isinstance(value, str): + if value not in registry: + raise ValueError(f"Unknown {label} reference {value!r}. Known: {sorted(registry)}.") + entry = registry[value] + return entry.model_dump() if hasattr(entry, "model_dump") else deepcopy(entry) + if hasattr(value, "model_dump"): + return value.model_dump() + return deepcopy(value) + + +def _compute_target_not_used( + package_config: OlivePackageConfig, + evaluator_config, + pass_configs: list["RunPassConfig"], +) -> bool: + return evaluator_config is None and all( + pc.evaluator is None and not get_run_on_target(package_config, pc) for pc in pass_configs + ) + + +def _is_execution_provider_required_for_passes( + package_config: OlivePackageConfig, pass_configs: list["RunPassConfig"] +) -> bool: + return any(package_config.is_onnx_module(pc.type) for pc in pass_configs) + + def run( run_config: Union[str, Path, dict], list_required_packages: bool = False, diff --git a/test/model/test_composite_model.py b/test/model/test_composite_model.py index ca3fcd1f40..ebc561ee54 100644 --- a/test/model/test_composite_model.py +++ b/test/model/test_composite_model.py @@ -42,3 +42,85 @@ def test_composite_model(as_handler): assert composite_json["config"]["model_components"][0]["config"]["model_attributes"] == {"attr0": "value0"} model_config = ModelConfig.from_json(composite_json) assert model_config.type == CompositeModelHandler.model_type + + +def _build_composite_handler(): + return CompositeModelHandler( + [get_onnx_model(), get_onnx_model(), get_onnx_model()], + ["text_encoder", "unet", "vae_decoder"], + model_attributes={"shared": "value"}, + ) + + +def test_select_components_single_returns_unwrapped_child(): + composite = _build_composite_handler() + selected = composite.select_components(["unet"]) + assert isinstance(selected, ONNXModelHandler) + # parent attributes should be inherited by the unwrapped child + assert selected.model_attributes == {"shared": "value"} + + +def test_select_components_multiple_returns_subset_composite(): + composite = _build_composite_handler() + selected = composite.select_components(["vae_decoder", "text_encoder"]) + assert isinstance(selected, CompositeModelHandler) + # order from the call is preserved + assert list(selected.model_component_names) == ["vae_decoder", "text_encoder"] + + +def test_select_components_unknown_name_raises(): + composite = _build_composite_handler() + with pytest.raises(ValueError, match="Unknown component"): + composite.select_components(["no_such_component"]) + + +def test_select_components_empty_list_raises(): + composite = _build_composite_handler() + with pytest.raises(ValueError, match="non-empty"): + composite.select_components([]) + + +def test_model_config_select_components_single_returns_child_config(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + }, + } + ) + selected = composite_config.select_components(["unet"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "onnxmodel" + assert selected.config["model_path"] == "b.onnx" + + +def test_model_config_select_components_multiple_returns_composite_config(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "c.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet", "vae_decoder"], + }, + } + ) + selected = composite_config.select_components(["vae_decoder", "text_encoder"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "compositemodel" + assert list(selected.config["model_component_names"]) == ["vae_decoder", "text_encoder"] + assert [c["config"]["model_path"] for c in selected.config["model_components"]] == ["c.onnx", "a.onnx"] + + +def test_model_config_select_components_on_non_composite_raises(): + onnx_config = ModelConfig.model_validate({"type": "ONNXModel", "config": {"model_path": "a.onnx"}}) + with pytest.raises(ValueError, match="only supported on CompositeModel"): + onnx_config.select_components(["any"]) diff --git a/test/workflows/test_run_builds.py b/test/workflows/test_run_builds.py new file mode 100644 index 0000000000..391f984bed --- /dev/null +++ b/test/workflows/test_run_builds.py @@ -0,0 +1,177 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import sys +from copy import deepcopy +from unittest.mock import MagicMock, patch + +import pytest + +from olive.workflows import run as olive_run +from test.utils import get_pytorch_model_io_config, pytorch_model_loader + +# pylint: disable=attribute-defined-outside-init + +PT_MODEL = { + "type": "PyTorchModel", + "config": { + "model_loader": pytorch_model_loader, + "io_config": get_pytorch_model_io_config(), + }, +} + + +class TestRunBuilds: + @pytest.fixture(autouse=True) + def setup(self, tmp_path): + self.cache_dir = tmp_path / "cache" + self.template = { + "input_model": PT_MODEL, + "systems": { + "cpu_system": {"type": "LocalSystem", "accelerators": [{"device": "cpu"}]}, + "gpu_system": {"type": "LocalSystem", "accelerators": [{"device": "gpu"}]}, + }, + "passes": { + "convert": {"type": "OnnxConversion"}, + "tune": {"type": "OrtSessionParamsTuning"}, + }, + "engine": { + "evaluate_input_model": False, + "cache_dir": str(self.cache_dir), + }, + } + + def _patch_engine_and_acc(self): + run_mock = MagicMock(return_value=MagicMock(name="WorkflowOutput")) + acc_mock = MagicMock(name="accelerator_spec") + engine_run_patch = patch("olive.engine.engine.Engine.run", run_mock) + accelerator_patch = patch.object(sys.modules[olive_run.__module__], "create_accelerator", return_value=acc_mock) + return run_mock, acc_mock, engine_run_patch, accelerator_patch + + def test_builds_no_builds_keeps_single_workflow_output(self): + # Sanity: with no `builds`, run() still returns the single WorkflowOutput from engine.run. + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + with engine_run_patch, acc_patch: + result = olive_run(config) + assert run_mock.call_count == 1 + assert not isinstance(result, dict) + + def test_builds_runs_each_build_once_and_returns_dict(self): + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["builds"] = { + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": {"pipeline": ["convert", "tune"], "output_dir": "out/second"}, + } + with engine_run_patch, acc_patch: + result = olive_run(config) + assert run_mock.call_count == 2 + assert isinstance(result, dict) + assert set(result) == {"first", "second"} + + def test_builds_passes_per_build_pipeline_subset_in_declared_order(self): + # `tune` declared first in passes but second in pipeline; engine should receive [convert, tune]. + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + captured: list = [] + + def capture_input_passes(self, pass_configs): + captured.append(list(pass_configs)) + + config = deepcopy(self.template) + config["passes"] = { + "tune": {"type": "OrtSessionParamsTuning"}, + "convert": {"type": "OnnxConversion"}, + } + config["builds"] = { + "only": {"pipeline": ["convert", "tune"], "output_dir": "out/only"}, + } + with ( + engine_run_patch, + acc_patch, + patch("olive.engine.engine.Engine.set_input_passes_configs", capture_input_passes), + ): + olive_run(config) + assert run_mock.call_count == 1 + assert captured == [["convert", "tune"]] + + def test_builds_uses_per_build_output_dir(self): + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["builds"] = { + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": {"pipeline": ["convert"], "output_dir": "out/second"}, + } + with engine_run_patch, acc_patch: + olive_run(config) + output_dirs = [call.args[3] for call in run_mock.call_args_list] + assert output_dirs == ["out/first", "out/second"] + + def test_builds_host_target_override_applied_per_build(self): + # Captures the SystemConfig passed to create_accelerator for each build. + run_mock, acc_mock, engine_run_patch, _ = self._patch_engine_and_acc() + seen_targets: list = [] + + def fake_create_accelerator(system_config, **kwargs): + seen_targets.append(system_config.config.accelerators[0].device.lower()) + return acc_mock + + config = deepcopy(self.template) + config["builds"] = { + "cpu_build": { + "pipeline": ["convert"], + "output_dir": "out/cpu", + "host": "cpu_system", + "target": "cpu_system", + }, + "gpu_build": { + "pipeline": ["convert"], + "output_dir": "out/gpu", + "host": "gpu_system", + "target": "gpu_system", + }, + } + with ( + engine_run_patch, + patch.object(sys.modules[olive_run.__module__], "create_accelerator", side_effect=fake_create_accelerator), + ): + olive_run(config) + assert run_mock.call_count == 2 + assert seen_targets == ["cpu", "gpu"] + + def test_builds_components_on_non_composite_input_raises(self): + config = deepcopy(self.template) + config["builds"] = { + "broken": { + "pipeline": ["convert"], + "output_dir": "out/broken", + "components": ["text_encoder"], + }, + } + with pytest.raises(ValueError, match="not a CompositeModel"): + olive_run(config) + + def test_builds_components_unknown_name_raises(self): + composite_input = { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + }, + } + config = deepcopy(self.template) + config["input_model"] = composite_input + config["builds"] = { + "bad": { + "pipeline": ["convert"], + "output_dir": "out/bad", + "components": ["no_such_component"], + }, + } + with pytest.raises(ValueError, match="unknown component"): + olive_run(config) From 6d21061e038a8eabf0131f9f371b460bd33b3f6d Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 2 Jun 2026 12:56:06 -0700 Subject: [PATCH 03/18] update model config --- olive/model/config/model_config.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/olive/model/config/model_config.py b/olive/model/config/model_config.py index ad577905f1..8f93ae14a6 100644 --- a/olive/model/config/model_config.py +++ b/olive/model/config/model_config.py @@ -5,6 +5,7 @@ import logging from copy import deepcopy from pathlib import Path +from typing import Optional from pydantic import Field, field_validator @@ -43,6 +44,21 @@ def create_model(self): cls = get_model_handler(self.type) return cls(**self.config) + def get_components(self) -> Optional[list[str]]: + """Return the list of component names exposed by this input model, or None if single-component. + + * ``CompositeModel`` -> the configured ``model_component_names`` list. + * ``DiffusersModel`` -> the variant-specific exportable components (via the handler). + * Anything else -> ``None`` (single-component model). + """ + model_type = self.type + if model_type == "compositemodel": + return list(self.config.get("model_component_names") or []) + if model_type == "diffusersmodel": + handler = self.create_model() + return [str(c) for c in handler.get_exportable_components()] + return None + def select_components(self, names: list[str]) -> "ModelConfig": """Return a new ModelConfig holding only the named components of a CompositeModel. From aa06608525608708385662407ec5b13b923388d3 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Mon, 15 Jun 2026 23:21:16 -0700 Subject: [PATCH 04/18] add design doc --- multi-component-model-architecture-design.md | 339 +++++++++++++++++++ olive/cli/api.py | 7 +- olive/cli/base.py | 9 +- olive/model/config/model_config.py | 28 +- olive/workflows/run/config.py | 10 + olive/workflows/run/run.py | 19 +- test/model/test_composite_model.py | 42 +++ test/workflows/test_run_config_builds.py | 10 + 8 files changed, 441 insertions(+), 23 deletions(-) create mode 100644 multi-component-model-architecture-design.md diff --git a/multi-component-model-architecture-design.md b/multi-component-model-architecture-design.md new file mode 100644 index 0000000000..91016e688c --- /dev/null +++ b/multi-component-model-architecture-design.md @@ -0,0 +1,339 @@ +# Design: Multi-Component Model Optimization in Olive + + +## 1. Problem Statement + +Olive needs to optimize **multi-component models** (different components → different optimizations) **and** produce **multiple target-specific outputs** from one config. Motivating cases: + +- **Multi-component models:** + - **VLM / multimodal HF models** — quantize the language decoder (e.g. GPTQ int4) while keeping the vision tower / projector at higher precision. + - **Diffusion models** (SD / SDXL / SD3 / FLUX) — optimize text encoders, the diffusion backbone (UNet/transformer), and VAE differently. + - **Future multi-component families** — without Olive learning every architecture's naming convention. +- **Multi-device / multi-EP builds** — produce several target-specific outputs from a single config, e.g. an OpenVINO **GPU** build and an OpenVINO **NPU** build of the same model, each with its own conversion/quantization/encapsulation pipeline and `host`/`target`. + + +## 2. Approach + +### 2.1 How components are obtained + +#### Option A — Query Mobius (preferred) + +Olive calls Mobius at runtime to inspect the model: + +```python +components = mobius.inspect_components(model_path_or_id, task=None, trust_remote_code=False) +``` + +- **Pros:** + - always in sync with Mobius's own architecture support; + - no per-model maintenance in Olive; + - covers any model Mobius can export, including new ones; single source of truth shared with the exporter. +- **Cons:** + - hard runtime dependency on `mobius-ai` even for the optimization step; + - coupled to Mobius versions (names/fields may shift) + + +#### Option B — Olive-maintained YAML registry + +Olive ships a YAML file enumerating the components of common models, keyed by `model_type` / architecture. Two component description styles appear, matching the two families: + +- **HF/VLM components** only need a **submodule path** to slice the component out of one model. `name` (for `builds.components`) plus `source.path` (where the submodule lives) is enough; `kind` is optional (only used for pass↔kind validation): + +```yaml +# olive/model/component_registry.yaml +llava: + components: + - { name: decoder, kind: decoder, source: { path: "model.language_model" } } + - { name: vision_encoder, kind: vision_encoder, source: { path: "model.vision_tower" } } + - { name: embedding, kind: embedding, source: { path: "model.language_model.embed_tokens" } } +``` + +- **Diffusion components** reuse existing Diffusion model components yaml file: + +```yaml +stable-diffusion: # SD 1.5 family (identified by model_index.json) + type: DiffusersModel + components: + - name: text_encoder + kind: text_encoder + loader: { component: text_encoder } # DiffusersModel.get_component("text_encoder") + io_config: + input_names: [input_ids] + output_names: [last_hidden_state, pooler_output] + dynamic_axes: { input_ids: { 0: batch, 1: sequence } } + dummy_inputs: text_encoder # generate_diffusers_dummy_inputs(...) + - name: vae_encoder + kind: vae_encoder + loader: { component: vae, patch: get_vae_encoder } # olive.model.utils.diffusers_utils.get_vae_encoder + io_config: + input_names: [sample, return_dict] + output_names: [latent_sample] + dynamic_axes: { sample: { 0: batch, 1: channels, 2: height, 3: width } } + dummy_inputs: vae_encoder + - name: vae_decoder + kind: vae_decoder + loader: { component: vae, patch: get_vae_decoder } # olive.model.utils.diffusers_utils.get_vae_decoder + io_config: + input_names: [latent_sample, return_dict] + output_names: [sample] + dynamic_axes: { latent_sample: { 0: batch, 1: channels, 2: height, 3: width } } + dummy_inputs: vae_decoder + - name: unet + kind: diffusion_backbone + loader: { component: unet } + io_config: + input_names: [sample, timestep, encoder_hidden_states, return_dict] + output_names: [out_sample] + dynamic_axes: + sample: { 0: batch, 1: channels, 2: height, 3: width } + timestep: { 0: batch } + encoder_hidden_states: { 0: batch, 1: sequence } + dummy_inputs: unet + # SDXL adds text_encoder_2 (kind: text_encoder) and extra UNet inputs (text_embeds, time_ids); + # SD3 / FLUX replace `unet` with `transformer` (kind: diffusion_backbone). +``` + + +- **Pros:** + - no runtime Mobius dependency for the optimization step; + - works offline; + - human-readable, reviewable, and overridable by users (drop-in extra entries); + - stable across Mobius versions; + - users can add an unsupported model without code changes. +- **Cons:** + - must be **maintained by Olive** as new architectures appear (the same per-architecture maintenance Mobius already does); + - risk of drifting out of sync with Mobius's actual export expectations (e.g. `export_key`s, weight prefixes); + - duplicates knowledge that also lives in Mobius. + +### 2.2 CompositeModel as the internal IR + +Whichever option supplies the plan, Olive normalizes it into a `CompositeModel` whose components carry `component_source_path` and `component_kind` in `model_attributes`. This reuses existing `CompositeModel` serialization and per-pass fan-out (`olive/passes/olive_pass.py`). + +--- + +## 3. The `builds` Schema + +A build is a named optimization unit: + +```python +class BuildNode: + components: list[str] | None # component names; omitted ⇒ full model + pipeline: list[str] # ordered pass names from the top-level `passes` + output_dir: str + input: str | list[str] | None # optional: take the model from another build's output(s) + host: SystemConfig | str | None + target: SystemConfig | str | None + evaluator: OliveEvaluatorConfig | str | None + search_strategy: SearchStrategyConfig | bool | None +``` + +Semantics: + +- **`components`** selects named components from the resolved `CompositeModel`. Omitted ⇒ run on the full model. A single name unwraps to that component; multiple names produce a sub-composite. +- **`pipeline`** lists pass names from the shared top-level `passes` dict, composed per build. Different builds reuse the same pass definitions in different orders/subsets. +- **`input`** (optional) sets the build's starting model: omitted ⇒ the top-level `input_model`; `""` ⇒ another build's output; `["a","b"]` ⇒ multiple build outputs (for a merge step). When the upstream output is a `CompositeModel` (e.g. an export build's package), `components` selects which of its components this build optimizes. This is what lets one exported composite fan out into different per-component builds. +- **`host`/`target`/`evaluator`/`search_strategy`** override engine defaults per build. + +From one `input_model`, several builds produce several outputs: +- **Per-component builds** — each build optimizes a different `components` subset (one model in → one output per component). +- **Per-target builds** — builds omit `components` and differ by `host`/`target`/`pipeline`, one output per device/EP. + +A build may also take its model from another build's output via `input` — e.g. an export build emits a `CompositeModel` and downstream builds optimize its components. + +--- + +## 4. Config Examples + +### 4.1 Basic shape — independent sibling builds + +Shared `passes`; each build picks a component and composes its own `pipeline`. Each build writes one optimized folder. The same shape is used whether the user optimizes ONNX components after export (Flow A) or PyTorch components before export (Flow B). + +```jsonc +{ + "input_model": { "type": "DiffusersModel", "model_path": "stabilityai/stable-diffusion-xl-base-1.0" }, + "systems": { + "local_system": { "type": "LocalSystem", "accelerators": [ { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } ] } + }, + "data_configs": [ + { "name": "quantize_data_config", "user_script": "user_script.py", + "load_dataset_config": { "type": "local_dataset" }, + "dataloader_config": { "type": "quantize_data_loader", "data_num": 100 } } + ], + "passes": { + "convert": { "type": "OnnxConversion", "target_opset": 17 }, + "optimize_clip": { "type": "OrtTransformersOptimization", "model_type": "clip", "float16": true }, + "optimize_vae": { "type": "OrtTransformersOptimization", "model_type": "vae", "float16": true }, + "optimize_unet": { "type": "OrtTransformersOptimization", "model_type": "unet", "float16": true }, + "quantization": { "type": "OnnxStaticQuantization", "data_config": "quantize_data_config" } + }, + "builds": { + "text_encoder": { "components": ["text_encoder"], "pipeline": ["convert", "optimize_clip", "quantization"], "output_dir": "out/text_encoder", "evaluator": "common_evaluator" }, + "vae_encoder": { "components": ["vae_encoder"], "pipeline": ["convert", "optimize_vae", "quantization"], "output_dir": "out/vae_encoder", "evaluator": "common_evaluator" }, + "vae_decoder": { "components": ["vae_decoder"], "pipeline": ["convert", "optimize_vae", "quantization"], "output_dir": "out/vae_decoder", "evaluator": "common_evaluator" }, + "unet": { "components": ["unet"], "pipeline": ["convert", "optimize_unet", "quantization"], "output_dir": "out/unet", "evaluator": "common_evaluator" } + } +} +``` + +Each build writes one optimized component under its `output_dir`. + + +### 4.2 Component optimization — two flows + + +#### Flow A — export to ONNX model first, then per-component optimization + +Export with `MobiusBuilder`, which takes an `HfModel` and returns a `CompositeModel`. There are two ways to connect export to per-component optimization. + +**Option 1 — one config (export build + `input` dependency).** The export build produces the composite; downstream builds reference it via `input` and each select a component. Unreferenced components stay as exported. + +```jsonc +{ + "input_model": { "type": "HfModel", "model_path": "" }, + "data_configs": [ + { "name": "calib", "user_script": "user_script.py", "load_dataset_config": { "type": "local_dataset" } } + ], + "passes": { + "export": { "type": "MobiusBuilder", "precision": "fp16", "runtime": "ort-genai" }, + "transformer_opt": { "type": "OrtTransformersOptimization", "float16": true }, + "quantization": { "type": "OnnxStaticQuantization", "data_config": "calib" } + }, + "builds": { + "export": { "pipeline": ["export"], "output_dir": "out/pkg" }, + "decoder": { "input": "export", "components": ["decoder"], "pipeline": ["transformer_opt", "quantization"], "output_dir": "out/decoder" }, + "vision_encoder": { "input": "export", "components": ["vision_encoder"], "pipeline": ["transformer_opt"], "output_dir": "out/vision_encoder" } + } +} +``` + +- Pros: + - One single config file. + - One step for the whole model optimization. +- Cons: + - Complex DAG logic. + - Needs `input` dependency. + - User needs to know the components names first (from a new Olive CLI, where Olive will get it from Mobius) + + +**Option 2 — two steps (CLI export, then load the folder).** Export with the CLI, then point `input_model` at the exported directory. + +Step 1 — export. Each component lands in its own subfolder: + +```powershell +olive capture-onnx-graph --model_name_or_path --use_mobius_builder --output_path exported_pkg +# exported_pkg/decoder/model.onnx, exported_pkg/vision_encoder/model.onnx, exported_pkg/embedding/model.onnx +``` + +Step 2 — point `input_model` at that directory. Olive loads it as a `CompositeModel`, taking each **subfolder name as the component name**. Plain sibling builds, no `input` dependency: + +```jsonc +{ + "input_model": { "type": "CompositeModel", "model_path": "exported_pkg" }, + "data_configs": [ + { "name": "calib", "user_script": "user_script.py", "load_dataset_config": { "type": "local_dataset" } } + ], + "passes": { + "transformer_opt": { "type": "OrtTransformersOptimization", "float16": true }, + "quantization": { "type": "OnnxStaticQuantization", "data_config": "calib" } + }, + "builds": { + "decoder": { "components": ["decoder"], "pipeline": ["transformer_opt", "quantization"], "output_dir": "out/decoder" }, + "vision_encoder": { "components": ["vision_encoder"], "pipeline": ["transformer_opt"], "output_dir": "out/vision_encoder" } + } +} +``` + +Each subfolder is a standard local ONNX model Olive already loads. The only new piece is aggregating a directory of per-component subfolders into a `CompositeModel` whose component names come from the folder names. + +- Pros: + - Clear config file, no DAG. + - User doesn't need to call a different CLI to get the components name. +- Cons: + - 2 steps. + - User needs to read output model folder to get components name. + +#### Flow B — optimize first, then export (recommended) + +For PyTorch-stage optimization (e.g. GPTQ on the decoder) **before** export. Three explicit user steps; + +**(a) Optimize each component**. Only the components the user wants to optimize need a build. + +```jsonc +{ + "input_model": { "type": "HfModel", "model_path": "" }, + "data_configs": [ { "name": "decoder_calib", "user_script": "user_script.py", "load_dataset_config": { "type": "local_dataset" } } ], + "passes": { + "decoder_quant": { "type": "Gptq", "bits": 4, "group_size": 128, "data_config": "decoder_calib" } + }, + "builds": { + "decoder": { "components": ["decoder"], "pipeline": ["decoder_quant"], "output_dir": "out/decoder" } + } +} +``` + +**(b) Converge the optimized component(s) into one complete HF model directory.** The recommended form is **in-place**: the optimization runs on the full model and quantizes only the selected submodule, so step (a)'s `output_dir` is already a complete HF directory with the decoder quantized. + +> **`builds.components` means different things for the two families:** +> - **Diffusion:** slice this component out and optimize it independently → independent ONNX artifact. +> - **VLM:** locate and optimize this submodule inside the full model, output the full model → one complete HF directory. + +**(c) Export with the existing `capture-onnx-graph` CLI + Mobius builder.** `--use_mobius_builder`, takes `--model_name_or_path` as one complete HF model directory and lets Mobius re-identify and export the multi-component package. + +```powershell +olive capture-onnx-graph ` + --model_name_or_path local_folder ` + --use_mobius_builder ` + --output_path out\pkg +``` + +**(c) requires a quant format bridge.** Olive saves `quant_method="olive"` with **uint8** packing; Mobius's `preprocess_gptq_weights` expects `quant_method="gptq"`/`"awq"` with **int32** packing. A conversion (or a Mobius `"olive"` branch) is required for Mobius to load the quantized weights. + + +### 4.3 Per-target builds — multi-device / multi-EP from one config + +The **same** `builds` schema produces several target-specific outputs without any `components`. Each build differs only by `host`/`target` and its `pipeline`; shared `passes` are composed per target. This is the OpenVINO GPU + NPU case (Qwen2.5-Coder). + +```jsonc +{ + "input_model": { "type": "HfModel", "model_path": "Qwen/Qwen2.5-Coder-7B-Instruct" }, + "systems": { + "ov_gpu": { "type": "LocalSystem", "accelerators": [ { "device": "gpu", "execution_providers": ["OpenVINOExecutionProvider"] } ] }, + "ov_npu": { "type": "LocalSystem", "accelerators": [ { "device": "npu", "execution_providers": ["OpenVINOExecutionProvider"] } ] } + }, + "passes": { + "optimum_convert_gpu": { "type": "OpenVINOOptimumConversion", "extra_args": { "device": "gpu", "task": "text-generation-with-past" }, "ov_quant_config": { "weight_format": "int4", "group_size": 128, "ratio": 0.8 } }, + "optimum_convert_npu": { "type": "OpenVINOOptimumConversion", "extra_args": { "device": "npu" }, "ov_quant_config": { "weight_format": "int4", "group_size": 128, "dataset": "wikitext2", "ratio": 1, "sym": true, "backup_precision": "int8_asym" } }, + "io_update": { "type": "OpenVINOIoUpdate", "static": false, "reuse_cache": true }, + "encapsulation_gpu": { "type": "OpenVINOEncapsulation", "target_device": "gpu", "ov_version": "2025.1", "reuse_cache": true }, + "encapsulation_npu": { "type": "OpenVINOEncapsulation", "target_device": "npu", "ov_version": "2025.2", "reuse_cache": true, "genai_config_override": { "model": { "context_length": 4224 } } } + }, + "builds": { + "gpu": { "host": "ov_gpu", "target": "ov_gpu", "search_strategy": false, "pipeline": ["optimum_convert_gpu", "io_update", "encapsulation_gpu"], "output_dir": "gpu_output" }, + "npu": { "host": "ov_npu", "target": "ov_npu", "search_strategy": false, "pipeline": ["optimum_convert_npu", "io_update", "encapsulation_npu"], "output_dir": "npu_output" } + } +} +``` + +--- + +## 5. Low Level Design + +This section covers details needs to be handled in low level. + +- Sibling builds share no mutable state except read-only config (`passes`, `systems`) and the on-disk cache directory. Parallelizing is a scheduling change: one build's execution body can run concurrently with another's. +- If we choose `input` dependency option, Olive needs to handle builds DAG internally. +- Shared cache safety + - Cache keys will be namespaced by `workflow_id` (`"{workflow_id}_{build_name}"`). + - Writes to the shared cache **directory** (footprints, saved models) must be atomic or land in per-build subdirectories; shared-cache upload (if enabled) must be concurrency-safe. +- Result aggregation and failure handling + - Results remain a `dict[str, WorkflowOutput]` keyed by build name, assembled as workers complete. + - A failure in one build does **not** abort siblings; record per-build success/failure and surface a summary. + - For the DAG variant, a failed upstream build causes its dependents to be skipped and marked (no partial/corrupt merges). + +## 6. Open Questions + + +- Should the YAML registry (Option B) be hand-authored, generated from Mobius, or both (generated then user-overridable)? +- Should component resolution run for every HfModel/DiffusersModel, or only when a build references `components`? +- After per-component optimization, what is the cleanest way to assemble the optimized weights into a single model that `capture-onnx-graph --use_mobius_builder` can consume (merged checkpoint folder vs. in-place weight swap)? +- For diffusion, is per-component sibling output sufficient, or is a final "collect into one package" export also wanted? \ No newline at end of file diff --git a/olive/cli/api.py b/olive/cli/api.py index cae2963264..fbbcdcb90f 100644 --- a/olive/cli/api.py +++ b/olive/cli/api.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- import inspect from argparse import ArgumentParser, Namespace -from typing import Any +from typing import Any, Union from olive.cli.benchmark import BenchmarkCommand from olive.cli.capture_onnx import CaptureOnnxGraphCommand @@ -300,7 +300,7 @@ def benchmark(model_name_or_path: str, **kwargs) -> WorkflowOutput: return _run_unified_command(BenchmarkCommand, **kwargs) -def run(run_config: str, **kwargs) -> WorkflowOutput: +def run(run_config: str, **kwargs) -> Union[WorkflowOutput, dict[str, WorkflowOutput]]: """Run a workflow. Args: @@ -308,7 +308,8 @@ def run(run_config: str, **kwargs) -> WorkflowOutput: **kwargs: All other CLI arguments supported by extract-adapters command Returns: - WorkflowOutput: Contains tuning results + WorkflowOutput for a single-pipeline workflow, or a ``dict[str, WorkflowOutput]`` keyed by build + name when the config declares ``builds``. """ kwargs["run_config"] = run_config diff --git a/olive/cli/base.py b/olive/cli/base.py index 5c831618b8..0c56bc9140 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -94,7 +94,14 @@ def _run_workflow(self): workflow_output = olive_run(run_config) if getattr(self.args, "test", None) not in (None, False): mark_test_output_path(self.args.output_path) - if not workflow_output.has_output_model(): + if isinstance(workflow_output, dict): + # `builds` workflows return one WorkflowOutput per build keyed by build name. + for build_name, build_output in workflow_output.items(): + if build_output is None or not build_output.has_output_model(): + print(f"Build {build_name!r}: no output model produced. Please check the log for details.") + else: + print(f"Build {build_name!r}: model is saved under {self.args.output_path}") + elif not workflow_output.has_output_model(): print("No output model produced. Please check the log for details.") else: print(f"Model is saved at {self.args.output_path}") diff --git a/olive/model/config/model_config.py b/olive/model/config/model_config.py index 8f93ae14a6..05cbfa0a10 100644 --- a/olive/model/config/model_config.py +++ b/olive/model/config/model_config.py @@ -45,18 +45,16 @@ def create_model(self): return cls(**self.config) def get_components(self) -> Optional[list[str]]: - """Return the list of component names exposed by this input model, or None if single-component. + """Return the component names that builds can target, or None for a single-component model. - * ``CompositeModel`` -> the configured ``model_component_names`` list. - * ``DiffusersModel`` -> the variant-specific exportable components (via the handler). - * Anything else -> ``None`` (single-component model). + Only ``CompositeModel`` exposes selectable components (its configured ``model_component_names``). + Every other model type -- including ``DiffusersModel``, which cannot yet be sliced via + ``select_components`` -- returns ``None`` and is treated as a single-component model. Keeping this + in sync with ``select_components`` ensures build component validation never accepts a name that + ``select_components`` would later reject. """ - model_type = self.type - if model_type == "compositemodel": + if self.type == "compositemodel": return list(self.config.get("model_component_names") or []) - if model_type == "diffusersmodel": - handler = self.create_model() - return [str(c) for c in handler.get_exportable_components()] return None def select_components(self, names: list[str]) -> "ModelConfig": @@ -83,10 +81,20 @@ def select_components(self, names: list[str]) -> "ModelConfig": component_map = dict(zip(component_names, model_components)) selected = [deepcopy(component_map[n]) for n in names] if len(selected) == 1: + # Unwrap to the child handler config, inheriting the composite's shared model_attributes so a + # single-component build keeps parent context (matches CompositeModelHandler.select_components). child = selected[0] + parent_attributes = self.config.get("model_attributes") or {} if isinstance(child, ModelConfig): + merged = {**parent_attributes, **(child.config.get("model_attributes") or {})} + if merged: + child.config["model_attributes"] = merged return child - return ModelConfig.model_validate(child) + child_config = dict(child.get("config") or {}) + merged = {**parent_attributes, **(child_config.get("model_attributes") or {})} + if merged: + child_config["model_attributes"] = merged + return ModelConfig.model_validate({**child, "config": child_config}) new_config = { **{k: v for k, v in self.config.items() if k not in ("model_components", "model_component_names")}, "model_components": selected, diff --git a/olive/workflows/run/config.py b/olive/workflows/run/config.py index 5b4f899a29..da28d3042b 100644 --- a/olive/workflows/run/config.py +++ b/olive/workflows/run/config.py @@ -255,6 +255,16 @@ def validate_builds_references(self): # noqa: N804 # model_validator mode="aft f"Build {build_name!r} {field_name} references unknown entry {value!r}." f" Known {registry_label}: {sorted(registry)}." ) + engine_search_strategy = self.engine.search_strategy if self.engine else None + engine_evaluator = self.engine.evaluator if self.engine else None + for build_name, build in self.builds.items(): + effective_search = build.search_strategy if build.search_strategy is not None else engine_search_strategy + effective_evaluator = build.evaluator if build.evaluator is not None else engine_evaluator + if effective_search and effective_evaluator is None: + raise ValueError( + f"Build {build_name!r} enables search but resolves to no evaluator. Provide an evaluator at the" + " build or engine level, or disable search." + ) return self @field_validator("data_configs", mode="before") diff --git a/olive/workflows/run/run.py b/olive/workflows/run/run.py index 4e421db435..d3bd0c758d 100644 --- a/olive/workflows/run/run.py +++ b/olive/workflows/run/run.py @@ -161,23 +161,24 @@ def _run_builds(package_config: OlivePackageConfig, run_config: RunConfig, olive pipeline_subset: dict[str, list[RunPassConfig]] = OrderedDict() for pass_name in build.pipeline: - pipeline_subset[pass_name] = run_config.passes[pass_name] + # deepcopy so each build engine owns its pass configs; Engine.initialize mutates them in place. + pipeline_subset[pass_name] = deepcopy(run_config.passes[pass_name]) + + input_model = run_config.input_model + if build.components: + input_model = input_model.select_components(build.components) used_passes_configs = [p for passes in pipeline_subset.values() for p in passes] target_not_used = _compute_target_not_used(package_config, engine.evaluator_config, used_passes_configs) is_ep_required = _is_execution_provider_required_for_passes(package_config, used_passes_configs) or ( engine.evaluator_config is not None and engine_config.evaluate_input_model - and run_config.input_model.type.lower() == "onnxmodel" + and input_model.type.lower() == "onnxmodel" ) accelerator_spec = create_accelerator( engine.target_config, skip_supported_eps_check=target_not_used, is_ep_required=is_ep_required ) - input_model = run_config.input_model - if build.components: - input_model = input_model.select_components(build.components) - engine.set_input_passes_configs(pipeline_subset) outputs[build_name] = engine.run( input_model, @@ -192,17 +193,17 @@ def _run_builds(package_config: OlivePackageConfig, run_config: RunConfig, olive def _validate_build_components(run_config: RunConfig) -> None: - """Verify ``build.components`` names exist in the composite input model's components.""" + """Verify ``build.components`` names exist in the input model's selectable components.""" needs_component_check = any(build.components for build in run_config.builds.values()) if not needs_component_check: return - if run_config.input_model.type != "compositemodel": + available = run_config.input_model.get_components() + if available is None: bad = [name for name, build in run_config.builds.items() if build.components] raise ValueError( f"Builds {bad} declare `components` but the input model is not a CompositeModel" f" (got type {run_config.input_model.type!r})." ) - available = list(run_config.input_model.config.get("model_component_names") or []) for build_name, build in run_config.builds.items(): if not build.components: continue diff --git a/test/model/test_composite_model.py b/test/model/test_composite_model.py index ebc561ee54..4f64feefc7 100644 --- a/test/model/test_composite_model.py +++ b/test/model/test_composite_model.py @@ -124,3 +124,45 @@ def test_model_config_select_components_on_non_composite_raises(): onnx_config = ModelConfig.model_validate({"type": "ONNXModel", "config": {"model_path": "a.onnx"}}) with pytest.raises(ValueError, match="only supported on CompositeModel"): onnx_config.select_components(["any"]) + + +def test_model_config_select_components_single_inherits_parent_attributes(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx", "model_attributes": {"child": "c"}}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + "model_attributes": {"shared": "s", "child": "parent"}, + }, + } + ) + selected = composite_config.select_components(["text_encoder"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "onnxmodel" + # parent-only keys are inherited; child keys win on conflict + assert selected.config["model_attributes"] == {"shared": "s", "child": "c"} + + +def test_model_config_get_components_returns_none_for_non_composite(): + onnx_config = ModelConfig.model_validate({"type": "ONNXModel", "config": {"model_path": "a.onnx"}}) + assert onnx_config.get_components() is None + + +def test_model_config_get_components_returns_names_for_composite(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + }, + } + ) + assert composite_config.get_components() == ["text_encoder", "unet"] diff --git a/test/workflows/test_run_config_builds.py b/test/workflows/test_run_config_builds.py index 656da3f5b8..016ec31e86 100644 --- a/test/workflows/test_run_config_builds.py +++ b/test/workflows/test_run_config_builds.py @@ -154,3 +154,13 @@ def test_builds_empty_default_is_noop(self): assert set(run_config.builds) == {"only"} assert run_config.builds["only"].pipeline == ["convert"] assert run_config.builds["only"].host is None + + def test_builds_search_without_evaluator_errors(self): + # Enabling search on a build with no build- or engine-level evaluator must fail validation. + config_dict = self._build_config( + { + "only": {"pipeline": ["convert"], "output_dir": "out/only", "search_strategy": True}, + } + ) + with pytest.raises(ValidationError, match="no evaluator"): + RunConfig.model_validate(config_dict) From 5ed2a6720ed102f8617e00785841abe66a39afce Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 16 Jun 2026 10:51:26 -0700 Subject: [PATCH 05/18] update doc --- multi-component-model-architecture-design.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multi-component-model-architecture-design.md b/multi-component-model-architecture-design.md index 91016e688c..d503b575c1 100644 --- a/multi-component-model-architecture-design.md +++ b/multi-component-model-architecture-design.md @@ -316,7 +316,7 @@ The **same** `builds` schema produces several target-specific outputs without an --- -## 5. Low Level Design +## 5. Low Level Details This section covers details needs to be handled in low level. From 581e0967f29fc48a6d0d6748afaf52d598414a3c Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 16 Jun 2026 11:17:43 -0700 Subject: [PATCH 06/18] update docs --- multi-component-model-architecture-design.md | 204 +++++++++---------- 1 file changed, 99 insertions(+), 105 deletions(-) diff --git a/multi-component-model-architecture-design.md b/multi-component-model-architecture-design.md index d503b575c1..0d04cee2f8 100644 --- a/multi-component-model-architecture-design.md +++ b/multi-component-model-architecture-design.md @@ -14,104 +14,7 @@ Olive needs to optimize **multi-component models** (different components → dif ## 2. Approach -### 2.1 How components are obtained - -#### Option A — Query Mobius (preferred) - -Olive calls Mobius at runtime to inspect the model: - -```python -components = mobius.inspect_components(model_path_or_id, task=None, trust_remote_code=False) -``` - -- **Pros:** - - always in sync with Mobius's own architecture support; - - no per-model maintenance in Olive; - - covers any model Mobius can export, including new ones; single source of truth shared with the exporter. -- **Cons:** - - hard runtime dependency on `mobius-ai` even for the optimization step; - - coupled to Mobius versions (names/fields may shift) - - -#### Option B — Olive-maintained YAML registry - -Olive ships a YAML file enumerating the components of common models, keyed by `model_type` / architecture. Two component description styles appear, matching the two families: - -- **HF/VLM components** only need a **submodule path** to slice the component out of one model. `name` (for `builds.components`) plus `source.path` (where the submodule lives) is enough; `kind` is optional (only used for pass↔kind validation): - -```yaml -# olive/model/component_registry.yaml -llava: - components: - - { name: decoder, kind: decoder, source: { path: "model.language_model" } } - - { name: vision_encoder, kind: vision_encoder, source: { path: "model.vision_tower" } } - - { name: embedding, kind: embedding, source: { path: "model.language_model.embed_tokens" } } -``` - -- **Diffusion components** reuse existing Diffusion model components yaml file: - -```yaml -stable-diffusion: # SD 1.5 family (identified by model_index.json) - type: DiffusersModel - components: - - name: text_encoder - kind: text_encoder - loader: { component: text_encoder } # DiffusersModel.get_component("text_encoder") - io_config: - input_names: [input_ids] - output_names: [last_hidden_state, pooler_output] - dynamic_axes: { input_ids: { 0: batch, 1: sequence } } - dummy_inputs: text_encoder # generate_diffusers_dummy_inputs(...) - - name: vae_encoder - kind: vae_encoder - loader: { component: vae, patch: get_vae_encoder } # olive.model.utils.diffusers_utils.get_vae_encoder - io_config: - input_names: [sample, return_dict] - output_names: [latent_sample] - dynamic_axes: { sample: { 0: batch, 1: channels, 2: height, 3: width } } - dummy_inputs: vae_encoder - - name: vae_decoder - kind: vae_decoder - loader: { component: vae, patch: get_vae_decoder } # olive.model.utils.diffusers_utils.get_vae_decoder - io_config: - input_names: [latent_sample, return_dict] - output_names: [sample] - dynamic_axes: { latent_sample: { 0: batch, 1: channels, 2: height, 3: width } } - dummy_inputs: vae_decoder - - name: unet - kind: diffusion_backbone - loader: { component: unet } - io_config: - input_names: [sample, timestep, encoder_hidden_states, return_dict] - output_names: [out_sample] - dynamic_axes: - sample: { 0: batch, 1: channels, 2: height, 3: width } - timestep: { 0: batch } - encoder_hidden_states: { 0: batch, 1: sequence } - dummy_inputs: unet - # SDXL adds text_encoder_2 (kind: text_encoder) and extra UNet inputs (text_embeds, time_ids); - # SD3 / FLUX replace `unet` with `transformer` (kind: diffusion_backbone). -``` - - -- **Pros:** - - no runtime Mobius dependency for the optimization step; - - works offline; - - human-readable, reviewable, and overridable by users (drop-in extra entries); - - stable across Mobius versions; - - users can add an unsupported model without code changes. -- **Cons:** - - must be **maintained by Olive** as new architectures appear (the same per-architecture maintenance Mobius already does); - - risk of drifting out of sync with Mobius's actual export expectations (e.g. `export_key`s, weight prefixes); - - duplicates knowledge that also lives in Mobius. - -### 2.2 CompositeModel as the internal IR - -Whichever option supplies the plan, Olive normalizes it into a `CompositeModel` whose components carry `component_source_path` and `component_kind` in `model_attributes`. This reuses existing `CompositeModel` serialization and per-pass fan-out (`olive/passes/olive_pass.py`). - ---- - -## 3. The `builds` Schema +### The `builds` Schema A build is a named optimization unit: @@ -142,9 +45,9 @@ A build may also take its model from another build's output via `input` — e.g. --- -## 4. Config Examples +## 3. Config Examples -### 4.1 Basic shape — independent sibling builds +### 3.1 Basic shape — independent sibling builds Shared `passes`; each build picks a component and composes its own `pipeline`. Each build writes one optimized folder. The same shape is used whether the user optimizes ONNX components after export (Flow A) or PyTorch components before export (Flow B). @@ -178,7 +81,7 @@ Shared `passes`; each build picks a component and composes its own `pipeline`. E Each build writes one optimized component under its `output_dir`. -### 4.2 Component optimization — two flows +### 3.2 Component optimization — two flows #### Flow A — export to ONNX model first, then per-component optimization @@ -254,7 +157,98 @@ Each subfolder is a standard local ONNX model Olive already loads. The only new #### Flow B — optimize first, then export (recommended) -For PyTorch-stage optimization (e.g. GPTQ on the decoder) **before** export. Three explicit user steps; +For PyTorch-stage optimization (e.g. GPTQ on the decoder) **before** export. + +##### How components are obtained + +###### Option A — Query Mobius (preferred) + +Olive calls Mobius at runtime to inspect the model: + +```python +components = mobius.inspect_components(model_path_or_id, task=None, trust_remote_code=False) +``` + +- **Pros:** + - always in sync with Mobius's own architecture support; + - no per-model maintenance in Olive; + - covers any model Mobius can export, including new ones; single source of truth shared with the exporter. +- **Cons:** + - hard runtime dependency on `mobius-ai` even for the optimization step; + - coupled to Mobius versions (names/fields may shift) + + +###### Option B — Olive-maintained YAML registry + +Olive ships a YAML file enumerating the components of common models, keyed by `model_type` / architecture. Two component description styles appear, matching the two families: + +- **HF/VLM components** only need a **submodule path** to slice the component out of one model. `name` (for `builds.components`) plus `source.path` (where the submodule lives) is enough; `kind` is optional (only used for pass↔kind validation): + +```yaml +# olive/model/component_registry.yaml +llava: + components: + - { name: decoder, kind: decoder, source: { path: "model.language_model" } } + - { name: vision_encoder, kind: vision_encoder, source: { path: "model.vision_tower" } } + - { name: embedding, kind: embedding, source: { path: "model.language_model.embed_tokens" } } +``` + +- **Diffusion components** reuse existing Diffusion model components yaml file: + +```yaml +stable-diffusion: # SD 1.5 family (identified by model_index.json) + type: DiffusersModel + components: + - name: text_encoder + kind: text_encoder + loader: { component: text_encoder } # DiffusersModel.get_component("text_encoder") + io_config: + input_names: [input_ids] + output_names: [last_hidden_state, pooler_output] + dynamic_axes: { input_ids: { 0: batch, 1: sequence } } + dummy_inputs: text_encoder # generate_diffusers_dummy_inputs(...) + - name: vae_encoder + kind: vae_encoder + loader: { component: vae, patch: get_vae_encoder } # olive.model.utils.diffusers_utils.get_vae_encoder + io_config: + input_names: [sample, return_dict] + output_names: [latent_sample] + dynamic_axes: { sample: { 0: batch, 1: channels, 2: height, 3: width } } + dummy_inputs: vae_encoder + - name: vae_decoder + kind: vae_decoder + loader: { component: vae, patch: get_vae_decoder } # olive.model.utils.diffusers_utils.get_vae_decoder + io_config: + input_names: [latent_sample, return_dict] + output_names: [sample] + dynamic_axes: { latent_sample: { 0: batch, 1: channels, 2: height, 3: width } } + dummy_inputs: vae_decoder + - name: unet + kind: diffusion_backbone + loader: { component: unet } + io_config: + input_names: [sample, timestep, encoder_hidden_states, return_dict] + output_names: [out_sample] + dynamic_axes: + sample: { 0: batch, 1: channels, 2: height, 3: width } + timestep: { 0: batch } + encoder_hidden_states: { 0: batch, 1: sequence } + dummy_inputs: unet + # SDXL adds text_encoder_2 (kind: text_encoder) and extra UNet inputs (text_embeds, time_ids); + # SD3 / FLUX replace `unet` with `transformer` (kind: diffusion_backbone). +``` + + +- **Pros:** + - no runtime Mobius dependency for the optimization step; + - works offline; + - human-readable, reviewable, and overridable by users (drop-in extra entries); + - stable across Mobius versions; + - users can add an unsupported model without code changes. +- **Cons:** + - must be **maintained by Olive** as new architectures appear (the same per-architecture maintenance Mobius already does); + - risk of drifting out of sync with Mobius's actual export expectations (e.g. `export_key`s, weight prefixes); + - duplicates knowledge that also lives in Mobius. **(a) Optimize each component**. Only the components the user wants to optimize need a build. @@ -289,7 +283,7 @@ olive capture-onnx-graph ` **(c) requires a quant format bridge.** Olive saves `quant_method="olive"` with **uint8** packing; Mobius's `preprocess_gptq_weights` expects `quant_method="gptq"`/`"awq"` with **int32** packing. A conversion (or a Mobius `"olive"` branch) is required for Mobius to load the quantized weights. -### 4.3 Per-target builds — multi-device / multi-EP from one config +### 3.3 Per-target builds — multi-device / multi-EP from one config The **same** `builds` schema produces several target-specific outputs without any `components`. Each build differs only by `host`/`target` and its `pipeline`; shared `passes` are composed per target. This is the OpenVINO GPU + NPU case (Qwen2.5-Coder). @@ -316,7 +310,7 @@ The **same** `builds` schema produces several target-specific outputs without an --- -## 5. Low Level Details +## 4. Low Level Details This section covers details needs to be handled in low level. @@ -330,7 +324,7 @@ This section covers details needs to be handled in low level. - A failure in one build does **not** abort siblings; record per-build success/failure and surface a summary. - For the DAG variant, a failed upstream build causes its dependents to be skipped and marked (no partial/corrupt merges). -## 6. Open Questions +## 5. Open Questions - Should the YAML registry (Option B) be hand-authored, generated from Mobius, or both (generated then user-overridable)? From ec305f476da8606d34f4d760d9c88197087769a6 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 16 Jun 2026 11:21:11 -0700 Subject: [PATCH 07/18] Implement component resolution for builds: directory composite + Mobius HfModel Support the two component-discovery paths from the multi-component design: - Flow A Option 2 (two steps): load a Mobius export directory as a CompositeModel, using per-component subfolder names as component names. Adds discover_onnx_components() and directory auto-discovery in CompositeModelHandler and ModelConfig.get_components/select_components. - Flow B (optimize then export): resolve an HfModel's components by querying Mobius (olive/common/mobius_utils.inspect_components, lazy import). HfModel.get_components returns Mobius component names; select_components tags the chosen component's submodule path in model_attributes for PyTorch-stage per-component passes. No 'input' build dependency is used. Build component validation updated for both sources. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- olive/common/mobius_utils.py | 89 +++++++++++++++++++++++ olive/model/config/model_config.py | 110 +++++++++++++++++++++++++---- olive/model/handler/composite.py | 41 ++++++++++- olive/model/utils/onnx_utils.py | 34 +++++++++ olive/workflows/run/run.py | 4 +- test/model/test_composite_model.py | 92 ++++++++++++++++++++++++ test/workflows/test_run_builds.py | 47 +++++++++++- 7 files changed, 398 insertions(+), 19 deletions(-) create mode 100644 olive/common/mobius_utils.py diff --git a/olive/common/mobius_utils.py b/olive/common/mobius_utils.py new file mode 100644 index 0000000000..dbb296ff4d --- /dev/null +++ b/olive/common/mobius_utils.py @@ -0,0 +1,89 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Helpers for obtaining a model's component plan from mobius. + +Mobius owns the per-architecture knowledge of which components a model exposes (e.g. a VLM's +``decoder`` / ``vision_encoder`` / ``embedding``), how each maps back to a submodule, and the role +of each component. Olive consumes that plan to drive per-component builds without re-implementing the +architecture-specific logic. + +``mobius-ai`` is imported lazily so Olive keeps working when it is not installed; only the code paths +that actually need a component plan for a Hugging Face model require it. +""" + +import logging +from dataclasses import dataclass, field +from typing import Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class ComponentInfo: + """A single component returned by a component source. + + Attributes: + name: Stable, user-facing component name used in ``builds.components``. + kind: Component role/kind (e.g. ``decoder``, ``vision_encoder``). Optional; used for + pass/component compatibility validation. + source_path: Dotted submodule path locating the component inside the full model + (e.g. ``model.language_model``). Used to slice the component for PyTorch-stage passes. + + """ + + name: str + kind: Optional[str] = None + source_path: Optional[str] = None + metadata: dict = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: "ComponentInfo | dict") -> "ComponentInfo": + if isinstance(data, ComponentInfo): + return data + source = data.get("source") or {} + return cls( + name=data["name"], + kind=data.get("kind"), + source_path=data.get("source_path") or source.get("path"), + metadata={k: v for k, v in data.items() if k not in ("name", "kind", "source", "source_path")}, + ) + + +def inspect_components( + model_name_or_path: str, + task: Optional[str] = None, + trust_remote_code: bool = False, +) -> list[ComponentInfo]: + """Return the component plan for a Hugging Face model by querying mobius. + + Args: + model_name_or_path: Hugging Face model id or local path. + task: Optional task hint passed to mobius. + trust_remote_code: Whether to trust remote code when mobius loads the config. + + Returns: + A list of :class:`ComponentInfo`. An empty list means the model is single-component + (no separable components). + + Raises: + ImportError: If ``mobius-ai`` is not installed. + + """ + try: + import mobius + except ImportError as exc: + raise ImportError( + "mobius-ai is required to resolve model components for a Hugging Face model. " + "Install with: pip install mobius-ai" + ) from exc + + raw_components = mobius.inspect_components( + model_name_or_path, + task=task, + trust_remote_code=trust_remote_code, + ) + components = [ComponentInfo.from_dict(c) for c in raw_components] + logger.debug("mobius.inspect_components(%s) -> %s", model_name_or_path, [c.name for c in components]) + return components diff --git a/olive/model/config/model_config.py b/olive/model/config/model_config.py index 05cbfa0a10..b2b615c073 100644 --- a/olive/model/config/model_config.py +++ b/olive/model/config/model_config.py @@ -47,32 +47,81 @@ def create_model(self): def get_components(self) -> Optional[list[str]]: """Return the component names that builds can target, or None for a single-component model. - Only ``CompositeModel`` exposes selectable components (its configured ``model_component_names``). - Every other model type -- including ``DiffusersModel``, which cannot yet be sliced via - ``select_components`` -- returns ``None`` and is treated as a single-component model. Keeping this - in sync with ``select_components`` ensures build component validation never accepts a name that - ``select_components`` would later reject. + * ``CompositeModel`` -> its configured ``model_component_names``, or, when the config only + points at a directory of per-component ONNX subfolders, the discovered subfolder names. + * ``HfModel`` -> the components reported by mobius (``mobius.inspect_components``), or None + when the model is single-component / mobius reports nothing. + * Anything else -> ``None`` (single-component model). """ if self.type == "compositemodel": - return list(self.config.get("model_component_names") or []) + names = list(self.config.get("model_component_names") or []) + if names: + return names + return [name for name, _ in self._discover_composite_components()] + if self.type == "hfmodel": + return self._get_hf_components() or None return None + def _discover_composite_components(self) -> list[tuple[str, str]]: + """Discover ``(name, onnx_relpath)`` from a directory-based composite, or empty list.""" + from olive.model.utils.onnx_utils import discover_onnx_components + + model_path = self.config.get("model_path") + if not model_path or not Path(str(model_path)).is_dir(): + return [] + return discover_onnx_components(str(model_path)) + + def _get_hf_components(self) -> list[str]: + """Return component names for an HfModel by querying mobius, or empty list.""" + from olive.common.mobius_utils import inspect_components + + model_path = self.config.get("model_path") + if not model_path: + return [] + load_kwargs = self.config.get("load_kwargs") or {} + components = inspect_components( + model_path, + task=self.config.get("task"), + trust_remote_code=bool(load_kwargs.get("trust_remote_code")), + ) + return [c.name for c in components] + def select_components(self, names: list[str]) -> "ModelConfig": - """Return a new ModelConfig holding only the named components of a CompositeModel. + """Return a new ModelConfig holding only the named components. - Returns the unwrapped child component ``ModelConfig`` when exactly one name is given; - returns a new ``CompositeModel`` ``ModelConfig`` containing the subset (in the requested - order) otherwise. Raises ``ValueError`` if invoked on a non-composite model or if any - name is missing from ``model_component_names``. + * ``CompositeModel`` -> the unwrapped child component ``ModelConfig`` when exactly one name + is given, otherwise a new ``CompositeModel`` ``ModelConfig`` containing the subset (in the + requested order). Directory-based composites are discovered first. + * ``HfModel`` -> a copy of this config tagged with the selected component's submodule path + (from the mobius plan) in ``model_attributes['component_source_path']``, so a PyTorch-stage + pass can slice that submodule while the saved output stays a complete HF directory. + + Raises ``ValueError`` if any name is missing from the available components. """ + if self.type == "hfmodel": + return self._select_hf_component(names) if self.type != "compositemodel": raise ValueError( - f"select_components is only supported on CompositeModel input configs (got type {self.type!r})." + f"select_components is only supported on CompositeModel or HfModel input configs " + f"(got type {self.type!r})." ) if not names: raise ValueError("select_components requires a non-empty list of names.") - component_names = self.config.get("model_component_names") or [] - model_components = self.config.get("model_components") or [] + component_names = list(self.config.get("model_component_names") or []) + model_components = list(self.config.get("model_components") or []) + if not component_names: + discovered = self._discover_composite_components() + if not discovered: + raise ValueError( + "CompositeModel config has no model_components and model_path is not a directory of " + "per-component ONNX subfolders." + ) + component_names = [name for name, _ in discovered] + model_path = self.config.get("model_path") + model_components = [ + {"type": "ONNXModel", "config": {"model_path": str(model_path), "onnx_file_name": onnx_rel}} + for _, onnx_rel in discovered + ] if len(component_names) != len(model_components): raise ValueError("CompositeModel config has mismatched model_components and model_component_names lengths.") missing = [n for n in names if n not in component_names] @@ -102,6 +151,39 @@ def select_components(self, names: list[str]) -> "ModelConfig": } return ModelConfig(type=self.type, config=new_config) + def _select_hf_component(self, names: list[str]) -> "ModelConfig": + """Select a single Hf component (by mobius source path) for PyTorch-stage optimization.""" + if not names: + raise ValueError("select_components requires a non-empty list of names.") + if len(names) != 1: + raise ValueError( + f"HfModel components must be optimized one at a time; got {names}. Use a separate build per component." + ) + from olive.common.mobius_utils import inspect_components + + model_path = self.config.get("model_path") + load_kwargs = self.config.get("load_kwargs") or {} + components = inspect_components( + model_path, + task=self.config.get("task"), + trust_remote_code=bool(load_kwargs.get("trust_remote_code")), + ) + by_name = {c.name: c for c in components} + missing = [n for n in names if n not in by_name] + if missing: + raise ValueError(f"Unknown component name(s) {missing}. Available components: {list(by_name)}.") + component = by_name[names[0]] + + new_config = deepcopy(self.config) + attributes = dict(new_config.get("model_attributes") or {}) + attributes["component_name"] = component.name + if component.kind is not None: + attributes["component_kind"] = component.kind + if component.source_path is not None: + attributes["component_source_path"] = component.source_path + new_config["model_attributes"] = attributes + return ModelConfig(type=self.type, config=new_config) + def get_model_id(self): for v in self.config.values(): if callable(v): diff --git a/olive/model/handler/composite.py b/olive/model/handler/composite.py index e3332ce38e..d41ad8e7e5 100644 --- a/olive/model/handler/composite.py +++ b/olive/model/handler/composite.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +from pathlib import Path from typing import Any, Optional, Union from olive.common.config_utils import serialize_to_json, validate_config @@ -32,8 +33,8 @@ class CompositeModelHandler(OliveModelHandler): def __init__( self, - model_components: list[Union[OliveModelHandler, dict[str, Any]]], - model_component_names: list[str], + model_components: Optional[list[Union[OliveModelHandler, dict[str, Any]]]] = None, + model_component_names: Optional[list[str]] = None, model_path: OLIVE_RESOURCE_ANNOTATIONS = None, model_attributes: Optional[dict[str, Any]] = None, ): @@ -43,6 +44,19 @@ def __init__( model_file_format=ModelFileFormat.COMPOSITE_MODEL, model_attributes=model_attributes, ) + + # When components are not provided but model_path is a directory of per-component ONNX + # subfolders (e.g. a mobius export package), discover them using the subfolder names as + # component names. This supports loading an exported package directly as a CompositeModel. + if not model_components: + discovered = self._discover_components(model_path) + if not discovered: + raise ValueError( + "CompositeModelHandler requires model_components, or a model_path directory containing " + "per-component ONNX subfolders." + ) + model_components, model_component_names = discovered + self._model_components = [ validate_config(m, ModelConfig).create_model() if isinstance(m, dict) else m for m in model_components ] @@ -53,6 +67,29 @@ def __init__( assert len(self._model_components) == len(model_component_names), "Number of components and names must match" self.model_component_names = model_component_names + @staticmethod + def _discover_components( + model_path: OLIVE_RESOURCE_ANNOTATIONS, + ) -> Optional[tuple[list[dict[str, Any]], list[str]]]: + """Build component configs from a directory of per-component ONNX subfolders. + + Returns ``(model_components, model_component_names)`` or ``None`` if discovery is not + applicable (model_path is not a local directory of component subfolders). + """ + from olive.model.utils.onnx_utils import discover_onnx_components + + if not model_path or not Path(str(model_path)).is_dir(): + return None + discovered = discover_onnx_components(str(model_path)) + if not discovered: + return None + names = [name for name, _ in discovered] + components = [ + {"type": "ONNXModel", "config": {"model_path": str(model_path), "onnx_file_name": onnx_rel}} + for _, onnx_rel in discovered + ] + return components, names + @property def model_components(self): for m in self._model_components: diff --git a/olive/model/utils/onnx_utils.py b/olive/model/utils/onnx_utils.py index 73be98a115..f1c6d5f3e9 100644 --- a/olive/model/utils/onnx_utils.py +++ b/olive/model/utils/onnx_utils.py @@ -63,6 +63,40 @@ def get_onnx_file_path(model_path: str, onnx_file_name: Optional[str] = None) -> raise ValueError(f"No .onnx file found in the model folder {model_path}.") +def discover_onnx_components(model_dir: str) -> list[tuple[str, str]]: + """Discover per-component ONNX subfolders in a directory. + + A multi-component ONNX package (e.g. produced by ``capture-onnx-graph --use_mobius_builder``) + lays out each component in its own subfolder, with a ``model.onnx`` inside: + + model_dir/decoder/model.onnx + model_dir/vision_encoder/model.onnx + model_dir/embedding/model.onnx + + Args: + model_dir: Directory that contains one subfolder per component. + + Returns: + A list of ``(component_name, onnx_file_relpath)`` tuples sorted by component name, where + ``component_name`` is the subfolder name and ``onnx_file_relpath`` is the path to the + component's ``.onnx`` file relative to ``model_dir``. Empty if no component subfolders are + found. + + """ + model_dir_path = Path(model_dir) + if not model_dir_path.is_dir(): + return [] + + components: list[tuple[str, str]] = [] + for sub_dir in sorted(p for p in model_dir_path.iterdir() if p.is_dir()): + onnx_files = list(sub_dir.glob("*.onnx")) + if len(onnx_files) == 1: + components.append((sub_dir.name, f"{sub_dir.name}/{onnx_files[0].name}")) + elif (sub_dir / "model.onnx").exists(): + components.append((sub_dir.name, f"{sub_dir.name}/model.onnx")) + return components + + def get_additional_file_path(model_dir: str, file_name: str) -> Optional[str]: """Get the full path to the additional file. diff --git a/olive/workflows/run/run.py b/olive/workflows/run/run.py index d3bd0c758d..0da23c78fc 100644 --- a/olive/workflows/run/run.py +++ b/olive/workflows/run/run.py @@ -198,10 +198,10 @@ def _validate_build_components(run_config: RunConfig) -> None: if not needs_component_check: return available = run_config.input_model.get_components() - if available is None: + if not available: bad = [name for name, build in run_config.builds.items() if build.components] raise ValueError( - f"Builds {bad} declare `components` but the input model is not a CompositeModel" + f"Builds {bad} declare `components` but the input model exposes no selectable components" f" (got type {run_config.input_model.type!r})." ) for build_name, build in run_config.builds.items(): diff --git a/test/model/test_composite_model.py b/test/model/test_composite_model.py index 4f64feefc7..ed2902e8ce 100644 --- a/test/model/test_composite_model.py +++ b/test/model/test_composite_model.py @@ -166,3 +166,95 @@ def test_model_config_get_components_returns_names_for_composite(): } ) assert composite_config.get_components() == ["text_encoder", "unet"] + + +def _make_export_package(root): + """Create a mobius-style export package: one subfolder per component with a model.onnx.""" + for name in ["decoder", "vision_encoder", "embedding"]: + comp_dir = root / name + comp_dir.mkdir(parents=True) + (comp_dir / "model.onnx").write_bytes(b"onnx") + return root + + +def test_discover_onnx_components_reads_subfolders(tmp_path): + from olive.model.utils.onnx_utils import discover_onnx_components + + _make_export_package(tmp_path) + discovered = discover_onnx_components(str(tmp_path)) + assert [name for name, _ in discovered] == ["decoder", "embedding", "vision_encoder"] + assert dict(discovered)["decoder"] == "decoder/model.onnx" + + +def test_discover_onnx_components_empty_for_flat_dir(tmp_path): + from olive.model.utils.onnx_utils import discover_onnx_components + + (tmp_path / "model.onnx").write_bytes(b"onnx") + assert discover_onnx_components(str(tmp_path)) == [] + + +def test_composite_handler_discovers_components_from_directory(tmp_path): + _make_export_package(tmp_path) + handler = CompositeModelHandler(model_path=str(tmp_path)) + assert list(handler.model_component_names) == ["decoder", "embedding", "vision_encoder"] + for _, component in handler.get_model_components(): + assert isinstance(component, ONNXModelHandler) + + +def test_model_config_get_components_discovers_directory_composite(tmp_path): + _make_export_package(tmp_path) + config = ModelConfig.model_validate({"type": "CompositeModel", "config": {"model_path": str(tmp_path)}}) + assert config.get_components() == ["decoder", "embedding", "vision_encoder"] + + +def test_model_config_select_components_discovers_directory_composite(tmp_path): + _make_export_package(tmp_path) + config = ModelConfig.model_validate({"type": "CompositeModel", "config": {"model_path": str(tmp_path)}}) + selected = config.select_components(["decoder"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "onnxmodel" + assert selected.config["onnx_file_name"] == "decoder/model.onnx" + + +def test_model_config_get_components_hfmodel_queries_mobius(monkeypatch): + from olive.common import mobius_utils + + monkeypatch.setattr( + mobius_utils, + "inspect_components", + lambda *a, **k: [ + mobius_utils.ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model"), + mobius_utils.ComponentInfo(name="vision_encoder", kind="vision_encoder", source_path="model.vision_tower"), + ], + ) + config = ModelConfig.model_validate({"type": "HfModel", "config": {"model_path": "some/vlm"}}) + assert config.get_components() == ["decoder", "vision_encoder"] + + +def test_model_config_select_components_hfmodel_tags_source_path(monkeypatch): + from olive.common import mobius_utils + + monkeypatch.setattr( + mobius_utils, + "inspect_components", + lambda *a, **k: [ + mobius_utils.ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model"), + ], + ) + config = ModelConfig.model_validate({"type": "HfModel", "config": {"model_path": "some/vlm"}}) + selected = config.select_components(["decoder"]) + assert selected.type == "hfmodel" + assert selected.config["model_path"] == "some/vlm" + attrs = selected.config["model_attributes"] + assert attrs["component_name"] == "decoder" + assert attrs["component_kind"] == "decoder" + assert attrs["component_source_path"] == "model.language_model" + + +def test_model_config_select_components_hfmodel_multiple_names_raises(monkeypatch): + from olive.common import mobius_utils + + monkeypatch.setattr(mobius_utils, "inspect_components", lambda *a, **k: []) + config = ModelConfig.model_validate({"type": "HfModel", "config": {"model_path": "some/vlm"}}) + with pytest.raises(ValueError, match="one at a time"): + config.select_components(["decoder", "vision_encoder"]) diff --git a/test/workflows/test_run_builds.py b/test/workflows/test_run_builds.py index 391f984bed..a2588ae9bd 100644 --- a/test/workflows/test_run_builds.py +++ b/test/workflows/test_run_builds.py @@ -150,7 +150,7 @@ def test_builds_components_on_non_composite_input_raises(self): "components": ["text_encoder"], }, } - with pytest.raises(ValueError, match="not a CompositeModel"): + with pytest.raises(ValueError, match="no selectable components"): olive_run(config) def test_builds_components_unknown_name_raises(self): @@ -175,3 +175,48 @@ def test_builds_components_unknown_name_raises(self): } with pytest.raises(ValueError, match="unknown component"): olive_run(config) + + def test_builds_directory_composite_input_runs_per_component(self, tmp_path): + # Flow A Option 2: a mobius export directory loads as a CompositeModel, + # subfolder names become component names, sibling builds optimize each. + for name in ["decoder", "vision_encoder"]: + comp_dir = tmp_path / "exported_pkg" / name + comp_dir.mkdir(parents=True) + (comp_dir / "model.onnx").write_bytes(b"onnx") + + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = {"type": "CompositeModel", "config": {"model_path": str(tmp_path / "exported_pkg")}} + config["builds"] = { + "decoder": {"components": ["decoder"], "pipeline": ["convert"], "output_dir": "out/decoder"}, + "vision_encoder": {"components": ["vision_encoder"], "pipeline": ["convert"], "output_dir": "out/vision"}, + } + with engine_run_patch, acc_patch: + result = olive_run(config) + assert set(result) == {"decoder", "vision_encoder"} + assert run_mock.call_count == 2 + + def test_builds_hfmodel_components_resolved_via_mobius(self): + # Flow B: HfModel input; component names + source paths come from mobius. + from olive.common import mobius_utils + + def fake_inspect(*_args, **_kwargs): + return [ + mobius_utils.ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model"), + mobius_utils.ComponentInfo(name="vision_encoder", kind="vision_encoder", source_path="model.vision"), + ] + + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = {"type": "HfModel", "config": {"model_path": "some/vlm"}} + config["builds"] = { + "decoder": {"components": ["decoder"], "pipeline": ["convert"], "output_dir": "out/decoder"}, + } + with ( + engine_run_patch, + acc_patch, + patch.object(mobius_utils, "inspect_components", side_effect=fake_inspect), + ): + result = olive_run(config) + assert set(result) == {"decoder"} + assert run_mock.call_count == 1 From 98adeccb1099ac141c0d3f521bb6edd3841fdf8b Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 16 Jun 2026 11:45:49 -0700 Subject: [PATCH 08/18] MobiusBuilder: log every component name and path for multi-component exports When mobius exports a multi-component model, log each component's name and its ONNX file path so the export layout is visible in the run log. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- olive/passes/onnx/mobius_model_builder.py | 6 ++++ test/passes/onnx/test_mobius_model_builder.py | 30 +++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/olive/passes/onnx/mobius_model_builder.py b/olive/passes/onnx/mobius_model_builder.py index 7b7e52bb34..5c83812864 100644 --- a/olive/passes/onnx/mobius_model_builder.py +++ b/olive/passes/onnx/mobius_model_builder.py @@ -203,6 +203,7 @@ def _run_for_config( # Multi-component model (VLMs, encoder-decoders, diffusion pipelines): # mobius saves each component to //model.onnx. components = [] + component_paths: list[tuple[str, str]] = [] for key in package_keys: component_dir = output_dir / key onnx_path = component_dir / "model.onnx" @@ -211,6 +212,7 @@ def _run_for_config( f"MobiusBuilder: expected output file not found: {onnx_path}. " f"mobius.build() may have failed silently for component '{key}'." ) + component_paths.append((key, str(onnx_path))) additional_files = sorted( {str(fp) for fp in component_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"} ) @@ -228,6 +230,10 @@ def _run_for_config( ) ) + logger.info("MobiusBuilder: exported multi-component model with %d components:", len(component_paths)) + for component_name, component_path in component_paths: + logger.info("MobiusBuilder: component '%s' -> %s", component_name, component_path) + return CompositeModelHandler( model_components=components, model_component_names=package_keys, diff --git a/test/passes/onnx/test_mobius_model_builder.py b/test/passes/onnx/test_mobius_model_builder.py index cd3d7338d8..7ed8834d06 100644 --- a/test/passes/onnx/test_mobius_model_builder.py +++ b/test/passes/onnx/test_mobius_model_builder.py @@ -254,13 +254,32 @@ def test_genai_artifacts_in_multi_component(tmp_path): def test_multi_component_returns_composite_handler(tmp_path): """Multi-component package (VLM) → CompositeModelHandler with one component per key.""" + import logging + out = tmp_path / "out" keys = ["model", "vision", "embedding"] pkg = _fake_pkg(keys, out) - with _patch_build(pkg): - p = _make_pass() - result = p.run(_make_hf_model("microsoft/phi-4-vision"), out) + # olive disables logger propagation (olive/__init__.py), so attach a capture handler + # directly to the mobius builder logger. + records: list[logging.LogRecord] = [] + + class _Capture(logging.Handler): + def emit(self, record): + records.append(record) + + mb_logger = logging.getLogger("olive.passes.onnx.mobius_model_builder") + handler = _Capture(level=logging.INFO) + prev_level = mb_logger.level + mb_logger.setLevel(logging.INFO) + mb_logger.addHandler(handler) + try: + with _patch_build(pkg): + p = _make_pass() + result = p.run(_make_hf_model("microsoft/phi-4-vision"), out) + finally: + mb_logger.removeHandler(handler) + mb_logger.setLevel(prev_level) assert isinstance(result, CompositeModelHandler) assert result.model_component_names == keys @@ -269,6 +288,11 @@ def test_multi_component_returns_composite_handler(tmp_path): for comp in components: assert isinstance(comp, ONNXModelHandler) + # every component name and its onnx path are logged + messages = [rec.getMessage() for rec in records] + for key in keys: + assert any(f"'{key}'" in msg and "model.onnx" in msg for msg in messages) + # --------------------------------------------------------------------------- # EP auto-detection tests From 7bd240fe0bc3a3809f5a2c9fe6aee39e179b9d97 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 16 Jun 2026 14:02:46 -0700 Subject: [PATCH 09/18] update docs --- multi-component-model-architecture-design.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multi-component-model-architecture-design.md b/multi-component-model-architecture-design.md index 0d04cee2f8..fd41c1436b 100644 --- a/multi-component-model-architecture-design.md +++ b/multi-component-model-architecture-design.md @@ -118,7 +118,7 @@ Export with `MobiusBuilder`, which takes an `HfModel` and returns a `CompositeMo - User needs to know the components names first (from a new Olive CLI, where Olive will get it from Mobius) -**Option 2 — two steps (CLI export, then load the folder).** Export with the CLI, then point `input_model` at the exported directory. +**Option 2 — two steps (CLI export, then load the folder).** Export with the CLI, then point `input_model` at the exported directory. (preferred) Step 1 — export. Each component lands in its own subfolder: From 2dfc1a7e90da60e3a8745a0fc4902f0f665fed7a Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 16 Jun 2026 18:02:04 -0700 Subject: [PATCH 10/18] Fix Olive consumption of mobius ComponentInfo objects mobius.inspect_components returns frozen ComponentInfo dataclasses, but Olive's coercion only handled its own ComponentInfo or a plain dict and crashed calling .get() on a mobius object. Broaden ComponentInfo.coerce (renamed from from_dict) to also accept duck-typed objects exposing name/kind/source_path. Add test/common/test_mobius_utils.py covering the object, dict, passthrough, and missing-mobius paths that the existing inspect_components mocks never exercised. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- olive/common/mobius_utils.py | 28 +++++++++---- test/common/test_mobius_utils.py | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 test/common/test_mobius_utils.py diff --git a/olive/common/mobius_utils.py b/olive/common/mobius_utils.py index dbb296ff4d..e971029580 100644 --- a/olive/common/mobius_utils.py +++ b/olive/common/mobius_utils.py @@ -39,15 +39,27 @@ class ComponentInfo: metadata: dict = field(default_factory=dict) @classmethod - def from_dict(cls, data: "ComponentInfo | dict") -> "ComponentInfo": - if isinstance(data, ComponentInfo): + def coerce(cls, data: "ComponentInfo | dict | object") -> "ComponentInfo": + """Normalize a component from any source into an Olive :class:`ComponentInfo`. + + Accepts an existing Olive ``ComponentInfo`` (returned as-is), a mapping following the + component contract, or a duck-typed object exposing ``name``/``kind``/``source_path`` + attributes (e.g. a ``mobius`` ``ComponentInfo`` dataclass). + """ + if isinstance(data, cls): return data - source = data.get("source") or {} + if isinstance(data, dict): + source = data.get("source") or {} + return cls( + name=data["name"], + kind=data.get("kind"), + source_path=data.get("source_path") or source.get("path"), + metadata={k: v for k, v in data.items() if k not in ("name", "kind", "source", "source_path")}, + ) return cls( - name=data["name"], - kind=data.get("kind"), - source_path=data.get("source_path") or source.get("path"), - metadata={k: v for k, v in data.items() if k not in ("name", "kind", "source", "source_path")}, + name=data.name, + kind=getattr(data, "kind", None), + source_path=getattr(data, "source_path", None), ) @@ -84,6 +96,6 @@ def inspect_components( task=task, trust_remote_code=trust_remote_code, ) - components = [ComponentInfo.from_dict(c) for c in raw_components] + components = [ComponentInfo.coerce(c) for c in raw_components] logger.debug("mobius.inspect_components(%s) -> %s", model_name_or_path, [c.name for c in components]) return components diff --git a/test/common/test_mobius_utils.py b/test/common/test_mobius_utils.py new file mode 100644 index 0000000000..dbdda9cecd --- /dev/null +++ b/test/common/test_mobius_utils.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import dataclasses +import sys +import types + +import pytest + +from olive.common.mobius_utils import ComponentInfo, inspect_components + + +@dataclasses.dataclass(frozen=True) +class _MobiusLikeComponent: + """Mirror of mobius' frozen ``ComponentInfo`` dataclass (no mapping interface).""" + + name: str + kind: str + + +def test_coerce_returns_same_instance_when_already_componentinfo(): + component = ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model") + + assert ComponentInfo.coerce(component) is component + + +def test_coerce_reads_contract_dict(): + component = ComponentInfo.coerce( + {"name": "decoder", "kind": "decoder", "source": {"path": "model.language_model"}, "extra": 1} + ) + + assert component.name == "decoder" + assert component.kind == "decoder" + assert component.source_path == "model.language_model" + assert component.metadata == {"extra": 1} + + +def test_coerce_reads_duck_typed_object_when_object_has_no_mapping_interface(): + # A mobius ComponentInfo is a frozen dataclass and does not implement ``.get``. + component = ComponentInfo.coerce(_MobiusLikeComponent(name="vision_encoder", kind="encoder")) + + assert component.name == "vision_encoder" + assert component.kind == "encoder" + assert component.source_path is None + + +def test_inspect_components_coerces_mobius_objects(monkeypatch): + fake_mobius = types.ModuleType("mobius") + fake_mobius.inspect_components = lambda model_name_or_path, task=None, trust_remote_code=False: [ + _MobiusLikeComponent(name="decoder", kind="decoder"), + _MobiusLikeComponent(name="vision_encoder", kind="encoder"), + _MobiusLikeComponent(name="embedding", kind="embedding"), + ] + monkeypatch.setitem(sys.modules, "mobius", fake_mobius) + + components = inspect_components("fake/llava") + + assert all(isinstance(c, ComponentInfo) for c in components) + assert [(c.name, c.kind, c.source_path) for c in components] == [ + ("decoder", "decoder", None), + ("vision_encoder", "encoder", None), + ("embedding", "embedding", None), + ] + + +def test_inspect_components_raises_importerror_when_mobius_missing(monkeypatch): + monkeypatch.setitem(sys.modules, "mobius", None) + + with pytest.raises(ImportError, match="mobius-ai is required"): + inspect_components("fake/llava") From f6472dd730ae9b9dd3e62f4b26e8a5539ce9bc0c Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 16 Jun 2026 18:02:15 -0700 Subject: [PATCH 11/18] Fix mobius stub fixture to not shadow real mobius The autouse _stub_mobius_module fixture guarded on whether mobius was already in sys.modules, which is False when real mobius is installed but not yet imported at fixture setup. It then injected a non-package stub that shadowed real mobius, breaking test_write_genai_config_requires_real_mobius (imports mobius.integrations). This was masked in CI where mobius is absent and the test is skipped. Guard on _HAS_REAL_MOBIUS instead so the fixture is a true no-op when mobius is installed. Also drop the log-capture assertion from test_multi_component_returns_composite_handler (logging stays in the pass). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test/passes/onnx/test_mobius_model_builder.py | 32 +++---------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/test/passes/onnx/test_mobius_model_builder.py b/test/passes/onnx/test_mobius_model_builder.py index 7ed8834d06..edc4a5c77e 100644 --- a/test/passes/onnx/test_mobius_model_builder.py +++ b/test/passes/onnx/test_mobius_model_builder.py @@ -33,7 +33,7 @@ def _stub_mobius_module(): The stub is only injected when mobius is absent; if the real package is installed, this fixture is a no-op. """ - if "mobius" in sys.modules: + if _HAS_REAL_MOBIUS: yield return fake = types.ModuleType("mobius") @@ -254,32 +254,13 @@ def test_genai_artifacts_in_multi_component(tmp_path): def test_multi_component_returns_composite_handler(tmp_path): """Multi-component package (VLM) → CompositeModelHandler with one component per key.""" - import logging - out = tmp_path / "out" keys = ["model", "vision", "embedding"] pkg = _fake_pkg(keys, out) - # olive disables logger propagation (olive/__init__.py), so attach a capture handler - # directly to the mobius builder logger. - records: list[logging.LogRecord] = [] - - class _Capture(logging.Handler): - def emit(self, record): - records.append(record) - - mb_logger = logging.getLogger("olive.passes.onnx.mobius_model_builder") - handler = _Capture(level=logging.INFO) - prev_level = mb_logger.level - mb_logger.setLevel(logging.INFO) - mb_logger.addHandler(handler) - try: - with _patch_build(pkg): - p = _make_pass() - result = p.run(_make_hf_model("microsoft/phi-4-vision"), out) - finally: - mb_logger.removeHandler(handler) - mb_logger.setLevel(prev_level) + with _patch_build(pkg): + p = _make_pass() + result = p.run(_make_hf_model("microsoft/phi-4-vision"), out) assert isinstance(result, CompositeModelHandler) assert result.model_component_names == keys @@ -288,11 +269,6 @@ def emit(self, record): for comp in components: assert isinstance(comp, ONNXModelHandler) - # every component name and its onnx path are logged - messages = [rec.getMessage() for rec in records] - for key in keys: - assert any(f"'{key}'" in msg and "model.onnx" in msg for msg in messages) - # --------------------------------------------------------------------------- # EP auto-detection tests From e98e0fd6b5c5e0289736c3e195bea009db41c292 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Mon, 22 Jun 2026 13:32:14 -0700 Subject: [PATCH 12/18] Wire DiffusersModel into builds component resolution builds.components could only target CompositeModel (directory) and HfModel (via mobius); a DiffusersModel input fell through to get_components()/select_components() returning None, so the design's per-component diffusion example failed validation. Add an optional components filter to DiffusersModelHandler (restricts get_exportable_components to a subset in canonical variant order) and resolve/select diffusion components in ModelConfig. select_components scopes the handler so each build's conversion emits just that component's ONNX, with later passes auto-mapping over the single-component composite. Variant detection only reads config files, so resolution stays cheap at validation time. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- olive/model/config/model_config.py | 34 ++++++++++++++++++++++++ olive/model/handler/diffusers.py | 21 +++++++++++++-- test/model/test_composite_model.py | 42 ++++++++++++++++++++++++++++++ test/model/test_diffusers_model.py | 40 ++++++++++++++++++++++++++++ test/workflows/test_run_builds.py | 39 +++++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 2 deletions(-) diff --git a/olive/model/config/model_config.py b/olive/model/config/model_config.py index b2b615c073..7afc5da41f 100644 --- a/olive/model/config/model_config.py +++ b/olive/model/config/model_config.py @@ -60,6 +60,8 @@ def get_components(self) -> Optional[list[str]]: return [name for name, _ in self._discover_composite_components()] if self.type == "hfmodel": return self._get_hf_components() or None + if self.type == "diffusersmodel": + return self._get_diffusers_components() or None return None def _discover_composite_components(self) -> list[tuple[str, str]]: @@ -86,6 +88,19 @@ def _get_hf_components(self) -> list[str]: ) return [c.name for c in components] + def _get_diffusers_components(self) -> list[str]: + """Return the exportable component names for a DiffusersModel, or empty list. + + Reads the variant's component layout from the handler (which only inspects config files, + not weights). When the config is already scoped to a subset via ``components``, returns that + subset; the top-level input model carries no such scope, so ``builds`` sees the full set. + """ + model_path = self.config.get("model_path") + if not model_path: + return [] + handler = self.create_model() + return [str(c) for c in handler.get_exportable_components()] + def select_components(self, names: list[str]) -> "ModelConfig": """Return a new ModelConfig holding only the named components. @@ -100,6 +115,8 @@ def select_components(self, names: list[str]) -> "ModelConfig": """ if self.type == "hfmodel": return self._select_hf_component(names) + if self.type == "diffusersmodel": + return self._select_diffusers_components(names) if self.type != "compositemodel": raise ValueError( f"select_components is only supported on CompositeModel or HfModel input configs " @@ -184,6 +201,23 @@ def _select_hf_component(self, names: list[str]) -> "ModelConfig": new_config["model_attributes"] = attributes return ModelConfig(type=self.type, config=new_config) + def _select_diffusers_components(self, names: list[str]) -> "ModelConfig": + """Scope a DiffusersModel to the named exportable components. + + Returns a copy of the config with ``components`` set to the requested subset (in the + variant's canonical order). The scoped handler exports only those components, so a build's + conversion pass produces just that component's ONNX while subsequent passes map over it. + """ + if not names: + raise ValueError("select_components requires a non-empty list of names.") + available = self._get_diffusers_components() + missing = [n for n in names if n not in available] + if missing: + raise ValueError(f"Unknown component name(s) {missing}. Available components: {available}.") + new_config = deepcopy(self.config) + new_config["components"] = [name for name in available if name in set(names)] + return ModelConfig(type=self.type, config=new_config) + def get_model_id(self): for v in self.config.values(): if callable(v): diff --git a/olive/model/handler/diffusers.py b/olive/model/handler/diffusers.py index e7f13f4f9b..da119f7ba4 100644 --- a/olive/model/handler/diffusers.py +++ b/olive/model/handler/diffusers.py @@ -34,7 +34,7 @@ class DiffusersModelHandler(OliveModelHandler): """ resource_keys: tuple[str, ...] = ("model_path", "adapter_path") - json_config_keys: tuple[str, ...] = ("model_variant", "load_kwargs") + json_config_keys: tuple[str, ...] = ("model_variant", "load_kwargs", "components") def __init__( self, @@ -42,6 +42,7 @@ def __init__( model_variant: Union[str, DiffusersModelVariant] = DiffusersModelVariant.AUTO, load_kwargs: Optional[dict[str, Any]] = None, adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None, + components: Optional[list[str]] = None, model_attributes: Optional[dict[str, Any]] = None, ): """Initialize DiffusersModelHandler. @@ -51,6 +52,10 @@ def __init__( model_variant: Model variant: 'sd15', 'sdxl', 'flux', or 'auto' for auto-detection. load_kwargs: Additional kwargs for loading the model (e.g., torch_dtype, variant). adapter_path: Path to LoRA adapter weights. + components: Optional subset of exportable component names this handler is scoped to + (e.g. ``["text_encoder"]``). When set, ``get_exportable_components`` returns only + these (in variant order); used by ``builds.components`` to optimize one component at + a time. When ``None``, all of the variant's components are exportable. model_attributes: Additional model attributes. """ @@ -67,6 +72,7 @@ def __init__( self.model_variant = DiffusersModelVariant(model_variant) self.load_kwargs = load_kwargs or {} + self.components = list(components) if components else None self._pipeline = None @property @@ -307,7 +313,18 @@ def get_exportable_components(self) -> list[DC]: } if variant not in variant_components: raise ValueError(f"Unknown model variant: {variant}") - return variant_components[variant] + full = variant_components[variant] + if self.components: + requested = list(self.components) + available = {str(c) for c in full} + unknown = [name for name in requested if name not in available] + if unknown: + raise ValueError( + f"Unknown component(s) {unknown} for variant {variant}. Available: {sorted(available)}." + ) + # preserve the variant's canonical component order + return [c for c in full if str(c) in set(requested)] + return full def get_pipeline_type(self) -> DiffusersModelVariant: """Get the pipeline type for OnnxConfig lookup. diff --git a/test/model/test_composite_model.py b/test/model/test_composite_model.py index ed2902e8ce..2d0fd96dff 100644 --- a/test/model/test_composite_model.py +++ b/test/model/test_composite_model.py @@ -258,3 +258,45 @@ def test_model_config_select_components_hfmodel_multiple_names_raises(monkeypatc config = ModelConfig.model_validate({"type": "HfModel", "config": {"model_path": "some/vlm"}}) with pytest.raises(ValueError, match="one at a time"): config.select_components(["decoder", "vision_encoder"]) + + +def _make_diffusers_dir(tmp_path): + """Create a minimal local diffusers dir so is_valid_diffusers_model passes offline.""" + (tmp_path / "model_index.json").write_text("{}") + return tmp_path + + +def test_model_config_get_components_diffusersmodel(tmp_path): + model_dir = _make_diffusers_dir(tmp_path) + config = ModelConfig.model_validate( + {"type": "DiffusersModel", "config": {"model_path": str(model_dir), "model_variant": "sdxl"}} + ) + assert config.get_components() == [ + "text_encoder", + "text_encoder_2", + "unet", + "vae_encoder", + "vae_decoder", + ] + + +def test_model_config_select_components_diffusersmodel_scopes_subset(tmp_path): + model_dir = _make_diffusers_dir(tmp_path) + config = ModelConfig.model_validate( + {"type": "DiffusersModel", "config": {"model_path": str(model_dir), "model_variant": "sdxl"}} + ) + selected = config.select_components(["unet", "text_encoder"]) + assert selected.type == "diffusersmodel" + # preserved in the variant's canonical order, not the requested order + assert selected.config["components"] == ["text_encoder", "unet"] + # the scoped config now exposes only the selected components + assert selected.get_components() == ["text_encoder", "unet"] + + +def test_model_config_select_components_diffusersmodel_unknown_raises(tmp_path): + model_dir = _make_diffusers_dir(tmp_path) + config = ModelConfig.model_validate( + {"type": "DiffusersModel", "config": {"model_path": str(model_dir), "model_variant": "sd"}} + ) + with pytest.raises(ValueError, match="Unknown component name"): + config.select_components(["text_encoder_2"]) # SDXL-only; not in SD diff --git a/test/model/test_diffusers_model.py b/test/model/test_diffusers_model.py index acbe6113a4..4a146b52dc 100644 --- a/test/model/test_diffusers_model.py +++ b/test/model/test_diffusers_model.py @@ -207,3 +207,43 @@ def test_adapter_path_property(self): def test_adapter_path_property_none(self): model = DiffusersModelHandler(model_path=self.model_path, model_variant=DiffusersModelVariant.SD) assert model.adapter_path is None + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_get_exportable_components_returns_full_variant_set_when_unscoped(self, mock_is_valid): + model = DiffusersModelHandler(model_path=self.model_path, model_variant=DiffusersModelVariant.SDXL) + assert [str(c) for c in model.get_exportable_components()] == [ + "text_encoder", + "text_encoder_2", + "unet", + "vae_encoder", + "vae_decoder", + ] + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_get_exportable_components_filters_to_scoped_subset_in_variant_order(self, mock_is_valid): + # requested out of order; result must follow the variant's canonical order + model = DiffusersModelHandler( + model_path=self.model_path, + model_variant=DiffusersModelVariant.SDXL, + components=["unet", "text_encoder"], + ) + assert [str(c) for c in model.get_exportable_components()] == ["text_encoder", "unet"] + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_get_exportable_components_raises_for_unknown_component(self, mock_is_valid): + model = DiffusersModelHandler( + model_path=self.model_path, + model_variant=DiffusersModelVariant.SD, + components=["text_encoder_2"], # SDXL-only; not in SD + ) + with pytest.raises(ValueError, match="Unknown component"): + model.get_exportable_components() + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_to_json_round_trips_components(self, mock_is_valid): + model = DiffusersModelHandler( + model_path=self.model_path, + model_variant=DiffusersModelVariant.SDXL, + components=["text_encoder"], + ) + assert model.to_json()["config"]["components"] == ["text_encoder"] diff --git a/test/workflows/test_run_builds.py b/test/workflows/test_run_builds.py index a2588ae9bd..03687180ca 100644 --- a/test/workflows/test_run_builds.py +++ b/test/workflows/test_run_builds.py @@ -220,3 +220,42 @@ def fake_inspect(*_args, **_kwargs): result = olive_run(config) assert set(result) == {"decoder"} assert run_mock.call_count == 1 + + def test_builds_diffusersmodel_per_component(self, tmp_path): + # §3.1: DiffusersModel input; each build scopes the pipeline to one exportable component. + model_dir = tmp_path / "sdxl" + model_dir.mkdir(parents=True) + (model_dir / "model_index.json").write_text("{}") + + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = { + "type": "DiffusersModel", + "config": {"model_path": str(model_dir), "model_variant": "sdxl"}, + } + config["builds"] = { + "text_encoder": {"components": ["text_encoder"], "pipeline": ["convert"], "output_dir": "out/te"}, + "unet": {"components": ["unet"], "pipeline": ["convert"], "output_dir": "out/unet"}, + } + with engine_run_patch, acc_patch: + result = olive_run(config) + assert set(result) == {"text_encoder", "unet"} + assert run_mock.call_count == 2 + + def test_builds_diffusersmodel_unknown_component_raises(self, tmp_path): + model_dir = tmp_path / "sd" + model_dir.mkdir(parents=True) + (model_dir / "model_index.json").write_text("{}") + + _, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = { + "type": "DiffusersModel", + "config": {"model_path": str(model_dir), "model_variant": "sd"}, + } + config["builds"] = { + # text_encoder_2 is SDXL-only; not a component of the SD variant + "te2": {"components": ["text_encoder_2"], "pipeline": ["convert"], "output_dir": "out/te2"}, + } + with engine_run_patch, acc_patch, pytest.raises(ValueError, match="unknown component"): + olive_run(config) From eec5da63e3013dcbf5943c98c7908887845a89bd Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Mon, 22 Jun 2026 15:22:58 -0700 Subject: [PATCH 13/18] add recipe --- .../diffusion_per_component_sdxl.json | 51 +++++++++++++++++++ .../flow_a_composite_package.json | 45 ++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 multi_comp_recipe/diffusion_per_component_sdxl.json create mode 100644 multi_comp_recipe/flow_a_composite_package.json diff --git a/multi_comp_recipe/diffusion_per_component_sdxl.json b/multi_comp_recipe/diffusion_per_component_sdxl.json new file mode 100644 index 0000000000..5f02b10565 --- /dev/null +++ b/multi_comp_recipe/diffusion_per_component_sdxl.json @@ -0,0 +1,51 @@ +{ + "input_model": { + "type": "DiffusersModel", + "config": { + "model_path": "stabilityai/stable-diffusion-xl-base-1.0", + "model_variant": "sdxl" + } + }, + "systems": { + "local_gpu": { + "type": "LocalSystem", + "accelerators": [ + { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } + ] + } + }, + "passes": { + "convert": { "type": "OnnxConversion", "target_opset": 17 }, + "optimize_clip": { "type": "OrtTransformersOptimization", "model_type": "clip", "float16": true }, + "optimize_unet": { "type": "OrtTransformersOptimization", "model_type": "unet", "float16": true }, + "optimize_vae": { "type": "OrtTransformersOptimization", "model_type": "vae", "float16": true } + }, + "engine": { + "host": "local_gpu", + "target": "local_gpu", + "evaluate_input_model": false, + "cache_dir": "cache" + }, + "builds": { + "text_encoder": { + "components": ["text_encoder"], + "pipeline": ["convert", "optimize_clip"], + "output_dir": "out/text_encoder" + }, + "text_encoder_2": { + "components": ["text_encoder_2"], + "pipeline": ["convert", "optimize_clip"], + "output_dir": "out/text_encoder_2" + }, + "unet": { + "components": ["unet"], + "pipeline": ["convert", "optimize_unet"], + "output_dir": "out/unet" + }, + "vae_decoder": { + "components": ["vae_decoder"], + "pipeline": ["convert", "optimize_vae"], + "output_dir": "out/vae_decoder" + } + } +} diff --git a/multi_comp_recipe/flow_a_composite_package.json b/multi_comp_recipe/flow_a_composite_package.json new file mode 100644 index 0000000000..fd6b6ecf0f --- /dev/null +++ b/multi_comp_recipe/flow_a_composite_package.json @@ -0,0 +1,45 @@ +{ + "input_model": { + "type": "CompositeModel", + "config": { + "model_path": "exported_pkg" + } + }, + "systems": { + "local_gpu": { + "type": "LocalSystem", + "accelerators": [ + { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } + ] + } + }, + "data_configs": [ + { + "name": "calib", + "user_script": "user_script.py", + "load_dataset_config": { "type": "local_dataset" } + } + ], + "passes": { + "transformer_opt": { "type": "OrtTransformersOptimization", "float16": true }, + "quantization": { "type": "OnnxStaticQuantization", "data_config": "calib" } + }, + "engine": { + "host": "local_gpu", + "target": "local_gpu", + "evaluate_input_model": false, + "cache_dir": "cache" + }, + "builds": { + "decoder": { + "components": ["decoder"], + "pipeline": ["transformer_opt", "quantization"], + "output_dir": "out/decoder" + }, + "vision_encoder": { + "components": ["vision_encoder"], + "pipeline": ["transformer_opt"], + "output_dir": "out/vision_encoder" + } + } +} From 7c7bbb97c6481795c99414234ad619a263611712 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Mon, 22 Jun 2026 15:56:16 -0700 Subject: [PATCH 14/18] Add multi-component optimization recipes (Flow A: export then optimize) Two runnable Flow A recipes plus a README documenting the two-step flow (CLI Mobius export, then an Olive config whose builds optimize each exported component): SD3 (transformer + VAE encoder/decoder; Mobius skips the CLIP/T5 text encoders) and Qwen3-VL-2B-Instruct (decoder/vision_encoder/embedding, the exact names Mobius produces). Both validate against RunConfig. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- multi_comp_recipe/README.md | 128 ++++++++++++++++++ .../diffusion_per_component_sdxl.json | 51 ------- .../flow_a_composite_package.json | 45 ------ .../sd3_optimize_components.json | 43 ++++++ .../vlm_optimize_components.json | 43 ++++++ 5 files changed, 214 insertions(+), 96 deletions(-) create mode 100644 multi_comp_recipe/README.md delete mode 100644 multi_comp_recipe/diffusion_per_component_sdxl.json delete mode 100644 multi_comp_recipe/flow_a_composite_package.json create mode 100644 multi_comp_recipe/sd3_optimize_components.json create mode 100644 multi_comp_recipe/vlm_optimize_components.json diff --git a/multi_comp_recipe/README.md b/multi_comp_recipe/README.md new file mode 100644 index 0000000000..a82b47a95b --- /dev/null +++ b/multi_comp_recipe/README.md @@ -0,0 +1,128 @@ +# Multi-Component Model Optimization Recipes + +These recipes demonstrate **Flow A — export first, then per-component optimization**: export a +multi-component model to ONNX once, then run a single Olive config whose `builds` apply a +**different pipeline to each component**. + +The flow is two explicit steps: + +1. **Export** the model to a directory of per-component ONNX subfolders using the Olive CLI with the + Mobius builder. +2. **Optimize** by pointing an Olive config at that directory; each component subfolder becomes a + selectable component that a `build` can target. + +There is no need to memorize component names: each exported component lives in its own folder, and +Olive loads the export directory as a `CompositeModel` whose **component names are the subfolder +names**. + +--- + +## Prerequisites + +``` +pip install olive-ai +pip install mobius-ai +``` + +Exporting a diffusion pipeline also needs `diffusers`/`transformers` and access to the model on +Hugging Face (Stable Diffusion 3 is a gated model — accept its license and `huggingface-cli login` +first). + +--- + +## Recipe 1 — Stable Diffusion 3 (`sd3_optimize_components.json`) + +### Step 1 — Export with the CLI + +``` +olive capture-onnx-graph --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers --use_mobius_builder --output_path exported_pkg +``` + +Mobius exports each neural-network component to its own subfolder: + +``` +exported_pkg/ + transformer/model.onnx # MMDiT denoising backbone + vae_encoder/model.onnx + vae_decoder/model.onnx +``` + +> **Note.** Mobius's diffusers builder exports the **transformer** backbone and the **VAE** +> (encoder + decoder). The CLIP/T5 **text encoders are not exported by Mobius** and are left to the +> original pipeline. The exact subfolders depend on the pipeline; the optimize config below only +> needs `builds` for the components you actually want to optimize. + +### Step 2 — Optimize each component + +Run from the directory that contains `exported_pkg/`: + +``` +olive run --config sd3_optimize_components.json +``` + +This applies a different pipeline per component: + +| component | pipeline | intent | +|---------------|--------------------|------------------------------------------| +| `transformer` | `dynamic_quant` | INT8-quantize the heavy denoising backbone | +| `vae_encoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | +| `vae_decoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | + +Output: + +``` +out/transformer/ # INT8 transformer +out/vae_encoder/ # FP16 VAE encoder +out/vae_decoder/ # FP16 VAE decoder +``` + +Each build writes one optimized component; components without a build stay as exported. + +--- + +## Recipe 2 — Vision-Language Model (`vlm_optimize_components.json`) + +Same two-step Flow A for a VLM, using `Qwen/Qwen3-VL-2B-Instruct`. + +### Step 1 — Export + +``` +olive capture-onnx-graph --model_name_or_path Qwen/Qwen3-VL-2B-Instruct --use_mobius_builder --output_path exported_vlm_pkg +``` + +Mobius exports this model as three components, each in its own subfolder: + +``` +exported_vlm_pkg/ + decoder/model.onnx + vision_encoder/model.onnx + embedding/model.onnx +``` + +### Step 2 — Optimize + +``` +olive run --config vlm_optimize_components.json +``` + +| component | pipeline | intent | +|------------------|-----------------|-------------------------------------| +| `decoder` | `dynamic_quant` | INT8-quantize the language decoder | +| `vision_encoder` | `to_fp16` | keep the vision tower in FP16 | +| `embedding` | `to_fp16` | keep the embedding in FP16 | + +> The three component names (`decoder`, `vision_encoder`, `embedding`) are exactly what Mobius +> produces for `Qwen/Qwen3-VL-2B-Instruct`. For a different VLM, adjust the component names in the +> config to match the subfolder names your export actually produced. + +--- + +## Notes + +- The passes here (`OnnxFloatToFloat16`, `OnnxDynamicQuantization`) are **illustrative** and chosen + to run without calibration data. Swap in `OrtTransformersOptimization`, `OnnxStaticQuantization` + (with a `data_config`), or other ONNX passes for production-quality optimization. +- The recipes target the **CPU** EP so they run anywhere. For GPU deployment, change the + `execution_providers` to e.g. `["CUDAExecutionProvider"]` and the device to `"gpu"`. +- `builds.components` selects which exported components to optimize. Only the components with a build + are touched; the rest remain as exported. diff --git a/multi_comp_recipe/diffusion_per_component_sdxl.json b/multi_comp_recipe/diffusion_per_component_sdxl.json deleted file mode 100644 index 5f02b10565..0000000000 --- a/multi_comp_recipe/diffusion_per_component_sdxl.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "input_model": { - "type": "DiffusersModel", - "config": { - "model_path": "stabilityai/stable-diffusion-xl-base-1.0", - "model_variant": "sdxl" - } - }, - "systems": { - "local_gpu": { - "type": "LocalSystem", - "accelerators": [ - { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } - ] - } - }, - "passes": { - "convert": { "type": "OnnxConversion", "target_opset": 17 }, - "optimize_clip": { "type": "OrtTransformersOptimization", "model_type": "clip", "float16": true }, - "optimize_unet": { "type": "OrtTransformersOptimization", "model_type": "unet", "float16": true }, - "optimize_vae": { "type": "OrtTransformersOptimization", "model_type": "vae", "float16": true } - }, - "engine": { - "host": "local_gpu", - "target": "local_gpu", - "evaluate_input_model": false, - "cache_dir": "cache" - }, - "builds": { - "text_encoder": { - "components": ["text_encoder"], - "pipeline": ["convert", "optimize_clip"], - "output_dir": "out/text_encoder" - }, - "text_encoder_2": { - "components": ["text_encoder_2"], - "pipeline": ["convert", "optimize_clip"], - "output_dir": "out/text_encoder_2" - }, - "unet": { - "components": ["unet"], - "pipeline": ["convert", "optimize_unet"], - "output_dir": "out/unet" - }, - "vae_decoder": { - "components": ["vae_decoder"], - "pipeline": ["convert", "optimize_vae"], - "output_dir": "out/vae_decoder" - } - } -} diff --git a/multi_comp_recipe/flow_a_composite_package.json b/multi_comp_recipe/flow_a_composite_package.json deleted file mode 100644 index fd6b6ecf0f..0000000000 --- a/multi_comp_recipe/flow_a_composite_package.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "input_model": { - "type": "CompositeModel", - "config": { - "model_path": "exported_pkg" - } - }, - "systems": { - "local_gpu": { - "type": "LocalSystem", - "accelerators": [ - { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } - ] - } - }, - "data_configs": [ - { - "name": "calib", - "user_script": "user_script.py", - "load_dataset_config": { "type": "local_dataset" } - } - ], - "passes": { - "transformer_opt": { "type": "OrtTransformersOptimization", "float16": true }, - "quantization": { "type": "OnnxStaticQuantization", "data_config": "calib" } - }, - "engine": { - "host": "local_gpu", - "target": "local_gpu", - "evaluate_input_model": false, - "cache_dir": "cache" - }, - "builds": { - "decoder": { - "components": ["decoder"], - "pipeline": ["transformer_opt", "quantization"], - "output_dir": "out/decoder" - }, - "vision_encoder": { - "components": ["vision_encoder"], - "pipeline": ["transformer_opt"], - "output_dir": "out/vision_encoder" - } - } -} diff --git a/multi_comp_recipe/sd3_optimize_components.json b/multi_comp_recipe/sd3_optimize_components.json new file mode 100644 index 0000000000..c1d8829fd5 --- /dev/null +++ b/multi_comp_recipe/sd3_optimize_components.json @@ -0,0 +1,43 @@ +{ + "input_model": { + "type": "CompositeModel", + "config": { + "model_path": "exported_pkg" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { "device": "cpu", "execution_providers": ["CPUExecutionProvider"] } + ] + } + }, + "passes": { + "to_fp16": { "type": "OnnxFloatToFloat16" }, + "dynamic_quant": { "type": "OnnxDynamicQuantization" } + }, + "engine": { + "host": "local_system", + "target": "local_system", + "evaluate_input_model": false, + "cache_dir": "cache" + }, + "builds": { + "transformer": { + "components": ["transformer"], + "pipeline": ["dynamic_quant"], + "output_dir": "out/transformer" + }, + "vae_encoder": { + "components": ["vae_encoder"], + "pipeline": ["to_fp16"], + "output_dir": "out/vae_encoder" + }, + "vae_decoder": { + "components": ["vae_decoder"], + "pipeline": ["to_fp16"], + "output_dir": "out/vae_decoder" + } + } +} diff --git a/multi_comp_recipe/vlm_optimize_components.json b/multi_comp_recipe/vlm_optimize_components.json new file mode 100644 index 0000000000..9260fd6807 --- /dev/null +++ b/multi_comp_recipe/vlm_optimize_components.json @@ -0,0 +1,43 @@ +{ + "input_model": { + "type": "CompositeModel", + "config": { + "model_path": "exported_vlm_pkg" + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ + { "device": "cpu", "execution_providers": ["CPUExecutionProvider"] } + ] + } + }, + "passes": { + "to_fp16": { "type": "OnnxFloatToFloat16" }, + "dynamic_quant": { "type": "OnnxDynamicQuantization" } + }, + "engine": { + "host": "local_system", + "target": "local_system", + "evaluate_input_model": false, + "cache_dir": "cache" + }, + "builds": { + "decoder": { + "components": ["decoder"], + "pipeline": ["dynamic_quant"], + "output_dir": "out/decoder" + }, + "vision_encoder": { + "components": ["vision_encoder"], + "pipeline": ["to_fp16"], + "output_dir": "out/vision_encoder" + }, + "embedding": { + "components": ["embedding"], + "pipeline": ["to_fp16"], + "output_dir": "out/embedding" + } + } +} From 1f7b6570efb72ab5ed044c502d202d723d4c224e Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 23 Jun 2026 20:59:13 +0000 Subject: [PATCH 15/18] add recipes --- multi_comp_recipe/.gitignore | 2 + .../sd3_optimize_components.json | 71 +++++++++++++++++-- olive/cli/capture_onnx.py | 10 +-- olive/passes/onnx/mobius_model_builder.py | 16 +++-- 4 files changed, 84 insertions(+), 15 deletions(-) create mode 100644 multi_comp_recipe/.gitignore diff --git a/multi_comp_recipe/.gitignore b/multi_comp_recipe/.gitignore new file mode 100644 index 0000000000..4796a2c8e0 --- /dev/null +++ b/multi_comp_recipe/.gitignore @@ -0,0 +1,2 @@ +exported_pkg +out \ No newline at end of file diff --git a/multi_comp_recipe/sd3_optimize_components.json b/multi_comp_recipe/sd3_optimize_components.json index c1d8829fd5..d265b403ef 100644 --- a/multi_comp_recipe/sd3_optimize_components.json +++ b/multi_comp_recipe/sd3_optimize_components.json @@ -9,13 +9,72 @@ "local_system": { "type": "LocalSystem", "accelerators": [ - { "device": "cpu", "execution_providers": ["CPUExecutionProvider"] } + { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } ] } }, "passes": { - "to_fp16": { "type": "OnnxFloatToFloat16" }, - "dynamic_quant": { "type": "OnnxDynamicQuantization" } + "optimize_transformer": { + "type": "OrtTransformersOptimization", + "model_type": "unet", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": true, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": true, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": true, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "group_norm_channels_last": false + }, + "force_fp32_ops": ["RandomNormalLike"] + }, + "optimize_vae": { + "type": "OrtTransformersOptimization", + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": true, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": true, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": true, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "group_norm_channels_last": false + }, + "force_fp32_ops": ["RandomNormalLike"], + "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } + } }, "engine": { "host": "local_system", @@ -26,17 +85,17 @@ "builds": { "transformer": { "components": ["transformer"], - "pipeline": ["dynamic_quant"], + "pipeline": ["optimize_transformer"], "output_dir": "out/transformer" }, "vae_encoder": { "components": ["vae_encoder"], - "pipeline": ["to_fp16"], + "pipeline": ["optimize_vae"], "output_dir": "out/vae_encoder" }, "vae_decoder": { "components": ["vae_decoder"], - "pipeline": ["to_fp16"], + "pipeline": ["optimize_vae"], "output_dir": "out/vae_decoder" } } diff --git a/olive/cli/capture_onnx.py b/olive/cli/capture_onnx.py index 25349e4b30..5581b8cd22 100644 --- a/olive/cli/capture_onnx.py +++ b/olive/cli/capture_onnx.py @@ -194,12 +194,14 @@ def run(self): def _get_run_config(self, tempdir: str) -> dict: config = deepcopy(TEMPLATE) + is_diffusers = is_valid_diffusers_model(self.args.model_name_or_path) if self.args.model_name_or_path else False + if self.args.use_mobius_builder: - input_model_config = get_input_model_config(self.args) + if is_diffusers: + input_model_config = get_diffusers_input_model(self.args, self.args.model_name_or_path) + else: + input_model_config = get_input_model_config(self.args) else: - is_diffusers = ( - is_valid_diffusers_model(self.args.model_name_or_path) if self.args.model_name_or_path else False - ) if is_diffusers: input_model_config = get_diffusers_input_model(self.args, self.args.model_name_or_path) else: diff --git a/olive/passes/onnx/mobius_model_builder.py b/olive/passes/onnx/mobius_model_builder.py index 5c83812864..e94cceb20c 100644 --- a/olive/passes/onnx/mobius_model_builder.py +++ b/olive/passes/onnx/mobius_model_builder.py @@ -15,6 +15,7 @@ from olive.hardware.constants import EXECUTION_PROVIDER_TO_MOBIUS_EP, ExecutionProvider from olive.model import HfModelHandler, ONNXModelHandler from olive.model.handler.composite import CompositeModelHandler +from olive.model.handler.diffusers import DiffusersModelHandler from olive.passes import Pass from olive.passes.olive_pass import PassConfigParam @@ -111,7 +112,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon def _run_for_config( self, - model: HfModelHandler, + model: HfModelHandler | DiffusersModelHandler, config: type[BasePassConfig], output_model_path: str, ) -> ONNXModelHandler | CompositeModelHandler: @@ -122,8 +123,10 @@ def _run_for_config( "mobius-ai is required to run MobiusBuilder. Install with: pip install mobius-ai" ) from exc - if not isinstance(model, HfModelHandler): - raise ValueError(f"MobiusBuilder requires an HfModelHandler input, got {type(model).__name__}.") + if not isinstance(model, (HfModelHandler, DiffusersModelHandler)): + raise ValueError( + f"MobiusBuilder requires an HfModelHandler or DiffusersModelHandler input, got {type(model).__name__}." + ) # Map Olive EP to mobius EP. If unsupported/unknown, fall back to mobius default EP. requested_ep = self.accelerator_spec.execution_provider @@ -137,10 +140,12 @@ def _run_for_config( ) dtype_str: str = _PRECISION_TO_DTYPE.get(config.precision, "f32") - model_id: str = model.model_name_or_path + model_id: str = model.model_name_or_path if isinstance(model, HfModelHandler) else str(model.model_path) # Read trust_remote_code from the model's HuggingFace load kwargs. - trust_remote_code: bool = model.get_load_kwargs().get("trust_remote_code", False) + trust_remote_code: bool = ( + model.get_load_kwargs().get("trust_remote_code", False) if isinstance(model, HfModelHandler) else False + ) logger.info( "MobiusBuilder: building '%s' (ep=%s, dtype=%s)", @@ -240,6 +245,7 @@ def _run_for_config( model_path=str(output_dir), model_attributes={ "mobius_package_keys": package_keys, + "no_flatten": True, **(model.model_attributes or {}), }, ) From 817b6b65ced03b80e69f4316f82d63e11defbee2 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Wed, 24 Jun 2026 04:29:43 +0000 Subject: [PATCH 16/18] update recipe --- multi_comp_recipe/.gitignore | 1 + multi_comp_recipe/README.md | 86 +++++++++++++++++ multi_comp_recipe/sd3_inference.py | 144 +++++++++++++++++++++++++++++ multi_comp_recipe/vlm_inference.py | 111 ++++++++++++++++++++++ 4 files changed, 342 insertions(+) create mode 100644 multi_comp_recipe/sd3_inference.py create mode 100644 multi_comp_recipe/vlm_inference.py diff --git a/multi_comp_recipe/.gitignore b/multi_comp_recipe/.gitignore index 4796a2c8e0..febffbef13 100644 --- a/multi_comp_recipe/.gitignore +++ b/multi_comp_recipe/.gitignore @@ -1,2 +1,3 @@ exported_pkg +exported_vlm_pkg out \ No newline at end of file diff --git a/multi_comp_recipe/README.md b/multi_comp_recipe/README.md index a82b47a95b..31091f22bf 100644 --- a/multi_comp_recipe/README.md +++ b/multi_comp_recipe/README.md @@ -78,6 +78,32 @@ out/vae_decoder/ # FP16 VAE decoder Each build writes one optimized component; components without a build stay as exported. +### Step 3 — Inference + +Run end-to-end image generation with the exported ONNX transformer: + +``` +python sd3_inference.py --prompt "A photo of a cat sitting on a windowsill" --steps 28 --output result.png +``` + +The inference script (`sd3_inference.py`) uses: +- **Text encoding**: PyTorch CLIP-L, CLIP-G, and T5-XXL (run once, not in the denoising loop) +- **Denoising**: ONNX Runtime `InferenceSession` with the exported transformer (28 steps) +- **VAE decoding**: PyTorch `AutoencoderKL` to decode latents into a 512×512 image + +Options: +``` +--prompt TEXT Text prompt for image generation +--steps N Number of denoising steps (default: 28) +--seed N Random seed (default: 42) +--output PATH Output image path (default: sd3_output.png) +--onnx_dir DIR Path to exported transformer directory (default: out/transformer) +``` + +> **Note.** The CLIP/T5 text encoders and the VAE are run via PyTorch since Mobius does not export +> them. Only the transformer (the compute-heavy denoising backbone that runs N steps) is in ONNX. +> SD3 is a gated model — you need `huggingface-cli login` or set `HF_TOKEN` for the text encoders. + --- ## Recipe 2 — Vision-Language Model (`vlm_optimize_components.json`) @@ -115,6 +141,66 @@ olive run --config vlm_optimize_components.json > produces for `Qwen/Qwen3-VL-2B-Instruct`. For a different VLM, adjust the component names in the > config to match the subfolder names your export actually produced. +### Step 3 — Inference with ORT GenAI + +Run text generation with the exported ONNX models using **onnxruntime-genai**: + +```bash +# Text-only +python vlm_inference.py --prompt "The capital of France is" + +# With image input +python vlm_inference.py --prompt "Describe this image." --image photo.jpg + +# Custom settings +python vlm_inference.py --model_dir exported_vlm_pkg --max_new_tokens 256 +``` + +The inference script (`vlm_inference.py`) uses ORT GenAI which handles: +- **Tokenization**: Built-in tokenizer from saved HF tokenizer files +- **Embedding**: ONNX `embedding/model.onnx` (token embed + image/audio feature mixing) +- **Vision encoding**: ONNX `vision_encoder/model.onnx` (when `--image` is provided) +- **Decoding**: ONNX `decoder/model.onnx` with KV cache (autoregressive generation) + +Options: +``` +--prompt TEXT Text prompt +--image PATH Optional image file for multimodal input +--max_new_tokens N Maximum tokens to generate (default: 128) +--model_dir DIR Path to exported model directory (default: exported_vlm_pkg) +``` + +#### Setup requirements + +The export directory needs these files alongside the ONNX models: + +``` +exported_vlm_pkg/ + genai_config.json # Model type, I/O mappings, search config + tokenizer.json # HF tokenizer + tokenizer_config.json + vision_processor.json # Vision preprocessing config + audio_processor.json # Audio preprocessing config (for Phi-4-multimodal) + decoder/model.onnx + vision_encoder/model.onnx + embedding/model.onnx + audio_encoder/model.onnx # Optional (Phi-4-multimodal) +``` + +To create `genai_config.json` and tokenizer files after export: + +```python +from transformers import AutoTokenizer +tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-multimodal-instruct", trust_remote_code=True) +tokenizer.save_pretrained("exported_vlm_pkg") +``` + +For the `genai_config.json` structure, see the +[Mobius phi4mm example](https://github.com/microsoft/mobius/blob/main/examples/phi4mm_ort_genai.py) +which writes the config automatically. + +> **Note.** Install `onnxruntime-genai` (`pip install onnxruntime-genai`) to use this script. + --- ## Notes diff --git a/multi_comp_recipe/sd3_inference.py b/multi_comp_recipe/sd3_inference.py new file mode 100644 index 0000000000..c84a504a32 --- /dev/null +++ b/multi_comp_recipe/sd3_inference.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +"""SD3 end-to-end inference using ONNX transformer + PyTorch text encoders and VAE. + +Usage: + python sd3_inference.py --prompt "A photo of a cat sitting on a windowsill" + python sd3_inference.py --prompt "A futuristic city" --steps 50 --output city.png +""" + +import argparse +import os + +import numpy as np +import onnxruntime as ort +import torch +from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler +from PIL import Image +from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast + +MODEL_ID = "stabilityai/stable-diffusion-3-medium-diffusers" +ONNX_DIR = "out/transformer" + + +def encode_text(prompt: str, model_id: str) -> tuple[np.ndarray, np.ndarray]: + """Encode prompt using CLIP-L, CLIP-G, and T5-XXL text encoders. + + Returns: + encoder_hidden_states: [1, 410, 4096] + pooled_projections: [1, 2048] + """ + tokenizer_l = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer") + text_encoder_l = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.float32) + + tokenizer_g = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer_2") + text_encoder_g = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder_2", torch_dtype=torch.float32) + + tokenizer_t5 = T5TokenizerFast.from_pretrained(model_id, subfolder="tokenizer_3") + text_encoder_t5 = T5EncoderModel.from_pretrained(model_id, subfolder="text_encoder_3", torch_dtype=torch.float32) + + with torch.no_grad(): + # CLIP-L + tokens_l = tokenizer_l(prompt, padding="max_length", max_length=77, return_tensors="pt", truncation=True) + out_l = text_encoder_l(**tokens_l, output_hidden_states=True) + clip_l_hidden = out_l.hidden_states[-2] # [1, 77, 768] + clip_l_pooled = out_l.pooler_output # [1, 768] + + # CLIP-G + tokens_g = tokenizer_g(prompt, padding="max_length", max_length=77, return_tensors="pt", truncation=True) + out_g = text_encoder_g(**tokens_g, output_hidden_states=True) + clip_g_hidden = out_g.hidden_states[-2] # [1, 77, 1280] + clip_g_pooled = out_g.pooler_output # [1, 1280] + + # T5-XXL + tokens_t5 = tokenizer_t5(prompt, padding="max_length", max_length=256, return_tensors="pt", truncation=True) + t5_hidden = text_encoder_t5(**tokens_t5).last_hidden_state # [1, 256, 4096] + + # Pad CLIP outputs to 4096 and concatenate + clip_l_padded = torch.nn.functional.pad(clip_l_hidden, (0, 4096 - 768)) # [1, 77, 4096] + clip_g_padded = torch.nn.functional.pad(clip_g_hidden, (0, 4096 - 1280)) # [1, 77, 4096] + encoder_hidden_states = torch.cat([clip_l_padded, clip_g_padded, t5_hidden], dim=1) # [1, 410, 4096] + pooled_projections = torch.cat([clip_l_pooled, clip_g_pooled], dim=-1) # [1, 2048] + + return encoder_hidden_states.numpy(), pooled_projections.numpy() + + +def denoise( + onnx_path: str, + encoder_hidden_states: np.ndarray, + pooled_projections: np.ndarray, + scheduler: FlowMatchEulerDiscreteScheduler, + latent_shape: tuple = (1, 16, 64, 64), + seed: int = 42, +) -> torch.Tensor: + """Run the denoising loop using the ONNX transformer.""" + sess = ort.InferenceSession(onnx_path) + + torch.manual_seed(seed) + latents = torch.randn(latent_shape) + + for i, t in enumerate(scheduler.timesteps): + noise_pred = sess.run( + None, + { + "sample": latents.numpy(), + "timestep": np.array([t.item()], dtype=np.int64), + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_projections, + }, + )[0] + latents = scheduler.step(torch.from_numpy(noise_pred), t, latents, return_dict=False)[0] + if i % 7 == 0: + print(f" Step {i}/{len(scheduler.timesteps)}, t={t.item():.1f}") + + return latents + + +def decode_latents(latents: torch.Tensor, model_id: str) -> np.ndarray: + """Decode latents to image using the VAE decoder.""" + vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) + with torch.no_grad(): + latents_scaled = latents / vae.config.scaling_factor + vae.config.shift_factor + image = vae.decode(latents_scaled, return_dict=False)[0] + image = (image / 2 + 0.5).clamp(0, 1) + image = image.permute(0, 2, 3, 1).numpy()[0] + return (image * 255).astype(np.uint8) + + +def main(): + parser = argparse.ArgumentParser(description="SD3 inference with ONNX transformer") + parser.add_argument("--prompt", default="A photo of a cat sitting on a windowsill") + parser.add_argument("--steps", type=int, default=28) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--output", default="sd3_output.png") + parser.add_argument("--model_id", default=MODEL_ID) + parser.add_argument("--onnx_dir", default=ONNX_DIR) + args = parser.parse_args() + + onnx_path = os.path.join(args.onnx_dir, "model.onnx") + if not os.path.exists(onnx_path): + print(f"Error: ONNX model not found at {onnx_path}") + print("Run: olive capture-onnx-graph --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers " + "--use_mobius_builder --output_path out") + return + + print(f"Prompt: {args.prompt}") + print(f"Steps: {args.steps}, Seed: {args.seed}") + + print("\n1. Encoding text...") + encoder_hidden_states, pooled_projections = encode_text(args.prompt, args.model_id) + print(f" encoder_hidden_states: {encoder_hidden_states.shape}") + print(f" pooled_projections: {pooled_projections.shape}") + + print("\n2. Denoising...") + scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler") + scheduler.set_timesteps(args.steps) + latents = denoise(onnx_path, encoder_hidden_states, pooled_projections, scheduler, seed=args.seed) + + print("\n3. Decoding latents...") + image = decode_latents(latents, args.model_id) + Image.fromarray(image).save(args.output) + print(f"\nSaved: {args.output} ({image.shape[1]}x{image.shape[0]})") + + +if __name__ == "__main__": + main() diff --git a/multi_comp_recipe/vlm_inference.py b/multi_comp_recipe/vlm_inference.py new file mode 100644 index 0000000000..04a0e66639 --- /dev/null +++ b/multi_comp_recipe/vlm_inference.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +"""VLM (Phi-4-multimodal) inference using ORT GenAI with exported ONNX models. + +Usage: + # Text-only + python vlm_inference.py --prompt "The capital of France is" + + # With image + python vlm_inference.py --prompt "Describe this image." --image photo.jpg + + # Custom model directory + python vlm_inference.py --model_dir exported_vlm_pkg --prompt "What is 2+2?" +""" + +import argparse +import os + +import onnxruntime_genai as og + + +def generate_text(model_dir: str, prompt: str, max_new_tokens: int = 128) -> str: + """Run text-only generation.""" + model = og.Model(model_dir) + tokenizer = og.Tokenizer(model) + + input_ids = tokenizer.encode(prompt) + params = og.GeneratorParams(model) + params.set_search_options(max_length=len(input_ids) + max_new_tokens) + + generator = og.Generator(model, params) + generator.append_tokens(input_ids) + + tokenizer_stream = tokenizer.create_stream() + generated = [] + while not generator.is_done(): + generator.generate_next_token() + token = generator.get_next_tokens()[0] + generated.append(token) + print(tokenizer_stream.decode(token), end="", flush=True) + if len(generated) >= max_new_tokens: + break + + print() + del generator + return tokenizer.decode(generated) + + +def generate_with_image(model_dir: str, prompt: str, image_path: str, max_new_tokens: int = 128) -> str: + """Run multimodal generation with image input.""" + model = og.Model(model_dir) + tokenizer = og.Tokenizer(model) + processor = model.create_multimodal_processor() + + images = og.Images.open(image_path) + inputs = processor(prompt, images=images) + + params = og.GeneratorParams(model) + params.set_search_options(max_length=4096) + + generator = og.Generator(model, params) + generator.set_inputs(inputs) + + tokenizer_stream = tokenizer.create_stream() + generated = [] + while not generator.is_done(): + generator.generate_next_token() + token = generator.get_next_tokens()[0] + generated.append(token) + print(tokenizer_stream.decode(token), end="", flush=True) + if len(generated) >= max_new_tokens: + break + + print() + del generator + return tokenizer.decode(generated) + + +def main(): + parser = argparse.ArgumentParser(description="VLM inference with ORT GenAI") + parser.add_argument("--prompt", default="The capital of France is") + parser.add_argument("--image", default=None, help="Path to an image file for vision input") + parser.add_argument("--max_new_tokens", type=int, default=128) + parser.add_argument("--model_dir", default="exported_vlm_pkg") + args = parser.parse_args() + + genai_config = os.path.join(args.model_dir, "genai_config.json") + if not os.path.exists(genai_config): + print(f"Error: genai_config.json not found in {args.model_dir}") + print("Run export first:") + print(" olive capture-onnx-graph --model_name_or_path microsoft/Phi-4-multimodal-instruct " + "--use_mobius_builder --trust_remote_code --output_path exported_vlm_pkg") + print("Then create genai_config.json and save tokenizer (see README.md).") + return + + print(f"Model: {args.model_dir}") + print(f"Prompt: {args.prompt}") + if args.image: + print(f"Image: {args.image}") + print("-" * 50) + + if args.image: + output = generate_with_image(args.model_dir, args.prompt, args.image, args.max_new_tokens) + else: + output = generate_text(args.model_dir, args.prompt, args.max_new_tokens) + + print("-" * 50) + print(f"Output: {output}") + + +if __name__ == "__main__": + main() From cf3b0ab050cfa78891a46b193f774bd0f8555969 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Wed, 24 Jun 2026 05:09:43 +0000 Subject: [PATCH 17/18] Update SD3 inference to use all-ONNX pipeline Now that mobius exports text encoders (CLIP-L, CLIP-G, T5-XXL), update the inference script to use ONNX Runtime for all components instead of PyTorch text encoders and VAE. Only tokenizers and the scheduler (pure logic, no neural network) remain as Python dependencies. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- multi_comp_recipe/README.md | 34 ++++---- multi_comp_recipe/sd3_inference.py | 124 ++++++++++++++++------------- 2 files changed, 88 insertions(+), 70 deletions(-) diff --git a/multi_comp_recipe/README.md b/multi_comp_recipe/README.md index 31091f22bf..71dc40ded6 100644 --- a/multi_comp_recipe/README.md +++ b/multi_comp_recipe/README.md @@ -42,14 +42,15 @@ Mobius exports each neural-network component to its own subfolder: ``` exported_pkg/ + text_encoder/model.onnx # CLIP-L text encoder + text_encoder_2/model.onnx # CLIP-G text encoder + text_encoder_3/model.onnx # T5-XXL text encoder transformer/model.onnx # MMDiT denoising backbone vae_encoder/model.onnx vae_decoder/model.onnx ``` -> **Note.** Mobius's diffusers builder exports the **transformer** backbone and the **VAE** -> (encoder + decoder). The CLIP/T5 **text encoders are not exported by Mobius** and are left to the -> original pipeline. The exact subfolders depend on the pipeline; the optimize config below only +> **Note.** The exact subfolders depend on the pipeline; the optimize config below only > needs `builds` for the components you actually want to optimize. ### Step 2 — Optimize each component @@ -62,16 +63,18 @@ olive run --config sd3_optimize_components.json This applies a different pipeline per component: -| component | pipeline | intent | -|---------------|--------------------|------------------------------------------| -| `transformer` | `dynamic_quant` | INT8-quantize the heavy denoising backbone | -| `vae_encoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | -| `vae_decoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | +| component | pipeline | intent | +|------------------|--------------------|------------------------------------------| +| `transformer` | `dynamic_quant` | INT8-quantize the heavy denoising backbone | +| `text_encoder_3` | `to_fp16` | keep T5-XXL in FP16 | +| `vae_encoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | +| `vae_decoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | Output: ``` out/transformer/ # INT8 transformer +out/text_encoder_3/ # FP16 T5-XXL out/vae_encoder/ # FP16 VAE encoder out/vae_decoder/ # FP16 VAE decoder ``` @@ -80,16 +83,16 @@ Each build writes one optimized component; components without a build stay as ex ### Step 3 — Inference -Run end-to-end image generation with the exported ONNX transformer: +Run end-to-end image generation with the exported ONNX models: ``` python sd3_inference.py --prompt "A photo of a cat sitting on a windowsill" --steps 28 --output result.png ``` The inference script (`sd3_inference.py`) uses: -- **Text encoding**: PyTorch CLIP-L, CLIP-G, and T5-XXL (run once, not in the denoising loop) -- **Denoising**: ONNX Runtime `InferenceSession` with the exported transformer (28 steps) -- **VAE decoding**: PyTorch `AutoencoderKL` to decode latents into a 512×512 image +- **Text encoding**: ONNX Runtime with exported CLIP-L, CLIP-G, and T5-XXL encoders (run once) +- **Denoising**: ONNX Runtime with the exported SD3 transformer (28 steps) +- **VAE decoding**: ONNX Runtime with the exported VAE decoder Options: ``` @@ -97,12 +100,11 @@ Options: --steps N Number of denoising steps (default: 28) --seed N Random seed (default: 42) --output PATH Output image path (default: sd3_output.png) ---onnx_dir DIR Path to exported transformer directory (default: out/transformer) +--onnx_dir DIR Path to exported model directory (default: exported_sd3_full2) ``` -> **Note.** The CLIP/T5 text encoders and the VAE are run via PyTorch since Mobius does not export -> them. Only the transformer (the compute-heavy denoising backbone that runs N steps) is in ONNX. -> SD3 is a gated model — you need `huggingface-cli login` or set `HF_TOKEN` for the text encoders. +> **Note.** SD3 is a gated model — you need `huggingface-cli login` or set `HF_TOKEN` to export. +> The tokenizers (CLIP and T5) still run via the `transformers` library. --- diff --git a/multi_comp_recipe/sd3_inference.py b/multi_comp_recipe/sd3_inference.py index c84a504a32..1a8323cfd5 100644 --- a/multi_comp_recipe/sd3_inference.py +++ b/multi_comp_recipe/sd3_inference.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -"""SD3 end-to-end inference using ONNX transformer + PyTorch text encoders and VAE. +"""SD3 end-to-end inference using all ONNX components (text encoders + transformer + VAE). Usage: python sd3_inference.py --prompt "A photo of a cat sitting on a windowsill" @@ -12,58 +12,65 @@ import numpy as np import onnxruntime as ort import torch -from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler +from diffusers import FlowMatchEulerDiscreteScheduler from PIL import Image -from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast +from transformers import CLIPTokenizer, T5TokenizerFast MODEL_ID = "stabilityai/stable-diffusion-3-medium-diffusers" -ONNX_DIR = "out/transformer" +ONNX_DIR = "exported_sd3_full2" -def encode_text(prompt: str, model_id: str) -> tuple[np.ndarray, np.ndarray]: - """Encode prompt using CLIP-L, CLIP-G, and T5-XXL text encoders. +def encode_text(prompt: str, onnx_dir: str, model_id: str) -> tuple[np.ndarray, np.ndarray]: + """Encode prompt using ONNX CLIP-L, CLIP-G, and T5-XXL text encoders. Returns: encoder_hidden_states: [1, 410, 4096] pooled_projections: [1, 2048] """ + # Load tokenizers (lightweight, no model weights) tokenizer_l = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer") - text_encoder_l = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.float32) - tokenizer_g = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer_2") - text_encoder_g = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder_2", torch_dtype=torch.float32) - tokenizer_t5 = T5TokenizerFast.from_pretrained(model_id, subfolder="tokenizer_3") - text_encoder_t5 = T5EncoderModel.from_pretrained(model_id, subfolder="text_encoder_3", torch_dtype=torch.float32) - - with torch.no_grad(): - # CLIP-L - tokens_l = tokenizer_l(prompt, padding="max_length", max_length=77, return_tensors="pt", truncation=True) - out_l = text_encoder_l(**tokens_l, output_hidden_states=True) - clip_l_hidden = out_l.hidden_states[-2] # [1, 77, 768] - clip_l_pooled = out_l.pooler_output # [1, 768] - # CLIP-G - tokens_g = tokenizer_g(prompt, padding="max_length", max_length=77, return_tensors="pt", truncation=True) - out_g = text_encoder_g(**tokens_g, output_hidden_states=True) - clip_g_hidden = out_g.hidden_states[-2] # [1, 77, 1280] - clip_g_pooled = out_g.pooler_output # [1, 1280] - - # T5-XXL - tokens_t5 = tokenizer_t5(prompt, padding="max_length", max_length=256, return_tensors="pt", truncation=True) - t5_hidden = text_encoder_t5(**tokens_t5).last_hidden_state # [1, 256, 4096] + # Load ONNX sessions + sess_l = ort.InferenceSession(os.path.join(onnx_dir, "text_encoder", "model.onnx")) + sess_g = ort.InferenceSession(os.path.join(onnx_dir, "text_encoder_2", "model.onnx")) + sess_t5 = ort.InferenceSession(os.path.join(onnx_dir, "text_encoder_3", "model.onnx")) + + # CLIP-L + tokens_l = tokenizer_l(prompt, padding="max_length", max_length=77, return_tensors="np", truncation=True) + out_l = sess_l.run(None, { + "input_ids": tokens_l["input_ids"].astype(np.int64), + "attention_mask": tokens_l["attention_mask"].astype(np.int64), + }) + clip_l_hidden = out_l[0] # last_hidden_state [1, 77, 768] + clip_l_pooled = out_l[1] # text_embeds [1, 768] + + # CLIP-G + tokens_g = tokenizer_g(prompt, padding="max_length", max_length=77, return_tensors="np", truncation=True) + out_g = sess_g.run(None, { + "input_ids": tokens_g["input_ids"].astype(np.int64), + "attention_mask": tokens_g["attention_mask"].astype(np.int64), + }) + clip_g_hidden = out_g[0] # last_hidden_state [1, 77, 1280] + clip_g_pooled = out_g[1] # text_embeds [1, 1280] + + # T5-XXL + tokens_t5 = tokenizer_t5(prompt, padding="max_length", max_length=256, return_tensors="np", truncation=True) + out_t5 = sess_t5.run(None, {"input_ids": tokens_t5["input_ids"].astype(np.int64)}) + t5_hidden = out_t5[0] # last_hidden_state [1, 256, 4096] # Pad CLIP outputs to 4096 and concatenate - clip_l_padded = torch.nn.functional.pad(clip_l_hidden, (0, 4096 - 768)) # [1, 77, 4096] - clip_g_padded = torch.nn.functional.pad(clip_g_hidden, (0, 4096 - 1280)) # [1, 77, 4096] - encoder_hidden_states = torch.cat([clip_l_padded, clip_g_padded, t5_hidden], dim=1) # [1, 410, 4096] - pooled_projections = torch.cat([clip_l_pooled, clip_g_pooled], dim=-1) # [1, 2048] + clip_l_padded = np.pad(clip_l_hidden, ((0, 0), (0, 0), (0, 4096 - 768))) # [1, 77, 4096] + clip_g_padded = np.pad(clip_g_hidden, ((0, 0), (0, 0), (0, 4096 - 1280))) # [1, 77, 4096] + encoder_hidden_states = np.concatenate([clip_l_padded, clip_g_padded, t5_hidden], axis=1) # [1, 410, 4096] + pooled_projections = np.concatenate([clip_l_pooled, clip_g_pooled], axis=-1) # [1, 2048] - return encoder_hidden_states.numpy(), pooled_projections.numpy() + return encoder_hidden_states.astype(np.float32), pooled_projections.astype(np.float32) def denoise( - onnx_path: str, + onnx_dir: str, encoder_hidden_states: np.ndarray, pooled_projections: np.ndarray, scheduler: FlowMatchEulerDiscreteScheduler, @@ -71,7 +78,7 @@ def denoise( seed: int = 42, ) -> torch.Tensor: """Run the denoising loop using the ONNX transformer.""" - sess = ort.InferenceSession(onnx_path) + sess = ort.InferenceSession(os.path.join(onnx_dir, "transformer", "model.onnx")) torch.manual_seed(seed) latents = torch.randn(latent_shape) @@ -93,19 +100,25 @@ def denoise( return latents -def decode_latents(latents: torch.Tensor, model_id: str) -> np.ndarray: - """Decode latents to image using the VAE decoder.""" - vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) - with torch.no_grad(): - latents_scaled = latents / vae.config.scaling_factor + vae.config.shift_factor - image = vae.decode(latents_scaled, return_dict=False)[0] - image = (image / 2 + 0.5).clamp(0, 1) - image = image.permute(0, 2, 3, 1).numpy()[0] +def decode_latents(latents: torch.Tensor, onnx_dir: str) -> np.ndarray: + """Decode latents to image using the ONNX VAE decoder.""" + sess = ort.InferenceSession(os.path.join(onnx_dir, "vae_decoder", "model.onnx")) + + # SD3 VAE scaling: latents / scaling_factor + shift_factor + # SD3 defaults: scaling_factor=1.5305, shift_factor=0.0609 + scaling_factor = 1.5305 + shift_factor = 0.0609 + latents_scaled = latents / scaling_factor + shift_factor + + output = sess.run(None, {"latent_sample": latents_scaled.numpy()})[0] + # output: [1, 3, H, W] in [-1, 1] + image = (output / 2 + 0.5).clip(0, 1) + image = np.transpose(image[0], (1, 2, 0)) # [H, W, 3] return (image * 255).astype(np.uint8) def main(): - parser = argparse.ArgumentParser(description="SD3 inference with ONNX transformer") + parser = argparse.ArgumentParser(description="SD3 all-ONNX inference") parser.add_argument("--prompt", default="A photo of a cat sitting on a windowsill") parser.add_argument("--steps", type=int, default=28) parser.add_argument("--seed", type=int, default=42) @@ -114,28 +127,31 @@ def main(): parser.add_argument("--onnx_dir", default=ONNX_DIR) args = parser.parse_args() - onnx_path = os.path.join(args.onnx_dir, "model.onnx") - if not os.path.exists(onnx_path): - print(f"Error: ONNX model not found at {onnx_path}") - print("Run: olive capture-onnx-graph --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers " - "--use_mobius_builder --output_path out") + # Verify exported model exists + transformer_path = os.path.join(args.onnx_dir, "transformer", "model.onnx") + if not os.path.exists(transformer_path): + print(f"Error: ONNX model not found at {args.onnx_dir}/") + print("Run: olive capture-onnx-graph --model_name_or_path " + "stabilityai/stable-diffusion-3-medium-diffusers " + "--use_mobius_builder --output_path exported_sd3_full2") return print(f"Prompt: {args.prompt}") print(f"Steps: {args.steps}, Seed: {args.seed}") + print(f"ONNX dir: {args.onnx_dir}") - print("\n1. Encoding text...") - encoder_hidden_states, pooled_projections = encode_text(args.prompt, args.model_id) + print("\n1. Encoding text (ONNX CLIP-L + CLIP-G + T5-XXL)...") + encoder_hidden_states, pooled_projections = encode_text(args.prompt, args.onnx_dir, args.model_id) print(f" encoder_hidden_states: {encoder_hidden_states.shape}") print(f" pooled_projections: {pooled_projections.shape}") - print("\n2. Denoising...") + print("\n2. Denoising (ONNX SD3 transformer)...") scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler") scheduler.set_timesteps(args.steps) - latents = denoise(onnx_path, encoder_hidden_states, pooled_projections, scheduler, seed=args.seed) + latents = denoise(args.onnx_dir, encoder_hidden_states, pooled_projections, scheduler, seed=args.seed) - print("\n3. Decoding latents...") - image = decode_latents(latents, args.model_id) + print("\n3. Decoding latents (ONNX VAE decoder)...") + image = decode_latents(latents, args.onnx_dir) Image.fromarray(image).save(args.output) print(f"\nSaved: {args.output} ({image.shape[1]}x{image.shape[0]})") From 97f424c9bd538d978aeb094d6a09d3ce63008c04 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 24 Jun 2026 18:04:57 +0000 Subject: [PATCH 18/18] Fix lint failures: JSON formatting, editorconfig, RUFF T201, PYLINT C1803 --- multi-component-model-architecture-design.md | 30 +-- multi_comp_recipe/.gitignore | 2 +- .../sd3_optimize_components.json | 182 ++++++++---------- multi_comp_recipe/vlm_inference.py | 6 +- .../vlm_optimize_components.json | 55 ++---- pyproject.toml | 1 + test/model/test_composite_model.py | 2 +- 7 files changed, 123 insertions(+), 155 deletions(-) diff --git a/multi-component-model-architecture-design.md b/multi-component-model-architecture-design.md index fd41c1436b..878df8efbc 100644 --- a/multi-component-model-architecture-design.md +++ b/multi-component-model-architecture-design.md @@ -159,7 +159,7 @@ Each subfolder is a standard local ONNX model Olive already loads. The only new For PyTorch-stage optimization (e.g. GPTQ on the decoder) **before** export. -##### How components are obtained +##### How components are obtained ###### Option A — Query Mobius (preferred) @@ -169,12 +169,12 @@ Olive calls Mobius at runtime to inspect the model: components = mobius.inspect_components(model_path_or_id, task=None, trust_remote_code=False) ``` -- **Pros:** - - always in sync with Mobius's own architecture support; - - no per-model maintenance in Olive; +- **Pros:** + - always in sync with Mobius's own architecture support; + - no per-model maintenance in Olive; - covers any model Mobius can export, including new ones; single source of truth shared with the exporter. -- **Cons:** - - hard runtime dependency on `mobius-ai` even for the optimization step; +- **Cons:** + - hard runtime dependency on `mobius-ai` even for the optimization step; - coupled to Mobius versions (names/fields may shift) @@ -239,15 +239,15 @@ stable-diffusion: # SD 1.5 family (identified by model_inde ``` -- **Pros:** - - no runtime Mobius dependency for the optimization step; - - works offline; - - human-readable, reviewable, and overridable by users (drop-in extra entries); - - stable across Mobius versions; +- **Pros:** + - no runtime Mobius dependency for the optimization step; + - works offline; + - human-readable, reviewable, and overridable by users (drop-in extra entries); + - stable across Mobius versions; - users can add an unsupported model without code changes. -- **Cons:** - - must be **maintained by Olive** as new architectures appear (the same per-architecture maintenance Mobius already does); - - risk of drifting out of sync with Mobius's actual export expectations (e.g. `export_key`s, weight prefixes); +- **Cons:** + - must be **maintained by Olive** as new architectures appear (the same per-architecture maintenance Mobius already does); + - risk of drifting out of sync with Mobius's actual export expectations (e.g. `export_key`s, weight prefixes); - duplicates knowledge that also lives in Mobius. **(a) Optimize each component**. Only the components the user wants to optimize need a build. @@ -330,4 +330,4 @@ This section covers details needs to be handled in low level. - Should the YAML registry (Option B) be hand-authored, generated from Mobius, or both (generated then user-overridable)? - Should component resolution run for every HfModel/DiffusersModel, or only when a build references `components`? - After per-component optimization, what is the cleanest way to assemble the optimized weights into a single model that `capture-onnx-graph --use_mobius_builder` can consume (merged checkpoint folder vs. in-place weight swap)? -- For diffusion, is per-component sibling output sufficient, or is a final "collect into one package" export also wanted? \ No newline at end of file +- For diffusion, is per-component sibling output sufficient, or is a final "collect into one package" export also wanted? diff --git a/multi_comp_recipe/.gitignore b/multi_comp_recipe/.gitignore index febffbef13..47ec7124bd 100644 --- a/multi_comp_recipe/.gitignore +++ b/multi_comp_recipe/.gitignore @@ -1,3 +1,3 @@ exported_pkg exported_vlm_pkg -out \ No newline at end of file +out diff --git a/multi_comp_recipe/sd3_optimize_components.json b/multi_comp_recipe/sd3_optimize_components.json index d265b403ef..8551d5325f 100644 --- a/multi_comp_recipe/sd3_optimize_components.json +++ b/multi_comp_recipe/sd3_optimize_components.json @@ -1,102 +1,90 @@ { - "input_model": { - "type": "CompositeModel", - "config": { - "model_path": "exported_pkg" - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "accelerators": [ - { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } - ] - } - }, - "passes": { - "optimize_transformer": { - "type": "OrtTransformersOptimization", - "model_type": "unet", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "use_multi_head_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": true, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": true, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": true, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "group_norm_channels_last": false - }, - "force_fp32_ops": ["RandomNormalLike"] - }, - "optimize_vae": { - "type": "OrtTransformersOptimization", - "model_type": "vae", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "use_multi_head_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": true, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": true, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": true, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "group_norm_channels_last": false - }, - "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { "GroupNorm": [0, 1, 2] } - } - }, - "engine": { - "host": "local_system", - "target": "local_system", - "evaluate_input_model": false, - "cache_dir": "cache" - }, - "builds": { - "transformer": { - "components": ["transformer"], - "pipeline": ["optimize_transformer"], - "output_dir": "out/transformer" + "input_model": { "type": "CompositeModel", "config": { "model_path": "exported_pkg" } }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] + } }, - "vae_encoder": { - "components": ["vae_encoder"], - "pipeline": ["optimize_vae"], - "output_dir": "out/vae_encoder" + "passes": { + "optimize_transformer": { + "type": "OrtTransformersOptimization", + "model_type": "unet", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": true, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": true, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": true, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "group_norm_channels_last": false + }, + "force_fp32_ops": [ "RandomNormalLike" ] + }, + "optimize_vae": { + "type": "OrtTransformersOptimization", + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": true, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": true, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": true, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "group_norm_channels_last": false + }, + "force_fp32_ops": [ "RandomNormalLike" ], + "force_fp16_inputs": { "GroupNorm": [ 0, 1, 2 ] } + } }, - "vae_decoder": { - "components": ["vae_decoder"], - "pipeline": ["optimize_vae"], - "output_dir": "out/vae_decoder" + "engine": { "host": "local_system", "target": "local_system", "evaluate_input_model": false, "cache_dir": "cache" }, + "builds": { + "transformer": { + "components": [ "transformer" ], + "pipeline": [ "optimize_transformer" ], + "output_dir": "out/transformer" + }, + "vae_encoder": { + "components": [ "vae_encoder" ], + "pipeline": [ "optimize_vae" ], + "output_dir": "out/vae_encoder" + }, + "vae_decoder": { + "components": [ "vae_decoder" ], + "pipeline": [ "optimize_vae" ], + "output_dir": "out/vae_decoder" + } } - } } diff --git a/multi_comp_recipe/vlm_inference.py b/multi_comp_recipe/vlm_inference.py index 04a0e66639..17bcbd9ee7 100644 --- a/multi_comp_recipe/vlm_inference.py +++ b/multi_comp_recipe/vlm_inference.py @@ -87,8 +87,10 @@ def main(): if not os.path.exists(genai_config): print(f"Error: genai_config.json not found in {args.model_dir}") print("Run export first:") - print(" olive capture-onnx-graph --model_name_or_path microsoft/Phi-4-multimodal-instruct " - "--use_mobius_builder --trust_remote_code --output_path exported_vlm_pkg") + print( + " olive capture-onnx-graph --model_name_or_path microsoft/Phi-4-multimodal-instruct " + "--use_mobius_builder --trust_remote_code --output_path exported_vlm_pkg" + ) print("Then create genai_config.json and save tokenizer (see README.md).") return diff --git a/multi_comp_recipe/vlm_optimize_components.json b/multi_comp_recipe/vlm_optimize_components.json index 9260fd6807..89e4d47d51 100644 --- a/multi_comp_recipe/vlm_optimize_components.json +++ b/multi_comp_recipe/vlm_optimize_components.json @@ -1,43 +1,20 @@ { - "input_model": { - "type": "CompositeModel", - "config": { - "model_path": "exported_vlm_pkg" - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "accelerators": [ - { "device": "cpu", "execution_providers": ["CPUExecutionProvider"] } - ] - } - }, - "passes": { - "to_fp16": { "type": "OnnxFloatToFloat16" }, - "dynamic_quant": { "type": "OnnxDynamicQuantization" } - }, - "engine": { - "host": "local_system", - "target": "local_system", - "evaluate_input_model": false, - "cache_dir": "cache" - }, - "builds": { - "decoder": { - "components": ["decoder"], - "pipeline": ["dynamic_quant"], - "output_dir": "out/decoder" - }, - "vision_encoder": { - "components": ["vision_encoder"], - "pipeline": ["to_fp16"], - "output_dir": "out/vision_encoder" + "input_model": { "type": "CompositeModel", "config": { "model_path": "exported_vlm_pkg" } }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ] + } }, - "embedding": { - "components": ["embedding"], - "pipeline": ["to_fp16"], - "output_dir": "out/embedding" + "passes": { "to_fp16": { "type": "OnnxFloatToFloat16" }, "dynamic_quant": { "type": "OnnxDynamicQuantization" } }, + "engine": { "host": "local_system", "target": "local_system", "evaluate_input_model": false, "cache_dir": "cache" }, + "builds": { + "decoder": { "components": [ "decoder" ], "pipeline": [ "dynamic_quant" ], "output_dir": "out/decoder" }, + "vision_encoder": { + "components": [ "vision_encoder" ], + "pipeline": [ "to_fp16" ], + "output_dir": "out/vision_encoder" + }, + "embedding": { "components": [ "embedding" ], "pipeline": [ "to_fp16" ], "output_dir": "out/embedding" } } - } } diff --git a/pyproject.toml b/pyproject.toml index 52a945e7d6..00fcd8b94c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -191,3 +191,4 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator", "pydantic.m "test/**" = ["INP001"] "scripts/**" = ["INP001"] "olive/cli/**" = ["T201"] +"multi_comp_recipe/**" = ["T201"] diff --git a/test/model/test_composite_model.py b/test/model/test_composite_model.py index 2d0fd96dff..725300855c 100644 --- a/test/model/test_composite_model.py +++ b/test/model/test_composite_model.py @@ -190,7 +190,7 @@ def test_discover_onnx_components_empty_for_flat_dir(tmp_path): from olive.model.utils.onnx_utils import discover_onnx_components (tmp_path / "model.onnx").write_bytes(b"onnx") - assert discover_onnx_components(str(tmp_path)) == [] + assert not discover_onnx_components(str(tmp_path)) def test_composite_handler_discovers_components_from_directory(tmp_path):