diff --git a/multi-component-model-architecture-design.md b/multi-component-model-architecture-design.md new file mode 100644 index 0000000000..878df8efbc --- /dev/null +++ b/multi-component-model-architecture-design.md @@ -0,0 +1,333 @@ +# Design: Multi-Component Model Optimization in Olive + + +## 1. Problem Statement + +Olive needs to optimize **multi-component models** (different components → different optimizations) **and** produce **multiple target-specific outputs** from one config. Motivating cases: + +- **Multi-component models:** + - **VLM / multimodal HF models** — quantize the language decoder (e.g. GPTQ int4) while keeping the vision tower / projector at higher precision. + - **Diffusion models** (SD / SDXL / SD3 / FLUX) — optimize text encoders, the diffusion backbone (UNet/transformer), and VAE differently. + - **Future multi-component families** — without Olive learning every architecture's naming convention. +- **Multi-device / multi-EP builds** — produce several target-specific outputs from a single config, e.g. an OpenVINO **GPU** build and an OpenVINO **NPU** build of the same model, each with its own conversion/quantization/encapsulation pipeline and `host`/`target`. + + +## 2. Approach + +### The `builds` Schema + +A build is a named optimization unit: + +```python +class BuildNode: + components: list[str] | None # component names; omitted ⇒ full model + pipeline: list[str] # ordered pass names from the top-level `passes` + output_dir: str + input: str | list[str] | None # optional: take the model from another build's output(s) + host: SystemConfig | str | None + target: SystemConfig | str | None + evaluator: OliveEvaluatorConfig | str | None + search_strategy: SearchStrategyConfig | bool | None +``` + +Semantics: + +- **`components`** selects named components from the resolved `CompositeModel`. Omitted ⇒ run on the full model. A single name unwraps to that component; multiple names produce a sub-composite. +- **`pipeline`** lists pass names from the shared top-level `passes` dict, composed per build. Different builds reuse the same pass definitions in different orders/subsets. +- **`input`** (optional) sets the build's starting model: omitted ⇒ the top-level `input_model`; `""` ⇒ another build's output; `["a","b"]` ⇒ multiple build outputs (for a merge step). When the upstream output is a `CompositeModel` (e.g. an export build's package), `components` selects which of its components this build optimizes. This is what lets one exported composite fan out into different per-component builds. +- **`host`/`target`/`evaluator`/`search_strategy`** override engine defaults per build. + +From one `input_model`, several builds produce several outputs: +- **Per-component builds** — each build optimizes a different `components` subset (one model in → one output per component). +- **Per-target builds** — builds omit `components` and differ by `host`/`target`/`pipeline`, one output per device/EP. + +A build may also take its model from another build's output via `input` — e.g. an export build emits a `CompositeModel` and downstream builds optimize its components. + +--- + +## 3. Config Examples + +### 3.1 Basic shape — independent sibling builds + +Shared `passes`; each build picks a component and composes its own `pipeline`. Each build writes one optimized folder. The same shape is used whether the user optimizes ONNX components after export (Flow A) or PyTorch components before export (Flow B). + +```jsonc +{ + "input_model": { "type": "DiffusersModel", "model_path": "stabilityai/stable-diffusion-xl-base-1.0" }, + "systems": { + "local_system": { "type": "LocalSystem", "accelerators": [ { "device": "gpu", "execution_providers": ["CUDAExecutionProvider"] } ] } + }, + "data_configs": [ + { "name": "quantize_data_config", "user_script": "user_script.py", + "load_dataset_config": { "type": "local_dataset" }, + "dataloader_config": { "type": "quantize_data_loader", "data_num": 100 } } + ], + "passes": { + "convert": { "type": "OnnxConversion", "target_opset": 17 }, + "optimize_clip": { "type": "OrtTransformersOptimization", "model_type": "clip", "float16": true }, + "optimize_vae": { "type": "OrtTransformersOptimization", "model_type": "vae", "float16": true }, + "optimize_unet": { "type": "OrtTransformersOptimization", "model_type": "unet", "float16": true }, + "quantization": { "type": "OnnxStaticQuantization", "data_config": "quantize_data_config" } + }, + "builds": { + "text_encoder": { "components": ["text_encoder"], "pipeline": ["convert", "optimize_clip", "quantization"], "output_dir": "out/text_encoder", "evaluator": "common_evaluator" }, + "vae_encoder": { "components": ["vae_encoder"], "pipeline": ["convert", "optimize_vae", "quantization"], "output_dir": "out/vae_encoder", "evaluator": "common_evaluator" }, + "vae_decoder": { "components": ["vae_decoder"], "pipeline": ["convert", "optimize_vae", "quantization"], "output_dir": "out/vae_decoder", "evaluator": "common_evaluator" }, + "unet": { "components": ["unet"], "pipeline": ["convert", "optimize_unet", "quantization"], "output_dir": "out/unet", "evaluator": "common_evaluator" } + } +} +``` + +Each build writes one optimized component under its `output_dir`. + + +### 3.2 Component optimization — two flows + + +#### Flow A — export to ONNX model first, then per-component optimization + +Export with `MobiusBuilder`, which takes an `HfModel` and returns a `CompositeModel`. There are two ways to connect export to per-component optimization. + +**Option 1 — one config (export build + `input` dependency).** The export build produces the composite; downstream builds reference it via `input` and each select a component. Unreferenced components stay as exported. + +```jsonc +{ + "input_model": { "type": "HfModel", "model_path": "" }, + "data_configs": [ + { "name": "calib", "user_script": "user_script.py", "load_dataset_config": { "type": "local_dataset" } } + ], + "passes": { + "export": { "type": "MobiusBuilder", "precision": "fp16", "runtime": "ort-genai" }, + "transformer_opt": { "type": "OrtTransformersOptimization", "float16": true }, + "quantization": { "type": "OnnxStaticQuantization", "data_config": "calib" } + }, + "builds": { + "export": { "pipeline": ["export"], "output_dir": "out/pkg" }, + "decoder": { "input": "export", "components": ["decoder"], "pipeline": ["transformer_opt", "quantization"], "output_dir": "out/decoder" }, + "vision_encoder": { "input": "export", "components": ["vision_encoder"], "pipeline": ["transformer_opt"], "output_dir": "out/vision_encoder" } + } +} +``` + +- Pros: + - One single config file. + - One step for the whole model optimization. +- Cons: + - Complex DAG logic. + - Needs `input` dependency. + - User needs to know the components names first (from a new Olive CLI, where Olive will get it from Mobius) + + +**Option 2 — two steps (CLI export, then load the folder).** Export with the CLI, then point `input_model` at the exported directory. (preferred) + +Step 1 — export. Each component lands in its own subfolder: + +```powershell +olive capture-onnx-graph --model_name_or_path --use_mobius_builder --output_path exported_pkg +# exported_pkg/decoder/model.onnx, exported_pkg/vision_encoder/model.onnx, exported_pkg/embedding/model.onnx +``` + +Step 2 — point `input_model` at that directory. Olive loads it as a `CompositeModel`, taking each **subfolder name as the component name**. Plain sibling builds, no `input` dependency: + +```jsonc +{ + "input_model": { "type": "CompositeModel", "model_path": "exported_pkg" }, + "data_configs": [ + { "name": "calib", "user_script": "user_script.py", "load_dataset_config": { "type": "local_dataset" } } + ], + "passes": { + "transformer_opt": { "type": "OrtTransformersOptimization", "float16": true }, + "quantization": { "type": "OnnxStaticQuantization", "data_config": "calib" } + }, + "builds": { + "decoder": { "components": ["decoder"], "pipeline": ["transformer_opt", "quantization"], "output_dir": "out/decoder" }, + "vision_encoder": { "components": ["vision_encoder"], "pipeline": ["transformer_opt"], "output_dir": "out/vision_encoder" } + } +} +``` + +Each subfolder is a standard local ONNX model Olive already loads. The only new piece is aggregating a directory of per-component subfolders into a `CompositeModel` whose component names come from the folder names. + +- Pros: + - Clear config file, no DAG. + - User doesn't need to call a different CLI to get the components name. +- Cons: + - 2 steps. + - User needs to read output model folder to get components name. + +#### Flow B — optimize first, then export (recommended) + +For PyTorch-stage optimization (e.g. GPTQ on the decoder) **before** export. + +##### How components are obtained + +###### Option A — Query Mobius (preferred) + +Olive calls Mobius at runtime to inspect the model: + +```python +components = mobius.inspect_components(model_path_or_id, task=None, trust_remote_code=False) +``` + +- **Pros:** + - always in sync with Mobius's own architecture support; + - no per-model maintenance in Olive; + - covers any model Mobius can export, including new ones; single source of truth shared with the exporter. +- **Cons:** + - hard runtime dependency on `mobius-ai` even for the optimization step; + - coupled to Mobius versions (names/fields may shift) + + +###### Option B — Olive-maintained YAML registry + +Olive ships a YAML file enumerating the components of common models, keyed by `model_type` / architecture. Two component description styles appear, matching the two families: + +- **HF/VLM components** only need a **submodule path** to slice the component out of one model. `name` (for `builds.components`) plus `source.path` (where the submodule lives) is enough; `kind` is optional (only used for pass↔kind validation): + +```yaml +# olive/model/component_registry.yaml +llava: + components: + - { name: decoder, kind: decoder, source: { path: "model.language_model" } } + - { name: vision_encoder, kind: vision_encoder, source: { path: "model.vision_tower" } } + - { name: embedding, kind: embedding, source: { path: "model.language_model.embed_tokens" } } +``` + +- **Diffusion components** reuse existing Diffusion model components yaml file: + +```yaml +stable-diffusion: # SD 1.5 family (identified by model_index.json) + type: DiffusersModel + components: + - name: text_encoder + kind: text_encoder + loader: { component: text_encoder } # DiffusersModel.get_component("text_encoder") + io_config: + input_names: [input_ids] + output_names: [last_hidden_state, pooler_output] + dynamic_axes: { input_ids: { 0: batch, 1: sequence } } + dummy_inputs: text_encoder # generate_diffusers_dummy_inputs(...) + - name: vae_encoder + kind: vae_encoder + loader: { component: vae, patch: get_vae_encoder } # olive.model.utils.diffusers_utils.get_vae_encoder + io_config: + input_names: [sample, return_dict] + output_names: [latent_sample] + dynamic_axes: { sample: { 0: batch, 1: channels, 2: height, 3: width } } + dummy_inputs: vae_encoder + - name: vae_decoder + kind: vae_decoder + loader: { component: vae, patch: get_vae_decoder } # olive.model.utils.diffusers_utils.get_vae_decoder + io_config: + input_names: [latent_sample, return_dict] + output_names: [sample] + dynamic_axes: { latent_sample: { 0: batch, 1: channels, 2: height, 3: width } } + dummy_inputs: vae_decoder + - name: unet + kind: diffusion_backbone + loader: { component: unet } + io_config: + input_names: [sample, timestep, encoder_hidden_states, return_dict] + output_names: [out_sample] + dynamic_axes: + sample: { 0: batch, 1: channels, 2: height, 3: width } + timestep: { 0: batch } + encoder_hidden_states: { 0: batch, 1: sequence } + dummy_inputs: unet + # SDXL adds text_encoder_2 (kind: text_encoder) and extra UNet inputs (text_embeds, time_ids); + # SD3 / FLUX replace `unet` with `transformer` (kind: diffusion_backbone). +``` + + +- **Pros:** + - no runtime Mobius dependency for the optimization step; + - works offline; + - human-readable, reviewable, and overridable by users (drop-in extra entries); + - stable across Mobius versions; + - users can add an unsupported model without code changes. +- **Cons:** + - must be **maintained by Olive** as new architectures appear (the same per-architecture maintenance Mobius already does); + - risk of drifting out of sync with Mobius's actual export expectations (e.g. `export_key`s, weight prefixes); + - duplicates knowledge that also lives in Mobius. + +**(a) Optimize each component**. Only the components the user wants to optimize need a build. + +```jsonc +{ + "input_model": { "type": "HfModel", "model_path": "" }, + "data_configs": [ { "name": "decoder_calib", "user_script": "user_script.py", "load_dataset_config": { "type": "local_dataset" } } ], + "passes": { + "decoder_quant": { "type": "Gptq", "bits": 4, "group_size": 128, "data_config": "decoder_calib" } + }, + "builds": { + "decoder": { "components": ["decoder"], "pipeline": ["decoder_quant"], "output_dir": "out/decoder" } + } +} +``` + +**(b) Converge the optimized component(s) into one complete HF model directory.** The recommended form is **in-place**: the optimization runs on the full model and quantizes only the selected submodule, so step (a)'s `output_dir` is already a complete HF directory with the decoder quantized. + +> **`builds.components` means different things for the two families:** +> - **Diffusion:** slice this component out and optimize it independently → independent ONNX artifact. +> - **VLM:** locate and optimize this submodule inside the full model, output the full model → one complete HF directory. + +**(c) Export with the existing `capture-onnx-graph` CLI + Mobius builder.** `--use_mobius_builder`, takes `--model_name_or_path` as one complete HF model directory and lets Mobius re-identify and export the multi-component package. + +```powershell +olive capture-onnx-graph ` + --model_name_or_path local_folder ` + --use_mobius_builder ` + --output_path out\pkg +``` + +**(c) requires a quant format bridge.** Olive saves `quant_method="olive"` with **uint8** packing; Mobius's `preprocess_gptq_weights` expects `quant_method="gptq"`/`"awq"` with **int32** packing. A conversion (or a Mobius `"olive"` branch) is required for Mobius to load the quantized weights. + + +### 3.3 Per-target builds — multi-device / multi-EP from one config + +The **same** `builds` schema produces several target-specific outputs without any `components`. Each build differs only by `host`/`target` and its `pipeline`; shared `passes` are composed per target. This is the OpenVINO GPU + NPU case (Qwen2.5-Coder). + +```jsonc +{ + "input_model": { "type": "HfModel", "model_path": "Qwen/Qwen2.5-Coder-7B-Instruct" }, + "systems": { + "ov_gpu": { "type": "LocalSystem", "accelerators": [ { "device": "gpu", "execution_providers": ["OpenVINOExecutionProvider"] } ] }, + "ov_npu": { "type": "LocalSystem", "accelerators": [ { "device": "npu", "execution_providers": ["OpenVINOExecutionProvider"] } ] } + }, + "passes": { + "optimum_convert_gpu": { "type": "OpenVINOOptimumConversion", "extra_args": { "device": "gpu", "task": "text-generation-with-past" }, "ov_quant_config": { "weight_format": "int4", "group_size": 128, "ratio": 0.8 } }, + "optimum_convert_npu": { "type": "OpenVINOOptimumConversion", "extra_args": { "device": "npu" }, "ov_quant_config": { "weight_format": "int4", "group_size": 128, "dataset": "wikitext2", "ratio": 1, "sym": true, "backup_precision": "int8_asym" } }, + "io_update": { "type": "OpenVINOIoUpdate", "static": false, "reuse_cache": true }, + "encapsulation_gpu": { "type": "OpenVINOEncapsulation", "target_device": "gpu", "ov_version": "2025.1", "reuse_cache": true }, + "encapsulation_npu": { "type": "OpenVINOEncapsulation", "target_device": "npu", "ov_version": "2025.2", "reuse_cache": true, "genai_config_override": { "model": { "context_length": 4224 } } } + }, + "builds": { + "gpu": { "host": "ov_gpu", "target": "ov_gpu", "search_strategy": false, "pipeline": ["optimum_convert_gpu", "io_update", "encapsulation_gpu"], "output_dir": "gpu_output" }, + "npu": { "host": "ov_npu", "target": "ov_npu", "search_strategy": false, "pipeline": ["optimum_convert_npu", "io_update", "encapsulation_npu"], "output_dir": "npu_output" } + } +} +``` + +--- + +## 4. Low Level Details + +This section covers details needs to be handled in low level. + +- Sibling builds share no mutable state except read-only config (`passes`, `systems`) and the on-disk cache directory. Parallelizing is a scheduling change: one build's execution body can run concurrently with another's. +- If we choose `input` dependency option, Olive needs to handle builds DAG internally. +- Shared cache safety + - Cache keys will be namespaced by `workflow_id` (`"{workflow_id}_{build_name}"`). + - Writes to the shared cache **directory** (footprints, saved models) must be atomic or land in per-build subdirectories; shared-cache upload (if enabled) must be concurrency-safe. +- Result aggregation and failure handling + - Results remain a `dict[str, WorkflowOutput]` keyed by build name, assembled as workers complete. + - A failure in one build does **not** abort siblings; record per-build success/failure and surface a summary. + - For the DAG variant, a failed upstream build causes its dependents to be skipped and marked (no partial/corrupt merges). + +## 5. Open Questions + + +- Should the YAML registry (Option B) be hand-authored, generated from Mobius, or both (generated then user-overridable)? +- Should component resolution run for every HfModel/DiffusersModel, or only when a build references `components`? +- After per-component optimization, what is the cleanest way to assemble the optimized weights into a single model that `capture-onnx-graph --use_mobius_builder` can consume (merged checkpoint folder vs. in-place weight swap)? +- For diffusion, is per-component sibling output sufficient, or is a final "collect into one package" export also wanted? diff --git a/multi_comp_recipe/.gitignore b/multi_comp_recipe/.gitignore new file mode 100644 index 0000000000..47ec7124bd --- /dev/null +++ b/multi_comp_recipe/.gitignore @@ -0,0 +1,3 @@ +exported_pkg +exported_vlm_pkg +out diff --git a/multi_comp_recipe/README.md b/multi_comp_recipe/README.md new file mode 100644 index 0000000000..71dc40ded6 --- /dev/null +++ b/multi_comp_recipe/README.md @@ -0,0 +1,216 @@ +# Multi-Component Model Optimization Recipes + +These recipes demonstrate **Flow A — export first, then per-component optimization**: export a +multi-component model to ONNX once, then run a single Olive config whose `builds` apply a +**different pipeline to each component**. + +The flow is two explicit steps: + +1. **Export** the model to a directory of per-component ONNX subfolders using the Olive CLI with the + Mobius builder. +2. **Optimize** by pointing an Olive config at that directory; each component subfolder becomes a + selectable component that a `build` can target. + +There is no need to memorize component names: each exported component lives in its own folder, and +Olive loads the export directory as a `CompositeModel` whose **component names are the subfolder +names**. + +--- + +## Prerequisites + +``` +pip install olive-ai +pip install mobius-ai +``` + +Exporting a diffusion pipeline also needs `diffusers`/`transformers` and access to the model on +Hugging Face (Stable Diffusion 3 is a gated model — accept its license and `huggingface-cli login` +first). + +--- + +## Recipe 1 — Stable Diffusion 3 (`sd3_optimize_components.json`) + +### Step 1 — Export with the CLI + +``` +olive capture-onnx-graph --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers --use_mobius_builder --output_path exported_pkg +``` + +Mobius exports each neural-network component to its own subfolder: + +``` +exported_pkg/ + text_encoder/model.onnx # CLIP-L text encoder + text_encoder_2/model.onnx # CLIP-G text encoder + text_encoder_3/model.onnx # T5-XXL text encoder + transformer/model.onnx # MMDiT denoising backbone + vae_encoder/model.onnx + vae_decoder/model.onnx +``` + +> **Note.** The exact subfolders depend on the pipeline; the optimize config below only +> needs `builds` for the components you actually want to optimize. + +### Step 2 — Optimize each component + +Run from the directory that contains `exported_pkg/`: + +``` +olive run --config sd3_optimize_components.json +``` + +This applies a different pipeline per component: + +| component | pipeline | intent | +|------------------|--------------------|------------------------------------------| +| `transformer` | `dynamic_quant` | INT8-quantize the heavy denoising backbone | +| `text_encoder_3` | `to_fp16` | keep T5-XXL in FP16 | +| `vae_encoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | +| `vae_decoder` | `to_fp16` | keep the VAE in FP16 to preserve quality | + +Output: + +``` +out/transformer/ # INT8 transformer +out/text_encoder_3/ # FP16 T5-XXL +out/vae_encoder/ # FP16 VAE encoder +out/vae_decoder/ # FP16 VAE decoder +``` + +Each build writes one optimized component; components without a build stay as exported. + +### Step 3 — Inference + +Run end-to-end image generation with the exported ONNX models: + +``` +python sd3_inference.py --prompt "A photo of a cat sitting on a windowsill" --steps 28 --output result.png +``` + +The inference script (`sd3_inference.py`) uses: +- **Text encoding**: ONNX Runtime with exported CLIP-L, CLIP-G, and T5-XXL encoders (run once) +- **Denoising**: ONNX Runtime with the exported SD3 transformer (28 steps) +- **VAE decoding**: ONNX Runtime with the exported VAE decoder + +Options: +``` +--prompt TEXT Text prompt for image generation +--steps N Number of denoising steps (default: 28) +--seed N Random seed (default: 42) +--output PATH Output image path (default: sd3_output.png) +--onnx_dir DIR Path to exported model directory (default: exported_sd3_full2) +``` + +> **Note.** SD3 is a gated model — you need `huggingface-cli login` or set `HF_TOKEN` to export. +> The tokenizers (CLIP and T5) still run via the `transformers` library. + +--- + +## Recipe 2 — Vision-Language Model (`vlm_optimize_components.json`) + +Same two-step Flow A for a VLM, using `Qwen/Qwen3-VL-2B-Instruct`. + +### Step 1 — Export + +``` +olive capture-onnx-graph --model_name_or_path Qwen/Qwen3-VL-2B-Instruct --use_mobius_builder --output_path exported_vlm_pkg +``` + +Mobius exports this model as three components, each in its own subfolder: + +``` +exported_vlm_pkg/ + decoder/model.onnx + vision_encoder/model.onnx + embedding/model.onnx +``` + +### Step 2 — Optimize + +``` +olive run --config vlm_optimize_components.json +``` + +| component | pipeline | intent | +|------------------|-----------------|-------------------------------------| +| `decoder` | `dynamic_quant` | INT8-quantize the language decoder | +| `vision_encoder` | `to_fp16` | keep the vision tower in FP16 | +| `embedding` | `to_fp16` | keep the embedding in FP16 | + +> The three component names (`decoder`, `vision_encoder`, `embedding`) are exactly what Mobius +> produces for `Qwen/Qwen3-VL-2B-Instruct`. For a different VLM, adjust the component names in the +> config to match the subfolder names your export actually produced. + +### Step 3 — Inference with ORT GenAI + +Run text generation with the exported ONNX models using **onnxruntime-genai**: + +```bash +# Text-only +python vlm_inference.py --prompt "The capital of France is" + +# With image input +python vlm_inference.py --prompt "Describe this image." --image photo.jpg + +# Custom settings +python vlm_inference.py --model_dir exported_vlm_pkg --max_new_tokens 256 +``` + +The inference script (`vlm_inference.py`) uses ORT GenAI which handles: +- **Tokenization**: Built-in tokenizer from saved HF tokenizer files +- **Embedding**: ONNX `embedding/model.onnx` (token embed + image/audio feature mixing) +- **Vision encoding**: ONNX `vision_encoder/model.onnx` (when `--image` is provided) +- **Decoding**: ONNX `decoder/model.onnx` with KV cache (autoregressive generation) + +Options: +``` +--prompt TEXT Text prompt +--image PATH Optional image file for multimodal input +--max_new_tokens N Maximum tokens to generate (default: 128) +--model_dir DIR Path to exported model directory (default: exported_vlm_pkg) +``` + +#### Setup requirements + +The export directory needs these files alongside the ONNX models: + +``` +exported_vlm_pkg/ + genai_config.json # Model type, I/O mappings, search config + tokenizer.json # HF tokenizer + tokenizer_config.json + vision_processor.json # Vision preprocessing config + audio_processor.json # Audio preprocessing config (for Phi-4-multimodal) + decoder/model.onnx + vision_encoder/model.onnx + embedding/model.onnx + audio_encoder/model.onnx # Optional (Phi-4-multimodal) +``` + +To create `genai_config.json` and tokenizer files after export: + +```python +from transformers import AutoTokenizer +tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-multimodal-instruct", trust_remote_code=True) +tokenizer.save_pretrained("exported_vlm_pkg") +``` + +For the `genai_config.json` structure, see the +[Mobius phi4mm example](https://github.com/microsoft/mobius/blob/main/examples/phi4mm_ort_genai.py) +which writes the config automatically. + +> **Note.** Install `onnxruntime-genai` (`pip install onnxruntime-genai`) to use this script. + +--- + +## Notes + +- The passes here (`OnnxFloatToFloat16`, `OnnxDynamicQuantization`) are **illustrative** and chosen + to run without calibration data. Swap in `OrtTransformersOptimization`, `OnnxStaticQuantization` + (with a `data_config`), or other ONNX passes for production-quality optimization. +- The recipes target the **CPU** EP so they run anywhere. For GPU deployment, change the + `execution_providers` to e.g. `["CUDAExecutionProvider"]` and the device to `"gpu"`. +- `builds.components` selects which exported components to optimize. Only the components with a build + are touched; the rest remain as exported. diff --git a/multi_comp_recipe/sd3_inference.py b/multi_comp_recipe/sd3_inference.py new file mode 100644 index 0000000000..1a8323cfd5 --- /dev/null +++ b/multi_comp_recipe/sd3_inference.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +"""SD3 end-to-end inference using all ONNX components (text encoders + transformer + VAE). + +Usage: + python sd3_inference.py --prompt "A photo of a cat sitting on a windowsill" + python sd3_inference.py --prompt "A futuristic city" --steps 50 --output city.png +""" + +import argparse +import os + +import numpy as np +import onnxruntime as ort +import torch +from diffusers import FlowMatchEulerDiscreteScheduler +from PIL import Image +from transformers import CLIPTokenizer, T5TokenizerFast + +MODEL_ID = "stabilityai/stable-diffusion-3-medium-diffusers" +ONNX_DIR = "exported_sd3_full2" + + +def encode_text(prompt: str, onnx_dir: str, model_id: str) -> tuple[np.ndarray, np.ndarray]: + """Encode prompt using ONNX CLIP-L, CLIP-G, and T5-XXL text encoders. + + Returns: + encoder_hidden_states: [1, 410, 4096] + pooled_projections: [1, 2048] + """ + # Load tokenizers (lightweight, no model weights) + tokenizer_l = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer") + tokenizer_g = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer_2") + tokenizer_t5 = T5TokenizerFast.from_pretrained(model_id, subfolder="tokenizer_3") + + # Load ONNX sessions + sess_l = ort.InferenceSession(os.path.join(onnx_dir, "text_encoder", "model.onnx")) + sess_g = ort.InferenceSession(os.path.join(onnx_dir, "text_encoder_2", "model.onnx")) + sess_t5 = ort.InferenceSession(os.path.join(onnx_dir, "text_encoder_3", "model.onnx")) + + # CLIP-L + tokens_l = tokenizer_l(prompt, padding="max_length", max_length=77, return_tensors="np", truncation=True) + out_l = sess_l.run(None, { + "input_ids": tokens_l["input_ids"].astype(np.int64), + "attention_mask": tokens_l["attention_mask"].astype(np.int64), + }) + clip_l_hidden = out_l[0] # last_hidden_state [1, 77, 768] + clip_l_pooled = out_l[1] # text_embeds [1, 768] + + # CLIP-G + tokens_g = tokenizer_g(prompt, padding="max_length", max_length=77, return_tensors="np", truncation=True) + out_g = sess_g.run(None, { + "input_ids": tokens_g["input_ids"].astype(np.int64), + "attention_mask": tokens_g["attention_mask"].astype(np.int64), + }) + clip_g_hidden = out_g[0] # last_hidden_state [1, 77, 1280] + clip_g_pooled = out_g[1] # text_embeds [1, 1280] + + # T5-XXL + tokens_t5 = tokenizer_t5(prompt, padding="max_length", max_length=256, return_tensors="np", truncation=True) + out_t5 = sess_t5.run(None, {"input_ids": tokens_t5["input_ids"].astype(np.int64)}) + t5_hidden = out_t5[0] # last_hidden_state [1, 256, 4096] + + # Pad CLIP outputs to 4096 and concatenate + clip_l_padded = np.pad(clip_l_hidden, ((0, 0), (0, 0), (0, 4096 - 768))) # [1, 77, 4096] + clip_g_padded = np.pad(clip_g_hidden, ((0, 0), (0, 0), (0, 4096 - 1280))) # [1, 77, 4096] + encoder_hidden_states = np.concatenate([clip_l_padded, clip_g_padded, t5_hidden], axis=1) # [1, 410, 4096] + pooled_projections = np.concatenate([clip_l_pooled, clip_g_pooled], axis=-1) # [1, 2048] + + return encoder_hidden_states.astype(np.float32), pooled_projections.astype(np.float32) + + +def denoise( + onnx_dir: str, + encoder_hidden_states: np.ndarray, + pooled_projections: np.ndarray, + scheduler: FlowMatchEulerDiscreteScheduler, + latent_shape: tuple = (1, 16, 64, 64), + seed: int = 42, +) -> torch.Tensor: + """Run the denoising loop using the ONNX transformer.""" + sess = ort.InferenceSession(os.path.join(onnx_dir, "transformer", "model.onnx")) + + torch.manual_seed(seed) + latents = torch.randn(latent_shape) + + for i, t in enumerate(scheduler.timesteps): + noise_pred = sess.run( + None, + { + "sample": latents.numpy(), + "timestep": np.array([t.item()], dtype=np.int64), + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_projections, + }, + )[0] + latents = scheduler.step(torch.from_numpy(noise_pred), t, latents, return_dict=False)[0] + if i % 7 == 0: + print(f" Step {i}/{len(scheduler.timesteps)}, t={t.item():.1f}") + + return latents + + +def decode_latents(latents: torch.Tensor, onnx_dir: str) -> np.ndarray: + """Decode latents to image using the ONNX VAE decoder.""" + sess = ort.InferenceSession(os.path.join(onnx_dir, "vae_decoder", "model.onnx")) + + # SD3 VAE scaling: latents / scaling_factor + shift_factor + # SD3 defaults: scaling_factor=1.5305, shift_factor=0.0609 + scaling_factor = 1.5305 + shift_factor = 0.0609 + latents_scaled = latents / scaling_factor + shift_factor + + output = sess.run(None, {"latent_sample": latents_scaled.numpy()})[0] + # output: [1, 3, H, W] in [-1, 1] + image = (output / 2 + 0.5).clip(0, 1) + image = np.transpose(image[0], (1, 2, 0)) # [H, W, 3] + return (image * 255).astype(np.uint8) + + +def main(): + parser = argparse.ArgumentParser(description="SD3 all-ONNX inference") + parser.add_argument("--prompt", default="A photo of a cat sitting on a windowsill") + parser.add_argument("--steps", type=int, default=28) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--output", default="sd3_output.png") + parser.add_argument("--model_id", default=MODEL_ID) + parser.add_argument("--onnx_dir", default=ONNX_DIR) + args = parser.parse_args() + + # Verify exported model exists + transformer_path = os.path.join(args.onnx_dir, "transformer", "model.onnx") + if not os.path.exists(transformer_path): + print(f"Error: ONNX model not found at {args.onnx_dir}/") + print("Run: olive capture-onnx-graph --model_name_or_path " + "stabilityai/stable-diffusion-3-medium-diffusers " + "--use_mobius_builder --output_path exported_sd3_full2") + return + + print(f"Prompt: {args.prompt}") + print(f"Steps: {args.steps}, Seed: {args.seed}") + print(f"ONNX dir: {args.onnx_dir}") + + print("\n1. Encoding text (ONNX CLIP-L + CLIP-G + T5-XXL)...") + encoder_hidden_states, pooled_projections = encode_text(args.prompt, args.onnx_dir, args.model_id) + print(f" encoder_hidden_states: {encoder_hidden_states.shape}") + print(f" pooled_projections: {pooled_projections.shape}") + + print("\n2. Denoising (ONNX SD3 transformer)...") + scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler") + scheduler.set_timesteps(args.steps) + latents = denoise(args.onnx_dir, encoder_hidden_states, pooled_projections, scheduler, seed=args.seed) + + print("\n3. Decoding latents (ONNX VAE decoder)...") + image = decode_latents(latents, args.onnx_dir) + Image.fromarray(image).save(args.output) + print(f"\nSaved: {args.output} ({image.shape[1]}x{image.shape[0]})") + + +if __name__ == "__main__": + main() diff --git a/multi_comp_recipe/sd3_optimize_components.json b/multi_comp_recipe/sd3_optimize_components.json new file mode 100644 index 0000000000..8551d5325f --- /dev/null +++ b/multi_comp_recipe/sd3_optimize_components.json @@ -0,0 +1,90 @@ +{ + "input_model": { "type": "CompositeModel", "config": { "model_path": "exported_pkg" } }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] + } + }, + "passes": { + "optimize_transformer": { + "type": "OrtTransformersOptimization", + "model_type": "unet", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": true, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": true, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": true, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "group_norm_channels_last": false + }, + "force_fp32_ops": [ "RandomNormalLike" ] + }, + "optimize_vae": { + "type": "OrtTransformersOptimization", + "model_type": "vae", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "use_multi_head_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": true, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": true, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": true, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "group_norm_channels_last": false + }, + "force_fp32_ops": [ "RandomNormalLike" ], + "force_fp16_inputs": { "GroupNorm": [ 0, 1, 2 ] } + } + }, + "engine": { "host": "local_system", "target": "local_system", "evaluate_input_model": false, "cache_dir": "cache" }, + "builds": { + "transformer": { + "components": [ "transformer" ], + "pipeline": [ "optimize_transformer" ], + "output_dir": "out/transformer" + }, + "vae_encoder": { + "components": [ "vae_encoder" ], + "pipeline": [ "optimize_vae" ], + "output_dir": "out/vae_encoder" + }, + "vae_decoder": { + "components": [ "vae_decoder" ], + "pipeline": [ "optimize_vae" ], + "output_dir": "out/vae_decoder" + } + } +} diff --git a/multi_comp_recipe/vlm_inference.py b/multi_comp_recipe/vlm_inference.py new file mode 100644 index 0000000000..17bcbd9ee7 --- /dev/null +++ b/multi_comp_recipe/vlm_inference.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +"""VLM (Phi-4-multimodal) inference using ORT GenAI with exported ONNX models. + +Usage: + # Text-only + python vlm_inference.py --prompt "The capital of France is" + + # With image + python vlm_inference.py --prompt "Describe this image." --image photo.jpg + + # Custom model directory + python vlm_inference.py --model_dir exported_vlm_pkg --prompt "What is 2+2?" +""" + +import argparse +import os + +import onnxruntime_genai as og + + +def generate_text(model_dir: str, prompt: str, max_new_tokens: int = 128) -> str: + """Run text-only generation.""" + model = og.Model(model_dir) + tokenizer = og.Tokenizer(model) + + input_ids = tokenizer.encode(prompt) + params = og.GeneratorParams(model) + params.set_search_options(max_length=len(input_ids) + max_new_tokens) + + generator = og.Generator(model, params) + generator.append_tokens(input_ids) + + tokenizer_stream = tokenizer.create_stream() + generated = [] + while not generator.is_done(): + generator.generate_next_token() + token = generator.get_next_tokens()[0] + generated.append(token) + print(tokenizer_stream.decode(token), end="", flush=True) + if len(generated) >= max_new_tokens: + break + + print() + del generator + return tokenizer.decode(generated) + + +def generate_with_image(model_dir: str, prompt: str, image_path: str, max_new_tokens: int = 128) -> str: + """Run multimodal generation with image input.""" + model = og.Model(model_dir) + tokenizer = og.Tokenizer(model) + processor = model.create_multimodal_processor() + + images = og.Images.open(image_path) + inputs = processor(prompt, images=images) + + params = og.GeneratorParams(model) + params.set_search_options(max_length=4096) + + generator = og.Generator(model, params) + generator.set_inputs(inputs) + + tokenizer_stream = tokenizer.create_stream() + generated = [] + while not generator.is_done(): + generator.generate_next_token() + token = generator.get_next_tokens()[0] + generated.append(token) + print(tokenizer_stream.decode(token), end="", flush=True) + if len(generated) >= max_new_tokens: + break + + print() + del generator + return tokenizer.decode(generated) + + +def main(): + parser = argparse.ArgumentParser(description="VLM inference with ORT GenAI") + parser.add_argument("--prompt", default="The capital of France is") + parser.add_argument("--image", default=None, help="Path to an image file for vision input") + parser.add_argument("--max_new_tokens", type=int, default=128) + parser.add_argument("--model_dir", default="exported_vlm_pkg") + args = parser.parse_args() + + genai_config = os.path.join(args.model_dir, "genai_config.json") + if not os.path.exists(genai_config): + print(f"Error: genai_config.json not found in {args.model_dir}") + print("Run export first:") + print( + " olive capture-onnx-graph --model_name_or_path microsoft/Phi-4-multimodal-instruct " + "--use_mobius_builder --trust_remote_code --output_path exported_vlm_pkg" + ) + print("Then create genai_config.json and save tokenizer (see README.md).") + return + + print(f"Model: {args.model_dir}") + print(f"Prompt: {args.prompt}") + if args.image: + print(f"Image: {args.image}") + print("-" * 50) + + if args.image: + output = generate_with_image(args.model_dir, args.prompt, args.image, args.max_new_tokens) + else: + output = generate_text(args.model_dir, args.prompt, args.max_new_tokens) + + print("-" * 50) + print(f"Output: {output}") + + +if __name__ == "__main__": + main() diff --git a/multi_comp_recipe/vlm_optimize_components.json b/multi_comp_recipe/vlm_optimize_components.json new file mode 100644 index 0000000000..89e4d47d51 --- /dev/null +++ b/multi_comp_recipe/vlm_optimize_components.json @@ -0,0 +1,20 @@ +{ + "input_model": { "type": "CompositeModel", "config": { "model_path": "exported_vlm_pkg" } }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ] + } + }, + "passes": { "to_fp16": { "type": "OnnxFloatToFloat16" }, "dynamic_quant": { "type": "OnnxDynamicQuantization" } }, + "engine": { "host": "local_system", "target": "local_system", "evaluate_input_model": false, "cache_dir": "cache" }, + "builds": { + "decoder": { "components": [ "decoder" ], "pipeline": [ "dynamic_quant" ], "output_dir": "out/decoder" }, + "vision_encoder": { + "components": [ "vision_encoder" ], + "pipeline": [ "to_fp16" ], + "output_dir": "out/vision_encoder" + }, + "embedding": { "components": [ "embedding" ], "pipeline": [ "to_fp16" ], "output_dir": "out/embedding" } + } +} diff --git a/olive/cli/api.py b/olive/cli/api.py index cae2963264..fbbcdcb90f 100644 --- a/olive/cli/api.py +++ b/olive/cli/api.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- import inspect from argparse import ArgumentParser, Namespace -from typing import Any +from typing import Any, Union from olive.cli.benchmark import BenchmarkCommand from olive.cli.capture_onnx import CaptureOnnxGraphCommand @@ -300,7 +300,7 @@ def benchmark(model_name_or_path: str, **kwargs) -> WorkflowOutput: return _run_unified_command(BenchmarkCommand, **kwargs) -def run(run_config: str, **kwargs) -> WorkflowOutput: +def run(run_config: str, **kwargs) -> Union[WorkflowOutput, dict[str, WorkflowOutput]]: """Run a workflow. Args: @@ -308,7 +308,8 @@ def run(run_config: str, **kwargs) -> WorkflowOutput: **kwargs: All other CLI arguments supported by extract-adapters command Returns: - WorkflowOutput: Contains tuning results + WorkflowOutput for a single-pipeline workflow, or a ``dict[str, WorkflowOutput]`` keyed by build + name when the config declares ``builds``. """ kwargs["run_config"] = run_config diff --git a/olive/cli/base.py b/olive/cli/base.py index 50f1e55bfd..182f73e119 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -151,8 +151,16 @@ def _run_workflow(self): workflow_output = olive_run(run_config) if getattr(self.args, "test", None) not in (None, False): mark_test_output_path(self.args.output_path) - save_discrepancy_check_results(workflow_output, self.args.output_path) - if not workflow_output.has_output_model(): + if not isinstance(workflow_output, dict): + save_discrepancy_check_results(workflow_output, self.args.output_path) + if isinstance(workflow_output, dict): + # `builds` workflows return one WorkflowOutput per build keyed by build name. + for build_name, build_output in workflow_output.items(): + if build_output is None or not build_output.has_output_model(): + print(f"Build {build_name!r}: no output model produced. Please check the log for details.") + else: + print(f"Build {build_name!r}: model is saved under {self.args.output_path}") + elif not workflow_output.has_output_model(): print("No output model produced. Please check the log for details.") else: print(f"Model is saved at {self.args.output_path}") diff --git a/olive/cli/capture_onnx.py b/olive/cli/capture_onnx.py index 262a29ab08..fb067f893b 100644 --- a/olive/cli/capture_onnx.py +++ b/olive/cli/capture_onnx.py @@ -199,12 +199,14 @@ def run(self): def _get_run_config(self, tempdir: str) -> dict: config = deepcopy(TEMPLATE) + is_diffusers = is_valid_diffusers_model(self.args.model_name_or_path) if self.args.model_name_or_path else False + if self.args.use_mobius_builder: - input_model_config = get_input_model_config(self.args) + if is_diffusers: + input_model_config = get_diffusers_input_model(self.args, self.args.model_name_or_path) + else: + input_model_config = get_input_model_config(self.args) else: - is_diffusers = ( - is_valid_diffusers_model(self.args.model_name_or_path) if self.args.model_name_or_path else False - ) if is_diffusers: input_model_config = get_diffusers_input_model(self.args, self.args.model_name_or_path) else: diff --git a/olive/common/mobius_utils.py b/olive/common/mobius_utils.py new file mode 100644 index 0000000000..e971029580 --- /dev/null +++ b/olive/common/mobius_utils.py @@ -0,0 +1,101 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Helpers for obtaining a model's component plan from mobius. + +Mobius owns the per-architecture knowledge of which components a model exposes (e.g. a VLM's +``decoder`` / ``vision_encoder`` / ``embedding``), how each maps back to a submodule, and the role +of each component. Olive consumes that plan to drive per-component builds without re-implementing the +architecture-specific logic. + +``mobius-ai`` is imported lazily so Olive keeps working when it is not installed; only the code paths +that actually need a component plan for a Hugging Face model require it. +""" + +import logging +from dataclasses import dataclass, field +from typing import Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class ComponentInfo: + """A single component returned by a component source. + + Attributes: + name: Stable, user-facing component name used in ``builds.components``. + kind: Component role/kind (e.g. ``decoder``, ``vision_encoder``). Optional; used for + pass/component compatibility validation. + source_path: Dotted submodule path locating the component inside the full model + (e.g. ``model.language_model``). Used to slice the component for PyTorch-stage passes. + + """ + + name: str + kind: Optional[str] = None + source_path: Optional[str] = None + metadata: dict = field(default_factory=dict) + + @classmethod + def coerce(cls, data: "ComponentInfo | dict | object") -> "ComponentInfo": + """Normalize a component from any source into an Olive :class:`ComponentInfo`. + + Accepts an existing Olive ``ComponentInfo`` (returned as-is), a mapping following the + component contract, or a duck-typed object exposing ``name``/``kind``/``source_path`` + attributes (e.g. a ``mobius`` ``ComponentInfo`` dataclass). + """ + if isinstance(data, cls): + return data + if isinstance(data, dict): + source = data.get("source") or {} + return cls( + name=data["name"], + kind=data.get("kind"), + source_path=data.get("source_path") or source.get("path"), + metadata={k: v for k, v in data.items() if k not in ("name", "kind", "source", "source_path")}, + ) + return cls( + name=data.name, + kind=getattr(data, "kind", None), + source_path=getattr(data, "source_path", None), + ) + + +def inspect_components( + model_name_or_path: str, + task: Optional[str] = None, + trust_remote_code: bool = False, +) -> list[ComponentInfo]: + """Return the component plan for a Hugging Face model by querying mobius. + + Args: + model_name_or_path: Hugging Face model id or local path. + task: Optional task hint passed to mobius. + trust_remote_code: Whether to trust remote code when mobius loads the config. + + Returns: + A list of :class:`ComponentInfo`. An empty list means the model is single-component + (no separable components). + + Raises: + ImportError: If ``mobius-ai`` is not installed. + + """ + try: + import mobius + except ImportError as exc: + raise ImportError( + "mobius-ai is required to resolve model components for a Hugging Face model. " + "Install with: pip install mobius-ai" + ) from exc + + raw_components = mobius.inspect_components( + model_name_or_path, + task=task, + trust_remote_code=trust_remote_code, + ) + components = [ComponentInfo.coerce(c) for c in raw_components] + logger.debug("mobius.inspect_components(%s) -> %s", model_name_or_path, [c.name for c in components]) + return components diff --git a/olive/engine/config.py b/olive/engine/config.py index fabef33eb3..22e323ace9 100644 --- a/olive/engine/config.py +++ b/olive/engine/config.py @@ -19,6 +19,9 @@ # list of all pruned configs PRUNED_CONFIGS = (FAILED_CONFIG, INVALID_CONFIG) +# sentinel key inside `builds` that holds partial defaults applied to all sibling builds +BUILD_DEFAULT_KEY = "_default" + class EngineConfig(ConfigBase): model_config = ConfigDict(extra="forbid") @@ -90,3 +93,88 @@ class RunPassConfig(AbstractPassConfig): " If not provided, use the engine's evaluator." ), ) + + +class BuildConfigPartial(ConfigBase): + """Partial build configuration. + + All fields are optional. Used as the schema for the ``_default`` sentinel inside ``builds`` + and as the unmerged form of every sibling entry before defaults are applied. + """ + + model_config = ConfigDict(extra="forbid") + + components: Optional[list[str]] = Field( + None, + description=( + "Names of input model components this build operates on. Each name must match an entry in the input" + " model's ``model_component_names``. When omitted, the build runs on the full input model." + " When a single name is given, the build receives the unwrapped component handler instead of a one-element" + " composite." + ), + ) + pipeline: Optional[list[str]] = Field( + None, + description=( + "Ordered list of pass names (referencing entries in the top-level ``passes`` dict) that form this build's" + " pipeline." + ), + ) + output_dir: Optional[str] = Field( + None, + description="Directory where this build's final model artifacts get saved.", + ) + host: Optional[Union[SystemConfig, str]] = Field( + None, + description=( + "Host system override for this build. If a string, must refer to a system config under ``systems``." + " If omitted, the engine's host is used." + ), + ) + target: Optional[Union[SystemConfig, str]] = Field( + None, + description=( + "Target system override for this build. If a string, must refer to a system config under ``systems``." + " If omitted, the engine's target is used." + ), + ) + evaluator: Optional[Union[OliveEvaluatorConfig, str]] = Field( + None, + description=( + "Evaluator override for this build. If a string, must refer to an evaluator config under ``evaluators``." + " If omitted, the engine's evaluator is used." + ), + ) + search_strategy: Optional[Union[SearchStrategyConfig, bool]] = Field( + None, + description="Search strategy override for this build. If omitted, the engine's search strategy is used.", + ) + + +class BuildConfig(BuildConfigPartial): + """Full build configuration after defaults have been merged. + + ``pipeline`` and ``output_dir`` are required post-merge; the other fields remain optional and + fall back to the engine-level configuration when not provided. + """ + + pipeline: list[str] = Field( + ..., + description=( + "Ordered list of pass names (referencing entries in the top-level ``passes`` dict) that form this build's" + " pipeline." + ), + ) + output_dir: str = Field( + ..., + description="Directory where this build's final model artifacts get saved.", + ) + + +def merge_build_default(default_partial: dict, sibling: dict) -> dict: + """Merge ``_default`` partial values into a sibling build dict. + + Sibling values fully override default values (no deep merge). Returns a new dict; inputs are + not mutated. + """ + return {**{k: v for k, v in default_partial.items() if v is not None}, **sibling} diff --git a/olive/model/config/model_config.py b/olive/model/config/model_config.py index d6eac90043..7afc5da41f 100644 --- a/olive/model/config/model_config.py +++ b/olive/model/config/model_config.py @@ -5,6 +5,7 @@ import logging from copy import deepcopy from pathlib import Path +from typing import Optional from pydantic import Field, field_validator @@ -43,6 +44,180 @@ def create_model(self): cls = get_model_handler(self.type) return cls(**self.config) + def get_components(self) -> Optional[list[str]]: + """Return the component names that builds can target, or None for a single-component model. + + * ``CompositeModel`` -> its configured ``model_component_names``, or, when the config only + points at a directory of per-component ONNX subfolders, the discovered subfolder names. + * ``HfModel`` -> the components reported by mobius (``mobius.inspect_components``), or None + when the model is single-component / mobius reports nothing. + * Anything else -> ``None`` (single-component model). + """ + if self.type == "compositemodel": + names = list(self.config.get("model_component_names") or []) + if names: + return names + return [name for name, _ in self._discover_composite_components()] + if self.type == "hfmodel": + return self._get_hf_components() or None + if self.type == "diffusersmodel": + return self._get_diffusers_components() or None + return None + + def _discover_composite_components(self) -> list[tuple[str, str]]: + """Discover ``(name, onnx_relpath)`` from a directory-based composite, or empty list.""" + from olive.model.utils.onnx_utils import discover_onnx_components + + model_path = self.config.get("model_path") + if not model_path or not Path(str(model_path)).is_dir(): + return [] + return discover_onnx_components(str(model_path)) + + def _get_hf_components(self) -> list[str]: + """Return component names for an HfModel by querying mobius, or empty list.""" + from olive.common.mobius_utils import inspect_components + + model_path = self.config.get("model_path") + if not model_path: + return [] + load_kwargs = self.config.get("load_kwargs") or {} + components = inspect_components( + model_path, + task=self.config.get("task"), + trust_remote_code=bool(load_kwargs.get("trust_remote_code")), + ) + return [c.name for c in components] + + def _get_diffusers_components(self) -> list[str]: + """Return the exportable component names for a DiffusersModel, or empty list. + + Reads the variant's component layout from the handler (which only inspects config files, + not weights). When the config is already scoped to a subset via ``components``, returns that + subset; the top-level input model carries no such scope, so ``builds`` sees the full set. + """ + model_path = self.config.get("model_path") + if not model_path: + return [] + handler = self.create_model() + return [str(c) for c in handler.get_exportable_components()] + + def select_components(self, names: list[str]) -> "ModelConfig": + """Return a new ModelConfig holding only the named components. + + * ``CompositeModel`` -> the unwrapped child component ``ModelConfig`` when exactly one name + is given, otherwise a new ``CompositeModel`` ``ModelConfig`` containing the subset (in the + requested order). Directory-based composites are discovered first. + * ``HfModel`` -> a copy of this config tagged with the selected component's submodule path + (from the mobius plan) in ``model_attributes['component_source_path']``, so a PyTorch-stage + pass can slice that submodule while the saved output stays a complete HF directory. + + Raises ``ValueError`` if any name is missing from the available components. + """ + if self.type == "hfmodel": + return self._select_hf_component(names) + if self.type == "diffusersmodel": + return self._select_diffusers_components(names) + if self.type != "compositemodel": + raise ValueError( + f"select_components is only supported on CompositeModel or HfModel input configs " + f"(got type {self.type!r})." + ) + if not names: + raise ValueError("select_components requires a non-empty list of names.") + component_names = list(self.config.get("model_component_names") or []) + model_components = list(self.config.get("model_components") or []) + if not component_names: + discovered = self._discover_composite_components() + if not discovered: + raise ValueError( + "CompositeModel config has no model_components and model_path is not a directory of " + "per-component ONNX subfolders." + ) + component_names = [name for name, _ in discovered] + model_path = self.config.get("model_path") + model_components = [ + {"type": "ONNXModel", "config": {"model_path": str(model_path), "onnx_file_name": onnx_rel}} + for _, onnx_rel in discovered + ] + if len(component_names) != len(model_components): + raise ValueError("CompositeModel config has mismatched model_components and model_component_names lengths.") + missing = [n for n in names if n not in component_names] + if missing: + raise ValueError(f"Unknown component name(s) {missing}. Available components: {list(component_names)}.") + component_map = dict(zip(component_names, model_components)) + selected = [deepcopy(component_map[n]) for n in names] + if len(selected) == 1: + # Unwrap to the child handler config, inheriting the composite's shared model_attributes so a + # single-component build keeps parent context (matches CompositeModelHandler.select_components). + child = selected[0] + parent_attributes = self.config.get("model_attributes") or {} + if isinstance(child, ModelConfig): + merged = {**parent_attributes, **(child.config.get("model_attributes") or {})} + if merged: + child.config["model_attributes"] = merged + return child + child_config = dict(child.get("config") or {}) + merged = {**parent_attributes, **(child_config.get("model_attributes") or {})} + if merged: + child_config["model_attributes"] = merged + return ModelConfig.model_validate({**child, "config": child_config}) + new_config = { + **{k: v for k, v in self.config.items() if k not in ("model_components", "model_component_names")}, + "model_components": selected, + "model_component_names": list(names), + } + return ModelConfig(type=self.type, config=new_config) + + def _select_hf_component(self, names: list[str]) -> "ModelConfig": + """Select a single Hf component (by mobius source path) for PyTorch-stage optimization.""" + if not names: + raise ValueError("select_components requires a non-empty list of names.") + if len(names) != 1: + raise ValueError( + f"HfModel components must be optimized one at a time; got {names}. Use a separate build per component." + ) + from olive.common.mobius_utils import inspect_components + + model_path = self.config.get("model_path") + load_kwargs = self.config.get("load_kwargs") or {} + components = inspect_components( + model_path, + task=self.config.get("task"), + trust_remote_code=bool(load_kwargs.get("trust_remote_code")), + ) + by_name = {c.name: c for c in components} + missing = [n for n in names if n not in by_name] + if missing: + raise ValueError(f"Unknown component name(s) {missing}. Available components: {list(by_name)}.") + component = by_name[names[0]] + + new_config = deepcopy(self.config) + attributes = dict(new_config.get("model_attributes") or {}) + attributes["component_name"] = component.name + if component.kind is not None: + attributes["component_kind"] = component.kind + if component.source_path is not None: + attributes["component_source_path"] = component.source_path + new_config["model_attributes"] = attributes + return ModelConfig(type=self.type, config=new_config) + + def _select_diffusers_components(self, names: list[str]) -> "ModelConfig": + """Scope a DiffusersModel to the named exportable components. + + Returns a copy of the config with ``components`` set to the requested subset (in the + variant's canonical order). The scoped handler exports only those components, so a build's + conversion pass produces just that component's ONNX while subsequent passes map over it. + """ + if not names: + raise ValueError("select_components requires a non-empty list of names.") + available = self._get_diffusers_components() + missing = [n for n in names if n not in available] + if missing: + raise ValueError(f"Unknown component name(s) {missing}. Available components: {available}.") + new_config = deepcopy(self.config) + new_config["components"] = [name for name in available if name in set(names)] + return ModelConfig(type=self.type, config=new_config) + def get_model_id(self): for v in self.config.values(): if callable(v): diff --git a/olive/model/handler/composite.py b/olive/model/handler/composite.py index c52bd1a315..d41ad8e7e5 100644 --- a/olive/model/handler/composite.py +++ b/olive/model/handler/composite.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +from pathlib import Path from typing import Any, Optional, Union from olive.common.config_utils import serialize_to_json, validate_config @@ -32,8 +33,8 @@ class CompositeModelHandler(OliveModelHandler): def __init__( self, - model_components: list[Union[OliveModelHandler, dict[str, Any]]], - model_component_names: list[str], + model_components: Optional[list[Union[OliveModelHandler, dict[str, Any]]]] = None, + model_component_names: Optional[list[str]] = None, model_path: OLIVE_RESOURCE_ANNOTATIONS = None, model_attributes: Optional[dict[str, Any]] = None, ): @@ -43,6 +44,19 @@ def __init__( model_file_format=ModelFileFormat.COMPOSITE_MODEL, model_attributes=model_attributes, ) + + # When components are not provided but model_path is a directory of per-component ONNX + # subfolders (e.g. a mobius export package), discover them using the subfolder names as + # component names. This supports loading an exported package directly as a CompositeModel. + if not model_components: + discovered = self._discover_components(model_path) + if not discovered: + raise ValueError( + "CompositeModelHandler requires model_components, or a model_path directory containing " + "per-component ONNX subfolders." + ) + model_components, model_component_names = discovered + self._model_components = [ validate_config(m, ModelConfig).create_model() if isinstance(m, dict) else m for m in model_components ] @@ -53,6 +67,29 @@ def __init__( assert len(self._model_components) == len(model_component_names), "Number of components and names must match" self.model_component_names = model_component_names + @staticmethod + def _discover_components( + model_path: OLIVE_RESOURCE_ANNOTATIONS, + ) -> Optional[tuple[list[dict[str, Any]], list[str]]]: + """Build component configs from a directory of per-component ONNX subfolders. + + Returns ``(model_components, model_component_names)`` or ``None`` if discovery is not + applicable (model_path is not a local directory of component subfolders). + """ + from olive.model.utils.onnx_utils import discover_onnx_components + + if not model_path or not Path(str(model_path)).is_dir(): + return None + discovered = discover_onnx_components(str(model_path)) + if not discovered: + return None + names = [name for name, _ in discovered] + components = [ + {"type": "ONNXModel", "config": {"model_path": str(model_path), "onnx_file_name": onnx_rel}} + for _, onnx_rel in discovered + ] + return components, names + @property def model_components(self): for m in self._model_components: @@ -77,6 +114,33 @@ def to_json(self, check_object: bool = False): def get_model_components(self) -> list[tuple[str, OliveModelHandler]]: return zip(self.model_component_names, self.model_components) + def select_components(self, names: list[str]) -> "OliveModelHandler": + """Return a handler holding only the named components. + + Returns the unwrapped child handler when exactly one name is given; returns a new + ``CompositeModelHandler`` containing the subset (in the requested order) otherwise. + Raises ``ValueError`` if any name is missing from ``model_component_names``. + """ + if not names: + raise ValueError("select_components requires a non-empty list of names.") + missing = [n for n in names if n not in self.model_component_names] + if missing: + raise ValueError( + f"Unknown component name(s) {missing}. Available components: {list(self.model_component_names)}." + ) + component_map = dict(zip(self.model_component_names, self._model_components)) + selected = [component_map[n] for n in names] + if len(selected) == 1: + child = selected[0] + child.model_attributes = {**(self.model_attributes or {}), **(child.model_attributes or {})} + return child + return CompositeModelHandler( + model_components=selected, + model_component_names=list(names), + model_path=self.model_path, + model_attributes=self.model_attributes, + ) + def load_model(self, rank: int = None, cache_model: bool = True): raise NotImplementedError diff --git a/olive/model/handler/diffusers.py b/olive/model/handler/diffusers.py index e7f13f4f9b..da119f7ba4 100644 --- a/olive/model/handler/diffusers.py +++ b/olive/model/handler/diffusers.py @@ -34,7 +34,7 @@ class DiffusersModelHandler(OliveModelHandler): """ resource_keys: tuple[str, ...] = ("model_path", "adapter_path") - json_config_keys: tuple[str, ...] = ("model_variant", "load_kwargs") + json_config_keys: tuple[str, ...] = ("model_variant", "load_kwargs", "components") def __init__( self, @@ -42,6 +42,7 @@ def __init__( model_variant: Union[str, DiffusersModelVariant] = DiffusersModelVariant.AUTO, load_kwargs: Optional[dict[str, Any]] = None, adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None, + components: Optional[list[str]] = None, model_attributes: Optional[dict[str, Any]] = None, ): """Initialize DiffusersModelHandler. @@ -51,6 +52,10 @@ def __init__( model_variant: Model variant: 'sd15', 'sdxl', 'flux', or 'auto' for auto-detection. load_kwargs: Additional kwargs for loading the model (e.g., torch_dtype, variant). adapter_path: Path to LoRA adapter weights. + components: Optional subset of exportable component names this handler is scoped to + (e.g. ``["text_encoder"]``). When set, ``get_exportable_components`` returns only + these (in variant order); used by ``builds.components`` to optimize one component at + a time. When ``None``, all of the variant's components are exportable. model_attributes: Additional model attributes. """ @@ -67,6 +72,7 @@ def __init__( self.model_variant = DiffusersModelVariant(model_variant) self.load_kwargs = load_kwargs or {} + self.components = list(components) if components else None self._pipeline = None @property @@ -307,7 +313,18 @@ def get_exportable_components(self) -> list[DC]: } if variant not in variant_components: raise ValueError(f"Unknown model variant: {variant}") - return variant_components[variant] + full = variant_components[variant] + if self.components: + requested = list(self.components) + available = {str(c) for c in full} + unknown = [name for name in requested if name not in available] + if unknown: + raise ValueError( + f"Unknown component(s) {unknown} for variant {variant}. Available: {sorted(available)}." + ) + # preserve the variant's canonical component order + return [c for c in full if str(c) in set(requested)] + return full def get_pipeline_type(self) -> DiffusersModelVariant: """Get the pipeline type for OnnxConfig lookup. diff --git a/olive/model/utils/onnx_utils.py b/olive/model/utils/onnx_utils.py index 73be98a115..f1c6d5f3e9 100644 --- a/olive/model/utils/onnx_utils.py +++ b/olive/model/utils/onnx_utils.py @@ -63,6 +63,40 @@ def get_onnx_file_path(model_path: str, onnx_file_name: Optional[str] = None) -> raise ValueError(f"No .onnx file found in the model folder {model_path}.") +def discover_onnx_components(model_dir: str) -> list[tuple[str, str]]: + """Discover per-component ONNX subfolders in a directory. + + A multi-component ONNX package (e.g. produced by ``capture-onnx-graph --use_mobius_builder``) + lays out each component in its own subfolder, with a ``model.onnx`` inside: + + model_dir/decoder/model.onnx + model_dir/vision_encoder/model.onnx + model_dir/embedding/model.onnx + + Args: + model_dir: Directory that contains one subfolder per component. + + Returns: + A list of ``(component_name, onnx_file_relpath)`` tuples sorted by component name, where + ``component_name`` is the subfolder name and ``onnx_file_relpath`` is the path to the + component's ``.onnx`` file relative to ``model_dir``. Empty if no component subfolders are + found. + + """ + model_dir_path = Path(model_dir) + if not model_dir_path.is_dir(): + return [] + + components: list[tuple[str, str]] = [] + for sub_dir in sorted(p for p in model_dir_path.iterdir() if p.is_dir()): + onnx_files = list(sub_dir.glob("*.onnx")) + if len(onnx_files) == 1: + components.append((sub_dir.name, f"{sub_dir.name}/{onnx_files[0].name}")) + elif (sub_dir / "model.onnx").exists(): + components.append((sub_dir.name, f"{sub_dir.name}/model.onnx")) + return components + + def get_additional_file_path(model_dir: str, file_name: str) -> Optional[str]: """Get the full path to the additional file. diff --git a/olive/passes/onnx/mobius_model_builder.py b/olive/passes/onnx/mobius_model_builder.py index e2d10fc0c4..5ad9033c93 100644 --- a/olive/passes/onnx/mobius_model_builder.py +++ b/olive/passes/onnx/mobius_model_builder.py @@ -15,6 +15,7 @@ from olive.hardware.constants import EXECUTION_PROVIDER_TO_MOBIUS_EP, ExecutionProvider from olive.model import HfModelHandler, ONNXModelHandler from olive.model.handler.composite import CompositeModelHandler +from olive.model.handler.diffusers import DiffusersModelHandler from olive.passes import Pass from olive.passes.olive_pass import PassConfigParam @@ -95,7 +96,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon def _run_for_config( self, - model: HfModelHandler, + model: HfModelHandler | DiffusersModelHandler, config: type[BasePassConfig], output_model_path: str, ) -> ONNXModelHandler | CompositeModelHandler: @@ -106,8 +107,10 @@ def _run_for_config( "mobius-ai is required to run MobiusBuilder. Install with: pip install mobius-ai" ) from exc - if not isinstance(model, HfModelHandler): - raise ValueError(f"MobiusBuilder requires an HfModelHandler input, got {type(model).__name__}.") + if not isinstance(model, (HfModelHandler, DiffusersModelHandler)): + raise ValueError( + f"MobiusBuilder requires an HfModelHandler or DiffusersModelHandler input, got {type(model).__name__}." + ) # Map Olive EP to mobius EP. If unsupported/unknown, fall back to mobius default EP. requested_ep = self.accelerator_spec.execution_provider @@ -121,10 +124,12 @@ def _run_for_config( ) dtype_str: str = _PRECISION_TO_DTYPE.get(config.precision, "f32") - model_id: str = model.model_name_or_path + model_id: str = model.model_name_or_path if isinstance(model, HfModelHandler) else str(model.model_path) # Read trust_remote_code from the model's HuggingFace load kwargs. - trust_remote_code: bool = model.get_load_kwargs().get("trust_remote_code", False) + trust_remote_code: bool = ( + model.get_load_kwargs().get("trust_remote_code", False) if isinstance(model, HfModelHandler) else False + ) logger.info( "MobiusBuilder: building '%s' (ep=%s, dtype=%s)", @@ -187,6 +192,7 @@ def _run_for_config( # sidecar files (genai_config.json, tokenizer.json, image_processor.json, # audio_feature_extraction.json) at output_dir root. components = [] + component_paths: list[tuple[str, str]] = [] for key in package_keys: component_dir = output_dir / key onnx_path = component_dir / "model.onnx" @@ -195,6 +201,7 @@ def _run_for_config( f"MobiusBuilder: expected output file not found: {onnx_path}. " f"mobius.build() may have failed silently for component '{key}'." ) + component_paths.append((key, str(onnx_path))) # Per-component additional files: only files that live inside the # component's own directory. Shared sidecars (genai_config, tokenizer, # image_processor) are attached to the composite handler below so @@ -214,6 +221,10 @@ def _run_for_config( ) ) + logger.info("MobiusBuilder: exported multi-component model with %d components:", len(component_paths)) + for component_name, component_path in component_paths: + logger.info("MobiusBuilder: component '%s' -> %s", component_name, component_path) + return CompositeModelHandler( model_components=components, model_component_names=package_keys, diff --git a/olive/workflows/run/config.py b/olive/workflows/run/config.py index ff641fd728..da28d3042b 100644 --- a/olive/workflows/run/config.py +++ b/olive/workflows/run/config.py @@ -15,7 +15,14 @@ from olive.data.container.dummy_data_container import TRANSFORMER_DUMMY_DATA_CONTAINER from olive.data.container.huggingface_container import HuggingfaceContainer from olive.engine import Engine -from olive.engine.config import EngineConfig, RunPassConfig +from olive.engine.config import ( + BUILD_DEFAULT_KEY, + BuildConfig, + BuildConfigPartial, + EngineConfig, + RunPassConfig, + merge_build_default, +) from olive.engine.packaging.packaging_config import PackagingConfig from olive.evaluator.olive_evaluator import OliveEvaluatorConfig from olive.model import ModelConfig @@ -146,6 +153,44 @@ class RunConfig(NestedConfig): ), ) passes: dict[str, list[RunPassConfig]] = Field(default_factory=dict, description="Pass configurations.") + builds: dict[str, BuildConfig] = Field( + default_factory=dict, + description=( + "Build configurations. Each entry declares an independent execution unit (a pipeline of passes optionally" + " scoped to a subset of input model components and overriding host/target/evaluator). The reserved" + " ``_default`` key holds partial defaults that are merged into every sibling build (sibling values fully" + " replace defaults; no deep merge). When ``builds`` is omitted, the workflow behaves as before and runs the" + " ``passes`` dict as a single implicit pipeline in its declared order." + ), + ) + + @model_validator(mode="before") + @classmethod + def expand_build_defaults(cls, values): + """Pop ``builds._default`` and merge its partial fields into every sibling build.""" + if values is None: + return {} + if not isinstance(values, dict): + return values + builds = values.get("builds") + if not builds or not isinstance(builds, dict): + return values + default_raw = builds.pop(BUILD_DEFAULT_KEY, None) + if default_raw is None: + return values + # validate default as partial schema (catches unknown keys / wrong types early) + default_partial = BuildConfigPartial.model_validate(default_raw).model_dump(exclude_none=True) + if not default_partial: + # `_default: {}` is a no-op + return values + if BUILD_DEFAULT_KEY in default_partial: + raise ValueError(f"Nested {BUILD_DEFAULT_KEY!r} inside builds._default is not allowed.") + for name, sibling in list(builds.items()): + if not isinstance(sibling, dict): + continue + builds[name] = merge_build_default(default_partial, sibling) + values["builds"] = builds + return values @model_validator(mode="before") @classmethod @@ -184,6 +229,44 @@ def validate_python_environment_paths(self): # noqa: N804 # model_validator mo _validate_python_environment_path(systems) return self + @model_validator(mode="after") + def validate_builds_references(self): # noqa: N804 # model_validator mode="after" uses self + """Verify each build's pipeline / host / target / evaluator references resolve to a known entry.""" + if not self.builds: + return self + pass_names = set(self.passes or {}) + system_names = set(self.systems or {}) + evaluator_names = set(self.evaluators or {}) + for build_name, build in self.builds.items(): + for pass_ref in build.pipeline: + if pass_ref not in pass_names: + raise ValueError( + f"Build {build_name!r} pipeline references unknown pass {pass_ref!r}." + f" Known passes: {sorted(pass_names)}." + ) + for field_name, registry, registry_label in ( + ("host", system_names, "systems"), + ("target", system_names, "systems"), + ("evaluator", evaluator_names, "evaluators"), + ): + value = getattr(build, field_name) + if isinstance(value, str) and value not in registry: + raise ValueError( + f"Build {build_name!r} {field_name} references unknown entry {value!r}." + f" Known {registry_label}: {sorted(registry)}." + ) + engine_search_strategy = self.engine.search_strategy if self.engine else None + engine_evaluator = self.engine.evaluator if self.engine else None + for build_name, build in self.builds.items(): + effective_search = build.search_strategy if build.search_strategy is not None else engine_search_strategy + effective_evaluator = build.evaluator if build.evaluator is not None else engine_evaluator + if effective_search and effective_evaluator is None: + raise ValueError( + f"Build {build_name!r} enables search but resolves to no evaluator. Provide an evaluator at the" + " build or engine level, or disable search." + ) + return self + @field_validator("data_configs", mode="before") @classmethod def validate_data_config_names(cls, v): diff --git a/olive/workflows/run/run.py b/olive/workflows/run/run.py index 89100e1c1c..0da23c78fc 100644 --- a/olive/workflows/run/run.py +++ b/olive/workflows/run/run.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +from collections import OrderedDict from copy import deepcopy from pathlib import Path from typing import TYPE_CHECKING, Optional, Union @@ -13,10 +14,10 @@ from olive.package_config import OlivePackageConfig from olive.systems.accelerator_creator import create_accelerator from olive.systems.common import SystemType -from olive.workflows.run.config import RunConfig +from olive.workflows.run.config import RunConfig, RunEngineConfig if TYPE_CHECKING: - from olive.engine.config import RunPassConfig + from olive.engine.config import BuildConfig, RunPassConfig logger = logging.getLogger(__name__) @@ -111,21 +112,16 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig): logger.warning("ORT log severity level configuration ignored since the module isn't installed.") olive_config = run_config.to_json() + + if run_config.builds: + return _run_builds(package_config, run_config, olive_config) + engine = run_config.engine.create_engine(package_config, workflow_id) engine.cache.cache_olive_config(olive_config) # check if target is not used used_passes_configs = get_used_passes_configs(run_config) - target_not_used = ( - # no evaluator given (also implies no search) - engine.evaluator_config is None - # no pass specific evaluator - # no pass needs to run on target - and all( - pass_config.evaluator is None and not get_run_on_target(package_config, pass_config) - for pass_config in used_passes_configs - ) - ) + target_not_used = _compute_target_not_used(package_config, engine.evaluator_config, used_passes_configs) is_ep_required = is_execution_provider_required(run_config, package_config) accelerator_spec = create_accelerator( @@ -147,6 +143,123 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig): ) +def _run_builds(package_config: OlivePackageConfig, run_config: RunConfig, olive_config: dict) -> dict: + """Run every entry in ``run_config.builds`` as an independent workflow. + + Returns a ``dict[str, WorkflowOutput]`` keyed by build name. Each build gets its own engine, + pipeline subset (from ``passes`` in the order declared by ``pipeline``), input model slice + (via ``select_components`` when ``components`` is set) and host/target/evaluator overrides. + """ + _validate_build_components(run_config) + workflow_id = run_config.workflow_id + outputs: dict = OrderedDict() + for build_name, build in run_config.builds.items(): + logger.info("Running build %s", build_name) + engine_config = _make_build_engine_config(run_config, build) + engine = engine_config.create_engine(package_config, f"{workflow_id}_{build_name}") + engine.cache.cache_olive_config(olive_config) + + pipeline_subset: dict[str, list[RunPassConfig]] = OrderedDict() + for pass_name in build.pipeline: + # deepcopy so each build engine owns its pass configs; Engine.initialize mutates them in place. + pipeline_subset[pass_name] = deepcopy(run_config.passes[pass_name]) + + input_model = run_config.input_model + if build.components: + input_model = input_model.select_components(build.components) + + used_passes_configs = [p for passes in pipeline_subset.values() for p in passes] + target_not_used = _compute_target_not_used(package_config, engine.evaluator_config, used_passes_configs) + is_ep_required = _is_execution_provider_required_for_passes(package_config, used_passes_configs) or ( + engine.evaluator_config is not None + and engine_config.evaluate_input_model + and input_model.type.lower() == "onnxmodel" + ) + accelerator_spec = create_accelerator( + engine.target_config, skip_supported_eps_check=target_not_used, is_ep_required=is_ep_required + ) + + engine.set_input_passes_configs(pipeline_subset) + outputs[build_name] = engine.run( + input_model, + accelerator_spec, + engine_config.packaging_config, + build.output_dir, + engine_config.evaluate_input_model, + engine_config.log_to_file, + engine_config.log_severity_level, + ) + return outputs + + +def _validate_build_components(run_config: RunConfig) -> None: + """Verify ``build.components`` names exist in the input model's selectable components.""" + needs_component_check = any(build.components for build in run_config.builds.values()) + if not needs_component_check: + return + available = run_config.input_model.get_components() + if not available: + bad = [name for name, build in run_config.builds.items() if build.components] + raise ValueError( + f"Builds {bad} declare `components` but the input model exposes no selectable components" + f" (got type {run_config.input_model.type!r})." + ) + for build_name, build in run_config.builds.items(): + if not build.components: + continue + missing = [n for n in build.components if n not in available] + if missing: + raise ValueError( + f"Build {build_name!r} references unknown component(s) {missing}. Available components: {available}." + ) + + +def _make_build_engine_config(run_config: RunConfig, build: "BuildConfig") -> RunEngineConfig: + """Clone the parent engine config and apply this build's host/target/evaluator/search overrides.""" + engine_dump = run_config.engine.model_dump() + systems = run_config.systems or {} + evaluators = run_config.evaluators or {} + if build.host is not None: + engine_dump["host"] = _resolve_build_ref(build.host, systems, "system") + if build.target is not None: + engine_dump["target"] = _resolve_build_ref(build.target, systems, "system") + if build.evaluator is not None: + engine_dump["evaluator"] = _resolve_build_ref(build.evaluator, evaluators, "evaluator") + if build.search_strategy is not None: + # search_strategy can be a bool or a SearchStrategyConfig; pydantic round-trips either form. + ss = build.search_strategy + engine_dump["search_strategy"] = ss if isinstance(ss, bool) else ss.model_dump() + return RunEngineConfig.model_validate(engine_dump) + + +def _resolve_build_ref(value, registry: dict, label: str): + """Resolve a string reference into the named entry; pass through dict/model instances unchanged.""" + if isinstance(value, str): + if value not in registry: + raise ValueError(f"Unknown {label} reference {value!r}. Known: {sorted(registry)}.") + entry = registry[value] + return entry.model_dump() if hasattr(entry, "model_dump") else deepcopy(entry) + if hasattr(value, "model_dump"): + return value.model_dump() + return deepcopy(value) + + +def _compute_target_not_used( + package_config: OlivePackageConfig, + evaluator_config, + pass_configs: list["RunPassConfig"], +) -> bool: + return evaluator_config is None and all( + pc.evaluator is None and not get_run_on_target(package_config, pc) for pc in pass_configs + ) + + +def _is_execution_provider_required_for_passes( + package_config: OlivePackageConfig, pass_configs: list["RunPassConfig"] +) -> bool: + return any(package_config.is_onnx_module(pc.type) for pc in pass_configs) + + def run( run_config: Union[str, Path, dict], list_required_packages: bool = False, diff --git a/pyproject.toml b/pyproject.toml index 52a945e7d6..00fcd8b94c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -191,3 +191,4 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator", "pydantic.m "test/**" = ["INP001"] "scripts/**" = ["INP001"] "olive/cli/**" = ["T201"] +"multi_comp_recipe/**" = ["T201"] diff --git a/test/common/test_mobius_utils.py b/test/common/test_mobius_utils.py new file mode 100644 index 0000000000..dbdda9cecd --- /dev/null +++ b/test/common/test_mobius_utils.py @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import dataclasses +import sys +import types + +import pytest + +from olive.common.mobius_utils import ComponentInfo, inspect_components + + +@dataclasses.dataclass(frozen=True) +class _MobiusLikeComponent: + """Mirror of mobius' frozen ``ComponentInfo`` dataclass (no mapping interface).""" + + name: str + kind: str + + +def test_coerce_returns_same_instance_when_already_componentinfo(): + component = ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model") + + assert ComponentInfo.coerce(component) is component + + +def test_coerce_reads_contract_dict(): + component = ComponentInfo.coerce( + {"name": "decoder", "kind": "decoder", "source": {"path": "model.language_model"}, "extra": 1} + ) + + assert component.name == "decoder" + assert component.kind == "decoder" + assert component.source_path == "model.language_model" + assert component.metadata == {"extra": 1} + + +def test_coerce_reads_duck_typed_object_when_object_has_no_mapping_interface(): + # A mobius ComponentInfo is a frozen dataclass and does not implement ``.get``. + component = ComponentInfo.coerce(_MobiusLikeComponent(name="vision_encoder", kind="encoder")) + + assert component.name == "vision_encoder" + assert component.kind == "encoder" + assert component.source_path is None + + +def test_inspect_components_coerces_mobius_objects(monkeypatch): + fake_mobius = types.ModuleType("mobius") + fake_mobius.inspect_components = lambda model_name_or_path, task=None, trust_remote_code=False: [ + _MobiusLikeComponent(name="decoder", kind="decoder"), + _MobiusLikeComponent(name="vision_encoder", kind="encoder"), + _MobiusLikeComponent(name="embedding", kind="embedding"), + ] + monkeypatch.setitem(sys.modules, "mobius", fake_mobius) + + components = inspect_components("fake/llava") + + assert all(isinstance(c, ComponentInfo) for c in components) + assert [(c.name, c.kind, c.source_path) for c in components] == [ + ("decoder", "decoder", None), + ("vision_encoder", "encoder", None), + ("embedding", "embedding", None), + ] + + +def test_inspect_components_raises_importerror_when_mobius_missing(monkeypatch): + monkeypatch.setitem(sys.modules, "mobius", None) + + with pytest.raises(ImportError, match="mobius-ai is required"): + inspect_components("fake/llava") diff --git a/test/model/test_composite_model.py b/test/model/test_composite_model.py index ca3fcd1f40..725300855c 100644 --- a/test/model/test_composite_model.py +++ b/test/model/test_composite_model.py @@ -42,3 +42,261 @@ def test_composite_model(as_handler): assert composite_json["config"]["model_components"][0]["config"]["model_attributes"] == {"attr0": "value0"} model_config = ModelConfig.from_json(composite_json) assert model_config.type == CompositeModelHandler.model_type + + +def _build_composite_handler(): + return CompositeModelHandler( + [get_onnx_model(), get_onnx_model(), get_onnx_model()], + ["text_encoder", "unet", "vae_decoder"], + model_attributes={"shared": "value"}, + ) + + +def test_select_components_single_returns_unwrapped_child(): + composite = _build_composite_handler() + selected = composite.select_components(["unet"]) + assert isinstance(selected, ONNXModelHandler) + # parent attributes should be inherited by the unwrapped child + assert selected.model_attributes == {"shared": "value"} + + +def test_select_components_multiple_returns_subset_composite(): + composite = _build_composite_handler() + selected = composite.select_components(["vae_decoder", "text_encoder"]) + assert isinstance(selected, CompositeModelHandler) + # order from the call is preserved + assert list(selected.model_component_names) == ["vae_decoder", "text_encoder"] + + +def test_select_components_unknown_name_raises(): + composite = _build_composite_handler() + with pytest.raises(ValueError, match="Unknown component"): + composite.select_components(["no_such_component"]) + + +def test_select_components_empty_list_raises(): + composite = _build_composite_handler() + with pytest.raises(ValueError, match="non-empty"): + composite.select_components([]) + + +def test_model_config_select_components_single_returns_child_config(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + }, + } + ) + selected = composite_config.select_components(["unet"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "onnxmodel" + assert selected.config["model_path"] == "b.onnx" + + +def test_model_config_select_components_multiple_returns_composite_config(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "c.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet", "vae_decoder"], + }, + } + ) + selected = composite_config.select_components(["vae_decoder", "text_encoder"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "compositemodel" + assert list(selected.config["model_component_names"]) == ["vae_decoder", "text_encoder"] + assert [c["config"]["model_path"] for c in selected.config["model_components"]] == ["c.onnx", "a.onnx"] + + +def test_model_config_select_components_on_non_composite_raises(): + onnx_config = ModelConfig.model_validate({"type": "ONNXModel", "config": {"model_path": "a.onnx"}}) + with pytest.raises(ValueError, match="only supported on CompositeModel"): + onnx_config.select_components(["any"]) + + +def test_model_config_select_components_single_inherits_parent_attributes(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx", "model_attributes": {"child": "c"}}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + "model_attributes": {"shared": "s", "child": "parent"}, + }, + } + ) + selected = composite_config.select_components(["text_encoder"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "onnxmodel" + # parent-only keys are inherited; child keys win on conflict + assert selected.config["model_attributes"] == {"shared": "s", "child": "c"} + + +def test_model_config_get_components_returns_none_for_non_composite(): + onnx_config = ModelConfig.model_validate({"type": "ONNXModel", "config": {"model_path": "a.onnx"}}) + assert onnx_config.get_components() is None + + +def test_model_config_get_components_returns_names_for_composite(): + composite_config = ModelConfig.model_validate( + { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + }, + } + ) + assert composite_config.get_components() == ["text_encoder", "unet"] + + +def _make_export_package(root): + """Create a mobius-style export package: one subfolder per component with a model.onnx.""" + for name in ["decoder", "vision_encoder", "embedding"]: + comp_dir = root / name + comp_dir.mkdir(parents=True) + (comp_dir / "model.onnx").write_bytes(b"onnx") + return root + + +def test_discover_onnx_components_reads_subfolders(tmp_path): + from olive.model.utils.onnx_utils import discover_onnx_components + + _make_export_package(tmp_path) + discovered = discover_onnx_components(str(tmp_path)) + assert [name for name, _ in discovered] == ["decoder", "embedding", "vision_encoder"] + assert dict(discovered)["decoder"] == "decoder/model.onnx" + + +def test_discover_onnx_components_empty_for_flat_dir(tmp_path): + from olive.model.utils.onnx_utils import discover_onnx_components + + (tmp_path / "model.onnx").write_bytes(b"onnx") + assert not discover_onnx_components(str(tmp_path)) + + +def test_composite_handler_discovers_components_from_directory(tmp_path): + _make_export_package(tmp_path) + handler = CompositeModelHandler(model_path=str(tmp_path)) + assert list(handler.model_component_names) == ["decoder", "embedding", "vision_encoder"] + for _, component in handler.get_model_components(): + assert isinstance(component, ONNXModelHandler) + + +def test_model_config_get_components_discovers_directory_composite(tmp_path): + _make_export_package(tmp_path) + config = ModelConfig.model_validate({"type": "CompositeModel", "config": {"model_path": str(tmp_path)}}) + assert config.get_components() == ["decoder", "embedding", "vision_encoder"] + + +def test_model_config_select_components_discovers_directory_composite(tmp_path): + _make_export_package(tmp_path) + config = ModelConfig.model_validate({"type": "CompositeModel", "config": {"model_path": str(tmp_path)}}) + selected = config.select_components(["decoder"]) + assert isinstance(selected, ModelConfig) + assert selected.type == "onnxmodel" + assert selected.config["onnx_file_name"] == "decoder/model.onnx" + + +def test_model_config_get_components_hfmodel_queries_mobius(monkeypatch): + from olive.common import mobius_utils + + monkeypatch.setattr( + mobius_utils, + "inspect_components", + lambda *a, **k: [ + mobius_utils.ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model"), + mobius_utils.ComponentInfo(name="vision_encoder", kind="vision_encoder", source_path="model.vision_tower"), + ], + ) + config = ModelConfig.model_validate({"type": "HfModel", "config": {"model_path": "some/vlm"}}) + assert config.get_components() == ["decoder", "vision_encoder"] + + +def test_model_config_select_components_hfmodel_tags_source_path(monkeypatch): + from olive.common import mobius_utils + + monkeypatch.setattr( + mobius_utils, + "inspect_components", + lambda *a, **k: [ + mobius_utils.ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model"), + ], + ) + config = ModelConfig.model_validate({"type": "HfModel", "config": {"model_path": "some/vlm"}}) + selected = config.select_components(["decoder"]) + assert selected.type == "hfmodel" + assert selected.config["model_path"] == "some/vlm" + attrs = selected.config["model_attributes"] + assert attrs["component_name"] == "decoder" + assert attrs["component_kind"] == "decoder" + assert attrs["component_source_path"] == "model.language_model" + + +def test_model_config_select_components_hfmodel_multiple_names_raises(monkeypatch): + from olive.common import mobius_utils + + monkeypatch.setattr(mobius_utils, "inspect_components", lambda *a, **k: []) + config = ModelConfig.model_validate({"type": "HfModel", "config": {"model_path": "some/vlm"}}) + with pytest.raises(ValueError, match="one at a time"): + config.select_components(["decoder", "vision_encoder"]) + + +def _make_diffusers_dir(tmp_path): + """Create a minimal local diffusers dir so is_valid_diffusers_model passes offline.""" + (tmp_path / "model_index.json").write_text("{}") + return tmp_path + + +def test_model_config_get_components_diffusersmodel(tmp_path): + model_dir = _make_diffusers_dir(tmp_path) + config = ModelConfig.model_validate( + {"type": "DiffusersModel", "config": {"model_path": str(model_dir), "model_variant": "sdxl"}} + ) + assert config.get_components() == [ + "text_encoder", + "text_encoder_2", + "unet", + "vae_encoder", + "vae_decoder", + ] + + +def test_model_config_select_components_diffusersmodel_scopes_subset(tmp_path): + model_dir = _make_diffusers_dir(tmp_path) + config = ModelConfig.model_validate( + {"type": "DiffusersModel", "config": {"model_path": str(model_dir), "model_variant": "sdxl"}} + ) + selected = config.select_components(["unet", "text_encoder"]) + assert selected.type == "diffusersmodel" + # preserved in the variant's canonical order, not the requested order + assert selected.config["components"] == ["text_encoder", "unet"] + # the scoped config now exposes only the selected components + assert selected.get_components() == ["text_encoder", "unet"] + + +def test_model_config_select_components_diffusersmodel_unknown_raises(tmp_path): + model_dir = _make_diffusers_dir(tmp_path) + config = ModelConfig.model_validate( + {"type": "DiffusersModel", "config": {"model_path": str(model_dir), "model_variant": "sd"}} + ) + with pytest.raises(ValueError, match="Unknown component name"): + config.select_components(["text_encoder_2"]) # SDXL-only; not in SD diff --git a/test/model/test_diffusers_model.py b/test/model/test_diffusers_model.py index acbe6113a4..4a146b52dc 100644 --- a/test/model/test_diffusers_model.py +++ b/test/model/test_diffusers_model.py @@ -207,3 +207,43 @@ def test_adapter_path_property(self): def test_adapter_path_property_none(self): model = DiffusersModelHandler(model_path=self.model_path, model_variant=DiffusersModelVariant.SD) assert model.adapter_path is None + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_get_exportable_components_returns_full_variant_set_when_unscoped(self, mock_is_valid): + model = DiffusersModelHandler(model_path=self.model_path, model_variant=DiffusersModelVariant.SDXL) + assert [str(c) for c in model.get_exportable_components()] == [ + "text_encoder", + "text_encoder_2", + "unet", + "vae_encoder", + "vae_decoder", + ] + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_get_exportable_components_filters_to_scoped_subset_in_variant_order(self, mock_is_valid): + # requested out of order; result must follow the variant's canonical order + model = DiffusersModelHandler( + model_path=self.model_path, + model_variant=DiffusersModelVariant.SDXL, + components=["unet", "text_encoder"], + ) + assert [str(c) for c in model.get_exportable_components()] == ["text_encoder", "unet"] + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_get_exportable_components_raises_for_unknown_component(self, mock_is_valid): + model = DiffusersModelHandler( + model_path=self.model_path, + model_variant=DiffusersModelVariant.SD, + components=["text_encoder_2"], # SDXL-only; not in SD + ) + with pytest.raises(ValueError, match="Unknown component"): + model.get_exportable_components() + + @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) + def test_to_json_round_trips_components(self, mock_is_valid): + model = DiffusersModelHandler( + model_path=self.model_path, + model_variant=DiffusersModelVariant.SDXL, + components=["text_encoder"], + ) + assert model.to_json()["config"]["components"] == ["text_encoder"] diff --git a/test/passes/onnx/test_mobius_model_builder.py b/test/passes/onnx/test_mobius_model_builder.py index 657ae7f9d0..abacf79109 100644 --- a/test/passes/onnx/test_mobius_model_builder.py +++ b/test/passes/onnx/test_mobius_model_builder.py @@ -33,7 +33,7 @@ def _stub_mobius_module(): The stub is only injected when mobius is absent; if the real package is installed, this fixture is a no-op. """ - if "mobius" in sys.modules: + if _HAS_REAL_MOBIUS: yield return fake = types.ModuleType("mobius") diff --git a/test/workflows/test_run_builds.py b/test/workflows/test_run_builds.py new file mode 100644 index 0000000000..03687180ca --- /dev/null +++ b/test/workflows/test_run_builds.py @@ -0,0 +1,261 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import sys +from copy import deepcopy +from unittest.mock import MagicMock, patch + +import pytest + +from olive.workflows import run as olive_run +from test.utils import get_pytorch_model_io_config, pytorch_model_loader + +# pylint: disable=attribute-defined-outside-init + +PT_MODEL = { + "type": "PyTorchModel", + "config": { + "model_loader": pytorch_model_loader, + "io_config": get_pytorch_model_io_config(), + }, +} + + +class TestRunBuilds: + @pytest.fixture(autouse=True) + def setup(self, tmp_path): + self.cache_dir = tmp_path / "cache" + self.template = { + "input_model": PT_MODEL, + "systems": { + "cpu_system": {"type": "LocalSystem", "accelerators": [{"device": "cpu"}]}, + "gpu_system": {"type": "LocalSystem", "accelerators": [{"device": "gpu"}]}, + }, + "passes": { + "convert": {"type": "OnnxConversion"}, + "tune": {"type": "OrtSessionParamsTuning"}, + }, + "engine": { + "evaluate_input_model": False, + "cache_dir": str(self.cache_dir), + }, + } + + def _patch_engine_and_acc(self): + run_mock = MagicMock(return_value=MagicMock(name="WorkflowOutput")) + acc_mock = MagicMock(name="accelerator_spec") + engine_run_patch = patch("olive.engine.engine.Engine.run", run_mock) + accelerator_patch = patch.object(sys.modules[olive_run.__module__], "create_accelerator", return_value=acc_mock) + return run_mock, acc_mock, engine_run_patch, accelerator_patch + + def test_builds_no_builds_keeps_single_workflow_output(self): + # Sanity: with no `builds`, run() still returns the single WorkflowOutput from engine.run. + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + with engine_run_patch, acc_patch: + result = olive_run(config) + assert run_mock.call_count == 1 + assert not isinstance(result, dict) + + def test_builds_runs_each_build_once_and_returns_dict(self): + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["builds"] = { + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": {"pipeline": ["convert", "tune"], "output_dir": "out/second"}, + } + with engine_run_patch, acc_patch: + result = olive_run(config) + assert run_mock.call_count == 2 + assert isinstance(result, dict) + assert set(result) == {"first", "second"} + + def test_builds_passes_per_build_pipeline_subset_in_declared_order(self): + # `tune` declared first in passes but second in pipeline; engine should receive [convert, tune]. + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + captured: list = [] + + def capture_input_passes(self, pass_configs): + captured.append(list(pass_configs)) + + config = deepcopy(self.template) + config["passes"] = { + "tune": {"type": "OrtSessionParamsTuning"}, + "convert": {"type": "OnnxConversion"}, + } + config["builds"] = { + "only": {"pipeline": ["convert", "tune"], "output_dir": "out/only"}, + } + with ( + engine_run_patch, + acc_patch, + patch("olive.engine.engine.Engine.set_input_passes_configs", capture_input_passes), + ): + olive_run(config) + assert run_mock.call_count == 1 + assert captured == [["convert", "tune"]] + + def test_builds_uses_per_build_output_dir(self): + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["builds"] = { + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": {"pipeline": ["convert"], "output_dir": "out/second"}, + } + with engine_run_patch, acc_patch: + olive_run(config) + output_dirs = [call.args[3] for call in run_mock.call_args_list] + assert output_dirs == ["out/first", "out/second"] + + def test_builds_host_target_override_applied_per_build(self): + # Captures the SystemConfig passed to create_accelerator for each build. + run_mock, acc_mock, engine_run_patch, _ = self._patch_engine_and_acc() + seen_targets: list = [] + + def fake_create_accelerator(system_config, **kwargs): + seen_targets.append(system_config.config.accelerators[0].device.lower()) + return acc_mock + + config = deepcopy(self.template) + config["builds"] = { + "cpu_build": { + "pipeline": ["convert"], + "output_dir": "out/cpu", + "host": "cpu_system", + "target": "cpu_system", + }, + "gpu_build": { + "pipeline": ["convert"], + "output_dir": "out/gpu", + "host": "gpu_system", + "target": "gpu_system", + }, + } + with ( + engine_run_patch, + patch.object(sys.modules[olive_run.__module__], "create_accelerator", side_effect=fake_create_accelerator), + ): + olive_run(config) + assert run_mock.call_count == 2 + assert seen_targets == ["cpu", "gpu"] + + def test_builds_components_on_non_composite_input_raises(self): + config = deepcopy(self.template) + config["builds"] = { + "broken": { + "pipeline": ["convert"], + "output_dir": "out/broken", + "components": ["text_encoder"], + }, + } + with pytest.raises(ValueError, match="no selectable components"): + olive_run(config) + + def test_builds_components_unknown_name_raises(self): + composite_input = { + "type": "CompositeModel", + "config": { + "model_components": [ + {"type": "ONNXModel", "config": {"model_path": "a.onnx"}}, + {"type": "ONNXModel", "config": {"model_path": "b.onnx"}}, + ], + "model_component_names": ["text_encoder", "unet"], + }, + } + config = deepcopy(self.template) + config["input_model"] = composite_input + config["builds"] = { + "bad": { + "pipeline": ["convert"], + "output_dir": "out/bad", + "components": ["no_such_component"], + }, + } + with pytest.raises(ValueError, match="unknown component"): + olive_run(config) + + def test_builds_directory_composite_input_runs_per_component(self, tmp_path): + # Flow A Option 2: a mobius export directory loads as a CompositeModel, + # subfolder names become component names, sibling builds optimize each. + for name in ["decoder", "vision_encoder"]: + comp_dir = tmp_path / "exported_pkg" / name + comp_dir.mkdir(parents=True) + (comp_dir / "model.onnx").write_bytes(b"onnx") + + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = {"type": "CompositeModel", "config": {"model_path": str(tmp_path / "exported_pkg")}} + config["builds"] = { + "decoder": {"components": ["decoder"], "pipeline": ["convert"], "output_dir": "out/decoder"}, + "vision_encoder": {"components": ["vision_encoder"], "pipeline": ["convert"], "output_dir": "out/vision"}, + } + with engine_run_patch, acc_patch: + result = olive_run(config) + assert set(result) == {"decoder", "vision_encoder"} + assert run_mock.call_count == 2 + + def test_builds_hfmodel_components_resolved_via_mobius(self): + # Flow B: HfModel input; component names + source paths come from mobius. + from olive.common import mobius_utils + + def fake_inspect(*_args, **_kwargs): + return [ + mobius_utils.ComponentInfo(name="decoder", kind="decoder", source_path="model.language_model"), + mobius_utils.ComponentInfo(name="vision_encoder", kind="vision_encoder", source_path="model.vision"), + ] + + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = {"type": "HfModel", "config": {"model_path": "some/vlm"}} + config["builds"] = { + "decoder": {"components": ["decoder"], "pipeline": ["convert"], "output_dir": "out/decoder"}, + } + with ( + engine_run_patch, + acc_patch, + patch.object(mobius_utils, "inspect_components", side_effect=fake_inspect), + ): + result = olive_run(config) + assert set(result) == {"decoder"} + assert run_mock.call_count == 1 + + def test_builds_diffusersmodel_per_component(self, tmp_path): + # §3.1: DiffusersModel input; each build scopes the pipeline to one exportable component. + model_dir = tmp_path / "sdxl" + model_dir.mkdir(parents=True) + (model_dir / "model_index.json").write_text("{}") + + run_mock, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = { + "type": "DiffusersModel", + "config": {"model_path": str(model_dir), "model_variant": "sdxl"}, + } + config["builds"] = { + "text_encoder": {"components": ["text_encoder"], "pipeline": ["convert"], "output_dir": "out/te"}, + "unet": {"components": ["unet"], "pipeline": ["convert"], "output_dir": "out/unet"}, + } + with engine_run_patch, acc_patch: + result = olive_run(config) + assert set(result) == {"text_encoder", "unet"} + assert run_mock.call_count == 2 + + def test_builds_diffusersmodel_unknown_component_raises(self, tmp_path): + model_dir = tmp_path / "sd" + model_dir.mkdir(parents=True) + (model_dir / "model_index.json").write_text("{}") + + _, _, engine_run_patch, acc_patch = self._patch_engine_and_acc() + config = deepcopy(self.template) + config["input_model"] = { + "type": "DiffusersModel", + "config": {"model_path": str(model_dir), "model_variant": "sd"}, + } + config["builds"] = { + # text_encoder_2 is SDXL-only; not a component of the SD variant + "te2": {"components": ["text_encoder_2"], "pipeline": ["convert"], "output_dir": "out/te2"}, + } + with engine_run_patch, acc_patch, pytest.raises(ValueError, match="unknown component"): + olive_run(config) diff --git a/test/workflows/test_run_config_builds.py b/test/workflows/test_run_config_builds.py new file mode 100644 index 0000000000..016ec31e86 --- /dev/null +++ b/test/workflows/test_run_config_builds.py @@ -0,0 +1,166 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from copy import deepcopy + +import pytest +from pydantic import ValidationError + +from olive.workflows.run.config import RunConfig + +# pylint: disable=attribute-defined-outside-init + + +class TestRunConfigBuilds: + @pytest.fixture(autouse=True) + def setup(self): + self.template = { + "input_model": { + "type": "HfModel", + "model_path": "dummy_model", + "task": "dummy_task", + }, + "systems": { + "local_system": {"type": "LocalSystem", "accelerators": [{"device": "cpu"}]}, + "other_system": {"type": "LocalSystem", "accelerators": [{"device": "gpu"}]}, + }, + "passes": { + "convert": {"type": "OnnxConversion"}, + "tune": {"type": "OrtSessionParamsTuning"}, + }, + "evaluate_input_model": False, + } + + def _build_config(self, builds): + config_dict = deepcopy(self.template) + config_dict["builds"] = builds + return config_dict + + def test_builds_absent_keeps_existing_behavior(self): + # Sanity: when `builds` is omitted, RunConfig validates and the field defaults to an empty dict. + run_config = RunConfig.model_validate(deepcopy(self.template)) + assert run_config.builds == {} + assert "convert" in run_config.passes + assert "tune" in run_config.passes + + def test_builds_default_merge_basic(self): + # `_default` partial fields should be merged into every sibling that omits them. + config_dict = self._build_config( + { + "_default": {"host": "local_system", "target": "local_system"}, + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": {"pipeline": ["convert", "tune"], "output_dir": "out/second"}, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert set(run_config.builds) == {"first", "second"}, "the `_default` sentinel must be removed after merge" + assert run_config.builds["first"].host == "local_system" + assert run_config.builds["first"].target == "local_system" + assert run_config.builds["second"].host == "local_system" + assert run_config.builds["second"].target == "local_system" + + def test_builds_override_default(self): + # Sibling values should fully override `_default` values. + config_dict = self._build_config( + { + "_default": {"host": "local_system", "target": "local_system"}, + "first": {"pipeline": ["convert"], "output_dir": "out/first"}, + "second": { + "pipeline": ["convert"], + "output_dir": "out/second", + "host": "other_system", + "target": "other_system", + }, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert run_config.builds["first"].host == "local_system" + assert run_config.builds["second"].host == "other_system" + assert run_config.builds["second"].target == "other_system" + + def test_builds_default_pipeline_full_replace(self): + # Lists from `_default` should be fully replaced (not concatenated) by sibling values. + config_dict = self._build_config( + { + "_default": { + "pipeline": ["convert", "tune"], + "components": ["text_encoder"], + "output_dir": "out/default", + }, + "override": { + "pipeline": ["convert"], + "components": ["unet"], + "output_dir": "out/override", + }, + "inherit": {}, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert run_config.builds["override"].pipeline == ["convert"] + assert run_config.builds["override"].components == ["unet"] + assert run_config.builds["inherit"].pipeline == ["convert", "tune"] + assert run_config.builds["inherit"].components == ["text_encoder"] + assert run_config.builds["inherit"].output_dir == "out/default" + + def test_builds_missing_pipeline_after_merge_errors(self): + # If neither `_default` nor the sibling supply `pipeline`/`output_dir`, validation fails. + config_dict = self._build_config( + { + "_default": {"host": "local_system"}, + "broken": {"components": ["text_encoder"]}, + } + ) + with pytest.raises(ValidationError, match="pipeline"): + RunConfig.model_validate(config_dict) + + def test_builds_invalid_pipeline_ref_errors(self): + # Pass names in `pipeline` must exist in the top-level `passes` dict. + config_dict = self._build_config( + { + "broken": { + "pipeline": ["convert", "no_such_pass"], + "output_dir": "out/broken", + }, + } + ) + with pytest.raises(ValidationError, match="unknown pass"): + RunConfig.model_validate(config_dict) + + def test_builds_invalid_host_ref_errors(self): + # String host/target refs must exist in the top-level `systems` dict. + config_dict = self._build_config( + { + "broken": { + "pipeline": ["convert"], + "output_dir": "out/broken", + "host": "no_such_system", + }, + } + ) + with pytest.raises(ValidationError, match="unknown entry"): + RunConfig.model_validate(config_dict) + + def test_builds_empty_default_is_noop(self): + # `_default: {}` should validate cleanly and leave siblings unchanged. + config_dict = self._build_config( + { + "_default": {}, + "only": {"pipeline": ["convert"], "output_dir": "out/only"}, + } + ) + run_config = RunConfig.model_validate(config_dict) + assert set(run_config.builds) == {"only"} + assert run_config.builds["only"].pipeline == ["convert"] + assert run_config.builds["only"].host is None + + def test_builds_search_without_evaluator_errors(self): + # Enabling search on a build with no build- or engine-level evaluator must fail validation. + config_dict = self._build_config( + { + "only": {"pipeline": ["convert"], "output_dir": "out/only", "search_strategy": True}, + } + ) + with pytest.raises(ValidationError, match="no evaluator"): + RunConfig.model_validate(config_dict)