diff --git a/.github/scripts/.vale.ini b/.github/scripts/.vale.ini new file mode 100644 index 00000000..b064f62b --- /dev/null +++ b/.github/scripts/.vale.ini @@ -0,0 +1,5 @@ +StylesPath = styles +MinAlertLevel = suggestion + +[*.{md,mdx}] +BasedOnStyles = Vale diff --git a/.github/scripts/CODE_TEST_REPORT.md b/.github/scripts/CODE_TEST_REPORT.md new file mode 100644 index 00000000..041acae0 --- /dev/null +++ b/.github/scripts/CODE_TEST_REPORT.md @@ -0,0 +1,547 @@ +# Code-block test report (per page) + +Generated by `.github/scripts/report.py` from a full run of `extract_code_blocks.py --run`. Every runnable block is executed (test-by-default); `notest` and non-runnable languages (cpp, text, json, ...) show as `skip`. + +_Totals: 74 pages (36 with code, 36 with failures) - 280 blocks: **35 pass**, **245 fail**, 0 skip._ + +## Summary (every page) + +| Page | Owner | Blocks | Pass | Fail | Skip | Status | +|------|-------|-------:|-----:|-----:|-----:|--------| +| README.md | @dwithchenna | 1 | 0 | 1 | 0 | 1 FAIL | +| audio/index.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| audio/parakeet-tdt.mdx | @lakshay048 | 9 | 1 | 8 | 0 | 8 FAIL | +| audio/whisper-asr.mdx | @dwithchenna | 11 | 2 | 9 | 0 | 9 FAIL | +| audio/whisper_cpp.mdx | @uday610 | 0 | 0 | 0 | 0 | no code | +| gpu-radeon/README.md | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| gpu-radeon/igpu-getting-started.mdx | @dwithchenna | 9 | 0 | 9 | 0 | 9 FAIL | +| gpu-radeon/index.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| gpu-radeon/radeon.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| gpu-radeon/ryzenai_gpu.mdx | @uday610 | 0 | 0 | 0 | 0 | no code | +| index.mdx | @uday610 | 0 | 0 | 0 | 0 | no code | +| installation.mdx | @uday610 | 12 | 4 | 8 | 0 | 8 FAIL | +| llms/distilbert-example.mdx | @dwithchenna | 1 | 0 | 1 | 0 | 1 FAIL | +| llms/high_level_python.mdx | @jeremyfowers | 3 | 1 | 2 | 0 | 2 FAIL | +| llms/hybrid_oga.mdx | @uday610 | 15 | 4 | 11 | 0 | 11 FAIL | +| llms/index.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| llms/llm-sft-deploy.mdx | @dwithchenna | 5 | 0 | 5 | 0 | 5 FAIL | +| llms/llm_linux.mdx | @lakshay048 | 8 | 3 | 5 | 0 | 5 FAIL | +| llms/oga-cpp-api.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| llms/oga-inference.mdx | @dwithchenna | 3 | 0 | 3 | 0 | 3 FAIL | +| llms/oga_model_prepare.mdx | @uday610 | 0 | 0 | 0 | 0 | no code | +| llms/oga_op_prepare.mdx | @uday610 | 0 | 0 | 0 | 0 | no code | +| llms/rag-oga.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| llms/server_interface.mdx | @jeremyfowers | 0 | 0 | 0 | 0 | no code | +| llms/vlm.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| reference/README.md | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| reference/app_development.mdx | @ThomasXilinx | 0 | 0 | 0 | 0 | no code | +| reference/applications.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| reference/ci-dashboard.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| reference/index.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| reference/licenses.mdx | @raholbharadwaj | 0 | 0 | 0 | 0 | no code | +| reference/relnotes.mdx | @uday610 | 0 | 0 | 0 | 0 | no code | +| reference/versions.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| tools/README.md | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| tools/ai_analyzer.mdx | @savitha-srinivasan | 2 | 0 | 2 | 0 | 2 FAIL | +| tools/index.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| tools/model_quantization.mdx | @uday610 | 1 | 0 | 1 | 0 | 1 FAIL | +| tools/modelrun.mdx | @uday610 | 11 | 1 | 10 | 0 | 10 FAIL | +| tools/npu-check.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| tools/onnx-benchmark.mdx | @dwithchenna | 3 | 0 | 3 | 0 | 3 FAIL | +| tools/ops_support.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| tools/quark-quantization.mdx | @dwithchenna | 17 | 2 | 15 | 0 | 15 FAIL | +| tools/ryzen_ai_libraries.mdx | @uday610 | 0 | 0 | 0 | 0 | no code | +| tools/xrt_smi.mdx | @uday610 | 10 | 1 | 9 | 0 | 9 FAIL | +| vision/cvml-face-detection.mdx | @dwithchenna | 4 | 1 | 3 | 0 | 3 FAIL | +| vision/cvml-face-mesh.mdx | @dwithchenna | 5 | 1 | 4 | 0 | 4 FAIL | +| vision/cvml.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| vision/getstartex.mdx | @savitha-srinivasan | 12 | 0 | 12 | 0 | 12 FAIL | +| vision/getting-started-resnet-bf16.mdx | @dwithchenna | 9 | 0 | 9 | 0 | 9 FAIL | +| vision/getting-started-resnet.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| vision/hello-world.mdx | @dwithchenna | 4 | 0 | 4 | 0 | 4 FAIL | +| vision/image-classification.mdx | @dwithchenna | 4 | 0 | 4 | 0 | 4 FAIL | +| vision/index.mdx | @bconsolvo | 0 | 0 | 0 | 0 | no code | +| vision/nemotron-ocr-v2.mdx | @lakshay048 | 12 | 4 | 8 | 0 | 8 FAIL | +| vision/npu-gpu-pipeline.mdx | @dwithchenna | 8 | 1 | 7 | 0 | 7 FAIL | +| vision/sd_demo.mdx | @ThomasXilinx | 0 | 0 | 0 | 0 | no code | +| vision/super_resolution.mdx | @bconsolvo | 7 | 3 | 4 | 0 | 4 FAIL | +| vision/torchvision.mdx | @dwithchenna | 4 | 0 | 4 | 0 | 4 FAIL | +| vision/yolov8m.mdx | @dwithchenna | 21 | 0 | 21 | 0 | 21 FAIL | +| vision/yolov8s-worldv2.mdx | @dwithchenna | 9 | 1 | 8 | 0 | 8 FAIL | +| windows-ml/README.md | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| windows-ml/clip.mdx | @dwithchenna | 9 | 0 | 9 | 0 | 9 FAIL | +| windows-ml/faq.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| windows-ml/googlebert.mdx | @dwithchenna | 6 | 0 | 6 | 0 | 6 FAIL | +| windows-ml/index.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| windows-ml/installation.mdx | @dwithchenna | 5 | 1 | 4 | 0 | 4 FAIL | +| windows-ml/llm.mdx | @dwithchenna | 12 | 3 | 9 | 0 | 9 FAIL | +| windows-ml/model_conversion.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| windows-ml/model_deployment.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| windows-ml/model_support.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| windows-ml/resnet.mdx | @dwithchenna | 11 | 1 | 10 | 0 | 10 FAIL | +| windows-ml/troubleshooting.mdx | @dwithchenna | 0 | 0 | 0 | 0 | no code | +| windows-ml/winml_ep.mdx | @dwithchenna | 8 | 0 | 8 | 0 | 8 FAIL | +| windows-ml/winml_example.mdx | @dwithchenna | 9 | 0 | 9 | 0 | 9 FAIL | + +## Per-page detail (pages with code blocks) + +### `docs/README.md` - @dwithchenna - 0 pass / 1 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | bash | FAIL | ⠋ preparing local preview... ⠙ preparing local preview... ⠹ ... | + +### `docs/audio/parakeet-tdt.mdx` - @lakshay048 - 1 pass / 8 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 1 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 2 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 4 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 5 | powershell | FAIL | ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirement... | +| 6 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 11 | powershell | PASS | ['VitisAIExecutionProvider', 'DmlExecutionProvider', 'CPUExecutionProvider'] | +| 12 | powershell | FAIL | ffmpeg : The term 'ffmpeg' is not recognized as the name of a cmdlet, function, script fil... | + +### `docs/audio/whisper-asr.mdx` - @dwithchenna - 2 pass / 9 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | EnvironmentNameNotFound: Could not find conda environment: ryzen-ai-1.4.0 You can list all... | +| 1 | powershell | FAIL | ind path 'C:\Users\bconsolv\AppData\Local\Temp\docs-ci-q1napgd4\docs\audio\whisper' becaus... | +| 2 | powershell | FAIL | --'. At line:3 char:5 + --device npu \ + ~~~~~~ Unexpected token 'device' in expression or... | +| 3 | powershell | FAIL | c \ + ~ Missing expression after unary operator '--'. At line:4 char:5 + --input mic \ + ~... | +| 4 | powershell | FAIL | ine:4 char:5 + --eval-dir eval_dataset/LibriSpeech-samples \ + ~~~~~~~~ Unexpected token '... | +| 5 | text | PASS | plain text | +| 6 | json | PASS | valid JSON | +| 7 | json | FAIL | json format error: Extra data: line 1 column 15 (char 14) | +| 8 | powershell | FAIL | At line:2 char:5 + --model openai/whisper-base \ + ~~~~~ Unexpected token 'model' in expre... | +| 9 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 10 | powershell | FAIL | --'. At line:4 char:5 + --device npu \ + ~~~~~~ Unexpected token 'device' in expression or... | + +### `docs/gpu-radeon/igpu-getting-started.mdx` - @dwithchenna - 0 pass / 9 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | bat | FAIL | The syntax of the command is incorrect. | +| 1 | bat | FAIL | (ryzen-ai-1.7.1) | +| 2 | powershell | FAIL | At line:1 char:4 + cd \CNN-examples\iGPU\getting_started + ~ The '<' operator ... | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: Error while fin... | +| 4 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: Error while fin... | +| 5 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 6 | powershell | FAIL | timeout after 300s | +| 7 | powershell | FAIL | t.PowerShell.Commands.SetLocationCommand compile.bat : The term 'compile.bat' is not recog... | +| 8 | powershell | FAIL | run.bat : The term 'run.bat' is not recognized as the name of a cmdlet, function, script f... | + +### `docs/installation.mdx` - @uday610 - 4 pass / 8 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 3 | python | PASS | Test finished | +| 6 | bash | PASS | WARNING: apt does not have a stable CLI interface. Use with caution in scripts. WARNING: a... | +| 7 | bash | FAIL | es not have a stable CLI interface. Use with caution in scripts. E: Unsupported file ./xrt... | +| 8 | bash | FAIL | bash: line 2: /opt/xilinx/xrt/setup.sh: No such file or directory | +| 9 | bash | FAIL | bash: line 1: xrt-smi: command not found bash: -c: line 3: syntax error near unexpected to... | +| 10 | bash | FAIL | cp: cannot stat 'ryzen_ai-1.7.1.tgz': No such file or directory tar (child): ryzen_ai-1.7.... | +| 11 | bash | FAIL | bash: line 1: TARGET-PATH: No such file or directory bash: line 2: TARGET-PATH: No such fi... | +| 12 | bash | PASS | | +| 13 | bash | FAIL | bash: line 1: TARGET-PATH: No such file or directory bash: line 2: python: command not fou... | +| 14 | bash | FAIL | bash: line 1: Setting: command not found bash: line 3: Test: command not found | +| 15 | bash | FAIL | bash: line 1: TARGET-PATH: No such file or directory | +| 16 | python | PASS | | + +### `docs/llms/distilbert-example.mdx` - @dwithchenna - 0 pass / 1 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | At line:3 char:4 + cd \Transformer-examples\DistilBERT_text_classification_b .... | + +### `docs/llms/high_level_python.mdx` - @jeremyfowers - 1 pass / 2 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | PASS | CondaSystemExit: Exiting. ERROR: pip's dependency resolver does not currently take into ac... | +| 1 | powershell | FAIL | ers.0.attn.o_proj.MatMulNBits.qweight": tensor(uint8),"model.layers.0.attn.o_proj.MatMulNB... | +| 2 | python | FAIL | ers.0.attn.o_proj.MatMulNBits.qweight": tensor(uint8),"model.layers.0.attn.o_proj.MatMulNB... | + +### `docs/llms/hybrid_oga.mdx` - @uday610 - 4 pass / 11 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | PASS | oes not currently take into account all the packages that are installed. This behaviour is... | +| 1 | bat | PASS | RyzenAI\1.7.1\deployment\.\onnxruntime_vitisai_ep.dll C:\Program Files\RyzenAI\1.7.1\deplo... | +| 2 | powershell | FAIL | The string is missing the terminator: '. + CategoryInfo : ParserError: (:) [], ParentConta... | +| 3 | powershell | FAIL | -f -l -f amd_genai_prompt_long ... | +| 5 | powershell | FAIL | File not found - amd_genai_prompt_long.txt | +| 6 | powershell | FAIL | At line:1 char:26 + .\model_benchmark.exe -i -f amd_genai_prompt_long ... | +| 7 | powershell | FAIL | The string is missing the terminator: '. + CategoryInfo : ParserError: (:) [], ParentConta... | +| 9 | python | PASS | | +| 10 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-2dwp... | +| 11 | powershell | FAIL | e\model_chat.py" -m python=3.12 -y + ~ The '<' operator is rese... | +| 14 | powershell | PASS | miniforge3\envs\ryzen-ai-1.7.1\Lib\site-packages (from requests->transformers->model-gener... | +| 15 | powershell | FAIL | At line:2 char:11 + git clone + ~ The '<' operator is reserved for future ... | + +### `docs/llms/llm-sft-deploy.mdx` - @dwithchenna - 0 pass / 5 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 1 | powershell | FAIL | At line:1 char:43 + python train.py --lora --lora_qv --hf_dir + ~ ... | +| 2 | powershell | FAIL | At line:1 char:88 + ... el --model_name meta-llama/Llama-3.2-1B --adapter_model_dir -pr -ipl \docs\models-tutorials\vision\quark_quantization + ~ The... | +| 3 | powershell | FAIL | At line:1 char:4 + cd /docs/vision/quark_quantization + ~ The '<' operator is ... | +| 4 | python | PASS | | +| 5 | powershell | FAIL | At line:1 char:11 + cd models && python download_ResNet.py + ~~ The token '&&' is not a va... | +| 6 | powershell | FAIL | tar.exe: Error opening archive: Failed to open 'val_images.tar.gz' C:\Users\bconsolv\AppDa... | +| 7 | powershell | FAIL | At line:1 char:19 + mkdir -p val_data && tar -xzf val_images.tar.gz -C val_data + ~~ The t... | +| 8 | python | PASS | led: CPU version of custom ops library compilation failed:Command '['where', 'cl']' return... | +| 9 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-ahcx... | +| 10 | python | FAIL | sors` and will be removed in the next release.  [QUARK-WARNING]: The custom ops l... | +| 11 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-ahcx... | +| 12 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-ahcx... | +| 13 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 14 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-ahcx... | +| 15 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-ahcx... | +| 16 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/tools/xrt_smi.mdx` - @uday610 - 1 pass / 9 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | At line:1 char:28 + xrt-smi examine -f JSON -o + ~ The '<' operator ... | +| 1 | powershell | FAIL | xrt-smi : The term 'xrt-smi' is not recognized as the name of a cmdlet, function, script f... | +| 4 | json | PASS | valid JSON | +| 5 | powershell | FAIL | xrt-smi : The term 'xrt-smi' is not recognized as the name of a cmdlet, function, script f... | +| 7 | powershell | FAIL | xrt-smi : The term 'xrt-smi' is not recognized as the name of a cmdlet, function, script f... | +| 9 | powershell | FAIL | xrt-smi : The term 'xrt-smi' is not recognized as the name of a cmdlet, function, script f... | +| 11 | powershell | FAIL | xrt-smi : The term 'xrt-smi' is not recognized as the name of a cmdlet, function, script f... | +| 13 | powershell | FAIL | At line:1 char:27 + xrt-smi configure --pmode + ~ Th... | +| 1 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 2 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 3 | python | FAIL | File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-9pnraw06\tmpg4lcb358.py", line 1 cache_... | +| 4 | powershell | FAIL | ~~ Unexpected token 'AI' in expression or statement. At line:4 char:7 + [Vitis AI EP] No. ... | +| 5 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 6 | powershell | FAIL | ErrorId : CommandNotFoundException Image : The term 'Image' is not recognized as the name ... | +| 7 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 8 | powershell | FAIL | ErrorId : CommandNotFoundException Image : The term 'Image' is not recognized as the name ... | + +### `docs/vision/hello-world.mdx` - @dwithchenna - 0 pass / 4 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | bat | FAIL | The syntax of the command is incorrect. | +| 1 | bat | FAIL | '#' is not recognized as an internal or external command, operable program or batch file. | +| 2 | powershell | FAIL | ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirement... | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/vision/image-classification.mdx` - @dwithchenna - 0 pass / 4 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | At line:1 char:4 + cd \CNN-examples\image_classification + ~ The '<' operator ... | +| 1 | powershell | FAIL | use it does not exist. At line:1 char:1 + cd models + ~~~~~~~~~ + CategoryInfo : ObjectNot... | +| 2 | powershell | FAIL | At line:1 char:16 + mkdir val_data && tar -xzf val_images.tar.gz -C val_data + ~~ The toke... | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/vision/nemotron-ocr-v2.mdx` - @lakshay048 - 4 pass / 8 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | PASS | 116), 2.60 GiB \| 40.13 MiB/s Filtering content: 95% (111/116), 2.60 GiB \| 33.70 MiB/s Fi... | +| 1 | powershell | PASS | 10) Filtering content: 30% (3/10) Filtering content: 40% (4/10) Filtering content: 50% (5/... | +| 2 | powershell | PASS | CondaSystemExit: Exiting. | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 4 | powershell | FAIL | of a cmdlet, function, script file, or operable program. Check the spelling of the name, o... | +| 5 | powershell | FAIL | All : The term 'All' is not recognized as the name of a cmdlet, function, script file, or ... | +| 6 | powershell | PASS | | +| 7 | powershell | FAIL | ing expression after unary operator '--'. At line:3 char:5 + --vai-config vitisai_config.j... | +| 8 | powershell | FAIL | ing expression after unary operator '--'. At line:4 char:5 + --vai-config vitisai_config.j... | +| 9 | powershell | FAIL | ing expression after unary operator '--'. At line:3 char:5 + --vai-config vitisai_config.j... | +| 10 | powershell | FAIL | ing expression after unary operator '--'. At line:3 char:5 + --vai-config vitisai_config.j... | +| 11 | powershell | FAIL | ne:5 char:5 + --image "Images\test\test.jpg" \ + ~~~~~ Unexpected token 'image' in express... | + +### `docs/vision/npu-gpu-pipeline.mdx` - @dwithchenna - 1 pass / 7 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | bat | FAIL | The syntax of the command is incorrect. | +| 1 | bat | FAIL | (ryzen-ai-1.7.1) rem Location of RyzenAI software installation path or default at "C:\Prog... | +| 2 | powershell | FAIL | At line:1 char:4 + cd \demo\NPU-GPU-Pipeline + ~ The '<' operator is reserved ... | +| 3 | powershell | FAIL | ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'stable_diff... | +| 4 | powershell | PASS | %XLNX_VART_FIRMWARE% | +| 5 | powershell | FAIL | Temp\docs-ci-no507gpo\%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win _amd64\vaip_config.json' be... | +| 6 | powershell | FAIL | . At line:1 char:1 + cd stable_diffusion + ~~~~~~~~~~~~~~~~~~~ + CategoryInfo : ObjectNotF... | +| 7 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/vision/super_resolution.mdx` - @bconsolvo - 3 pass / 4 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | PASS | CondaSystemExit: Exiting. | +| 1 | powershell | PASS | Updating files: 89% (17/19) Updating files: 94% (18/19) Updating files: 100% (19/19) Updat... | +| 2 | powershell | FAIL | ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirement... | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 4 | powershell | PASS | Cloning into 'sesr-m7-256x256-tiles-amdnpu'... warning: redirecting to https://huggingface... | +| 5 | powershell | FAIL | ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirement... | +| 6 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/vision/torchvision.mdx` - @dwithchenna - 0 pass / 4 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: No module named... | +| 1 | powershell | FAIL | The syntax of the command is incorrect. | +| 2 | powershell | FAIL | At line:1 char:18 + mkdir val_images && tar -xzf val_images.tar.gz -C val_images + ~~ The ... | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/vision/yolov8m.mdx` - @dwithchenna - 0 pass / 21 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | [--show-channel-urls] [--file FILE] [--no-default-packages] [--subdir SUBDIR] [--solver {c... | +| 1 | powershell | FAIL | [--show-channel-urls] [--file FILE] [--no-default-packages] [--subdir SUBDIR] [--solver {c... | +| 2 | powershell | FAIL | At line:1 char:4 + cd \docs\models-tutorials\vision\object_detection\yolov8m +... | +| 3 | powershell | FAIL | At line:1 char:4 + cd /docs/vision/object_detection/yolov8m + ~ The '<' operat... | +| 4 | powershell | FAIL | ause it does not exist. At line:1 char:1 + cd models + ~~~~~~~~~ + CategoryInfo : ObjectNo... | +| 5 | powershell | FAIL | Unexpected token 'output_model_path' in expression or statement. At line:4 char:28 + --con... | +| 6 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 7 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 8 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 9 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 10 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 11 | powershell | FAIL | nexpected token 'output_model_path' in expression or statement. At line:4 char:28 + --conf... | +| 12 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 13 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 14 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 15 | powershell | FAIL | --exclude_subgraphs "[/model.22/Concat_3], [ ... + ~ Missing expression after unary operat... | +| 16 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 17 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 18 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 19 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 20 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/vision/yolov8s-worldv2.mdx` - @dwithchenna - 1 pass / 8 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | bat | FAIL | The syntax of the command is incorrect. | +| 1 | powershell | PASS | ERROR: pip's dependency resolver does not currently take into account all the packages tha... | +| 2 | powershell | FAIL | .\download.bat : The term '.\download.bat' is not recognized as the name of a cmdlet, func... | +| 3 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 4 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 5 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 6 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 7 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 8 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | + +### `docs/windows-ml/clip.mdx` - @dwithchenna - 0 pass / 9 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | CondaSystemExit: Exiting. ERROR: Could not open requirements file: [Errno 2] No such file ... | +| 1 | powershell | FAIL | | +| 2 | powershell | FAIL | cmdlet, function, script file, or operable program. Check the spelling of the name, or if ... | +| 3 | powershell | FAIL | 'windowsappruntimeinstall-x86.exe' is not recognized as the name of a cmdlet, function, sc... | +| 4 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 5 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 6 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 7 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 8 | powershell | FAIL | ~ Missing expression after unary operator '-'. At line:36 char:1 + -----------------------... | + +### `docs/windows-ml/googlebert.mdx` - @dwithchenna - 0 pass / 6 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | CondaSystemExit: Exiting. ERROR: Could not open requirements file: [Errno 2] No such file ... | +| 1 | powershell | FAIL | | +| 2 | powershell | FAIL | cmdlet, function, script file, or operable program. Check the spelling of the name, or if ... | +| 3 | powershell | FAIL | 'windowsappruntimeinstall-x86.exe' is not recognized as the name of a cmdlet, function, sc... | +| 4 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 5 | powershell | FAIL | ~ An expression was expected after '('. At line:21 char:232 + ... ch().clone() or sourceTe... | + +### `docs/windows-ml/installation.mdx` - @dwithchenna - 1 pass / 4 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | At line:4 char:4 + cd \WinML\CNN\ResNet + ~ The '<' operator is reserved for f... | +| 1 | powershell | FAIL | | +| 2 | powershell | FAIL | 'windowsappruntimeinstall-x86.exe' is not recognized as the name of a cmdlet, function, sc... | +| 3 | powershell | FAIL | At line:1 char:4 + cd \WinML + ~ The '<' operator is reserved for future use. ... | +| 4 | text | PASS | plain text | + +### `docs/windows-ml/llm.mdx` - @dwithchenna - 3 pass / 9 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | Found an existing package already installed. Trying to upgrade the installed package... No... | +| 1 | powershell | FAIL | No available upgrade found. No newer package versions are available from the configured so... | +| 2 | powershell | FAIL | Exception: No models were returned from the Azure Foundry catalog. | +| 3 | powershell | FAIL | Exception: No models were returned from the Azure Foundry catalog. | +| 4 | text | PASS | plain text | +| 5 | powershell | FAIL | ==> WARNING: A newer version of conda exists. <== current version: 26.1.1 latest version: ... | +| 6 | powershell | PASS | Uninstalling sympy-1.13.1: Successfully uninstalled sympy-1.13.1 Attempting uninstall: tor... | +| 7 | bash | FAIL | hon application, it may be easiest to use pipx install xyz, which will manage a virtual en... | +| 8 | powershell | FAIL | olive : The term 'olive' is not recognized as the name of a cmdlet, function, script file,... | +| 9 | powershell | FAIL | ==> WARNING: A newer version of conda exists. <== current version: 26.1.1 latest version: ... | +| 10 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 11 | text | PASS | plain text | + +### `docs/windows-ml/resnet.mdx` - @dwithchenna - 1 pass / 10 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | powershell | FAIL | CondaSystemExit: Exiting. ERROR: Could not open requirements file: [Errno 2] No such file ... | +| 1 | powershell | FAIL | | +| 2 | powershell | FAIL | cmdlet, function, script file, or operable program. Check the spelling of the name, or if ... | +| 3 | powershell | FAIL | 'windowsappruntimeinstall-x86.exe' is not recognized as the name of a cmdlet, function, sc... | +| 4 | powershell | FAIL | At line:1 char:4 + cd \WinML\CNN\ResNet\model\ + ~ The '<' operator is reserve... | +| 5 | powershell | FAIL | At line:1 char:4 + cd \WinML\CNN\ResNet\python + ~ The '<' operator is reserve... | +| 6 | powershell | FAIL | C:\Users\bconsolv\AppData\Local\miniforge3\envs\ryzen-ai-1.7.1\python.exe: can't open file... | +| 8 | python | PASS | | +| 9 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-4g3a... | +| 10 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-4g3a... | +| 11 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-4g3a... | + +### `docs/windows-ml/winml_ep.mdx` - @dwithchenna - 0 pass / 8 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 0 | cpp | FAIL | _doc_block.cpp:1:10: fatal error: winrt/Microsoft.Windows.AI.MachineLearning.h: No such fi... | +| 1 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-c42r... | +| 2 | cpp | FAIL | _doc_block.cpp:3:1: error: ‘Ort’ does not name a type 3 \| Ort::SessionOptions sessionOpti... | +| 3 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-c42r... | +| 4 | cpp | FAIL | _doc_block.cpp:5:10: fatal error: win_onnxruntime_cxx_api.h: No such file or directory 5 \... | +| 5 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-c42r... | +| 6 | cpp | FAIL | d 1 \| bool isCompiledModelAvailable = std::filesystem::exists(compiledModelPath); \| ^~~~... | +| 7 | python | FAIL | Traceback (most recent call last): File "C:\Users\bconsolv\AppData\Local\Temp\docs-ci-c42r... | + +### `docs/windows-ml/winml_example.mdx` - @dwithchenna - 0 pass / 9 fail / 0 skip + +| Block | Lang | Result | Detail | +|------:|------|--------|--------| +| 1 | powershell | FAIL | At line:3 char:4 + cd \WinML\CNN\ResNet + ~ The '<' operator is reserved for f... | +| 2 | powershell | FAIL | | +| 3 | powershell | FAIL | cmdlet, function, script file, or operable program. Check the spelling of the name, or if ... | +| 4 | powershell | FAIL | At line:1 char:4 + cd \WinML\CNN\ResNet\model + ~ The '<' operator is reserved... | +| 5 | powershell | FAIL | At line:1 char:4 + cd \WinML\CNN\ResNet\python + ~ The '<' operator is reserve... | +| 6 | powershell | FAIL | 6 + 287, lynx with confidence of 0.00119624 + ~~~~ Unexpected token 'lynx' in expression o... | +| 7 | powershell | FAIL | At line:1 char:4 + cd \WinML\CNN\cpp\CppResnetBuildDemo\ + ~ The '<' operator ... | +| 8 | powershell | FAIL | ognized as the name of a cmdlet, function, script file, or operable program. Check the spe... | +| 9 | powershell | FAIL | soccer' in expression or statement. At line:8 char:5 + 208,Labrador retriever 0.61% + ~ Mi... | + diff --git a/.github/scripts/README.md b/.github/scripts/README.md new file mode 100644 index 00000000..eb170904 --- /dev/null +++ b/.github/scripts/README.md @@ -0,0 +1,262 @@ +# Docs-as-Code CI + +Everything that powers the documentation CI for the Mintlify site in `../../docs`. +This is the **single** doc for the CI - scripts, conventions, runners, reporting, +and deployment. (The only other file you should ever read here is the generated +`CODE_TEST_REPORT.md`.) + +## Table of contents + +1. [Pipeline: execution order (what runs first)](#pipeline-at-a-glance) + - [Workflows (in run order)](#workflows-in-run-order) · [Scripts (in run order)](#scripts-in-run-order) +2. [Code-block testing model (test-by-default)](#code-block-testing-model-test-by-default) +4. [Languages: what runs, what doesn't (incl. C++)](#languages-what-runs-what-doesnt-incl-c) +5. [Authoring conventions](#authoring-conventions) +6. [Runners (hardware execution)](#runners-hardware-execution) +7. [Per-page report & dashboard](#per-page-report--dashboard) +8. [Failure routing (CODEOWNERS + notify)](#failure-routing-codeowners--notify) +9. [End-to-end walkthrough](#end-to-end-walkthrough) +10. [Deploying the site](#deploying-the-site) +11. [Run it locally](#run-it-locally) + +--- + +## Pipeline at a glance + +### Execution order (what runs first) + +The pipeline below runs on a push/PR touching `docs/**`. It is ordered +**generate first, then test, then check prose**: ownership and the model tables +(generated content) come first, then the code is extracted and tested, then +links/prose are checked. + +```text +push / PR to docs/** +│ +1. CODEOWNERS & Page Ownership ...................... cloud +│ a. check_owners.py ........ every page has an owner header +│ b. generate_codeowners.py . rebuild docs/CODEOWNERS; fail if stale +│ c. codeowners-validator ... syntax / no-unowned / no-shadow +│ +2. Update Model List ............................... cloud (weekly / on demand) +│ a. fetch_models.py ........ refresh Vision/LLMs/Audio model tables +│ +3. Test Code Samples ............................... cloud → self-hosted +│ a. syntax-check (cloud) .... extract_code_blocks.py --syntax-only +│ └ python syntax + json/yaml/toml/cmake lint +│ b. test-hardware (runner) .. extract_code_blocks.py --run [needs 3a] +│ └ run python/powershell/cmd · bash via WSL · compile C/C++ · lint +│ └ report.py → CODE_TEST_REPORT.md (uploaded artifact) +│ c. record (always) ......... record_run.py → ci-history.json +│ +4. Mintlify Docs Checks ............................ cloud +│ a. validate (mint validate → broken-links) b. external links +│ c. Vale prose d. cspell e. record → ci-history.json +│ +5. Notify Owner On Failure (only if 3 or 4 failed) . cloud + resolve owner → open issue @mention → email NOTIFY_EMAIL (if SMTP_* set) +``` + +> On GitHub, stages 1, 3, and 4 are separate workflows triggered by the same PR, +> so they actually run **concurrently**; the numbering is the intended logical +> order (and within stage 3 the cloud syntax gate truly runs before the hardware +> run via `needs`). Stage 2 runs on a weekly schedule / on demand and +> regenerates the tables the other stages build on. Also weekly: **Link Check** +> (external links → opens an issue). Want hard serialization (1 → 2 → 3 → 4)? +> That needs `workflow_run` chaining - say the word. + +### Workflows (in run order) + +| # | Workflow (`.github/workflows/`) | Trigger | Where | +|---|---|---|---| +| 1 | `codeowners.yml` | PR/push `docs/**`,`.github/**` | cloud | +| 2 | `update-model-list.yml` | weekly + manual | cloud | +| 3 | `test-code-samples.yml` | PR/push `docs/**` | cloud → self-hosted | +| 4 | `mintlify-checks.yml` | PR/push `docs/**` | cloud | +| 5 | `notify-owner.yml` | `workflow_run` completed (on failure) | cloud | +| — | `link-check.yml` | weekly + manual | cloud | + +### Scripts (in run order) + +| # | Script | Runs during | What it does | +|---|---|---|---| +| 1 | `check_owners.py` | CODEOWNERS — step 1 | every page has an owner header | +| 2 | `generate_codeowners.py` | CODEOWNERS — step 2 | rebuild `docs/CODEOWNERS` from headers; fail if stale | +| 3 | `fetch_models.py` | Update Model List | refresh the Vision/LLMs/Audio model tables | +| 4 | `extract_code_blocks.py` | Test Code Samples — **syntax-check** (cloud), then **test-hardware** (runner) | parse every block in `docs/**` (`.mdx` + `.md`); `--syntax-only` = python syntax + format lint; `--run` = execute / compile / WSL | +| 5 | `report.py` | Test Code Samples — after test-hardware | run JSON → `CODE_TEST_REPORT.md` + inject the dashboard table | +| 6 | `record_run.py` | end of Test Code Samples **and** Mintlify Docs Checks | append the run (status + per-page result + owner) to `ci-history.json` | +| 7 | `resolve_owner.py` | Notify Owner (helper, also used by 5 & 6) | read the owner GitHub ID from a page header | +| 8 | `notify_owner.py` | Notify Owner — on failure | compose the issue/email body @mentioning owners | + +## Code-block testing model (test-by-default) + +**Every fenced block in a runnable language is executed on every run.** There is +no opt-in. The single opt-out is `notest`. + +```` +```python -> EXECUTED (always; also python-syntax-checked) +```powershell -> EXECUTED (Windows-native) +```bash -> EXECUTED via WSL on Windows +```cpp / ```c -> COMPILED with g++/gcc (run if it has main()) +```json / ```yaml -> LINTED for format validity +```python notest -> the only opt-out: skipped entirely +```` + +Optional **authoring** tags (they do not make testing optional): + +- `npu` / `gpu` / `cpu` - device scope (`--device npu` runs npu-tagged + untagged). +- `timeout=600`, `workdir=examples`, `continue_on_error=true`, `setup=`. +- Page directives in MDX comments: `{/* @os:windows */}…{/* @os:end */}`, + `{/* @device:npu */}…`, `{/* @setup:id=… command="…" */}`, + `{/* @var:id=… device=npu value="…" */}`, `{/* @require: */}`. + +A page's blocks run **in order in one sandbox dir**, so a `git clone` / `cd` in +an early block persists for later blocks, and nothing pollutes the docs tree. + +## Languages: what runs, what doesn't (incl. C++) + +| Fence | Tested how | Where it runs | +|---|---|---| +| `python` | syntax-compiled, then **run** | Ryzen AI conda env (NPU/GPU/CPU providers visible) | +| `powershell` / `pwsh` / `ps1` | **run** | Windows-native (`powershell -NoProfile -Command`) | +| `cmd` / `bat` / `batch` | **run** | Windows-native (`cmd /c`) | +| `bash` / `sh` / `shell` | **run** | **WSL** on Windows (`wsl bash -lc`); native `bash` on Linux | +| `cpp` / `c` | **compiled** with `g++`/`gcc` (and run if it defines `main()`; otherwise `-fsyntax-only`) | WSL on Windows; native on Linux | +| `json` / `yaml` / `toml` / `cmake` / `text` | **linted** for format validity (`json.loads`, `yaml.safe_load`, `tomllib`, paren-balance, plain-text) | in-process (no shell) | +| `mdx` / `ini` / other | skipped | n/a (not runnable or lintable) | + +**Linux vs Windows routing:** `bash`, C/C++, and anything inside an `@os:linux` +scope are Linux work, so on the Windows runner they execute through **WSL** +(Ubuntu). Windows blocks (`powershell`, `cmd`) run natively. Pass `--no-wsl` to +skip Linux blocks instead of running them through WSL. A dedicated Ubuntu runner +can replace WSL later with no doc changes (the routing is automatic). + +## Authoring conventions + +1. **Owner header (required)** - first line after frontmatter: + `{/* owner: */}`. Drives CODEOWNERS + failure routing. Default + owner: `@dwithchenna`. +2. **Language tabs** - when the same step exists in multiple languages, show + **Python first**, then **C++**, using Mintlify `…`. +3. **2-level page paths** - `folder/page.mdx` (e.g. `llms/hybrid_oga.mdx`). The + link checker and Mintlify only resolve 2-level page paths; do deeper grouping + in `docs.json` nav, not on disk. (Top-level standalone pages like + `index.mdx` and `installation.mdx` are fine.) +4. **Icons** - only on top-level categories (group `icon` in `docs.json`, or a + frontmatter `icon:` on a top-level page). Never on second-level pages. + +## Runners (hardware execution) + +The `test-hardware` job runs on a **self-hosted runner**. Devices are chosen by +the `DOCS_CI_DEVICES` repo variable (a JSON array) so you never edit the +workflow to add hardware: + +| Variable | Default | Meaning | +|---|---|---| +| `DOCS_CI_DEVICES` | `["halo"]` | runner device labels to target (e.g. `["halo","stxp","krk"]`) | +| `DOCS_CI_OS` | `Windows` | OS label in the runner triple | +| `RYZEN_AI_ENV` | `ryzen-ai-1.7.1` | conda env on the runner with the NPU/GPU/CPU providers | + +A job targets `runs-on: [self-hosted, , ]`. To add a machine: +label it `self-hosted`, the OS, and a device tag, then add that tag to +`DOCS_CI_DEVICES`. + +**Today:** a single local **Strix Halo** box (`AMD Ryzen AI Max+`, NPU present) +is registered to this repo with the label `halo`, so `DOCS_CI_DEVICES=["halo"]` +runs the whole suite end-to-end on real hardware. + +**Shared AMD pool (future):** the AMD Playbooks lab machines (`xsj-aimlab-halo-*`, +`xsj-aimlab-stxp-*`, `…-krk-*`) use the same `[self-hosted, Windows, ]` +label scheme. They are registered to an AMD **org runner group**, so to use them +the docs repo must live under the `amd` org and be added to that group (a fork +under a personal account gets `403` and the job queues forever). Then set +`DOCS_CI_DEVICES=["halo","stxp","krk"]`. + +Register the local box (stopgap): repo -> Settings -> Actions -> Runners -> New +self-hosted runner (Windows); add the label `halo`; run as a service +(`./svc.cmd install && ./svc.cmd start`). + +## Per-page report & dashboard + +- **`report.py`** reads a run's JSON (`--output-json` from + `extract_code_blocks.py`) and writes **`CODE_TEST_REPORT.md`** - a summary + table (one row per page: blocks / pass / fail / skip / owner) plus a per-page + detail table (one row per block: `#`, lang, result, short detail). It covers + **every** `.mdx` and `.md`, including pages with no code (`no code`). +- It also injects the summary table into `docs/reference/ci-dashboard.mdx` + between `{/* RESULTS_START */}` / `{/* RESULTS_END */}`, so the published CI + dashboard reflects all pages. +- **`ci-history.json`** is the append-only run log (status + per-page result + + owner) that `record_run.py` writes; the in-repo dashboard + (`.github/scripts/dashboard/index.html`) and the Cursor canvas read it. + +## Failure routing (CODEOWNERS + notify) + +- `generate_codeowners.py` rebuilds `docs/CODEOWNERS` from page headers: a + catch-all default, infra rules, a per-folder default (the folder's dominant + owner), then a per-page rule for every page. `codeowners.yml` fails a PR if + the committed file is stale. +- On a failing run, `notify_owner.py` resolves the owner from the page header + and opens an issue that **@mentions** them - GitHub emails them through its own + system, so no individual email addresses are stored anywhere. +- **Plus a full report by email to the shared support DL.** `notify-owner.yml` + also emails the report to `NOTIFY_EMAIL` (repo variable, default + `dl.ryzenai.support@amd.com`). A distribution list is safe to keep public - it + is not a person's address. This email step runs only when the SMTP relay + secrets are configured: `SMTP_SERVER`, `SMTP_PORT`, `SMTP_USERNAME`, + `SMTP_PASSWORD`. Without them, the GitHub issue is still opened; the email is + simply skipped. + +## End-to-end walkthrough + +Take `docs/vision/super_resolution.mdx` (owner `@bconsolvo`), which has a +runnable `python` PSNR block. + +1. A PR edits `docs/**` -> `Mintlify Docs Checks` and `Test Code Samples` run. +2. `mint validate` + `broken-links` confirm the build and links. +3. `extract_code_blocks.py --syntax-only` compiles every python block (cloud). +4. `extract_code_blocks.py --run` executes the PSNR block on the Strix Halo + runner; it prints `PSNR @ MSE=100 -> 28.13 dB` and passes. +5. If it regressed, the page is written to `failed-pages.txt`, `notify-owner` + resolves `@bconsolvo` from the header and opens an issue mentioning them. +6. `generate_codeowners.py` keeps `CODEOWNERS` in sync from the same header, so + review assignment and the notifier always agree. + +## Deploying the site + +The site is hosted by **Mintlify** from the `/docs` subfolder; the GitHub repo +can stay private while the site is public. + +1. mintlify.com -> connect the repo via the Mintlify GitHub App (works on + private repos; scope it to this repo). +2. Dashboard -> Git Settings -> enable **monorepo**, set the docs path to + `/docs` (otherwise it looks for `docs.json` at the root and fails). +3. Push/merge to `main` -> auto rebuild + deploy. Every PR gets a preview build. +4. The public URL (e.g. `https://ryzen-ai-xxxx.mintlify.app`) shows on the + dashboard Overview. + +Notes: GitHub/Discord sidebar links, icons, theme, and the AI "Ask/Copy" menu +live in `docs/docs.json`. The "View as Markdown" / Ask-AI routes are produced by +Mintlify's hosted build and 404 under local `mint dev` - expected. + +## Run it locally + +```bash +# 1. Ownership + generated tables first (the pipeline order) +python .github/scripts/check_owners.py +python .github/scripts/generate_codeowners.py +python .github/scripts/fetch_models.py + +# 2. Cloud-equivalent syntax + format check (fast, no hardware) +python .github/scripts/extract_code_blocks.py --syntax-only --docs docs + +# 3. Full hardware run (inside the Ryzen AI conda env), then build the report +conda run -n ryzen-ai-1.7.1 python .github/scripts/extract_code_blocks.py \ + --run --docs docs --output-json report_run.json --failed-pages report_failed.txt +python .github/scripts/report.py --results report_run.json --docs docs \ + --out .github/scripts/CODE_TEST_REPORT.md --dashboard docs/reference/ci-dashboard.mdx + +# Preview the site +cd docs && npx mint dev # http://localhost:3000 +``` diff --git a/.github/scripts/check_owners.py b/.github/scripts/check_owners.py new file mode 100644 index 00000000..5dba80a8 --- /dev/null +++ b/.github/scripts/check_owners.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +""" +Fail CI if any docs page is missing an owner header. + +Every page under docs/ must declare an owner inline: + {/* owner: */} + +This is our guarantee that "every docs page has ownership" - it complements +CODEOWNERS (which routes GitHub review) by recording the owner's GitHub ID on +the page itself. Notifications are sent GitHub-natively by @mentioning the ID. +""" + +import re +import sys +from pathlib import Path + +# {/* owner: */} (an optional legacy "| email" tail is tolerated) +OWNER_RE = re.compile(r"\{/\*\s*owner:\s*[^*|]+?\s*(?:\|[^*]*)?\*/\}") +DOCS = Path("docs") + + +def main() -> None: + missing = [] + for mdx in sorted(DOCS.rglob("*.mdx")): + text = mdx.read_text(encoding="utf-8", errors="replace") + if not OWNER_RE.search(text): + missing.append(mdx.as_posix()) + total = len(list(DOCS.rglob("*.mdx"))) + if missing: + print(f"ERROR: {len(missing)}/{total} docs pages have no owner header:") + for m in missing: + print(f" - {m}") + print("\nAdd a header: {/* owner: */}") + sys.exit(1) + print(f"OK: all {total} docs pages have an owner header.") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/ci-history.json b/.github/scripts/ci-history.json new file mode 100644 index 00000000..9b5dd0c8 --- /dev/null +++ b/.github/scripts/ci-history.json @@ -0,0 +1,269 @@ +{ + "schema": 1, + "description": "Append-only history of docs CI runs, written by .github/scripts/record_run.py. Each run lists per-page code-block results (pass if all blocks pass).", + "runs": [ + { + "run_id": "local-20260610-144850", + "workflow": "Test Code Samples", + "event": "local", + "actor": "local", + "branch": "local", + "sha": "local", + "started_at": "2026-06-10T14:48:50-07:00", + "finished_at": "2026-06-10T14:48:50-07:00", + "status": "failure", + "jobs": [], + "pages": [ + { + "page": "audio/parakeet-tdt.mdx", + "owner_id": "lakshay048", + "check": "code-execution", + "status": "fail", + "detail": "ffmpeg : The term 'ffmpeg' is not recognized as the name of a cmdlet, function, script file, or operable program. \nCheck the spelling of the name, or if a path was included, verify that the path is co" + }, + { + "page": "audio/whisper-asr.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "--'.\nAt line:4 char:5\n+ --device npu \\\n+ ~~~~~~\nUnexpected token 'device' in expression or statement.\nAt line:5 char:5\n+ --input audio_files/1089-134686-0000.wav\n+ ~\nMissing expression aft" + }, + { + "page": "gpu-radeon/igpu-getting-started.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "run.bat : The term 'run.bat' is not recognized as the name of a cmdlet, function, script file, or operable program. \nCheck the spelling of the name, or if a path was included, verify that the path is " + }, + { + "page": "installation.mdx", + "owner_id": "uday610", + "check": "code-execution", + "status": "fail" + }, + { + "page": "llms/distilbert-example.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "At line:3 char:4\n+ cd \\Transformer-examples\\DistilBERT_text_classification_b ...\n+ ~\nThe '<' operator is reserved for future use.\n + CategoryInfo : ParserError: (:) [], Pare" + }, + { + "page": "llms/high_level_python.mdx", + "owner_id": "jeremyfowers", + "check": "code-execution", + "status": "fail", + "detail": "ers.0.attn.o_proj.MatMulNBits.qweight\": tensor(uint8),\"model.layers.0.attn.o_proj.MatMulNBits.scales\": tensor(float16),\"model.layers.0.attn.o_proj.MatMulNBits.qzeros\": tensor(uint8),\"model.layers.0.at" + }, + { + "page": "llms/hybrid_oga.mdx", + "owner_id": "uday610", + "check": "code-execution", + "status": "fail", + "detail": "At line:2 char:11\n+ git clone \n+ ~\nThe '<' operator is reserved for future use.\n + CategoryInfo : ParserError: (:) [], ParentContainsErrorRecordException\n + Ful" + }, + { + "page": "llms/llm-sft-deploy.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "At line:1 char:59\n+ python inference.py --quark_safetensors --quant_model_dir \n sess_options = ort.SessionOptions()\n ^^^\nNam" + }, + { + "page": "tools/model_quantization.mdx", + "owner_id": "uday610", + "check": "code-execution", + "status": "fail", + "detail": "Traceback (most recent call last):\n File \"C:\\Users\\bconsolv\\AppData\\Local\\Temp\\docs-ci-w7rvs9w4\\tmppgb6am8v.py\", line 1, in \n quant_config = QuantizationConfig(calibrate_method=PowerOfTwoM" + }, + { + "page": "tools/modelrun.mdx", + "owner_id": "uday610", + "check": "code-execution", + "status": "fail", + "detail": " for member \u2018begin\u2019 in \u2018onnx_model\u2019, which is of non-class type \u2018const char*\u2019\n 15 | std::basic_string(onnx_model.begin(), onnx_model.end()).c_str(),\n | " + }, + { + "page": "tools/onnx-benchmark.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements-win.txt'\n" + }, + { + "page": "tools/quark-quantization.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\docs-ci-ahcxbwh9\\\\quark_quantize.py': [Errno 2] No such file or d" + }, + { + "page": "tools/xrt_smi.mdx", + "owner_id": "uday610", + "check": "code-execution", + "status": "fail", + "detail": "xrt-smi : The term 'xrt-smi' is not recognized as the name of a cmdlet, function, script file, or operable program. \nCheck the spelling of the name, or if a path was included, verify that the path is " + }, + { + "page": "vision/cvml-face-detection.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "ace-detection.exe' is not recognized as the name of a cmdlet, \nfunction, script file, or operable program. Check the spelling of the name, or if a path was included, verify that the \npath is correct a" + }, + { + "page": "vision/cvml-face-mesh.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "emesh.exe : The term 'cvml-sample-facemesh.exe' is not recognized as the name of a cmdlet, function, \nscript file, or operable program. Check the spelling of the name, or if a path was included, verif" + }, + { + "page": "vision/getstartex.mdx", + "owner_id": "savitha-srinivasan", + "check": "code-execution", + "status": "fail", + "detail": "resnet_cifar.exe : The term 'resnet_cifar.exe' is not recognized as the name of a cmdlet, function, script file, or \noperable program. Check the spelling of the name, or if a path was included, verify" + }, + { + "page": "vision/getting-started-resnet-bf16.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "ErrorId : CommandNotFoundException\n \nImage : The term 'Image' is not recognized as the name of a cmdlet, function, script file, or operable program. Check \nthe spelling of the name, or if a path was i" + }, + { + "page": "vision/hello-world.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\docs-ci-1v2arqri\\\\hello_world.py': [Errno 2] No such file or dire" + }, + { + "page": "vision/image-classification.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\docs-ci-awbb51vb\\\\image_classification.py': [Errno 2] No such fil" + }, + { + "page": "vision/nemotron-ocr-v2.mdx", + "owner_id": "lakshay048", + "check": "code-execution", + "status": "fail", + "detail": "ne:5 char:5\n+ --image \"Images\\test\\test.jpg\" \\\n+ ~~~~~\nUnexpected token 'image' in expression or statement.\nAt line:6 char:5\n+ --vai-config vitisai_config.json\n+ ~\nMissing expression after" + }, + { + "page": "vision/npu-gpu-pipeline.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\pipeline.py': [Errno 2] No such file or directory\n" + }, + { + "page": "vision/super_resolution.mdx", + "owner_id": "bconsolvo", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\docs-ci-z0vtjxao\\\\onnx_inference.py': [Errno 2] No such file or d" + }, + { + "page": "vision/torchvision.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\docs-ci-9cybfihi\\\\classification.py': [Errno 2] No such file or d" + }, + { + "page": "vision/yolov8m.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\docs-ci-hj8z0x1a\\\\run_inference.py': [Errno 2] No such file or di" + }, + { + "page": "vision/yolov8s-worldv2.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "C:\\Users\\bconsolv\\AppData\\Local\\miniforge3\\envs\\ryzen-ai-1.7.1\\python.exe: can't open file 'C:\\\\Users\\\\bconsolv\\\\AppData\\\\Local\\\\Temp\\\\docs-ci-pac6to12\\\\infer_single.py': [Errno 2] No such file or dir" + }, + { + "page": "windows-ml/clip.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": " ~\nMissing expression after unary operator '-'.\nAt line:36 char:1\n+ -------------------------------------+------------+--+-\n+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + }, + { + "page": "windows-ml/googlebert.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": " ~\nAn expression was expected after '('.\nAt line:21 char:232\n+ ... ch().clone() or sourceTensor.detach().clone().requires_grad_(True), r ...\n+ " + }, + { + "page": "windows-ml/installation.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail" + }, + { + "page": "windows-ml/llm.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail" + }, + { + "page": "windows-ml/resnet.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "Traceback (most recent call last):\n File \"C:\\Users\\bconsolv\\AppData\\Local\\Temp\\docs-ci-4g3alg45\\tmpjqabtxe3.py\", line 1, in \n session = ort.InferenceSession(model_path, sess_options=sessio" + }, + { + "page": "windows-ml/winml_ep.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "Traceback (most recent call last):\n File \"C:\\Users\\bconsolv\\AppData\\Local\\Temp\\docs-ci-c42ripjy\\tmp3m45vq6b.py\", line 4, in \n model_compiler = ort.ModelCompiler(\n ^^^\nN" + }, + { + "page": "windows-ml/winml_example.mdx", + "owner_id": "dwithchenna", + "check": "code-execution", + "status": "fail", + "detail": "soccer' in expression or statement.\nAt line:8 char:5\n+ 208,Labrador retriever 0.61%\n+ ~\nMissing expression after ','.\nAt line:8 char:5\n+ 208,Labrador retriever 0.61%\n" + } + ] + } + ] +} diff --git a/.github/scripts/cspell.json b/.github/scripts/cspell.json new file mode 100644 index 00000000..e9eab351 --- /dev/null +++ b/.github/scripts/cspell.json @@ -0,0 +1,48 @@ +{ + "version": "0.2", + "language": "en", + "files": ["docs/**/*.mdx"], + "ignorePaths": [ + "docs/images/**", + "docs/assets/**", + "**/node_modules/**" + ], + "ignoreRegExpList": [ + "/\\{/\\*[\\s\\S]*?\\*/\\}/", + "/`[^`]*`/", + "/```[\\s\\S]*?```/" + ], + "words": [ + "Ryzen", + "AMD", + "XDNA", + "Vitis", + "iGPU", + "dGPU", + "ROCm", + "Radeon", + "Mintlify", + "MDX", + "onnx", + "onnxruntime", + "OGA", + "genai", + "Quark", + "Lemonade", + "llama", + "Strix", + "Krackan", + "Phoenix", + "Hawk", + "quantization", + "quantized", + "bfloat", + "Whisper", + "Nemotron", + "Parakeet", + "WinML", + "Docusaurus", + "lychee", + "cspell" + ] +} diff --git a/.github/scripts/dashboard/index.html b/.github/scripts/dashboard/index.html new file mode 100644 index 00000000..538323c6 --- /dev/null +++ b/.github/scripts/dashboard/index.html @@ -0,0 +1,98 @@ + + + + + + Ryzen AI Docs - CI Dashboard + + + +

Ryzen AI Docs - CI Dashboard

+

Docs-as-code pipeline: what ran, when, pass/fail, and the owner for each page.

+
Loading ci-history.json ...
+ + + + diff --git a/.github/scripts/extract_code_blocks.py b/.github/scripts/extract_code_blocks.py new file mode 100644 index 00000000..7746ab6b --- /dev/null +++ b/.github/scripts/extract_code_blocks.py @@ -0,0 +1,560 @@ +#!/usr/bin/env python3 +""" +Extract and validate fenced code blocks from Mintlify .mdx docs. + +Design (test-by-default / opt-out): + - EVERY fenced block in a runnable language is EXECUTED by default. + - Add `notest` to the fence info string to skip a block entirely. + - Optional device tags `cpu` / `gpu` / `npu` declare which accelerator a block + needs. With `--device X`, a block runs only if it is untagged (runs + everywhere) or tagged with X. + +This ports the useful ideas from the AMD Playbooks test runner +(amd/playbooks .github/scripts/run_playbook_tests.py) but (1) inverts the +default to "test everything", and (2) uses MDX-valid comment syntax +`{/* ... */}` instead of HTML comments ``. + +================================ Testing model ============================== +TEST BY DEFAULT. Every fenced block written in a runnable language IS EXECUTED +on every run. There is no opt-in tag. The ONLY way to skip a block is `notest`. + + ```python -> EXECUTED (always; also python-syntax-checked) + ```powershell / ```cmd -> EXECUTED (Windows-native) + ```bash / ```sh -> EXECUTED via WSL on Windows (native bash on Linux) + ```cpp / ```c -> COMPILED with g++/gcc (run if it defines main(); + otherwise -fsyntax-only). Via WSL on Windows. + ```json/yaml/toml/cmake/text -> LINTED for format validity (e.g. json.loads) + ```python notest -> the one opt-out: skipped entirely + +Linux blocks (bash, C/C++, anything in @os:linux) run through WSL on the Windows +runner; pass --no-wsl to skip them instead. Only truly unknown fences (mdx, ini, +...) are recorded "skipped" - nothing runnable is silently passed. + +=============================== Authoring extras ============================ +The tags/attributes below are OPTIONAL conveniences for authors (they do NOT +make testing optional). Per-block, in the fence info string (after the language): + ```python npu -> device-scoped (cpu | gpu | npu) + ```python timeout=600 -> per-block timeout (seconds) + ```bash workdir=examples -> run in /examples + ```bash continue_on_error=true -> a failure doesn't fail the page + ```python setup=activate-venv -> run a named @setup first + +Inline marker: + any line ending with `#hide` is executed but meant to be hidden from the + rendered site (shown as [hidden] in CI logs). + +Page-level comment directives (MDX comments, invisible when rendered): + Scope blocks (wrap one or more code blocks): + {/* @os:windows */} ... {/* @os:end */} (windows | linux) + {/* @device:npu */} ... {/* @device:end */} (cpu,gpu,npu lists ok) + Reusable named setup (OS-scoped): + {/* @setup:id=activate-venv command="..." */} + Reusable device-aware variables, referenced in code as ${name}: + {/* @var:id=model device=npu value="..." */} + Inline a shared include (from docs/_includes via _includes/registry.json): + {/* @require:common-install */} + +Outputs: + - --output-json: per-block results + - --failed-pages: pages with >=1 failing block (consumed by notify-owner) + +Usage: + python extract_code_blocks.py --syntax-only # cloud + python extract_code_blocks.py --run # hardware: all + python extract_code_blocks.py --run --device npu # npu + untagged + python extract_code_blocks.py --run --platform windows # os filter +""" + +import argparse +import json +import re +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Optional + +FENCE_RE = re.compile(r"^```([^\n]*)\n(.*?)^```", re.MULTILINE | re.DOTALL) + +RUNNABLE_SHELL_WIN = {"powershell", "pwsh", "ps1", "cmd", "bat", "batch"} +RUNNABLE_SHELL_NIX = {"bash", "sh", "shell"} # executed via WSL on Windows +COMPILE_LANGS = {"cpp", "c++", "cc", "cxx", "c"} # compiled (run if it has main) +FORMAT_LANGS = {"json", "yaml", "yml", "toml", "cmake", "text"} # validated/linted +RUNNABLE = RUNNABLE_SHELL_WIN | RUNNABLE_SHELL_NIX | COMPILE_LANGS | {"python"} +TESTABLE = RUNNABLE | FORMAT_LANGS # everything we check (else skip) +DEVICE_TAGS = {"cpu", "gpu", "npu"} +SKIP_TAG = "notest" + +# MDX comment directives: {/* @tag ... */} +SETUP_RE = re.compile(r"\{/\*\s*@setup:(.+?)\*/\}") +VAR_RE = re.compile(r"\{/\*\s*@var:(.+?)\*/\}") +REQUIRE_RE = re.compile(r"\{/\*\s*@require:([a-z0-9\-,]+)\s*\*/\}") +OS_OPEN_RE = re.compile(r"\{/\*\s*@os:(windows|linux)\s*\*/\}") +OS_CLOSE_RE = re.compile(r"\{/\*\s*@os:end\s*\*/\}") +DEVICE_OPEN_RE = re.compile(r"\{/\*\s*@device:([\w,]+)\s*\*/\}") +DEVICE_CLOSE_RE = re.compile(r"\{/\*\s*@device:end\s*\*/\}") + +ATTR_RE = re.compile(r'(\w+)=(?:"([^"]*)"|(\S+))') + + +# --------------------------------------------------------------------------- # +# Parsing helpers +# --------------------------------------------------------------------------- # +def parse_attr_string(s: str) -> dict: + """Parse `key=value` / `key="value with spaces"` pairs (comment attrs).""" + attrs = {} + for m in ATTR_RE.finditer(s): + key = m.group(1) + val = m.group(2) if m.group(2) is not None else m.group(3) + if key == "timeout": + val = int(val) + elif key in ("continue_on_error", "hidden"): + val = str(val).lower() == "true" + attrs[key] = val + return attrs + + +def parse_fence(info: str): + """Split a fence info string into (lang, tags:set, attrs:dict). + + Bare words become tags (e.g. notest, npu); `key=value` become attrs. + Fence values cannot contain spaces - use a named @setup for that. + """ + tokens = info.strip().split() + if not tokens: + return None + lang = tokens[0].lower() + tags, attrs = set(), {} + for t in tokens[1:]: + if "=" in t: + k, v = t.split("=", 1) + if k == "timeout": + try: + v = int(v) + except ValueError: + pass + elif k in ("continue_on_error", "hidden"): + v = v.lower() == "true" + attrs[k] = v + else: + tags.add(t.lower()) + return lang, tags, attrs + + +def find_nested_blocks(content, open_re, close_re): + """Return (value, start, end) for nested directive blocks, innermost first.""" + close_spans = [(m.start(), m.end()) for m in close_re.finditer(content)] + close_starts = {s for s, _ in close_spans} + events = [] # (pos, kind, value, end) + for m in open_re.finditer(content): + if m.start() in close_starts: # guard permissive open patterns + continue + events.append((m.start(), "open", m.group(1), m.end())) + for s, e in close_spans: + events.append((s, "close", "", e)) + events.sort(key=lambda x: x[0]) + + stack, blocks = [], [] + for pos, kind, value, end in events: + if kind == "open": + stack.append((value, pos)) + elif kind == "close" and stack: + ovalue, opos = stack.pop() + blocks.append((ovalue, opos, end)) + blocks.sort(key=lambda b: b[2] - b[1]) + return blocks + + +def infer_scope(blocks, pos) -> str: + for value, start, end in blocks: + if start <= pos < end: + return value + return "all" + + +# --------------------------------------------------------------------------- # +# Reusable @setup / @var / @require +# --------------------------------------------------------------------------- # +def extract_setup_definitions(content: str) -> dict: + defs: dict[str, dict[str, str]] = {} + os_blocks = find_nested_blocks(content, OS_OPEN_RE, OS_CLOSE_RE) + for m in SETUP_RE.finditer(content): + attrs = parse_attr_string(m.group(1)) + sid, cmd = attrs.get("id"), attrs.get("command") + if not sid or not cmd: + continue + platform = infer_scope(os_blocks, m.start()) + defs.setdefault(sid, {}) + if platform == "all": + defs[sid]["windows"] = defs[sid]["linux"] = cmd + else: + defs[sid][platform] = cmd + return defs + + +def resolve_setup(value, defs, platform) -> Optional[str]: + if not value: + return None + if value in defs: + return defs[value].get(platform) + return value # raw command (backward compatible) + + +def extract_var_definitions(content: str) -> dict: + defs: dict[str, dict[str, str]] = {} + device_blocks = find_nested_blocks(content, DEVICE_OPEN_RE, DEVICE_CLOSE_RE) + for m in VAR_RE.finditer(content): + attrs = parse_attr_string(m.group(1)) + vid, val = attrs.get("id"), attrs.get("value") + if not vid or val is None: + continue + device_value = attrs.get("device") or infer_scope(device_blocks, m.start()) + defs.setdefault(vid, {}) + if device_value == "all": + defs[vid]["all"] = val + else: + for d in (x.strip() for x in device_value.split(",")): + if d: + defs[vid][d] = val + return defs + + +def substitute_vars(code, var_defs, device, where) -> str: + if not var_defs: + return code + pat = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}") + + def repl(m): + name = m.group(1) + if name not in var_defs: + return m.group(0) + mapping = var_defs[name] + val = (mapping.get(device) if device else None) or mapping.get("all") + if val is None: + raise ValueError( + f"{where}: @var '${{{name}}}' has no value for device " + f"'{device or ''}' (have: {', '.join(sorted(mapping))})") + return val + + return pat.sub(repl, code) + + +def resolve_requires(content: str, docs_root: Path) -> str: + registry = docs_root / "_includes" / "registry.json" + if not registry.exists(): + return content + try: + deps = json.loads(registry.read_text(encoding="utf-8")).get("dependencies", {}) + except Exception: + return content + + def repl(m): + parts = [] + for dep_id in (d.strip() for d in m.group(1).split(",") if d.strip()): + info = deps.get(dep_id) + if not info: + continue + f = docs_root / "_includes" / info["file"] + if f.exists(): + parts.append(f.read_text(encoding="utf-8")) + return "\n".join(parts) if parts else m.group(0) + + return REQUIRE_RE.sub(repl, content) + + +# --------------------------------------------------------------------------- # +# Execution +# --------------------------------------------------------------------------- # +def strip_hide(code: str) -> str: + out = [] + for line in code.splitlines(): + if line.rstrip().endswith("#hide"): + out.append(re.sub(r"\s*#hide\s*$", "", line)) + else: + out.append(line) + return "\n".join(out) + + +def check_python_syntax(code: str) -> tuple[bool, str]: + try: + compile(code, "", "exec") + return True, "" + except SyntaxError as e: + return False, f"SyntaxError: {e}" + + +_WSL_CACHE: Optional[bool] = None + + +def wsl_available() -> bool: + """True if a WSL distro is usable (Windows only). Cached.""" + global _WSL_CACHE + if _WSL_CACHE is None: + if sys.platform != "win32": + _WSL_CACHE = False + else: + try: + _WSL_CACHE = subprocess.run( + ["wsl.exe", "-e", "true"], capture_output=True, timeout=30 + ).returncode == 0 + except Exception: # noqa: BLE001 + _WSL_CACHE = False + return _WSL_CACHE + + +def _run(cmd, timeout, cwd) -> tuple[bool, str]: + p = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, + cwd=str(cwd) if cwd else None) + return p.returncode == 0, (p.stderr or p.stdout)[-500:] + + +def run_in_wsl(body, timeout, cwd) -> tuple[bool, str]: + """Run a bash snippet inside WSL, using the Windows sandbox dir as cwd.""" + cmd = ["wsl.exe"] + if cwd: + cmd += ["--cd", str(cwd)] + cmd += ["bash", "-lc", body] + return _run(cmd, timeout, None) + + +def check_format(lang, code) -> tuple[bool, str]: + """Lint a non-executable block: verify it really is valid for its language.""" + try: + if lang == "json": + json.loads(code) + return True, "valid JSON" + if lang in ("yaml", "yml"): + try: + import yaml # type: ignore + except ImportError: + return True, "skipped: pyyaml not installed" + list(yaml.safe_load_all(code)) + return True, "valid YAML" + if lang == "toml": + try: + import tomllib # type: ignore + except ImportError: + return True, "skipped: tomllib unavailable" + tomllib.loads(code) + return True, "valid TOML" + if lang == "cmake": + if code.count("(") != code.count(")"): + return False, "CMake: unbalanced parentheses" + return True, "CMake parentheses balanced" + if lang == "text": + return True, "plain text" + except Exception as e: # noqa: BLE001 + return False, f"{lang} format error: {e}" + return True, "" + + +def compile_and_run(lang, code, timeout, cwd, use_wsl) -> tuple[bool, str]: + """Compile a C/C++ block (and run it if it defines main()). Uses WSL + gcc/g++ on Windows, native gcc/g++ on Linux. Snippets without main() get a + `-fsyntax-only` compile check.""" + is_c = lang == "c" + comp = "gcc" if is_c else "g++" + std = "" if is_c else "-std=c++17" + src = f"_doc_block.{'c' if is_c else 'cpp'}" + (Path(cwd) / src).write_text(strip_hide(code), encoding="utf-8") + if re.search(r"\bmain\s*\(", code): + body = f"{comp} {std} {src} -o _doc_block.out && ./_doc_block.out" + else: + body = f"{comp} {std} -fsyntax-only {src}" + if use_wsl: + return run_in_wsl(body, timeout, cwd) + if sys.platform != "win32": + return _run(["bash", "-lc", body], timeout, cwd) + return False, "no C/C++ compiler available (need WSL or a native gcc/g++)" + + +def run_block(lang, code, timeout, setup, cwd, use_wsl) -> tuple[bool, str]: + code = strip_hide(code) + try: + if lang == "python": + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False, + encoding="utf-8", dir=str(cwd)) as f: + f.write(code) + path = f.name + if setup: + is_win = sys.platform == "win32" + shell, flag = ("cmd", "/c") if is_win else ("bash", "-c") + cmd = [shell, flag, f'{setup} && python "{path}"'] + else: + cmd = [sys.executable, path] + return _run(cmd, timeout, cwd) + if lang in RUNNABLE_SHELL_NIX: + body = f"{setup}\n{code}" if setup else code + if use_wsl: + return run_in_wsl(body, timeout, cwd) + return _run(["bash", "-c", body], timeout, cwd) + if lang in ("powershell", "pwsh", "ps1"): + body = f"{setup}\n{code}" if setup else code + return _run(["powershell", "-NoProfile", "-Command", body], timeout, cwd) + if lang in ("bat", "cmd", "batch"): + body = code if not setup else "\n".join([setup, code]) + with tempfile.NamedTemporaryFile("w", suffix=".bat", delete=False, + encoding="utf-8", dir=str(cwd)) as f: + f.write(body) + path = f.name + return _run(["cmd", "/c", path], timeout, cwd) + if lang in COMPILE_LANGS: + return compile_and_run(lang, code, timeout, cwd, use_wsl) + return True, "skipped (unsupported lang)" + except subprocess.TimeoutExpired: + return False, f"timeout after {timeout}s" + except Exception as e: # noqa: BLE001 + return False, f"runner error: {e}" + + +# --------------------------------------------------------------------------- # +# Main +# --------------------------------------------------------------------------- # +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--docs", type=Path, default=Path("docs")) + ap.add_argument("--run", action="store_true", help="execute runnable blocks (default-on)") + ap.add_argument("--syntax-only", action="store_true", help="python syntax checks only") + ap.add_argument("--device", choices=sorted(DEVICE_TAGS), default=None, + help="only run blocks tagged with this device (or untagged)") + ap.add_argument("--platform", choices=["windows", "linux"], default=None, + help="host platform for native execution (defaults to host OS)") + ap.add_argument("--no-wsl", action="store_true", + help="do not use WSL; Linux (bash/C/C++/@os:linux) blocks then skip on Windows") + ap.add_argument("--timeout", type=int, default=300) + ap.add_argument("--output-json", type=Path, default=Path("code-results.json")) + ap.add_argument("--failed-pages", type=Path, default=Path("failed-pages.txt")) + args = ap.parse_args() + + platform = args.platform or ("windows" if sys.platform == "win32" else "linux") + host_windows = sys.platform == "win32" + use_wsl = (not args.no_wsl) and wsl_available() + linux_ok = (not host_windows) or use_wsl + results, failed_pages = [], set() + + # Scan every Markdown file (.mdx pages AND .md example READMEs) - no doc + # with code blocks is left untested. + docs_files = sorted(set(args.docs.rglob("*.mdx")) | set(args.docs.rglob("*.md"))) + for mdx in docs_files: + raw = mdx.read_text(encoding="utf-8") + content = resolve_requires(raw, args.docs) + rel = mdx.as_posix() + + os_blocks = find_nested_blocks(content, OS_OPEN_RE, OS_CLOSE_RE) + device_blocks = find_nested_blocks(content, DEVICE_OPEN_RE, DEVICE_CLOSE_RE) + setup_defs = extract_setup_definitions(content) + var_defs = extract_var_definitions(content) + + # Per-page sandbox: all of a page's blocks run in ONE temp dir, in order, + # so files/downloads from earlier blocks persist for later blocks + # ("keep the environment open"), and nothing pollutes the docs tree. + page_dir = Path(tempfile.mkdtemp(prefix="docs-ci-")) + + for i, m in enumerate(FENCE_RE.finditer(content)): + parsed = parse_fence(m.group(1)) + if not parsed: + continue + lang, tags, attrs = parsed + code = m.group(2) + pos = m.start() + + if SKIP_TAG in tags: + results.append(_rec(rel, i, lang, tags, False, "skipped", "notest")) + continue + if lang not in TESTABLE: + results.append(_rec(rel, i, lang, tags, False, "skipped", f"{lang}: non-runnable lang")) + continue + + # Which environment must this block run in? Explicit @os scope wins; + # else infer from language (nix shells / C-C++ -> linux; ps/cmd -> windows). + block_os = infer_scope(os_blocks, pos) + if block_os in ("windows", "linux"): + need = block_os + elif lang in (RUNNABLE_SHELL_NIX | COMPILE_LANGS): + need = "linux" + elif lang in RUNNABLE_SHELL_WIN: + need = "windows" + else: + need = "any" + block_platform = need if need in ("windows", "linux") else platform + + # Device filter: fence tags take precedence, else surrounding @device block + block_devices = (tags & DEVICE_TAGS) + if not block_devices: + scoped = infer_scope(device_blocks, pos) + if scoped != "all": + block_devices = {d.strip() for d in scoped.split(",") if d.strip()} + if args.device and block_devices and args.device not in block_devices: + results.append(_rec(rel, i, lang, tags, False, "skipped", f"device!={args.device}")) + continue + + # Resolve var substitutions and setup + try: + code = substitute_vars(code, var_defs, args.device, f"{rel}#block{i}") + except ValueError as e: + failed_pages.add(rel) + results.append(_rec(rel, i, lang, tags, False, "fail", str(e))) + continue + setup = resolve_setup(attrs.get("setup"), setup_defs, block_platform) + timeout = attrs.get("timeout", args.timeout) + workdir = (page_dir / attrs["workdir"]) if attrs.get("workdir") else page_dir + workdir.mkdir(parents=True, exist_ok=True) + cont = attrs.get("continue_on_error", False) + + status, detail, ran = "skipped", "", False + + # Non-executable languages: lint that they're valid (json/yaml/etc.). + if lang in FORMAT_LANGS: + ok, detail = check_format(lang, code) + status = "pass" if ok else "fail" + if not ok and not cont: + failed_pages.add(rel) + results.append(_rec(rel, i, lang, tags, False, status, detail)) + continue + + # Python: always syntax-check (cloud + hardware). + if lang == "python": + ok, detail = check_python_syntax(code) + status = "pass" if ok else "fail" + if not ok and not cont: + failed_pages.add(rel) + + # Execution (hardware/run mode only). + if args.run and not args.syntax_only: + if need == "windows" and not host_windows: + status, detail = "skipped", "needs Windows runner" + elif need == "linux" and not linux_ok: + status, detail = "skipped", "needs Linux/WSL runner" + else: + ok, detail = run_block(lang, code, timeout, setup, workdir, use_wsl) + ran = True + status = "pass" if ok else "fail" + if not ok and not cont: + failed_pages.add(rel) + + results.append(_rec(rel, i, lang, tags, ran, status, detail)) + + shutil.rmtree(page_dir, ignore_errors=True) + + args.output_json.write_text(json.dumps(results, indent=2), encoding="utf-8") + args.failed_pages.write_text("\n".join(sorted(failed_pages)), encoding="utf-8") + + n_fail = len(failed_pages) + n_exec = sum(1 for r in results if r["executed"]) + print(f"Checked {len(results)} blocks across docs " + f"({n_exec} executed); {n_fail} page(s) with failures.") + for r in results: + if r["status"] == "fail": + print(f" FAIL {r['page']} block#{r['block']} ({r['lang']}): {r['detail'][:160]}") + sys.exit(1 if n_fail else 0) + + +def _rec(page, block, lang, tags, executed, status, detail): + return {"page": page, "block": block, "lang": lang, + "tags": sorted(tags), "executed": executed, + "status": status, "detail": detail} + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/fetch_models.py b/.github/scripts/fetch_models.py new file mode 100644 index 00000000..26b98089 --- /dev/null +++ b/.github/scripts/fetch_models.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +""" +Regenerate the Ryzen AI model tables from the official Hugging Face collections. + +Writes each category's model-table block (between MODELS_TABLE markers) in +docs/llms/index.mdx, docs/vision/index.mdx, and docs/audio/index.mdx: + - LLMs: combined Hybrid NPU/GPU, NPU 4K, NPU 16K, and NPU-only LFM2 (ONNX) + variants, keyed by model name; the model name links to Hugging Face. + - Vision Models: Stable Diffusion image-generation models (capabilities from + the amd/sd-sandbox project), incl. SD3 / SD3.5. + - Audio Models: Whisper + Parakeet ASR models. + +Live collections are unioned with small curated supplements (SD3/3.5 capability +metadata, audio models) that aren't expressible from collection metadata alone. + +Usage: + python .github/scripts/fetch_models.py +""" + +import json +import re +import sys +import urllib.request +from pathlib import Path + +VERSION = "1.7.1" +DOCS = Path(__file__).resolve().parents[2] / "docs" +# Each top-level category page hosts its own model table between these markers. +LLMS_INDEX = DOCS / "llms" / "index.mdx" +VISION_INDEX = DOCS / "vision" / "index.mdx" +AUDIO_INDEX = DOCS / "audio" / "index.mdx" +START = "{/* MODELS_TABLE_START" +END = "{/* MODELS_TABLE_END */}" + +# (column header, collection slug, id suffix to strip, short cell label) +LLM_COLLECTIONS = [ + ("Hybrid NPU / GPU", "amd/ryzen-ai-171-hybrid", "_rai_1.7.1_hybrid", "Hybrid"), + ("NPU 4K", "amd/ryzen-ai-171-npu-4k", "_rai_1.7.1_npu_4K", "4K"), + ("NPU 16K", "amd/ryzen-ai-171-npu-16k", "_rai_1.7.1_npu_16K", "16K"), +] +LFM2 = ("amd/ryzen-ai-171-npu-lfm2-models", "_rai_1.7.1") # folded into the LLM table +SD_COLLECTION = "amd/ryzen-ai-171-sd-models" + +# Curated Vision (Stable Diffusion) capabilities, keyed by HF id. +# Source: amd/sd-sandbox "Supported Models" table. +VISION_META = [ + # (hf_id, display, resolution, t2i, i2i, controlnet) + ("amd/stable-diffusion-1.5-amdnpu", "SD 1.5", "512x512", True, False, False), + ("amd/sd-turbo-amdnpu", "SD Turbo", "512x512", True, False, False), + ("amd/sdxl-base-amdnpu", "SDXL Base", "1024x1024", True, True, False), + ("amd/sdxl-turbo-amdnpu", "SDXL Turbo", "512x512", True, False, False), + ("amd/segmind-vega-amdnpu", "Segmind-Vega", "1024x1024", True, True, False), + ("stabilityai/stable-diffusion-3-medium-amdnpu", "SD3 Medium", "512-1024", True, True, True), + ("stabilityai/stable-diffusion-3.5-medium-amdnpu", "SD3.5 Medium", "512-1024", True, True, True), +] + +# Curated Audio (ASR) models. NOTE: confirm the Parakeet repo id for your release. +AUDIO = [ + # (display, hf_id, params, task) + ("whisper-base", "amd/whisper-base-onnx-npu", "74M", "Speech-to-text (ASR)"), + ("whisper-small", "amd/whisper-small-onnx-npu", "244M", "Speech-to-text (ASR)"), + ("whisper-medium", "amd/whisper-medium-onnx-npu", "769M", "Speech-to-text (ASR)"), + ("whisper-large-v3-turbo", "amd/whisper-large-v3-turbo-onnx-npu", "809M", "Speech-to-text (ASR)"), + ("Parakeet-TDT-0.6B", "nvidia/parakeet-tdt-0.6b-v2", "0.6B", "Speech-to-text (ASR)"), +] + + +def fetch(slug: str) -> list[str]: + url = f"https://huggingface.co/api/collections/{slug}" + try: + with urllib.request.urlopen(url, timeout=60) as r: + data = json.loads(r.read().decode("utf-8")) + return [it["id"] for it in data.get("items", []) if it.get("type") == "model"] + except Exception as e: # noqa: BLE001 + print(f"WARN: could not fetch {slug}: {e}", file=sys.stderr) + return [] + + +def hf(idv: str) -> str: + return f"https://huggingface.co/{idv}" + + +def base_name(model_id: str, suffix: str) -> str: + name = model_id.split("/", 1)[1] if "/" in model_id else model_id + if suffix and name.endswith(suffix): + name = name[: -len(suffix)] + return name + + +def yes(flag: bool) -> str: + return "Yes" if flag else "" + + +def llm_table() -> str: + # base name -> {label: model_id} + rows: dict[str, dict[str, str]] = {} + for _h, slug, suffix, label in LLM_COLLECTIONS: + for mid in fetch(slug): + rows.setdefault(base_name(mid, suffix), {})[label] = mid + # Fold LFM2 in as the "NPU (ONNX)" column. + lfm2_slug, lfm2_suffix = LFM2 + for mid in fetch(lfm2_slug): + rows.setdefault(base_name(mid, lfm2_suffix), {})["ONNX"] = mid + + labels = [l for _h, _s, _x, l in LLM_COLLECTIONS] + ["ONNX"] + headers = ["Model"] + [h for h, _s, _x, _l in LLM_COLLECTIONS] + ["NPU (ONNX)"] + out = ["| " + " | ".join(headers) + " |", + "| " + " | ".join(["---"] * len(headers)) + " |"] + for base in sorted(rows, key=str.lower): + variants = rows[base] + # Model name links to the first available variant repo. + first = next((variants[l] for l in labels if l in variants), None) + name_cell = f"[{base}]({hf(first)})" if first else base + cells = [name_cell] + for label in labels: + mid = variants.get(label) + tag = {"Hybrid": "Hybrid", "4K": "4K", "16K": "16K", "ONNX": "ONNX"}[label] + cells.append(f"[{tag}]({hf(mid)})" if mid else "") + out.append("| " + " | ".join(cells) + " |") + return "\n".join(out) + + +def vision_table() -> str: + known = {hf_id for hf_id, *_ in VISION_META} + # The amd/ SD3 and SD3.5 mirrors are private; we link the public stabilityai + # repos (in VISION_META) instead, so exclude the private ids from the append. + skip = known | {"amd/stable-diffusion-3-medium-amdnpu", + "amd/stable-diffusion-3.5-medium-amdnpu"} + out = ["| Model | Output Resolution | Text-to-Image | Image-to-Image | ControlNet |", + "| --- | --- | --- | --- | --- |"] + for hf_id, display, res, t2i, i2i, cn in VISION_META: + out.append(f"| [{display}]({hf(hf_id)}) | {res} | {yes(t2i)} | {yes(i2i)} | {yes(cn)} |") + # Append any collection models not already covered by the curated metadata. + for mid in sorted(fetch(SD_COLLECTION), key=str.lower): + if mid not in skip: + out.append(f"| [{base_name(mid, '')}]({hf(mid)}) | | Yes | | |") + return "\n".join(out) + + +def audio_table() -> str: + out = ["| Model | Parameters | Task |", "| --- | --- | --- |"] + for display, hf_id, params, task in AUDIO: + out.append(f"| [{display}]({hf(hf_id)}) | {params} | {task} |") + return "\n".join(out) + + +def inject(path: Path, table_md: str) -> None: + text = path.read_text(encoding="utf-8") + pattern = re.compile(re.escape(START) + r".*?" + re.escape(END), re.DOTALL) + if not pattern.search(text): + print(f"ERROR: markers not found in {path}", file=sys.stderr) + sys.exit(1) + block = (f"{START} - generated by .github/scripts/fetch_models.py; " + f"do not edit by hand */}}\n\n{table_md}\n\n{END}") + path.write_text(pattern.sub(lambda _m: block, text), encoding="utf-8") + print(f"Updated {path.relative_to(DOCS)}") + + +def main() -> None: + inject(LLMS_INDEX, llm_table()) + inject(VISION_INDEX, vision_table()) + inject(AUDIO_INDEX, audio_table()) + + +if __name__ == "__main__": + try: + main() + except Exception as e: # noqa: BLE001 + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) diff --git a/.github/scripts/gen_cards.py b/.github/scripts/gen_cards.py new file mode 100644 index 00000000..2b3882a7 --- /dev/null +++ b/.github/scripts/gen_cards.py @@ -0,0 +1,93 @@ +"""Generate the "bubble" CardGroup on each category index page from docs.json. + +For every top-level nav group that has a `/index` page, this writes a +CardGroup listing ALL of that group's sub-pages (nested groups flattened), +using each page's own frontmatter `title` so card labels always match the nav. + +Cards are written between these markers in the index page: + {/* CARDS_START ... */} + {/* CARDS_END */} + +Run from anywhere: python .github/scripts/gen_cards.py +""" +import json +import re +from pathlib import Path + +DOCS = Path(__file__).resolve().parents[2] / "docs" + +START_RE = re.compile(r"\{/\* CARDS_START.*?\*/\}", re.S) +BLOCK_RE = re.compile(r"\{/\* CARDS_START.*?\{/\* CARDS_END \*/\}", re.S) + + +def frontmatter(slug): + p = DOCS / (slug + ".mdx") + if not p.exists(): + return (slug, "") + text = p.read_text(encoding="utf-8") + m = re.search(r"^---\s*(.*?)\s*---", text, re.S) + blk = m.group(1) if m else "" + title = re.search(r'^title:\s*"?(.*?)"?\s*$', blk, re.M) + desc = re.search(r'^description:\s*"?(.*?)"?\s*$', blk, re.M) + return (title.group(1) if title else slug, desc.group(1) if desc else "") + + +def flatten(pages): + out = [] + for p in pages: + if isinstance(p, str): + out.append(p) + elif isinstance(p, dict): + out.extend(flatten(p["pages"])) + return out + + +def main(): + nav = json.loads((DOCS / "docs.json").read_text(encoding="utf-8")) + navn = nav["navigation"] + groups = (navn["tabs"][0]["groups"] if "tabs" in navn else navn["groups"])[0]["pages"] + + updated = 0 + for g in groups: + if not isinstance(g, dict): + continue + icon = g.get("icon", "") + pages = flatten(g["pages"]) + index_pages = [p for p in pages if p.rsplit("/", 1)[-1] == "index"] + if not index_pages: + continue + index_slug = index_pages[0] + children = [p for p in pages if p != index_slug] + index_file = DOCS / (index_slug + ".mdx") + text = index_file.read_text(encoding="utf-8") + if not START_RE.search(text): + print(f"SKIP {index_slug}: no CARDS_START marker") + continue + + lines = [""] + for child in children: + title, desc = frontmatter(child) + icon_attr = f' icon="{icon}"' if icon else "" + lines.append(f' ') + if desc: + lines.append(f" {desc}") + lines.append(" ") + lines.append("") + cards = "\n".join(lines) + + replacement = ( + "{/* CARDS_START - generated by .github/scripts/gen_cards.py; do not edit by hand */}\n" + + cards + + "\n{/* CARDS_END */}" + ) + new_text = BLOCK_RE.sub(lambda _m: replacement, text) + if new_text != text: + index_file.write_text(new_text, encoding="utf-8") + updated += 1 + print(f" {index_slug}: {len(children)} cards") + + print(f"Updated {updated} index pages.") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/generate_codeowners.py b/.github/scripts/generate_codeowners.py new file mode 100644 index 00000000..a0481936 --- /dev/null +++ b/.github/scripts/generate_codeowners.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +""" +Generate docs/CODEOWNERS from the ACTUAL docs tree - no external spreadsheet. + +Ownership lives in each page as a hidden header (the single source of truth): + + {/* owner: */} + +This script walks every page under docs/ (`.mdx` AND `.md`) and emits, in order: + + 1. a default owner (catch-all), + 2. infra rules (CI config) owned by the docs lead, + 3. one rule per top-level section folder, set to that folder's dominant owner, + 4. a per-page rule for every page (most specific - GitHub uses the LAST match). + +There is intentionally NO hard-coded example/path map: CODEOWNERS always +reflects the current folder structure. Pages without an owner header fall back +to the default owner. Run from anywhere: + + python .github/scripts/generate_codeowners.py +""" + +import re +from collections import Counter +from pathlib import Path + +REPO = Path(__file__).resolve().parents[2] +DOCS = REPO / "docs" +OUT = DOCS / "CODEOWNERS" + +DEFAULT_OWNER = "dwithchenna" # catch-all owner for anything unmatched +INFRA_OWNER = "bconsolvo" # docs lead owns CI / config + +OWNER_RE = re.compile(r"\{/\*\s*owner:\s*(?P[^|*]+?)\s*(?:\|[^*]*)?\*/\}") + +COL = 56 # path column width for alignment + + +def page_owner(path: Path) -> str: + try: + m = OWNER_RE.search(path.read_text(encoding="utf-8")) + except Exception: + return DEFAULT_OWNER + return m.group("id").strip() if m else DEFAULT_OWNER + + +def rule(path: str, owner: str) -> str: + return f"{path:<{COL}} @{owner}" + + +def main() -> None: + pages = sorted( + set(DOCS.rglob("*.mdx")) | set(DOCS.rglob("*.md")), + key=lambda p: p.relative_to(DOCS).as_posix(), + ) + owners = {p: page_owner(p) for p in pages} + + # Top-level section folders -> dominant owner among their pages + sections: dict[str, list[Path]] = {} + top_pages: list[Path] = [] + for p in pages: + rel_parts = p.relative_to(DOCS).parts + if len(rel_parts) == 1: + top_pages.append(p) # docs/index.mdx, docs/installation.mdx, ... + else: + sections.setdefault(rel_parts[0], []).append(p) + + lines = [ + "# CODEOWNERS - Ryzen AI Software documentation", + "# GENERATED by .github/scripts/generate_codeowners.py from each page's", + "# hidden `{/* owner: */}` header. Do NOT hand-edit: change the page", + "# header (or add a page) and regenerate. GitHub uses the LAST matching", + "# rule, so the per-page rules at the bottom override the folder defaults.", + "", + "# ----- Default owner (catch-all) -----", + rule("*", DEFAULT_OWNER), + "", + "# ----- CI / configuration (docs lead) -----", + rule("/.github/", INFRA_OWNER), + rule("/docs/docs.json", INFRA_OWNER), + "", + "# ----- Section defaults (dominant owner per top-level folder) -----", + ] + for section in sorted(sections): + dominant = Counter(owners[p] for p in sections[section]).most_common(1)[0][0] + lines.append(rule(f"/docs/{section}/", dominant)) + + lines += ["", "# ----- Top-level pages -----"] + for p in sorted(top_pages, key=lambda p: p.name): + lines.append(rule(f"/docs/{p.name}", owners[p])) + + for section in sorted(sections): + lines += ["", f"# ----- {section}/ -----"] + for p in sorted(sections[section], key=lambda p: p.relative_to(DOCS).as_posix()): + lines.append(rule("/docs/" + p.relative_to(DOCS).as_posix(), owners[p])) + + OUT.write_text("\n".join(lines) + "\n", encoding="utf-8") + n_owners = len(set(owners.values())) + print(f"Wrote {OUT} - {len(pages)} pages, {len(sections)} sections, {n_owners} distinct owners.") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/notify_owner.py b/.github/scripts/notify_owner.py new file mode 100644 index 00000000..9645fc3f --- /dev/null +++ b/.github/scripts/notify_owner.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +""" +Compose a GitHub-native owner notification for failing docs pages. + +Resolves each failing page's owner (GitHub ID from the in-page header) and +writes a Markdown issue body that @mentions the owners. The notify-owner +workflow opens a GitHub issue with this body; the @mention triggers GitHub's +own notification/email to each owner. + +No email addresses and no SMTP are used or stored - notifications ride entirely +on GitHub's notification system, keyed off the owner's GitHub ID. + +Usage: + python notify_owner.py --failed-pages failed-pages.txt --body-out body.md + python notify_owner.py --file docs/installation.mdx # single-file demo +""" + +import argparse +import os +import sys +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from resolve_owner import resolve # noqa: E402 + + +def gha_output(**kv): + out = os.environ.get("GITHUB_OUTPUT") + if not out: + return + with open(out, "a", encoding="utf-8") as f: + for k, v in kv.items(): + f.write(f"{k}={v}\n") + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--failed-pages", type=Path) + ap.add_argument("--file", type=Path) + ap.add_argument("--repo", default=os.environ.get("GITHUB_REPOSITORY", "amd/RyzenAI-SW")) + ap.add_argument("--run-url", default=os.environ.get("RUN_URL", "")) + ap.add_argument("--notify-email", default=os.environ.get("NOTIFY_EMAIL", ""), + help="shared DL also emailed the report (e.g. dl.ryzenai.support@amd.com)") + ap.add_argument("--body-out", type=Path, default=Path("owner-issue-body.md")) + args = ap.parse_args() + + pages: list[str] = [] + if args.file: + pages = [args.file.as_posix()] + elif args.failed_pages and args.failed_pages.exists(): + pages = [ln.strip() for ln in args.failed_pages.read_text(encoding="utf-8").splitlines() if ln.strip()] + + if not pages: + print("No failing pages; nothing to notify.") + gha_output(has_targets="false", assignees="", title="") + args.body_out.write_text("", encoding="utf-8") + return + + owners: dict[str, list[str]] = {} + for p in pages: + owners.setdefault(resolve(Path(p)), []).append(p) + + title = f"[Docs CI] {len(pages)} page(s) failed checks - action needed" + lines = [f"A docs CI check failed on {len(pages)} page(s) in `{args.repo}`.", ""] + if args.run_url: + lines += [f"Failed run: {args.run_url}", ""] + lines.append("Owners are @mentioned below so GitHub notifies them directly.") + lines.append("") + for oid in sorted(owners): + lines.append(f"### @{oid}") + for p in sorted(owners[oid]): + lines.append(f"- `{p}`") + lines.append("") + if args.notify_email: + lines.append(f"_A copy of this report is emailed to the Ryzen AI support " + f"distribution list ({args.notify_email})._") + lines.append("") + args.body_out.write_text("\n".join(lines), encoding="utf-8") + + assignees = ",".join(sorted(owners)) + gha_output(has_targets="true", assignees=assignees, title=title) + + print("=" * 70) + print(f"OWNER NOTIFICATION (GitHub-native) - {len(pages)} page(s)") + print("=" * 70) + for oid in sorted(owners): + print(f" @{oid}: {len(owners[oid])} page(s)") + print(f"Assignees: {assignees}") + print(f"Title: {title}") + print(f"Body written to: {args.body_out}") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/record_run.py b/.github/scripts/record_run.py new file mode 100644 index 00000000..c6faec06 --- /dev/null +++ b/.github/scripts/record_run.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +""" +Append a CI run record to .github/scripts/ci-history.json. + +Called at the end of each CI workflow. Captures what ran, when, pass/fail, and +per-page results (with the resolved owner) so the dashboard can show history. + +Reads: + - code-results.json (optional) from extract_code_blocks.py for per-page status + - run metadata from flags / GITHUB_* env vars + +Usage: + python .github/scripts/record_run.py --workflow "Test Code Samples" --status success \ + --code-results code-results.json --history .github/scripts/ci-history.json +""" + +import argparse +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from resolve_owner import resolve # noqa: E402 + + +def now_iso() -> str: + return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds") + + +def pages_from_code_results(path: Path, docs_root: Path) -> list[dict]: + if not path or not path.exists(): + return [] + data = json.loads(path.read_text(encoding="utf-8")) + seen: dict[str, dict] = {} + for r in data: + page = r.get("page", "") + status = r.get("status") + if status not in ("pass", "fail"): + continue + # A page fails if any block fails. + prev = seen.get(page) + page_status = "fail" if (status == "fail" or (prev and prev["status"] == "fail")) else "pass" + rel = page.split("docs/", 1)[-1] + owner_id = resolve(docs_root / rel) if (docs_root / rel).exists() else resolve(Path(page)) + seen[page] = { + "page": rel, + "owner_id": owner_id, + "check": "code-execution", + "status": page_status, + } + if status == "fail": + seen[page]["detail"] = (r.get("detail") or "")[:200] + return list(seen.values()) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--workflow", required=True) + ap.add_argument("--status", required=True, choices=["success", "failure"]) + ap.add_argument("--job", default=None, help="job name") + ap.add_argument("--script", default=None, help="primary script run") + ap.add_argument("--code-results", type=Path) + ap.add_argument("--docs", type=Path, default=Path("docs")) + ap.add_argument("--history", type=Path, default=Path(".github/scripts/ci-history.json")) + args = ap.parse_args() + + record = { + "run_id": os.environ.get("GITHUB_RUN_ID", f"local-{datetime.now():%Y%m%d-%H%M%S}"), + "workflow": args.workflow, + "event": os.environ.get("GITHUB_EVENT_NAME", "local"), + "actor": os.environ.get("GITHUB_ACTOR", "local"), + "branch": os.environ.get("GITHUB_REF_NAME", "local"), + "sha": (os.environ.get("GITHUB_SHA", "local"))[:12], + "started_at": os.environ.get("RUN_STARTED_AT", now_iso()), + "finished_at": now_iso(), + "status": args.status, + "jobs": [], + "pages": pages_from_code_results(args.code_results, args.docs), + } + if args.job: + record["jobs"].append({"name": args.job, "script": args.script or "", "status": args.status}) + + hist_path = args.history + if hist_path.exists(): + hist = json.loads(hist_path.read_text(encoding="utf-8")) + else: + hist = {"schema": 1, "runs": []} + hist["runs"].insert(0, record) + hist_path.parent.mkdir(parents=True, exist_ok=True) + hist_path.write_text(json.dumps(hist, indent=2) + "\n", encoding="utf-8") + print(f"Recorded run {record['run_id']} ({args.workflow}: {args.status}) " + f"with {len(record['pages'])} page result(s).") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/report.py b/.github/scripts/report.py new file mode 100644 index 00000000..34642683 --- /dev/null +++ b/.github/scripts/report.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +""" +Build a readable per-page code-block report from a run's JSON results. + +Input : the --output-json file from extract_code_blocks.py (per-block records). +Outputs: + - CODE_TEST_REPORT.md : a dashboard - summary table (one row per page) + a + per-page detail table (one row per block: #, lang, result, short detail). + - optionally injects the summary table into a Mintlify page between + {/* RESULTS_START */} and {/* RESULTS_END */} (e.g. the CI dashboard page). + +Covers EVERY .mdx and .md under --docs, including pages with zero code blocks +(shown as "no code"), so nothing is silently missing. + +Usage: + python .github/scripts/report.py --results report_run.json --docs docs \ + --out .github/scripts/CODE_TEST_REPORT.md \ + --dashboard docs/reference/ci-dashboard.mdx +""" + +import argparse +import json +import os +import re +import sys +from collections import defaultdict +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from resolve_owner import resolve # noqa: E402 + +RUNNABLE = {"python", "bash", "sh", "shell", "powershell", "pwsh", "ps1", + "cmd", "bat", "batch"} + + +def clean(detail: str, n: int = 90) -> str: + s = re.sub(r"\s+", " ", (detail or "")).strip().replace("|", "\\|") + return (s[:n] + "...") if len(s) > n else s + + +def result_label(r: dict) -> str: + if r["status"] == "pass": + return "PASS" + if r["status"] == "fail": + return "FAIL" + # skipped + if "notest" in (r.get("tags") or []): + return "skip (notest)" + return f"skip ({r['lang']} not runnable)" + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--results", type=Path, default=Path("report_run.json")) + ap.add_argument("--docs", type=Path, default=Path("docs")) + ap.add_argument("--out", type=Path, default=Path(".github/scripts/CODE_TEST_REPORT.md")) + ap.add_argument("--dashboard", type=Path, default=None) + args = ap.parse_args() + + records = json.loads(args.results.read_text(encoding="utf-8")) if args.results.exists() else [] + by_page: dict[str, list[dict]] = defaultdict(list) + for r in records: + by_page[r["page"]].append(r) + + # Every page under docs (so 0-block pages are visible too) + all_pages = sorted( + p.as_posix() for p in (set(args.docs.rglob("*.mdx")) | set(args.docs.rglob("*.md"))) + ) + + rows, details, tot = [], [], defaultdict(int) + for page in all_pages: + blocks = sorted(by_page.get(page, []), key=lambda r: r["block"]) + n_pass = sum(1 for r in blocks if r["status"] == "pass") + n_fail = sum(1 for r in blocks if r["status"] == "fail") + n_skip = sum(1 for r in blocks if r["status"] == "skipped") + owner = resolve(Path(page)) + tot["pass"] += n_pass; tot["fail"] += n_fail; tot["skip"] += n_skip + tot["blocks"] += len(blocks) + + if not blocks: + status = "no code" + elif n_fail: + status = f"{n_fail} FAIL" + elif n_pass: + status = "all pass" + else: + status = "all skip" + rel = page.split("docs/", 1)[-1] + rows.append((rel, owner, len(blocks), n_pass, n_fail, n_skip, status)) + + if blocks: + details.append(f"\n### `{page}` - @{owner} - " + f"{n_pass} pass / {n_fail} fail / {n_skip} skip\n") + details.append("| Block | Lang | Result | Detail |") + details.append("|------:|------|--------|--------|") + for r in blocks: + details.append(f"| {r['block']} | {r['lang']} | {result_label(r)} | {clean(r['detail'])} |") + + pages_with = sum(1 for r in rows if r[2] > 0) + pages_fail = sum(1 for r in rows if r[4] > 0) + + # ---- Summary table (markdown) ---- + summary = ["| Page | Owner | Blocks | Pass | Fail | Skip | Status |", + "|------|-------|-------:|-----:|-----:|-----:|--------|"] + for rel, owner, nb, np_, nf, ns, status in rows: + summary.append(f"| {rel} | @{owner} | {nb} | {np_} | {nf} | {ns} | {status} |") + summary_md = "\n".join(summary) + + header = ( + f"_Totals: {len(all_pages)} pages ({pages_with} with code, {pages_fail} with " + f"failures) - {tot['blocks']} blocks: **{tot['pass']} pass**, " + f"**{tot['fail']} fail**, {tot['skip']} skip._" + ) + + out = [ + "# Code-block test report (per page)", + "", + "Generated by `.github/scripts/report.py` from a full run of " + "`extract_code_blocks.py --run`. Every runnable block is executed " + "(test-by-default); `notest` and non-runnable languages (cpp, text, " + "json, ...) show as `skip`.", + "", + header, + "", + "## Summary (every page)", + "", + summary_md, + "", + "## Per-page detail (pages with code blocks)", + *details, + "", + ] + args.out.write_text("\n".join(out) + "\n", encoding="utf-8") + print(f"Wrote {args.out} ({len(all_pages)} pages, {tot['blocks']} blocks).") + + # ---- Inject summary into a Mintlify page between markers ---- + if args.dashboard and args.dashboard.exists(): + text = args.dashboard.read_text(encoding="utf-8") + block = "{/* RESULTS_START - generated by .github/scripts/report.py; do not edit by hand */}\n\n" + \ + header + "\n\n" + summary_md + "\n\n{/* RESULTS_END */}" + new = re.sub(r"\{/\* RESULTS_START.*?\{/\* RESULTS_END \*/\}", lambda _m: block, text, flags=re.S) + if new != text: + args.dashboard.write_text(new, encoding="utf-8") + print(f"Updated {args.dashboard} results table.") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/resolve_owner.py b/.github/scripts/resolve_owner.py new file mode 100644 index 00000000..c2c303a8 --- /dev/null +++ b/.github/scripts/resolve_owner.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +""" +Resolve the owner of a documentation page from its in-file owner header. + +Ownership is the page owner's GitHub ID only (no email). Notifications are sent +GitHub-natively by @mentioning the ID, which triggers GitHub's own email. + +Given a .mdx/.md file, print "" and emit a GitHub Actions output. +Falls back to the default owner when no header is present. +""" + +import os +import re +import sys +from pathlib import Path + +DEFAULT_OWNER_ID = "dwithchenna" + +# {/* owner: */} (an optional legacy "| email" tail is ignored) +OWNER_RE = re.compile(r"\{/\*\s*owner:\s*(?P[^|*]+?)\s*(?:\|[^*]*)?\*/\}") + + +def resolve(path: Path) -> str: + try: + text = path.read_text(encoding="utf-8") + except OSError: + return DEFAULT_OWNER_ID + m = OWNER_RE.search(text) + return m.group("id").strip() if m else DEFAULT_OWNER_ID + + +def main() -> None: + if len(sys.argv) < 2: + print("usage: resolve_owner.py ", file=sys.stderr) + sys.exit(2) + owner_id = resolve(Path(sys.argv[1])) + print(owner_id) + gh_out = os.environ.get("GITHUB_OUTPUT") + if gh_out: + with open(gh_out, "a", encoding="utf-8") as f: + f.write(f"owner_id={owner_id}\n") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/codeowners.yml b/.github/workflows/codeowners.yml new file mode 100644 index 00000000..3a02e021 --- /dev/null +++ b/.github/workflows/codeowners.yml @@ -0,0 +1,51 @@ +# ============================================================================= +# CODEOWNERS & Page Ownership +# ============================================================================= +# Guarantees every docs page has an owner, using standard tooling: +# - check_owners.py : every docs/ page must have an inline owner header +# - generate_codeowners.py : regenerate CODEOWNERS and fail if it is stale +# - codeowners-validator : validate CODEOWNERS syntax, no duplicate/shadowed +# patterns, no unowned files, and that listed owners are valid. +# ============================================================================= + +name: CODEOWNERS & Page Ownership + +on: + pull_request: + paths: ['docs/**', 'CODEOWNERS', '.github/**'] + push: + branches: [main] + paths: ['docs/**', 'CODEOWNERS', '.github/**'] + workflow_dispatch: + +permissions: + contents: read + +jobs: + page-owner-headers: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Every docs page has an owner header + run: python .github/scripts/check_owners.py + - name: CODEOWNERS is in sync with page headers + run: | + python .github/scripts/generate_codeowners.py + git diff --exit-code docs/CODEOWNERS || { + echo "::error::docs/CODEOWNERS is stale. Run 'python .github/scripts/generate_codeowners.py' and commit."; + exit 1; + } + + codeowners-validator: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Validate CODEOWNERS + uses: mszostok/codeowners-validator@v0.7.4 + with: + checks: "files,syntax,duppatterns" + experimental_checks: "notowned,avoid-shadowing" + github_access_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/link-check.yml b/.github/workflows/link-check.yml new file mode 100644 index 00000000..7f132e43 --- /dev/null +++ b/.github/workflows/link-check.yml @@ -0,0 +1,41 @@ +# ============================================================================= +# Link Check (external) +# ============================================================================= +# Internal links are checked on every PR by the "Mintlify Docs Checks" workflow +# (`mint broken-links`). External links rot over time (e.g. an old version URL +# going 404), so this job also checks EXTERNAL links, anchors, and redirects on +# a weekly schedule and on demand. On failure it opens/updates a tracking issue. +# ============================================================================= + +name: Link Check + +on: + schedule: + - cron: '0 8 * * 1' # Mondays 08:00 UTC + workflow_dispatch: + +permissions: + contents: read + issues: write + +jobs: + external-links: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Install Mintlify CLI + run: npm i -g mint + - name: Check all links (internal + external + anchors + redirects) + id: links + working-directory: docs + run: mint broken-links --check-external --check-anchors --check-redirects 2>&1 | tee ../link-report.txt + - name: Open an issue if links are broken + if: failure() + uses: peter-evans/create-issue-from-file@v5 + with: + title: "Broken links detected by scheduled link check" + content-filepath: ./link-report.txt + labels: docs, broken-links, automated diff --git a/.github/workflows/mintlify-checks.yml b/.github/workflows/mintlify-checks.yml new file mode 100644 index 00000000..b8ceb035 --- /dev/null +++ b/.github/workflows/mintlify-checks.yml @@ -0,0 +1,150 @@ +# ============================================================================= +# Mintlify Docs Checks +# ============================================================================= +# Cloud-only, fast quality gate for the Mintlify docs in docs/. +# - validate : `mint validate` (build) + `mint broken-links` (internal links) +# - prose : Vale style (warning-level to start, per docs-as-code best practice) +# - spell : cspell typo detection +# - a11y : `mint a11y` accessibility (non-blocking) +# On failure, collect-failures records the changed docs pages and uploads them +# so the Notify Owner workflow can route the failure to the page owner. +# ============================================================================= + +name: Mintlify Docs Checks + +on: + pull_request: + paths: ['docs/**'] + push: + branches: [main] + paths: ['docs/**'] + workflow_dispatch: + +permissions: + contents: read + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Install Mintlify CLI + run: npm i -g mint + - name: Validate docs build + working-directory: docs + run: mint validate + - name: Check broken internal links + working-directory: docs + run: mint broken-links + + # Every PR also gets an external + anchor + redirect link check. It is + # non-blocking (continue-on-error) because external sites can be transiently + # unreachable; the weekly "Link Check" workflow is the authoritative gate and + # opens a tracking issue on real failures. + external-links: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Install Mintlify CLI + run: npm i -g mint + - name: Check external links, anchors, and redirects + working-directory: docs + run: mint broken-links --check-external --check-anchors --check-redirects + + a11y: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Install Mintlify CLI + run: npm i -g mint + - name: Accessibility check + working-directory: docs + run: mint a11y + + prose: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Vale prose lint (warning-level) + uses: vale-cli/vale-action@v2 + with: + files: docs + vale_flags: "--config=.github/scripts/.vale.ini" + fail_on_error: false + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + spell: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Spell check + uses: streetsidesoftware/cspell-action@v6 + with: + files: 'docs/**/*.mdx' + config: .github/scripts/cspell.json + + collect-failures: + needs: [validate, prose, spell] + if: failure() + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Collect changed docs pages + run: | + base="${{ github.event.pull_request.base.sha || github.event.before }}" + if [ -z "$base" ] || [ "$base" = "0000000000000000000000000000000000000000" ]; then + base="HEAD~1" + fi + git diff --name-only "$base" HEAD -- 'docs/**/*.mdx' > failed-pages.txt || true + echo "Changed docs pages:"; cat failed-pages.txt + - uses: actions/upload-artifact@v4 + with: + name: failed-pages-checks + path: failed-pages.txt + + # ---- Record this run to the CI history (dashboard data) ---- + record: + needs: [validate, prose, spell] + if: always() + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Append run to ci-history.json + env: + RUN_STATUS: ${{ (needs.validate.result == 'success' && needs.prose.result == 'success' && needs.spell.result == 'success') && 'success' || 'failure' }} + run: | + python .github/scripts/record_run.py \ + --workflow "Mintlify Docs Checks" \ + --status "$RUN_STATUS" \ + --history .github/scripts/ci-history.json + - name: Commit history (main) or upload artifact + run: | + if [ "${{ github.ref }}" = "refs/heads/main" ]; then + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add .github/scripts/ci-history.json + git diff --staged --quiet || (git commit -m "ci: record docs CI run [skip ci]" && git push) + fi + - uses: actions/upload-artifact@v4 + with: + name: ci-history-checks + path: .github/scripts/ci-history.json diff --git a/.github/workflows/notify-owner.yml b/.github/workflows/notify-owner.yml new file mode 100644 index 00000000..9d0030aa --- /dev/null +++ b/.github/workflows/notify-owner.yml @@ -0,0 +1,122 @@ +# ============================================================================= +# Notify Owner On Failure +# ============================================================================= +# When a docs check fails, route the failure to the page OWNER (resolved from +# the hidden owner header inside the page) using GitHub-native notifications. +# +# - workflow_run : real path. Fires after "Mintlify Docs Checks" or +# "Test Code Samples" complete; only acts on failure. +# Downloads the failed-pages artifacts from that run. +# - workflow_dispatch : demo path. Pick a page to simulate a failure for and +# watch the owner get resolved + notified. +# +# Two notifications, both keyed off the page's hidden owner header: +# 1. GitHub issue whose body @mentions the page owner(s) by GitHub ID (always; +# GitHub emails them through its own system - no individual emails stored). +# 2. A full report emailed to the shared Ryzen AI support distribution list +# (`NOTIFY_EMAIL`, default dl.ryzenai.support@amd.com). A DL is safe to keep +# in the open - it is not an individual's address. This step only runs when +# the SMTP_* secrets are set; otherwise it is skipped and only the issue is +# opened. +# ============================================================================= + +name: Notify Owner On Failure + +on: + workflow_run: + workflows: ["Mintlify Docs Checks", "Test Code Samples"] + types: [completed] + workflow_dispatch: + inputs: + file: + description: "Docs page to simulate a failure for (demo)" + required: false + default: "docs/installation.mdx" + +permissions: + contents: read + actions: read + issues: write + +jobs: + notify: + if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'failure' }} + runs-on: ubuntu-latest + env: + # Shared distribution list for the full failure report (safe to be public). + # Override with the NOTIFY_EMAIL repo variable if the DL ever changes. + NOTIFY_EMAIL: ${{ vars.NOTIFY_EMAIL || 'dl.ryzenai.support@amd.com' }} + # SMTP relay for the email step. Set these as repo secrets to enable email; + # if unset, the email step is skipped and only the GitHub issue is opened. + SMTP_SERVER: ${{ secrets.SMTP_SERVER }} + SMTP_PORT: ${{ secrets.SMTP_PORT }} + SMTP_USERNAME: ${{ secrets.SMTP_USERNAME }} + SMTP_PASSWORD: ${{ secrets.SMTP_PASSWORD }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Download failed-pages artifacts from the failed run + if: ${{ github.event_name == 'workflow_run' }} + uses: actions/download-artifact@v4 + with: + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + pattern: failed-pages* + merge-multiple: true + path: ./_failed + + - name: Assemble failing pages list + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "${{ github.event.inputs.file }}" > failed-pages.txt + else + cat ./_failed/*.txt 2>/dev/null | sort -u > failed-pages.txt || true + fi + echo "Failing pages:"; cat failed-pages.txt + + - name: Compose owner notification (resolve owner GitHub IDs) + id: compose + env: + GITHUB_REPOSITORY: ${{ github.repository }} + RUN_URL: ${{ github.event.workflow_run.html_url }} + run: | + python .github/scripts/notify_owner.py \ + --failed-pages failed-pages.txt \ + --notify-email "$NOTIFY_EMAIL" \ + --body-out owner-issue-body.md + + - name: Open issue mentioning the owner(s) + if: ${{ steps.compose.outputs.has_targets == 'true' }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh issue create \ + --repo "${{ github.repository }}" \ + --title "${{ steps.compose.outputs.title }}" \ + --body-file owner-issue-body.md \ + --label "docs,ci-failure" + + # Additionally email the full report to the shared Ryzen AI support DL. + # Skipped automatically when SMTP_* secrets are not configured. + - name: Email full report to the Ryzen AI support DL + if: ${{ steps.compose.outputs.has_targets == 'true' && env.SMTP_SERVER != '' }} + uses: dawidd6/action-send-mail@v3 + with: + server_address: ${{ env.SMTP_SERVER }} + server_port: ${{ env.SMTP_PORT }} + username: ${{ env.SMTP_USERNAME }} + password: ${{ env.SMTP_PASSWORD }} + from: "Ryzen AI Docs CI <${{ env.SMTP_USERNAME }}>" + to: ${{ env.NOTIFY_EMAIL }} + subject: ${{ steps.compose.outputs.title }} + body: "Ryzen AI docs CI failed. Full per-owner report attached (and filed as a GitHub issue). Run: ${{ github.event.workflow_run.html_url }}" + attachments: owner-issue-body.md + + - name: Upload notification body + uses: actions/upload-artifact@v4 + with: + name: owner-notification + path: owner-issue-body.md diff --git a/.github/workflows/test-code-samples.yml b/.github/workflows/test-code-samples.yml new file mode 100644 index 00000000..54f13ffc --- /dev/null +++ b/.github/workflows/test-code-samples.yml @@ -0,0 +1,126 @@ +# ============================================================================= +# Test Code Samples +# ============================================================================= +# The "docs as code" accuracy gate: code in the docs must actually work. +# 1. syntax-check - python syntax checks for every block (cloud, no hardware) +# 2. test-hardware - EXECUTES every runnable block on real AMD hardware via a +# self-hosted runner. Devices are configurable with the +# `DOCS_CI_DEVICES` repo variable (JSON array; default +# ["halo"] = the local Strix Halo box). Add more runners by +# labelling them and extending that variable - no edits +# here. See the "Runners" section of scripts/README.md. +# Failing pages are uploaded so Notify Owner can route to the page owner. +# +# Test-by-default (see .github/scripts/extract_code_blocks.py): +# ```python / ```powershell / ```bash -> EXECUTED on every run +# ```python npu -> executed; device-scoped (cpu|gpu|npu) +# ```bash notest -> the only opt-out: skipped +# ```cpp / ```text / ```json -> non-runnable -> recorded "skip" +# ============================================================================= + +name: Test Code Samples + +on: + pull_request: + paths: ['docs/**'] + push: + branches: [main, bconsolvo] + paths: ['docs/**'] + workflow_dispatch: + +permissions: + contents: read + +jobs: + syntax-check: + name: Syntax checks (no hardware) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install linters (YAML format check) + run: pip install pyyaml + - name: Syntax + format check code blocks + run: python .github/scripts/extract_code_blocks.py --syntax-only --docs docs --failed-pages failed-pages-syntax.txt + - uses: actions/upload-artifact@v4 + if: failure() + with: + name: failed-pages-syntax + path: failed-pages-syntax.txt + + test-hardware: + name: Execute on ${{ matrix.hw }} + needs: syntax-check + strategy: + fail-fast: false + matrix: + # Which device labels to run on. Configurable per-repo WITHOUT editing + # this file: set the `DOCS_CI_DEVICES` repo variable to a JSON array, + # e.g. ["halo","stxp","krk"]. Defaults to ["halo"] (the local Strix + # Halo runner). Each value must be a runner label (see RUNNERS section + # of .github/scripts/README.md): halo = Strix Halo, stxp = Strix Point. + hw: ${{ fromJSON(vars.DOCS_CI_DEVICES || '["halo"]') }} + # Runner selection: [self-hosted, , ]. OS label is configurable + # via DOCS_CI_OS (default "Windows"); device comes from the matrix above. + runs-on: [self-hosted, "${{ vars.DOCS_CI_OS || 'Windows' }}", "${{ matrix.hw }}"] + env: + # The Ryzen AI conda env on the runner (NPU/GPU/CPU providers installed). + # Override per-repo with the RYZEN_AI_ENV variable. + RYZEN_AI_ENV: ${{ vars.RYZEN_AI_ENV || 'ryzen-ai-1.7.1' }} + steps: + - uses: actions/checkout@v4 + # Execute inside the Ryzen AI conda env so `python` blocks see the NPU + # (VitisAIExecutionProvider), GPU (DmlExecutionProvider), and CPU. + - name: Execute code blocks on hardware + shell: powershell + run: conda run --no-capture-output -n $env:RYZEN_AI_ENV python .github/scripts/extract_code_blocks.py --run --docs docs --output-json code-results-${{ matrix.hw }}.json --failed-pages failed-pages-${{ matrix.hw }}.txt + - name: Build per-page report + if: always() + shell: powershell + run: python .github/scripts/report.py --results code-results-${{ matrix.hw }}.json --docs docs --out CODE_TEST_REPORT-${{ matrix.hw }}.md + - uses: actions/upload-artifact@v4 + if: always() + with: + name: code-report-${{ matrix.hw }} + path: | + code-results-${{ matrix.hw }}.json + CODE_TEST_REPORT-${{ matrix.hw }}.md + failed-pages-${{ matrix.hw }}.txt + + # ---- Record this run to the CI history (dashboard data) ---- + record: + needs: [syntax-check, test-hardware] + if: always() + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Build page results + run: python .github/scripts/extract_code_blocks.py --syntax-only --docs docs --output-json code-results.json --failed-pages failed-pages.txt || true + - name: Append run to ci-history.json + env: + RUN_STATUS: ${{ (needs.syntax-check.result == 'success' && needs.test-hardware.result == 'success') && 'success' || 'failure' }} + run: | + python .github/scripts/record_run.py \ + --workflow "Test Code Samples" \ + --status "$RUN_STATUS" \ + --code-results code-results.json \ + --history .github/scripts/ci-history.json + - name: Commit history (main) or upload artifact + run: | + if [ "${{ github.ref }}" = "refs/heads/main" ]; then + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add .github/scripts/ci-history.json + git diff --staged --quiet || (git commit -m "ci: record docs CI run [skip ci]" && git push) + fi + - uses: actions/upload-artifact@v4 + with: + name: ci-history + path: .github/scripts/ci-history.json diff --git a/.github/workflows/update-model-list.yml b/.github/workflows/update-model-list.yml new file mode 100644 index 00000000..3d9e351c --- /dev/null +++ b/.github/workflows/update-model-list.yml @@ -0,0 +1,47 @@ +# ============================================================================= +# Update Model List +# ============================================================================= +# Keeps the model tables in the category index pages (docs/{llms,vision,audio}/index.mdx) in sync with the official AMD +# Hugging Face collections for the current Ryzen AI version. Runs weekly and on +# demand; opens a PR when the collections change so the owner can review. +# Owner: uday610. +# ============================================================================= + +name: Update Model List + +on: + schedule: + - cron: '0 7 * * 1' # Mondays 07:00 UTC + workflow_dispatch: + +permissions: + contents: write + pull-requests: write + +jobs: + update: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Regenerate model table from Hugging Face + run: python .github/scripts/fetch_models.py + - name: Open PR if the table changed + uses: peter-evans/create-pull-request@v6 + with: + commit-message: "docs: refresh Ryzen AI model table from Hugging Face" + title: "Refresh model table from Hugging Face collections" + body: | + Automated refresh of the model tables in the category index pages + (`docs/llms/index.mdx`, `docs/vision/index.mdx`, `docs/audio/index.mdx`) + from the AMD Hugging Face collections for the current Ryzen AI version. + + @uday610 please review and merge to keep the published model list accurate. + branch: auto/update-model-list + add-paths: | + docs/llms/index.mdx + docs/vision/index.mdx + docs/audio/index.mdx + reviewers: uday610 diff --git a/.gitignore b/.gitignore index 329e9f2b..a279f68a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,57 @@ -*~ -*.swp -__pycache__ -*node_modules* -*.yml +# Dependencies +node_modules/ + +# Python +__pycache__/ +*.pyc +*.pyo +.venv/ +*.egg-info/ + +# CI artifacts / local report scratch +report_run.json +report_failed.txt +report_pages.md + +# Downloaded models +*-onnx-ryzenai-*/ + +# Local model downloads + ported example artifacts (multi-GB; not docs source) +/models/ +/models-tutorials/ + +# Downloaded installers / binaries (pulled at runtime by examples) +*.exe + +# Build artifacts *_Perf.log test.json -benchmark_scripts \ No newline at end of file +benchmark_scripts/ +build_out/ + +# C++ build artifacts (ResNet examples) +configure.txt +passContext.txt +original-info-signature.txt +original-model-signature.txt +output.log + +# Local validation scripts +run_validation.bat +validation-output.txt + +# Editor +*~ +*.swp +.DS_Store + +# Environment +.env.local +.env.development.local +.env.test.local +.env.production.local + +# Logs +npm-debug.log* +yarn-debug.log* +yarn-error.log* diff --git a/.lychee.toml b/.lychee.toml new file mode 100644 index 00000000..39f24ce8 --- /dev/null +++ b/.lychee.toml @@ -0,0 +1,56 @@ +## Lychee link checker configuration +## https://lychee.cli.rs/usage/config/ + +# Check external links (HTTP/HTTPS) +include_verbatim = true + +# Timeout per request (seconds) +timeout = 30 + +# Max retries per link +max_retries = 3 + +# Max concurrent requests +max_concurrency = 8 + +# User-agent string (some sites block bots) +user_agent = "Mozilla/5.0 (compatible; lychee/0.23; +https://github.com/lycheeverse/lychee)" + +# Accept these HTTP status codes as valid +accept = [200, 204, 301, 302, 307, 308] + +# Exclude private/internal IPs +exclude_all_private = true + +# Do not check mail addresses +include_mail = false + +# Glob patterns for files to skip +exclude_path = [ + "docs/website/node_modules/**", + "docs/website/.docusaurus/**", + "docs/website/build/**", + "docs/templates/**", +] + +# URL patterns to exclude from checking (regex) +exclude = [ + # AMD websites have HTTP/2 protocol issues with automated checkers + "https://www\\.amd\\.com/.*", + "https://community\\.amd\\.com/.*", + # AMD account-gated downloads (require login) + "https://account\\.amd\\.com/.*", + # Hugging Face model repos (rate-limited / gated) + "https://huggingface\\.co/amd/.*", + "https://huggingface\\.co/collections/.*", + "https://hf\\.co/.*", + # localhost references (used in code examples) + "http://localhost.*", + "http://127\\.0\\.0\\.1.*", + # PyPI simple index (not browsable) + "https://pypi\\.amd\\.com/.*", + # Lunarg URLs with shell variables + ".*\\$UBUNTU_CODENAME.*", + # Reacher API endpoint (not a browsable URL) + "https://api\\.reacher\\.email/.*", +] diff --git a/CNN-examples/getting_started_resnet/Readme.md b/CNN-examples/getting_started_resnet/Readme.md deleted file mode 100644 index b9087cc3..00000000 --- a/CNN-examples/getting_started_resnet/Readme.md +++ /dev/null @@ -1,15 +0,0 @@ - - - - -

Ryzen™ AI ResNet Tutorial

-
- -# Getting Started Example - -This tutorial uses a fine-tuned version of the ResNet model (using the CIFAR-10 dataset) to demonstrate the process of preparing, quantizing, and deploying a model using Ryzen AI Software. The tutorial features deployment using both Python and C++ ONNX runtime code. - -For a walkthrough of this tutorial please follow: - -- [Tutorial for ResNet INT8 model](https://ryzenai.docs.amd.com/en/latest/getstartex.html) -- [Tutorial for ResNet BF16 model](./bf16/README.md) diff --git a/CNN-examples/getting_started_resnet/bf16/app/.gitignore b/CNN-examples/getting_started_resnet/bf16/app/.gitignore deleted file mode 100644 index 9f4078bc..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -build -configure.txt -passContext.txt -original-info-signature.txt -original-model-signature.txt -output.log \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/bf16/app/CMakeLists.txt b/CNN-examples/getting_started_resnet/bf16/app/CMakeLists.txt deleted file mode 100644 index 2c3c384e..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/CMakeLists.txt +++ /dev/null @@ -1,70 +0,0 @@ -cmake_minimum_required(VERSION 3.10) - -project(app VERSION 1.0.0 LANGUAGES C CXX) - -set(RYZEN_AI_INSTALLATION_PATH $ENV{RYZEN_AI_INSTALLATION_PATH}) -set(ONNXRUNTIME_ROOTDIR "${RYZEN_AI_INSTALLATION_PATH}/onnxruntime") -set(CONDA_PREFIX "$ENV{CONDA_PREFIX}") - -message(STATUS "RYZEN_AI_INSTALLATION_PATH = ${RYZEN_AI_INSTALLATION_PATH}") -message(STATUS "ONNXRUNTIME_ROOTDIR = ${ONNXRUNTIME_ROOTDIR}") -message(STATUS "CONDA_PREFIX = ${CONDA_PREFIX}") - -set(CMAKE_CONFIGURATION_TYPES Release) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_INSTALL_PREFIX .) -set(CMAKE_PREFIX_PATH .) -set(BUILD_SHARED_LIBS OFF) - -add_compile_options( - /Zc:__cplusplus - /wd4100 - /wd4996 - /WX - /std:c++latest -) - -include_directories( - "${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session" -) - -add_compile_options( - /Qspectre - /ZH:SHA_256 - /guard:cf - /W3 - /Zi - /Zc:__cplusplus -) -add_link_options( - /CETCOMPAT - /DEBUG -) - -link_directories("${ONNXRUNTIME_ROOTDIR}/lib") -link_directories("${CMAKE_INSTALL_PREFIX}/lib") - -add_executable(${PROJECT_NAME} main.cpp npu_util.cpp) - -target_link_libraries(${PROJECT_NAME} onnxruntime) - -# -- Copy the RyzenAI runtime DLLs in folder containing the executable - -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/onnxruntime.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/onnxruntime_providers_shared.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/onnxruntime_providers_vitisai.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/onnxruntime_vitisai_ep.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/dyn_dispatch_core.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/ryzenai_onnx_utils.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/zlib.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/zstd.dll") -list(APPEND dll_list "${RYZEN_AI_INSTALLATION_PATH}/deployment/aiecompiler_client.dll") - -foreach(DLL_FILE ${dll_list}) - add_custom_command( - TARGET ${PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ - ) -endforeach() \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/bf16/app/build.bat b/CNN-examples/getting_started_resnet/bf16/app/build.bat deleted file mode 100644 index d28035c4..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/build.bat +++ /dev/null @@ -1,18 +0,0 @@ -@echo off - -if "%CONDA_PREFIX%" == "" echo CONDA_PREFIX not set. This script must be executed from within the RyzenAI conda environment. & goto :error -if "%RYZEN_AI_INSTALLATION_PATH%" == "" echo RYZEN_AI_INSTALLATION_PATH not set. This script requires the RYZEN_AI_INSTALLATION_PATH env var to be set to the RyzenAI installation folder. & goto :error - -cmake -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -B build -S . -G "Visual Studio 17 2022" - -cmake --build .\build --config Release --target ALL_BUILD - -echo. -echo Copying ONNX models, compiled models and vitisai_config.json file -xcopy /Y /I /S ..\models build\Release\models > nul -xcopy /Y /I /S ..\my_cache_dir build\Release\my_cache_dir > nul -xcopy /Y /-I ..\vitisai_config.json build\Release > nul -xcopy /Y /E /I test_images build\Release\test_images > nul - -:error -exit /b %errorlevel% \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/bf16/app/clean.bat b/CNN-examples/getting_started_resnet/bf16/app/clean.bat deleted file mode 100644 index 7e7de5f7..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/clean.bat +++ /dev/null @@ -1,14 +0,0 @@ -@echo off - -cd %~dp0 - -del cpu_inference_summary.json > nul 2>&1 -del summary.csv > nul 2>&1 -del user_events.csv > nul 2>&1 -del xrt.run_summary > nul 2>&1 -del original-info-signature.txt > nul 2>&1 -del original-model-signature.txt > nul 2>&1 -del /Q output.log > nul 2>&1 - -rmdir /S /Q build > nul 2>&1 - diff --git a/CNN-examples/getting_started_resnet/bf16/app/main.cpp b/CNN-examples/getting_started_resnet/bf16/app/main.cpp deleted file mode 100644 index 92c57deb..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/main.cpp +++ /dev/null @@ -1,486 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ************************************************************************************/ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NOMINMAX // Prevent Windows min/max macros from interfering with std::min/max - -#include "npu_util.h" - -// CIFAR-10 class labels -const std::vector CIFAR10_CLASSES = { - "airplane", "automobile", "bird", "cat", "deer", - "dog", "frog", "horse", "ship", "truck" -}; - - -static int get_num_elements(const std::vector& v) { - int total = 1; - for (auto& i : v) - total *= (int)i; - return total; -} - -template -std::ostream& operator<<(std::ostream& os, const std::vector& v) -{ - os << "["; - for (int i = 0; i < v.size(); ++i) - { - os << v[i]; - if (i != v.size() - 1) - { - os << ", "; - } - } - os << "]"; - return os; -} - -// pretty prints a shape dimension vector -static std::string print_shape(const std::vector& v) { - std::stringstream ss(""); - for (size_t i = 0; i < v.size() - 1; i++) - ss << v[i] << "x"; - ss << v[v.size() - 1]; - return ss.str(); -} - -static std::string print_tensor(Ort::Value& tensor) { - auto shape = tensor.GetTensorTypeAndShapeInfo().GetShape(); - auto nelem = get_num_elements(shape); - auto tensor_ptr = tensor.GetTensorMutableData(); - - std::stringstream ss(""); - for (auto i = 0; i < nelem; i++) - ss << tensor_ptr[i] << " "; - return ss.str(); -} - -template -Ort::Value vec_to_tensor(std::vector& data, const std::vector& shape) { - Ort::MemoryInfo mem_info = - Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); - auto tensor = Ort::Value::CreateTensor(mem_info, data.data(), data.size(), shape.data(), shape.size()); - return tensor; -} - -std::string get_program_dir() -{ - char* exe_path; _get_pgmptr(&exe_path); // full path and name of the executable - return std::filesystem::path(exe_path).parent_path().string(); // directory in which the executable is located -} - -// Function to load a binary image file (for CIFAR-10: label + 32x32x3 pixels) -std::vector load_cifar_image(const std::string& filename) { - std::ifstream file(filename, std::ios::binary); - if (!file.is_open()) { - std::cerr << "Error: Could not open image file: " << filename << std::endl; - // Return a random image if file doesn't exist - std::vector random_image(3 * 32 * 32); - std::generate(random_image.begin(), random_image.end(), [&] { return (float)(rand() % 256) / 255.0f; }); - return random_image; - } - - // CIFAR-10 binary format: 1 byte label + 3072 bytes image data (32x32x3) - // Skip the label byte if present - uint8_t label; - file.read(reinterpret_cast(&label), 1); - - std::vector buffer(3 * 32 * 32); - file.read(reinterpret_cast(buffer.data()), buffer.size()); - file.close(); - - // Convert to float and normalize [0, 255] -> [0, 1] - // CIFAR-10 format is: all red pixels, then all green pixels, then all blue pixels - std::vector image(3 * 32 * 32); - for (size_t i = 0; i < buffer.size(); ++i) { - image[i] = static_cast(buffer[i]) / 255.0f; - } - - return image; -} - -// Function to get the predicted class from model output -int get_predicted_class(Ort::Value& output_tensor) { - auto shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape(); - auto output_ptr = output_tensor.GetTensorMutableData(); - - // Find the index with maximum probability - int predicted_class = 0; - float max_prob = output_ptr[0]; - - for (int i = 1; i < shape[1]; ++i) { - if (output_ptr[i] > max_prob) { - max_prob = output_ptr[i]; - predicted_class = i; - } - } - - return predicted_class; -} - -// Function to print top-k predictions -void print_top_predictions(Ort::Value& output_tensor, int top_k = 3) { - auto shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape(); - auto output_ptr = output_tensor.GetTensorMutableData(); - - // Apply softmax to output to get probabilities - std::vector logits(shape[1]); - float max_logit = output_ptr[0]; - for (int i = 0; i < shape[1]; ++i) { - logits[i] = output_ptr[i]; - if (output_ptr[i] > max_logit) max_logit = output_ptr[i]; - } - // Subtract max for numerical stability - float sum_exp = 0.0f; - std::vector probs(shape[1]); - for (int i = 0; i < shape[1]; ++i) { - probs[i] = std::exp(logits[i] - max_logit); - sum_exp += probs[i]; - } - for (int i = 0; i < shape[1]; ++i) { - probs[i] /= sum_exp; - } - - // Create pairs of (probability, class_index) - std::vector> prob_class_pairs; - for (int i = 0; i < shape[1]; ++i) { - prob_class_pairs.emplace_back(probs[i], i); - } - - // Sort by probability in descending order - std::sort(prob_class_pairs.begin(), prob_class_pairs.end(), - [](const std::pair& a, const std::pair& b) { - return a.first > b.first; - }); - - // Print top-k predictions - std::cout << "Top " << top_k << " predictions:" << std::endl; - int num_predictions = (top_k < (int)prob_class_pairs.size()) ? top_k : (int)prob_class_pairs.size(); - for (int i = 0; i < num_predictions; ++i) { - int class_idx = prob_class_pairs[i].second; - float prob = prob_class_pairs[i].first; - std::cout << " " << (i + 1) << ". " << CIFAR10_CLASSES[class_idx] - << " (probability: " << std::fixed << std::setprecision(4) << prob << ")" << std::endl; - } -} - - -int runtest(std::string& model_name, std::unordered_map vai_ep_options = {}, bool run_classification = false) -{ - int64_t batch_size = 1; - - printf("Creating ORT env\n"); - Ort::Env env(ORT_LOGGING_LEVEL_ERROR, "quicktest"); - - printf("Initializing session options\n"); - auto session_options = Ort::SessionOptions(); - - if (vai_ep_options.empty()==false) // If VAI EP options are provided, initialize the VitisAI EP - { - printf("Configuring VAI EP\n"); - try { - session_options.AppendExecutionProvider_VitisAI(vai_ep_options); - } - catch (const std::exception& e) { - std::cerr << "Exception occurred in appending execution provider: " << e.what() << std::endl; - } - } - - printf("Creating ONNX Session\n"); - - // Check if model file exists first - if (!std::filesystem::exists(model_name)) { - std::cerr << "Error: Model file not found at: " << model_name << std::endl; - std::cerr << "Please ensure you have the model file before running this application." << std::endl; - return -1; - } - - // Create session - this might throw an exception if the model can't be loaded - Ort::Session session(env, std::basic_string(model_name.begin(), model_name.end()).c_str(), session_options); - - try { - // Get names and shapes of model inputs and outputs - Ort::AllocatorWithDefaultOptions allocator; - auto input_count = session.GetInputCount(); - auto input_names = std::vector(); - auto input_names_char = std::vector(); - auto input_shapes = std::vector>(); - auto output_count = session.GetOutputCount(); - auto output_names = std::vector(); - auto output_names_char = std::vector(); - auto output_shapes = std::vector>(); - for (size_t i = 0; i < input_count; i++) - { - auto shape = session.GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); - std::string name = session.GetInputNameAllocated(i, allocator).get(); - input_names.emplace_back(name); - input_names_char.emplace_back(input_names.at(i).c_str()); - input_shapes.emplace_back(shape); - } - for (size_t i = 0; i < output_count; i++) - { - auto shape = session.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); - std::string name = session.GetOutputNameAllocated(i, allocator).get(); - output_names.emplace_back(name); - output_names_char.emplace_back(output_names.at(i).c_str()); - output_shapes.emplace_back(shape); - } - - // Display model info - std::cout << "ONNX model : " << model_name << std::endl; - for (size_t i = 0; i < input_count; i++) - std::cout << " " << input_names.at(i) << " " << print_shape(input_shapes.at(i)) << std::endl; - for (size_t i = 0; i < output_count; i++) - std::cout << " " << output_names.at(i) << " " << print_shape(output_shapes.at(i)) << std::endl; - - // The code which follows expects the model to have 1 input node and 1 output node. - if (output_count != 1 || input_count != 1) { - std::cout << "This version of the program only supports models with 1 input node and 1 output node. Exiting." << std::endl; - return -1; - } - - // If input shape has dynamic batch size, set it to a fixed value - auto input_shape = input_shapes[0]; - if (input_shape[0] < 0) { - std::cout << "Dynamic batch size detected. Setting batch size to " << batch_size << "." << std::endl; - input_shape[0] = batch_size; - } - - if (run_classification) { - // Run classification on sample images - std::cout << "Running classification on sample images..." << std::endl; - - std::string exe_dir = get_program_dir(); - - // Check if test_images directory exists - std::string test_images_dir = exe_dir + "\\test_images"; - if (!std::filesystem::exists(test_images_dir)) { - std::cout << "Warning: Test images directory not found at: " << test_images_dir << std::endl; - std::cout << "Creating directory: " << test_images_dir << std::endl; - std::filesystem::create_directory(test_images_dir); - } - - std::vector test_images = { - exe_dir + "\\test_images\\airplane.bin", - exe_dir + "\\test_images\\automobile.bin", - exe_dir + "\\test_images\\cat.bin", - exe_dir + "\\test_images\\ship.bin", - exe_dir + "\\test_images\\dog.bin" - }; - - for (const auto& image_path : test_images) { - std::cout << "\n--- Testing image: " << std::filesystem::path(image_path).filename().string() << " ---" << std::endl; - - // Load image data - std::vector input_tensor_values = load_cifar_image(image_path); - - // Initialize input tensor - std::vector input_tensors; - input_tensors.emplace_back(vec_to_tensor(input_tensor_values, input_shape)); - - // Run inference - try - { - auto output_tensors = session.Run( - Ort::RunOptions(), - input_names_char.data(), input_tensors.data(), input_names_char.size(), - output_names_char.data(), output_names_char.size() - ); - - // Get predicted class - int predicted_class = get_predicted_class(output_tensors[0]); - std::cout << "Predicted class: " << CIFAR10_CLASSES[predicted_class] << std::endl; - - // Print top predictions - print_top_predictions(output_tensors[0]); - } - catch (const Ort::Exception& exception) { - std::cout << "ERROR running model inference: " << exception.what() << std::endl; - return -1; - } - } - } else { - // Run performance benchmark - auto n = 100; - std::cout << "Running " << n << " inferences of the model" << std::endl; - // Get the current time before the operation - auto start = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < n; i++) - { - // Initialize input data with random numbers in the range [0, 1] - std::vector input_tensor_values(get_num_elements(input_shape)); - std::generate(input_tensor_values.begin(), input_tensor_values.end(), [&] { return (float)(rand() % 256) / 255.0f; }); - - // Initialize input tensor with input data - std::vector input_tensors; - input_tensors.emplace_back(vec_to_tensor(input_tensor_values, input_shape)); - - // Pass input tensors through model - try - { - auto output_tensors = session.Run( - Ort::RunOptions(), - input_names_char.data(), input_tensors.data(), input_names_char.size(), - output_names_char.data(), output_names_char.size() - ); - // std::cout << i << " : " << print_tensor(output_tensors[0]) << std::endl; - } - catch (const Ort::Exception& exception) { - std::cout << "ERROR running model inference: " << exception.what() << std::endl; - return -1; - } - } - // Get the current time after the operation - auto end = std::chrono::high_resolution_clock::now(); - // Calculate the duration of the operation - std::chrono::duration duration = end - start; - // Print the duration in seconds - std::cout << "Operation took " << duration.count() << " seconds" << std::endl; - } - } - catch (const Ort::Exception& exception) { - std::cerr << "ERROR initializing model: " << exception.what() << std::endl; - return -1; - } - catch (const std::exception& exception) { - std::cerr << "ERROR: " << exception.what() << std::endl; - return -1; - } - - printf("Done\n"); - printf("-------------------------------------------------------\n"); - printf("\n"); - - return 0; -} - -int main(int argc, char* argv[]) -{ - std::string exe_dir = get_program_dir(); - - // Default values - std::unordered_map vai_ep_options; - vai_ep_options["config_file"] = exe_dir + "\\vitisai_config.json"; - vai_ep_options["cache_dir"] = exe_dir + "\\my_cache_dir"; - vai_ep_options["cache_key"] = "resnet_trained_for_cifar10"; - - - // Ensure models directory exists - std::string models_dir = exe_dir + "\\models"; - if (!std::filesystem::exists(models_dir)) { - std::cout << "Warning: Models directory not found at: " << models_dir << std::endl; - std::cout << "Creating directory: " << models_dir << std::endl; - std::filesystem::create_directory(models_dir); - } - - std::string model_path = exe_dir + "\\models\\resnet_trained_for_cifar10.onnx"; - - bool run_classification = true; // Default to classification mode - - std::cout << "usage: app.exe [mode]" << std::endl; - std::cout << " mode: 'classification' (default) or 'benchmark'" << std::endl; - - if (argc > 1) { - model_path = std::string(argv[1]); // First argument: model path - } - if (argc > 2) { - vai_ep_options["config_file"] = std::string(argv[2]); // Second argument config file - } - if (argc > 3) { - std::string mode = std::string(argv[3]); // Third argument: mode - if (mode == "benchmark") { - run_classification = false; - } else if (mode == "classification") { - run_classification = true; - } else { - std::cout << "Unknown mode '" << mode << "'. Using classification mode." << std::endl; - } - } - - printf("-------------------------------------------------------\n"); - printf("Performing compatibility check for VitisAI EP 1.5.0 \n"); - printf("-------------------------------------------------------\n"); - auto npu_info = npu_util::checkCompatibility_RAI_1_5(); - - std::cout << " - NPU Device ID : 0x" << std::hex << npu_info.device_id << std::dec << std::endl; - std::cout << " - NPU Device Name : " << npu_info.device_name << std::endl; - std::cout << " - NPU Driver Version: " << npu_info.driver_version_string << std::endl; - switch (npu_info.check) { - case npu_util::Status::OK: - std::cout << "Environment compatible for VitisAI EP" << std::endl; - break; - case npu_util::Status::NPU_UNRECOGNIZED: - std::cout << "NPU type not recognized." << std::endl; - std::cout << "Skipping run with VitisAI EP." << std::endl; - return -1; - break; - case npu_util::Status::DRIVER_TOO_OLD: - std::cout << "Installed drivers are too old." << std::endl; - std::cout << "Skipping run with VitisAI EP." << std::endl; - return -1; - break; - case npu_util::Status::EP_TOO_OLD: - std::cout << "VitisAI EP is too old." << std::endl; - std::cout << "Skipping run with VitisAI EP." << std::endl; - return -1; - break; - default: - std::cout << "Unknown state." << std::endl; - std::cout << "Skipping run with VitisAI EP." << std::endl; - return -1; - break; - } - switch(npu_info.device_id) { - case 0x17F0: // STX/KRK NPU - std::cout << "STX/KRK NPU device detected." << std::endl; - break; - case 0x1502: // PHX/HPT NPU - default: - std::cout << "Unsupported NPU device ID." << std::endl; - return -1; - break; - } - std::cout << std::endl; - - // Set environment variables - _putenv("XLNX_VART_FIRMWARE="); - _putenv("XLNX_TARGET_NAME="); - _putenv("XLNX_ENABLE_CACHE=0"); - - // Run test - printf("-------------------------------------------------------\n"); - printf("Running model on CPU \n"); - printf("-------------------------------------------------------\n"); - runtest(model_path, {}, run_classification); - - printf("-------------------------------------------------------\n"); - printf("Running model on NPU \n"); - printf("-------------------------------------------------------\n"); - runtest(model_path, vai_ep_options, run_classification); - - - std::cout << "Test Done." << std::endl; - - return 0; -} diff --git a/CNN-examples/getting_started_resnet/bf16/app/npu_util.cpp b/CNN-examples/getting_started_resnet/bf16/app/npu_util.cpp deleted file mode 100644 index 9aca47ec..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/npu_util.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, copy, modify, -merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit -persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall -be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE -FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. -************************************************************************************/ - -// compile using: /std:c++latest - -#pragma comment(lib, "setupapi.lib") -#pragma comment(lib, "dxgi") - -#include -#include - -#include -#include -#include - -#include "npu_util.h" - - -namespace npu_util { - - std::string DriverHexToString(DWORDLONG ver) { - std::stringstream string_stream; - string_stream << ((ver >> 48) & 0xffff) << "." << ((ver >> 32) & 0xffff) << "." << ((ver >> 16) & 0xffff) << "." << ((ver >> 0) & 0xffff); - return string_stream.str(); - } - - DWORDLONG DriverNumberToHex(DWORDLONG a, DWORDLONG b, DWORDLONG c, DWORDLONG d) { - DWORDLONG ver = ((a & 0xffff) << 48) | ((b & 0xffff) << 32) | ((c & 0xffff) << 16) | ((d & 0xffff) << 0) ; - return ver; - } - - // Return a vector of DXGI adapter descriptions - std::vector enumerateDXGIAdapters() - { - std::vector adapterDescriptions; - - IDXGIFactory* pFactory = nullptr; - HRESULT hr = CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&pFactory); - if (FAILED(hr)) { - std::cerr << "Failed to create DXGI Factory." << std::endl; - return adapterDescriptions; - } - - UINT i = 0; - IDXGIAdapter* pAdapter = nullptr; - while (pFactory->EnumAdapters(i, &pAdapter) != DXGI_ERROR_NOT_FOUND) { - DXGI_ADAPTER_DESC desc; - pAdapter->GetDesc(&desc); - adapterDescriptions.push_back(desc); - pAdapter->Release(); - ++i; - } - pFactory->Release(); - - return adapterDescriptions; - } - - // Extract NPU information - NPUInfo extractNPUInfo() - { - // Make extractNPUInfo thread-safe - static std::mutex function_mutex; - std::lock_guard guard(function_mutex); - - NPUInfo npu_info; - npu_info.device_id = -1; - npu_info.device_name = ""; - npu_info.driver_version_number = -1; - npu_info.driver_version_string = ""; - npu_info.check = Status::UNKNOWN; - - static const std::vector> PCI_IDS = { - { "PCI\\VEN_1022&DEV_1502", 0x1502 }, // AIE2 - { "PCI\\VEN_1022&DEV_17F0", 0x17F0 } // AIE2P - }; - - static const std::vector DEV_CLASSES = { - &GUID_DEVCLASS_COMPUTEACCELERATOR, - &GUID_DEVCLASS_SYSTEM - }; - - for (const auto& devClass : DEV_CLASSES) { - HDEVINFO deviceInfoSet = SetupDiGetClassDevs(devClass, nullptr, nullptr, DIGCF_PRESENT); - if (deviceInfoSet == INVALID_HANDLE_VALUE) { - continue; - } - - SP_DEVINFO_DATA deviceInfoData = { 0 }; - deviceInfoData.cbSize = sizeof(deviceInfoData); - - DWORD index = 0; - while (npu_info.device_id == -1 && SetupDiEnumDeviceInfo(deviceInfoSet, index, &deviceInfoData)) { - DWORD requiredSize = 0; - - SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_HARDWAREID, nullptr, nullptr, 0, &requiredSize); - - std::vector buffer(requiredSize); - - if (SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_HARDWAREID, nullptr, buffer.data(), requiredSize, nullptr)) { - std::string hardwareId(reinterpret_cast(buffer.data())); - - for (const auto& entry : PCI_IDS) { - if (hardwareId.find(entry.first) != std::string::npos) { - npu_info.device_id = entry.second; - requiredSize = 0; - SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_DEVICEDESC, nullptr, nullptr, 0, &requiredSize); - - buffer.resize(requiredSize); - if (SetupDiGetDeviceRegistryPropertyA(deviceInfoSet, &deviceInfoData, SPDRP_DEVICEDESC, nullptr, buffer.data(), requiredSize, nullptr)) { - std::string dev_desc(reinterpret_cast(buffer.data())); - npu_info.device_name = dev_desc; - } - SP_DEVINSTALL_PARAMS DeviceInstallParams; - ZeroMemory(&DeviceInstallParams, sizeof(DeviceInstallParams)); - DeviceInstallParams.cbSize = sizeof(SP_DEVINSTALL_PARAMS); - DeviceInstallParams.FlagsEx |= (DI_FLAGSEX_INSTALLEDDRIVER | DI_FLAGSEX_ALLOWEXCLUDEDDRVS); - if (SetupDiSetDeviceInstallParams(deviceInfoSet, &deviceInfoData, &DeviceInstallParams)) { - if (SetupDiBuildDriverInfoList(deviceInfoSet, &deviceInfoData, SPDIT_COMPATDRIVER)) { - SP_DRVINFO_DATA DriverInfoData; - DriverInfoData.cbSize = sizeof(SP_DRVINFO_DATA); - if (SetupDiEnumDriverInfo(deviceInfoSet, &deviceInfoData, SPDIT_COMPATDRIVER, 0, &DriverInfoData)) { - npu_info.driver_version_number = DriverInfoData.DriverVersion; - npu_info.driver_version_string = DriverHexToString(DriverInfoData.DriverVersion).c_str(); - } - } - SetupDiDestroyDriverInfoList(deviceInfoSet, &deviceInfoData, SPDIT_COMPATDRIVER); - break; - } - } - } - } - - ++index; - } - - SetupDiDestroyDeviceInfoList(deviceInfoSet); - - if (npu_info.device_id != -1) { - break; - } - } - return npu_info; - } - - NPUInfo checkCompatibility(DWORDLONG min_driver_version, std::chrono::year_month_day max_date) - { - NPUInfo info = extractNPUInfo(); - - // Check if supported NPU is present - if (info.device_id==-1) { - info.check = Status::NPU_UNRECOGNIZED; - return info; - } - - // Check if minimum version of driver is installed - if (info.driver_version_number(std::chrono::system_clock::now())};; - if (current_date>max_date) { - info.check = Status::EP_TOO_OLD; - return info; - } - - info.check = Status::OK; - return info; - } - - NPUInfo checkCompatibility_RAI_1_2() - { - // Min driver: 32.0.201.204 - // Max date : 2027-07-30 (3 yrs after the release date of RyzenAI 1.2) - return checkCompatibility(DriverNumberToHex(32,0,201,204), { std::chrono::July / 30 / 2027 }); - } - - NPUInfo checkCompatibility_RAI_1_3() - { - // Min driver: 32.0.203.237 - // Max date : 2027-11-26 (3 yrs after the release date of RyzenAI 1.3) - return checkCompatibility(DriverNumberToHex(32,0,203,237), { std::chrono::November / 26 / 2027 }); - } - - NPUInfo checkCompatibility_RAI_1_3_1() - { - // Min driver: 32.0.203.242 - // Max date : 2028-01-17 (3 yrs after the release date of RyzenAI 1.3) - return checkCompatibility(DriverNumberToHex(32,0,203,242), { std::chrono::January / 15 / 2028 }); - } - - NPUInfo checkCompatibility_RAI_1_4() - { - // Min driver: 32.0.203.257 - // Max date : 2028-03-28 - return checkCompatibility(DriverNumberToHex(32,0,203,257), { std::chrono::March / 28 / 2028 }); - } - - NPUInfo checkCompatibility_RAI_1_4_1() - { - // Min driver: 32.0.203.259 - // Max date : 2028-05-12 - return checkCompatibility(DriverNumberToHex(32,0,203,259), { std::chrono::May / 12 / 2028 }); - } - - NPUInfo checkCompatibility_RAI_1_5() - { - // Min driver: 32.0.203.259 - // Max date : 2028-05-16 - return checkCompatibility(DriverNumberToHex(32,0,203,280), { std::chrono::May / 16 / 2028 }); - } - -} // npu_util diff --git a/CNN-examples/getting_started_resnet/bf16/app/npu_util.h b/CNN-examples/getting_started_resnet/bf16/app/npu_util.h deleted file mode 100644 index 34dd1a94..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/npu_util.h +++ /dev/null @@ -1,73 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, copy, modify, -merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit -persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall -be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE -FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. -************************************************************************************/ - -#pragma once - -#include -#include -#include - -#include - -#include - -namespace npu_util { - - enum Status { - OK = 0, - UNKNOWN, - NPU_UNRECOGNIZED, - DRIVER_TOO_OLD, - EP_TOO_OLD - }; - - struct NPUInfo { - int device_id; - std::string device_name; - DWORDLONG driver_version_number; - std::string driver_version_string; - Status check; - }; - - // Return a vector of DXGI adapter descriptions - std::vector enumerateDXGIAdapters(); - - // Checks whether the system configuration is compatible for VitisAI EP 1.2 - NPUInfo checkCompatibility_RAI_1_2(); - - // Checks whether the system configuration is compatible for VitisAI EP 1.3 - NPUInfo checkCompatibility_RAI_1_3(); - - // Checks whether the system configuration is compatible for VitisAI EP 1.3.1 - NPUInfo checkCompatibility_RAI_1_3_1(); - - // Checks whether the system configuration is compatible for VitisAI EP 1.4 - NPUInfo checkCompatibility_RAI_1_4(); - - // Checks whether the system configuration is compatible for VitisAI EP 1.4.1 - NPUInfo checkCompatibility_RAI_1_4_1(); - - // Checks whether the system configuration is compatible for VitisAI EP 1.5 - NPUInfo checkCompatibility_RAI_1_5(); - -} // npu_util - diff --git a/CNN-examples/getting_started_resnet/bf16/app/prepare_test_images.py b/CNN-examples/getting_started_resnet/bf16/app/prepare_test_images.py deleted file mode 100644 index de0de1ed..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/prepare_test_images.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -Script to prepare test images for ResNet CIFAR-10 inference. -This script downloads CIFAR-10 dataset and extracts some sample images -for testing with the C++ inference code. -""" - -import os -import torch -import torchvision -import torchvision.transforms as transforms - -# Directory to save the images -output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_images") -os.makedirs(output_dir, exist_ok=True) - -# Download CIFAR10 dataset (test split) -testset = torchvision.datasets.CIFAR10(root='./data', train=False, - download=True, transform=transforms.ToTensor()) - -# Extract one image from each class -class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', - 'dog', 'frog', 'horse', 'ship', 'truck'] - -for class_idx in range(10): - # Find the first image of this class - for i, (image, label) in enumerate(testset): - if label == class_idx: - # Save the raw image bytes in CIFAR-10 binary format (with the label byte) - filename = os.path.join(output_dir, f"{class_names[class_idx]}.bin") - - # Convert tensor back to bytes (CIFAR-10 format: R channel, G channel, B channel) - raw_data = bytearray([label]) # First byte is the label - - # Append image data in CIFAR-10 format (all R, then all G, then all B) - # Convert from [0,1] float to [0,255] byte - for c in range(3): # RGB channels - for h in range(32): # Height - for w in range(32): # Width - pixel_value = int(image[c][h][w] * 255) - raw_data.append(pixel_value) - - # Write to binary file - with open(filename, 'wb') as f: - f.write(raw_data) - - print(f"Saved {filename}") - break - -print("Done preparing test images") diff --git a/CNN-examples/getting_started_resnet/bf16/app/run.bat b/CNN-examples/getting_started_resnet/bf16/app/run.bat deleted file mode 100644 index 2cebd5a6..00000000 --- a/CNN-examples/getting_started_resnet/bf16/app/run.bat +++ /dev/null @@ -1,35 +0,0 @@ -@echo off - -powershell -command "build\Release\app.exe | Tee-object output.log" -:: Check the return code to see if the program returned as expected -if %errorlevel% equ 1 goto error - -:: Check the log to see if final expected message was printed -find /c "Test Done." output.log >NUL -if %errorlevel% equ 1 goto unsuccessful - -:: Check the log to see if ops were on CPU or NPU (VAIML) -find /c "[Vitis AI EP] No. of Operators : VAIML" output.log >NUL -if %errorlevel% equ 1 goto no_vaiml -goto success - -:success -powershell write-host -fore Green SUCCESS: Model ran on NPU -goto done - -:no_vaiml -powershell write-host -fore Red ERROR: Model did not run on NPU -goto done - -:unsuccessful -echo. -powershell write-host -fore Red ERROR: Test did not generate the final expected message -goto done - -:error -echo. -powershell write-host -fore Red ERROR: Program returned a non-zero value -goto done - -:done - diff --git a/CNN-examples/getting_started_resnet/bf16/clean.bat b/CNN-examples/getting_started_resnet/bf16/clean.bat deleted file mode 100644 index c2bc5c54..00000000 --- a/CNN-examples/getting_started_resnet/bf16/clean.bat +++ /dev/null @@ -1,20 +0,0 @@ -@echo off - -cd %~dp0 - -del cpu_inference_summary.json > nul 2>&1 -del summary.csv > nul 2>&1 -del user_events.csv > nul 2>&1 -del xrt.run_summary > nul 2>&1 -del original-info-signature.txt > nul 2>&1 -del original-model-signature.txt > nul 2>&1 - -rmdir /S /Q models > nul 2>&1 -rmdir /S /Q my_cache_dir > nul 2>&1 - -call %~dp0\app\clean.bat -call %~dp0\app_vaimlUnarchivePath\clean.bat - -cd %~dp0 - - diff --git a/CNN-examples/getting_started_resnet/bf16/compile.py b/CNN-examples/getting_started_resnet/bf16/compile.py deleted file mode 100644 index aec1c401..00000000 --- a/CNN-examples/getting_started_resnet/bf16/compile.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import shutil -import argparse -import sys -import pathlib -import onnxruntime -import numpy as np -import pathlib -from pathlib import Path - -def main(): - parser = argparse.ArgumentParser(description="Compile BF16 CNN model") - parser.add_argument("--model", "-i", help="Path to the ONNX model") - args = parser.parse_args() - - onnx_model = args.model - config_file = 'vitisai_config.json' - # cache_dir = 'my_cache_dir' - - cache_dir = Path(__file__).parent.resolve() - cache_dir = os.path.join(cache_dir,'my_cache_dir') - - cache_key = pathlib.Path(onnx_model).stem - - provider_options_dict = { - "config_file": config_file, - "cache_dir": cache_dir, - "cache_key": cache_key, - "enable_cache_file_io_in_mem":0, - } - - print(f"Creating ORT inference session for model {onnx_model}") - # Create session options - session_options = onnxruntime.SessionOptions() - session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - session = onnxruntime.InferenceSession( - onnx_model, - sess_options=session_options, - providers=["VitisAIExecutionProvider"], - provider_options=[provider_options_dict] - ) - - print("Done") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/bf16/docs/README_C++.md b/CNN-examples/getting_started_resnet/bf16/docs/README_C++.md deleted file mode 100644 index 32c6b359..00000000 --- a/CNN-examples/getting_started_resnet/bf16/docs/README_C++.md +++ /dev/null @@ -1,261 +0,0 @@ -# ResNet CIFAR-10 Inference Example with C++ - -This example demonstrates how to run inference with a ResNet model trained on CIFAR-10 dataset using ONNX Runtime in C++. - -**Note:** Ensure that you are following the instructions for model and dataset setup and compilation from [BF16 setup](../README.md) - -## Requirements - -- Visual Studio 2022 (for building the C++ application) -- CMake -- ResNet18 CIFAR-10 model in ONNX format - -### Setup: Test Images - -The test images are generated from CIFAR-10 dataset, by extracting sample image from each classe. For more details refer to the ``prepare_test_images.py`` script. - -### Step 1: - -```bash -cd \CNN-examples\getting_started_resnet\bf16\app -build.bat -``` - -This batch file will execute below two commands one after another. - -```bash -cmake -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -B build -S . -G "Visual Studio 17 2022" -cmake --build .\build --config Release --target ALL_BUILD -``` - -The build step will create a build / folder, and all the system files (such as object files and intermediate builds files) will be generated inside it using the .cpp source file. -The build process will then create the final executable file from the compiled object files. - -The CMakeLists.txt ensures that all required runtime DLLs are copied in the build\Release folder at build time. The compile.bat script takes care of copying the ONNX model, the VAIML compiled model folder and the JSON config file in the build\Release folder. The executable looks for these files in this specific directory. - -## Step 2: - -The application supports two modes: - -1. **Classification Mode (default)**: Runs inference on sample CIFAR-10 images and prints the predicted class labels. -2. **Benchmark Mode**: Runs multiple iterations of inference to measure performance. - -## Usage - -```bash -build\Release\app.exe [model_path] [config_path] [mode] -``` - -- `model_path`: Path to the ONNX model (default: `models/resnet_trained_for_cifar10.onnx`) -- `config_path`: Path to the Vitis AI configuration file (default: `vitisai_config.json`) -- `mode`: Either `classification` (default) or `benchmark` - -### Examples - -Run in classification mode (default): - -```bash -build\Release\app.exe ../models/resnet_trained_for_cifar10.onnx ../vitisai_config.json classification -``` - -In classification mode, the application will: - -1. Load test images from the `test_images` directory -2. Run inference using the model -3. Print the predicted class and top-3 predictions with probabilities - -The output from the run command will look like below. - -```bash -usage: app.exe [mode] - mode: 'classification' (default) or 'benchmark' -------------------------------------------------------- -Performing compatibility check for VitisAI EP 1.5.0 -------------------------------------------------------- - - NPU Device ID : 0x17f0 - - NPU Device Name : NPU Compute Accelerator Device - - NPU Driver Version: 32.0.203.280 -Environment compatible for VitisAI EP -STX/KRK NPU device detected. - -------------------------------------------------------- -Running model on CPU -------------------------------------------------------- -Creating ORT env -Initializing session options -Creating ONNX Session -ONNX model : ../models/resnet_quantized_bf16.onnx - input -1x3x32x32 - output -1x10 -Dynamic batch size detected. Setting batch size to 1. -Running classification on sample images... - ---- Testing image: airplane.bin --- -Predicted class: airplane -Top 3 predictions: - 1. airplane (probability: 3.9844) - 2. ship (probability: 3.7344) - 3. automobile (probability: 1.8203) - ---- Testing image: automobile.bin --- -Predicted class: truck -Top 3 predictions: - 1. truck (probability: 6.7812) - 2. automobile (probability: 5.4688) - 3. ship (probability: 0.2930) - ---- Testing image: cat.bin --- -Predicted class: cat -Top 3 predictions: - 1. cat (probability: 7.8750) - 2. frog (probability: 2.8750) - 3. dog (probability: 2.1094) ---- Testing image: ship.bin --- -Predicted class: ship -Top 3 predictions: - 1. ship (probability: 9.0000) - 2. automobile (probability: 2.4688) - 3. airplane (probability: 1.7891) - ---- Testing image: dog.bin --- -Predicted class: dog -Top 3 predictions: - 1. dog (probability: 6.4688) - 2. cat (probability: 5.0000) - 3. deer (probability: 2.0312) - -Done -------------------------------------------------------- - -------------------------------------------------------- -Running model on NPU -------------------------------------------------------- -Creating ORT env -Initializing session options -Configuring VAI EP -WARNING: Logging before InitGoogleLogging() is written to STDERR -I20250929 15:57:49.471036 1528 register_ssmlp.cpp:124] Registering Custom Operator: com.amd:SSMLP -I20250929 15:57:49.471036 1528 register_matmulnbits.cpp:110] Registering Custom Operator: com.amd:MatMulNBits -Creating ONNX Session -I20250929 15:57:49.685086 1528 vitisai_compile_model.cpp:1266] Vitis AI EP Load ONNX Model Success -I20250929 15:57:49.685086 1528 vitisai_compile_model.cpp:1267] Graph Input Node Name/Shape (1) -I20250929 15:57:49.685086 1528 vitisai_compile_model.cpp:1271] input : [-1x3x32x32] -I20250929 15:57:49.685086 1528 vitisai_compile_model.cpp:1277] Graph Output Node Name/Shape (1) -I20250929 15:57:49.685086 1528 vitisai_compile_model.cpp:1281] output : [-1x10] -[Vitis AI EP] No. of Operators : CPU 2 VAIML 272 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -ONNX model : ../models/resnet_quantized_bf16.onnx - input -1x3x32x32 - output -1x10 -Dynamic batch size detected. Setting batch size to 1. -Running classification on sample images... - ---- Testing image: airplane.bin --- -Predicted class: airplane -Top 3 predictions: - 1. airplane (probability: 4.0312) - 2. ship (probability: 3.8594) - 3. automobile (probability: 1.8594) - ---- Testing image: automobile.bin --- -Predicted class: truck -Top 3 predictions: - 1. truck (probability: 7.0938) - 2. automobile (probability: 5.6875) - 3. ship (probability: 0.2910) - ---- Testing image: cat.bin --- -Predicted class: cat -Top 3 predictions: - 1. cat (probability: 8.1875) - 2. frog (probability: 2.8906) - 3. dog (probability: 2.1250) ---- Testing image: ship.bin --- -Predicted class: ship -Top 3 predictions: - 1. ship (probability: 9.3125) - 2. automobile (probability: 2.6094) - 3. airplane (probability: 1.8281) - ---- Testing image: dog.bin --- -Predicted class: dog -Top 3 predictions: - 1. dog (probability: 6.6250) - 2. cat (probability: 5.1250) - 3. deer (probability: 1.9297) -Done -------------------------------------------------------- - -Test Done. -------------------------------------------------------- -``` - -Run in benchmark mode: - -```bash -build\Release\app.exe ../models/resnet_trained_for_cifar10.onnx ../vitisai_config.json benchmark -``` - -In benchmark mode, the application will: - -1. Run 100 inferences with random input data -2. Measure and report the total execution time - -The output from the run command will look like below. - -```bash - -usage: app.exe [mode] - mode: 'classification' (default) or 'benchmark' -------------------------------------------------------- -Performing compatibility check for VitisAI EP 1.5.0 -------------------------------------------------------- - - NPU Device ID : 0x17f0 - - NPU Device Name : NPU Compute Accelerator Device - - NPU Driver Version: 32.0.203.280 -Environment compatible for VitisAI EP -STX/KRK NPU device detected. - -------------------------------------------------------- -Running model on CPU -------------------------------------------------------- -Creating ORT env -Initializing session options -Creating ONNX Session -ONNX model : ../models/resnet_quantized_bf16.onnx - input -1x3x32x32 - output -1x10 -Dynamic batch size detected. Setting batch size to 1. -Running 100 inferences of the model -Operation took 0.290223 seconds -Done -------------------------------------------------------- - -------------------------------------------------------- -Running model on NPU -------------------------------------------------------- -Creating ORT env -Initializing session options -Configuring VAI EP -WARNING: Logging before InitGoogleLogging() is written to STDERR -I20250929 16:02:40.645891 23128 register_ssmlp.cpp:124] Registering Custom Operator: com.amd:SSMLP -I20250929 16:02:40.645891 23128 register_matmulnbits.cpp:110] Registering Custom Operator: com.amd:MatMulNBits -Creating ONNX Session -I20250929 16:02:40.806568 23128 vitisai_compile_model.cpp:1266] Vitis AI EP Load ONNX Model Success -I20250929 16:02:40.814989 23128 vitisai_compile_model.cpp:1267] Graph Input Node Name/Shape (1) -I20250929 16:02:40.814989 23128 vitisai_compile_model.cpp:1271] input : [-1x3x32x32] -I20250929 16:02:40.814989 23128 vitisai_compile_model.cpp:1277] Graph Output Node Name/Shape (1) -I20250929 16:02:40.814989 23128 vitisai_compile_model.cpp:1281] output : [-1x10] -[Vitis AI EP] No. of Operators : CPU 2 VAIML 272 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -ONNX model : ../models/resnet_quantized_bf16.onnx - input -1x3x32x32 - output -1x10 -Dynamic batch size detected. Setting batch size to 1. -Running 100 inferences of the model -Operation took 0.298719 seconds -Done -------------------------------------------------------- - -Test Done. -``` \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/bf16/export_to_onnx.py b/CNN-examples/getting_started_resnet/bf16/export_to_onnx.py deleted file mode 100644 index 305bb917..00000000 --- a/CNN-examples/getting_started_resnet/bf16/export_to_onnx.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -import os -import argparse -import torch -from transformers import AutoModel, AutoTokenizer, AutoFeatureExtractor, AutoImageProcessor -from transformers.modeling_utils import PreTrainedModel -from PIL import Image -import numpy as np -import timm - -def is_timm_model(model_name: str) -> bool: - """ - Check if the model name is a timm model - """ - return model_name.startswith('timm/') - -def is_vision_model(model_config) -> bool: - """ - Determine if the model is a vision model based on its config - """ - # Check for common vision model architectures - model_type = getattr(model_config, 'model_type', '').lower() - vision_architectures = {'vit', 'swin', 'deit', 'beit', 'convnext', 'resnet'} - return any(arch in model_type for arch in vision_architectures) - -def prepare_vision_input(model, processor=None): - """ - Prepare dummy input for vision models - """ - # Create a dummy image (black square) - dummy_image = Image.fromarray(np.zeros((224, 224, 3), dtype=np.uint8)) - - if processor is not None: - # For HF models with processor - inputs = processor(images=dummy_image, return_tensors="pt") - else: - # For timm models - use standard normalization - dummy_input = torch.zeros(1, 3, 224, 224) - inputs = {'pixel_values': dummy_input} - - return inputs - -def prepare_text_input(tokenizer): - """ - Prepare dummy input for text models - """ - return tokenizer("Hello, world!", return_tensors="pt", padding=True, truncation=True, max_length=8) - -def load_model(model_name: str): - """ - Load model based on its type (timm or huggingface) - """ - if is_timm_model(model_name): - # Remove 'timm/' prefix for timm.create_model - timm_model_name = model_name.replace('timm/', '', 1) - model = timm.create_model(timm_model_name, pretrained=True) - return model, None, True - else: - model = AutoModel.from_pretrained(model_name) - is_vision = is_vision_model(model.config) - - # Load appropriate processor - if is_vision: - try: - processor = AutoImageProcessor.from_pretrained(model_name) - except Exception: - processor = AutoFeatureExtractor.from_pretrained(model_name) - else: - processor = AutoTokenizer.from_pretrained(model_name) - - return model, processor, is_vision - -def export_model_to_onnx(model_name: str, output_dir: str, opset_version: int = 19) -> str: - """ - Export a Hugging Face PyTorch model to ONNX format. - - Args: - model_name (str): Name or path of the Hugging Face model - output_dir (str): Directory to save the ONNX model - opset_version (int): ONNX opset version to use - - Returns: - str: Path to the saved ONNX model - """ - # Create output directory if it doesn't exist - os.makedirs(output_dir, exist_ok=True) - - # Load model - print(f"Loading model: {model_name}") - model, processor, is_vision = load_model(model_name) - - # Prepare input based on model type - dummy_input = prepare_vision_input(model, processor) if is_vision else prepare_text_input(processor) - - # Set model to evaluation mode - model.eval() - - # Prepare output path - model_name_safe = os.path.basename(model_name) - output_path = os.path.join(output_dir, f"{model_name_safe}.onnx") - - # Export the model - print(f"Exporting model to ONNX format (opset version: {opset_version})") - with torch.no_grad(): - if is_vision: - # For vision models, typically only pixel_values is needed - print("Configuring export for vision model") - input_names = ['pixel_values'] - dynamic_axes = { - 'pixel_values': {0: 'batch_size'}, #, 2: 'height', 3: 'width'}, - 'output': {0: 'batch_size'} - } - inputs = (dummy_input['pixel_values'],) - else: - # For text models, we need input_ids and attention_mask - print("Configuring export for text model") - input_names = ['input_ids', 'attention_mask'] - dynamic_axes = { - 'input_ids': {0: 'batch_size', 1: 'sequence'}, - 'attention_mask': {0: 'batch_size', 1: 'sequence'}, - 'output': {0: 'batch_size', 1: 'sequence'} - } - inputs = (dummy_input['input_ids'], dummy_input['attention_mask']) - - torch.onnx.export( - model, # PyTorch model - inputs, # model input - output_path, # output path - opset_version=opset_version, - input_names=input_names, # model input names - output_names=['output'], # model output names - dynamic_axes=dynamic_axes - ) - - print(f"Model exported successfully to: {output_path}") - return output_path - -def main(): - parser = argparse.ArgumentParser(description='Export Hugging Face PyTorch model to ONNX') - parser.add_argument('--model', type=str, required=True, - help='Name or path of the Hugging Face model') - parser.add_argument('--output-dir', type=str, required=True, - help='Output directory for the ONNX model') - parser.add_argument('--opset', type=int, default=19, - help='ONNX opset version to use') - - args = parser.parse_args() - - output_path = export_model_to_onnx( - model_name=args.model, - output_dir=args.output_dir, - opset_version=args.opset - ) - - print(f"ONNX model path: {output_path}") - -if __name__ == "__main__": - main() diff --git a/CNN-examples/getting_started_resnet/bf16/go.bat b/CNN-examples/getting_started_resnet/bf16/go.bat deleted file mode 100644 index a47f13e7..00000000 --- a/CNN-examples/getting_started_resnet/bf16/go.bat +++ /dev/null @@ -1,29 +0,0 @@ -:: -:: This script runs the entire example end-to-end: -:: - Exports and compiles the ONNX model -:: - Tests the compiled model using a Python script -:: - Builds the C++ application -:: - Runs the C++ application with the precompiled model -:: - -@echo off - -cd %~dp0 - -if "%CONDA_PREFIX%" == "" echo CONDA_PREFIX not set. This script must be executed from within the RyzenAI conda environment. & goto :error -pip install -r requirements.txt - -:: Export and compile the ONNX model -call precompile_model.bat -:: Test the compiled model using a Python script -call test_model.bat - -cd app -:: Build the C++ application -call compile.bat -:: Run the C++ application with the precompiled model -call run.bat - -cd %~dp0 - - diff --git a/CNN-examples/getting_started_resnet/bf16/models/resnet_trained_for_cifar10.pt b/CNN-examples/getting_started_resnet/bf16/models/resnet_trained_for_cifar10.pt deleted file mode 100644 index bf2fa9a1..00000000 --- a/CNN-examples/getting_started_resnet/bf16/models/resnet_trained_for_cifar10.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6fbc917577ccf951e7c6039e3d85e95b11162abc06f9a69fe9ef26a5178d00cd -size 94908320 diff --git a/CNN-examples/getting_started_resnet/bf16/predict.py b/CNN-examples/getting_started_resnet/bf16/predict.py deleted file mode 100644 index 6227424a..00000000 --- a/CNN-examples/getting_started_resnet/bf16/predict.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/python3 - -import argparse -import numpy as np -import os -import cv2 -import onnx -import onnxruntime as ort -import numpy as np -from PIL import Image -from pathlib import Path -from resnet_utils import get_npu_info, get_xclbin -import pathlib -import onnxruntime as ort -import onnxruntime -import os -import pickle - - -def unpickle(file): - with open(file,'rb') as fo: - dict = pickle.load(fo, encoding='latin1') - return dict - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--ep', type=str, default ='cpu',choices = ['cpu','npu'], help='EP backend selection') - opt = parser.parse_args() - - onnx_model_path = r'./models/resnet_trained_for_cifar10.onnx' - config_file = 'vitisai_config.json' - datafile = r'./data/cifar-10-batches-py/test_batch' - metafile = r'./data/cifar-10-batches-py/batches.meta' - cache_dir = os.path.abspath('my_cache_dir') - cache_key = pathlib.Path(onnx_model_path).stem - - # defualt provider is CPUExecutionProvider - providers = ['CPUExecutionProvider'] - provider_options_dict = {} - - #NPU/CPU Setup - if opt.ep == 'npu': - print("execution started on NPU") - npu_device = get_npu_info() - providers = ['VitisAIExecutionProvider'] - - provider_options_dict = { - "config_file": config_file, - "cache_dir": cache_dir, - "cache_key": cache_key, - "enable_cache_file_io_in_mem":0, - } - elif opt.ep == 'cpu': - print("execution started on CPU") - else: - print("No APU detected. Exiting.") - exit(-1) - - # Create session options - session_options = onnxruntime.SessionOptions() - session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - - # onnxruntime session creation with provided option (either CPU or NPU) - # corresponding provider option will be created - session = onnxruntime.InferenceSession( - onnx_model_path, - sess_options=session_options, - providers=providers, - provider_options=[provider_options_dict] - ) - - data_batch_1 = unpickle(datafile) - metadata = unpickle(metafile) - - images = data_batch_1['data'] - labels = data_batch_1['labels'] - images = np.reshape(images,(10000, 3, 32, 32)) - - dirname = 'images' - if not os.path.exists(dirname): - os.mkdir(dirname) - - #Extract and dump first 10 images - for i in range (0,10): - im = images[i] - im = im.transpose(1,2,0) - im = cv2.cvtColor(im,cv2.COLOR_RGB2BGR) - im_name = f'./images/image_{i}.png' - cv2.imwrite(im_name, im) - - #Pick dumped images and predict - for i in range (0,10): - image_name = f'./images/image_{i}.png' - image = Image.open(image_name).convert('RGB') - # Resize the image to match the input size expected by the model - image = image.resize((32, 32)) - image_array = np.array(image).astype(np.float32) - image_array = image_array/255 - - # Reshape the array to match the input shape expected by the model - image_array = np.transpose(image_array, (2, 0, 1)) - - # Add a batch dimension to the input image - input_data = np.expand_dims(image_array, axis=0) - - - # Run the model - outputs = session.run(None, {'input': input_data}) - - - # Process the outputs - output_array = outputs[0] - predicted_class = np.argmax(output_array) - predicted_label = metadata['label_names'][predicted_class] - label = metadata['label_names'][labels[i]] - print(f'Image {i}: Actual Label {label}, Predicted Label {predicted_label}') - - - ################################################################################# - #License - #Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. -if __name__=="__main__": - main() \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/bf16/prepare_model_data.py b/CNN-examples/getting_started_resnet/bf16/prepare_model_data.py deleted file mode 100644 index 564606df..00000000 --- a/CNN-examples/getting_started_resnet/bf16/prepare_model_data.py +++ /dev/null @@ -1,155 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import argparse -import random -import tarfile -import urllib.request - -import torch -import torch.nn as nn -import torchvision -import torchvision.transforms as transforms -from resnet_utils import get_directories -from torchvision.models import ResNet50_Weights, resnet50 - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--num_epochs", type=int, default=0) - parser.add_argument("--train", action='store_true') - args = parser.parse_args() - return args - - -def load_resnet_model(): - weights = ResNet50_Weights.DEFAULT - resnet = resnet50(weights=weights) - resnet.fc = torch.nn.Sequential(torch.nn.Linear(2048, 64), torch.nn.ReLU(inplace=True), torch.nn.Linear(64, 10)) - return resnet - - -# For updating learning rate -def update_lr(optimizer, lr): - for param_group in optimizer.param_groups: - param_group["lr"] = lr - - -def prepare_model(num_epochs=0, models_dir="models", data_dir="data"): - # seed everything to 0 - random.seed(0) - torch.manual_seed(0) - torch.cuda.manual_seed(0) - - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - # Hyper-parameters - num_epochs = num_epochs - learning_rate = 0.001 - - # Image preprocessing modules - transform = transforms.Compose( - [transforms.Pad(4), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor()] - ) - - # CIFAR-10 dataset - train_dataset = torchvision.datasets.CIFAR10(root=data_dir, train=True, transform=transform, download=False) - test_dataset = torchvision.datasets.CIFAR10(root=data_dir, train=False, transform=transforms.ToTensor()) - - # Data loader - train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True) - test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False) - - model = load_resnet_model().to(device) - - # Loss and optimizer - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) - - # Train the model - total_step = len(train_loader) - curr_lr = learning_rate - for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = images.to(device) - labels = labels.to(device) - # Forward pass - outputs = model(images) - loss = criterion(outputs, labels) - # Backward and optimize - optimizer.zero_grad() - loss.backward() - optimizer.step() - if (i + 1) % 100 == 0: - print( - "Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format( - epoch + 1, num_epochs, i + 1, total_step, loss.item() - ) - ) - # Decay learning rate - if (epoch + 1) % 20 == 0: - curr_lr /= 3 - update_lr(optimizer, curr_lr) - - # Test the model - model.eval() - if num_epochs: - with torch.no_grad(): - correct = 0 - total = 0 - for images, labels in test_loader: - images = images.to(device) - labels = labels.to(device) - outputs = model(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - - print("Accuracy of the model on the test images: {} %".format(100 * correct / total)) - - # Save the model - model.to("cpu") - torch.save(model, str(models_dir / "resnet_trained_for_cifar10.pt")) - -def export_to_onnx(model, models_dir): - model.to("cpu") - dummy_inputs = torch.randn(1, 3, 32, 32) - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} - tmp_model_path = str(models_dir / "resnet_trained_for_cifar10.onnx") - torch.onnx.export( - model, - dummy_inputs, - tmp_model_path, - export_params=True, - opset_version=17, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - ) - - -def main(): - _, models_dir, data_dir, _ = get_directories() - args = get_args() - - data_download_path_python = data_dir / "cifar-10-python.tar.gz" - data_download_path_bin = data_dir / "cifar-10-binary.tar.gz" - urllib.request.urlretrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", data_download_path_python) - urllib.request.urlretrieve("https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", data_download_path_bin) - file_python = tarfile.open(data_download_path_python) - file_python.extractall(data_dir) - file_python.close() - file_bin = tarfile.open(data_download_path_bin) - file_bin.extractall(data_dir) - file_bin.close() - if args.train: - prepare_model(args.num_epochs, models_dir, data_dir) - model = torch.load(str(models_dir / "resnet_trained_for_cifar10.pt"), weights_only=False) - export_to_onnx(model, models_dir) - - -if __name__ == "__main__": - main() diff --git a/CNN-examples/getting_started_resnet/bf16/requirements.txt b/CNN-examples/getting_started_resnet/bf16/requirements.txt deleted file mode 100644 index f4c495e7..00000000 --- a/CNN-examples/getting_started_resnet/bf16/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -timm==1.0.20 -torch==2.8.0 -torchvision==0.23.0 -opencv-python==4.11.0.86 -numpy==1.26.4 \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/bf16/resnet_utils.py b/CNN-examples/getting_started_resnet/bf16/resnet_utils.py deleted file mode 100644 index 7e629387..00000000 --- a/CNN-examples/getting_started_resnet/bf16/resnet_utils.py +++ /dev/null @@ -1,46 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import os -import subprocess -from pathlib import Path - -def get_directories(): - current_dir = Path(__file__).resolve().parent - - # models directory for resnet sample - models_dir = current_dir / "models" - models_dir.mkdir(parents=True, exist_ok=True) - - # data directory for resnet sample - data_dir = current_dir / "data" - data_dir.mkdir(parents=True, exist_ok=True) - - # cache directory for resnet sample - cache_dir = current_dir / "cache" - cache_dir.mkdir(parents=True, exist_ok=True) - - return current_dir, models_dir, data_dir, cache_dir - -def get_npu_info(): - # Run pnputil as a subprocess to enumerate PCI devices - command = r'pnputil /enum-devices /bus PCI /deviceids ' - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - # Check for supported Hardware IDs - apu_type = '' - if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): apu_type = 'PHX/HPT' - if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): apu_type = 'KRK' - return apu_type - -def get_xclbin(npu_device): - xclbin_file = '' - if npu_device == 'STX' or npu_device=='KRK': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_4x4_Overlay.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - if npu_device == 'PHX/HPT': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\phoenix\\4x4.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - return xclbin_file diff --git a/CNN-examples/getting_started_resnet/bf16/vitisai_config.json b/CNN-examples/getting_started_resnet/bf16/vitisai_config.json deleted file mode 100644 index 75ae09db..00000000 --- a/CNN-examples/getting_started_resnet/bf16/vitisai_config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "enable_f32_to_bf16_conversion": true, - "preferred_data_storage": "auto" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/int8/Readme.md b/CNN-examples/getting_started_resnet/int8/Readme.md deleted file mode 100644 index b39b3c83..00000000 --- a/CNN-examples/getting_started_resnet/int8/Readme.md +++ /dev/null @@ -1,12 +0,0 @@ - - - - -

Ryzen™ AI ResNet Tutorial

-
- -# Getting Started Example - -This tutorial uses a fine-tuned version of the ResNet model (using the CIFAR-10 dataset) to demonstrate the process of preparing, quantizing, and deploying a model using Ryzen AI Software. The tutorial features deployment using both Python and C++ ONNX runtime code. - -For a walkthrough of this tutorial please follow the [tutorial documentation](https://ryzenai.docs.amd.com/en/latest/getstartex.html) diff --git a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/CMakeLists.txt b/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/CMakeLists.txt deleted file mode 100644 index 7d7bd715..00000000 --- a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -cmake_minimum_required(VERSION 3.5) -project( - resnet_cifar - VERSION 1.0.0 - LANGUAGES C CXX) -set(CMAKE_CXX_STANDARD 17) -find_package(Python 3.10 EXACT COMPONENTS Interpreter Development) -set (RYZEN_AI_INSTALLATION_PATH $ENV{RYZEN_AI_INSTALLATION_PATH}) - - -string(APPEND CMAKE_CXX_FLAGS " /Zc:__cplusplus") -# -# unreferenced formal parameter -string(APPEND CMAKE_CXX_FLAGS " /wd4100") -# std::codecvt_utf8': warning STL4017: std::wbuffer_convert, std::wstring_convert, and the header (containing std::codecvt_mode, std::codecvt_utf8, std::codecvt_utf16, and std::codecvt_utf8_utf16) are deprecated in C++17. (The std::codecvt class template is NOT deprecated.) The C++ Standard doesn't provide equivalent non-deprecated functionality; consider using MultiByteToWideChar() and WideCharToMultiByte() from instead. You can define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING or _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS to acknowledge that you have received this warning. -# -string(APPEND CMAKE_CXX_FLAGS " /wd4996") -string(APPEND CMAKE_CXX_FLAGS " /WX") - -if(NOT ONNXRUNTIME_ROOTDIR) - if(WIN32) - set(ONNXRUNTIME_ROOTDIR "${RYZEN_AI_INSTALLATION_PATH}/onnxruntime") -else() - set(ONNXRUNTIME_ROOTDIR "/usr/local") - endif() -endif() - -# The ORT package has a different include directory structure to a local install via cmake. -# We added the path for the pre-built package above. Add the path for a local install to support either usage. -# TODO: If we want to support additional EPs being loadable from a local install we also need to add EP specific -# directories under /include/onnxruntime/core/providers -include_directories("${ONNXRUNTIME_ROOTDIR}/include" # Pre-built package - "${ONNXRUNTIME_ROOTDIR}/include/onnxruntime" # Linux local install to /usr/local - "${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session" # Windows local install - ) - -link_directories("${ONNXRUNTIME_ROOTDIR}/lib") - -link_directories("${CMAKE_INSTALL_PREFIX}/lib") - - - -find_package(OpenCV COMPONENTS core highgui imgproc REQUIRED) -#find_package(Eigen3) # bug in opencv.cmake. - - -add_executable(resnet_cifar resnet_cifar.cpp util/getopt.c) -target_include_directories(resnet_cifar - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/utils -) -target_link_libraries(resnet_cifar ${ORT_LIBRARY} ${OpenCV_LIBS} onnxruntime) -install(TARGETS resnet_cifar RUNTIME DESTINATION bin) \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/cifar_word_list.inc b/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/cifar_word_list.inc deleted file mode 100644 index dbd792ec..00000000 --- a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/cifar_word_list.inc +++ /dev/null @@ -1,10 +0,0 @@ -"airplane", -"automobile", -"bird", -"cat", -"deer", -"dog", -"frog", -"horse", -"ship", -"truck", \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/resnet_cifar.cpp b/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/resnet_cifar.cpp deleted file mode 100644 index bf7226a1..00000000 --- a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/resnet_cifar.cpp +++ /dev/null @@ -1,393 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ************************************************************************************/ -#include -#include - -#include // std::generate -#include -#include -#include -#include -#include -#include -#if _WIN32 -extern "C" { -#include "util/getopt.h" -} -#include -#include -using convert_t = std::codecvt_utf8; -std::wstring_convert strconverter; -#endif -#include -#include -#include -#include -#include -#include - -using namespace std; - -static cv::Mat read_image(const std::string files); -static cv::Mat croppedImage(const cv::Mat& image, int height, int width); -static cv::Mat preprocess_image(const cv::Mat& image, cv::Size size); -static void set_input_image(const cv::Mat& image, float* data); -static std::vector softmax(float* data, int64_t size); -static std::vector> topk(const std::vector& score, - int K); -static void print_topk(const std::vector>& topk); -static const char* lookup(int index); - -const int CIFAR_IMAGE_DEPTH = 3; -const int CIFAR_IMAGE_WIDTH = 32; -const int CIFAR_IMAGE_HEIGHT = 32; -const int CIFAR_IMAGE_AREA = CIFAR_IMAGE_WIDTH * CIFAR_IMAGE_HEIGHT; -const int CIFAR_LABEL_SIZE = 1; -const int CIFAR_IMAGE_SIZE = CIFAR_IMAGE_DEPTH * CIFAR_IMAGE_AREA; // 3072 = 3 * 32 * 32 - -vector> ReadFirstTenCIFAR10Images(const std::string& filename) -{ - vector> labeled_images; - vector images; - ifstream file(filename, std::ios::binary); - int count = 0; - vector labels; - if (file.is_open()) { - while (!file.eof() && count < 10) { - unsigned char label; - unsigned char data[CIFAR_IMAGE_SIZE]; - if (!file.read(reinterpret_cast(&label), CIFAR_LABEL_SIZE)) { - break; - } - labels.push_back(label); - if (!file.read(reinterpret_cast(data), CIFAR_IMAGE_SIZE)) { - std::cerr << "Error reading image data." << std::endl; - break; - } - cv::Mat channels[3]; - for (int i = 0; i < 3; ++i) { - channels[i] = cv::Mat(CIFAR_IMAGE_HEIGHT, CIFAR_IMAGE_WIDTH, CV_8UC1, &data[i * CIFAR_IMAGE_AREA]); - } - - // Merge the separate channels into a single BGR image - cv::Mat img; - cv::merge(channels, 3, img); - cv::cvtColor(img, img, cv::COLOR_RGB2BGR); - - labeled_images.emplace_back(img, static_cast(label)); - count += 1; - } - file.close(); - } - else - { - std::cerr << "Unable to open the file: " << filename << std::endl; - } - return labeled_images; -} -// preprocess -static void preprocess_resnet(const string file, - std::vector& input_tensor_values, - std::vector& input_shape) { - auto channel = input_shape[1]; - auto height = input_shape[2]; - auto width = input_shape[3]; - auto size = cv::Size((int)width, (int)height); - auto image = read_image(file); - set_input_image(image, input_tensor_values.data()); - -} - -// postprocess -static string postprocess_resnet(const string file, - Ort::Value& output_tensor) { - auto output_shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape(); - auto channel = output_shape[1]; - auto output_tensor_ptr = output_tensor.GetTensorMutableData(); - auto softmax_output = softmax(output_tensor_ptr, channel); - auto tb_top5 = topk(softmax_output, 5); - //print_topk(tb_top5); - auto top1 = tb_top5[0]; - auto cls = std::string("") + lookup(top1.first) + " prob. " + - std::to_string(top1.second); - return lookup(top1.first); -} - -#define CHECK_STATUS_OK(expr) \ - do { \ - Status _tmp_status = (expr); \ - CHECK(_tmp_status.IsOK()) << _tmp_status; \ - } while (0) - - -// pretty prints a shape dimension vector -static std::string print_shape(const std::vector& v) { - std::stringstream ss(""); - for (size_t i = 0; i < v.size() - 1; i++) - ss << v[i] << "x"; - ss << v[v.size() - 1]; - return ss.str(); -} - -static int calculate_product(const std::vector& v) { - int total = 1; - for (auto& i : v) - total *= (int)i; - return total; -} - -static void usage() { - std::cout << "usage: resnet_cifar " - " [img_url]... \n" - << std::endl; -} - -bool isValidEP(const std::string& option) { - const std::vector validOptions = { "npu", "cpu"}; - for (const auto& validOption : validOptions) { - if (option == validOption) { - return true; - } - } - return false; -} - -int main(int argc, char* argv[]) { - - const char* env_val = getenv("CONDA_PREFIX"); - const char* env_name = "PYTHONHOME"; - _putenv_s(env_name, env_val); - - const string data_dir = "./data/cifar-10-batches-bin/test_batch.bin"; - const string output_folder = "images/"; - std::filesystem::create_directory(output_folder); - auto labeled_images = ReadFirstTenCIFAR10Images(data_dir); - for (int i = 0; i < labeled_images.size(); ++i) { - string output_path = output_folder + "cifar_image_" + std::to_string(i) + ".png"; - cv::imwrite(output_path, labeled_images[i].first); - } - vector> results; - int opt = 0; - int64_t batch_number = 1; - auto model_name = strconverter.from_bytes(std::string(argv[optind])); - cout << "model name:" << std::string(argv[optind]) << endl; - auto ep = std::string(argv[optind + 1]); - if (!isValidEP(ep)) { - std::cerr << "Error: Choose from one of the available EP options: cpu, npu.\n"; - return 0; - } - cout << "ep:" << ep << endl; - Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "resnet_cifar"); - auto session_options = Ort::SessionOptions(); - - auto cache_dir = std::filesystem::current_path().string(); - - if (ep == "npu") - { - auto options = - std::unordered_map{ {"cacheDir", cache_dir}, {"cacheKey", "modelcachekey_c++"},{"log_level", "info"},{"enable_cache_file_io_in_mem", "0"} }; - try { - session_options.AppendExecutionProvider_VitisAI(options); - } - catch (const std::exception& e) { - std::cerr << "Exception occurred in appending execution provider: " << e.what() << std::endl; - } - } - - auto session = Ort::Session(env, model_name.data(), session_options); - // print name/shape of inputs and outputs - Ort::AllocatorWithDefaultOptions allocator; - auto input_count = session.GetInputCount(); - auto input_names = std::vector(); - auto input_names_ptr = std::vector(); - auto input_shapes = std::vector>(); - input_shapes.reserve(input_count); - input_names_ptr.reserve(input_count); - input_names.reserve(input_count); - std::cout << "Input Node Name/Shape (" << input_count << "):" << std::endl; - for (size_t i = 0; i < input_count; i++) - { - input_shapes.push_back(session.GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); - auto name = session.GetInputNameAllocated(i, allocator); - input_names.push_back(name.get()); - input_names_ptr.push_back(std::move(name)); - std::cout << "\t" << input_names[i] << " : " << print_shape(input_shapes[i]) << std::endl; - } - - auto output_count = session.GetOutputCount(); - auto output_shapes = std::vector>(); - auto output_names_ptr = std::vector(); - auto output_names = std::vector(); - output_shapes.reserve(output_count); - output_names_ptr.reserve(output_count); - output_names.reserve(output_count); - - for (size_t i = 0; i < output_count; i++) - { - auto shape = session.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); - output_shapes.push_back(shape); - auto name = session.GetOutputNameAllocated(i, allocator); - output_names.push_back(name.get()); - output_names_ptr.push_back(std::move(name)); - std::cout << "\t" << output_names[i] << " : " << print_shape(output_shapes[i]) << std::endl; - } - // Assume model has 1 input node and 1 output node. - //assert(input_names.size() == 1 && output_names.size() == 1); - for (int i = 0; i < 10; i++) - { - const std::string curr_file = "./images/cifar_image_" + std::to_string(i) + ".png"; - //cout << "curr file: " << curr_file << endl; - auto input_shape = input_shapes[0]; - if (input_shape[0] == -1) { - input_shape[0] = batch_number; - } - int total_number_elements = calculate_product(input_shape); - std::vector input_tensor_values(total_number_elements); - preprocess_resnet(curr_file, input_tensor_values, input_shape); - - std::vector input_tensors; - Ort::MemoryInfo info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - input_tensors.push_back(Ort::Value::CreateTensor( - info, input_tensor_values.data(), input_tensor_values.size(), - input_shape.data(), input_shape.size())); - - // double-check the dimensions of the input tensor - assert(input_tensors[0].IsTensor() && - input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == - input_shape); - /*cout << "\ninput_tensor shape: " - << print_shape(input_tensors[0].GetTensorTypeAndShapeInfo().GetShape()) - << endl;*/ - - - // pass data through model - //cout << "Running model..."; - try { - auto output_tensors = session.Run(Ort::RunOptions(), input_names.data(), input_tensors.data(), input_count, output_names.data(), output_count); - //cout << "done" << endl; - - // double-check the dimensions of the output tensors - // NOTE: the number of output tensors is equal to the number of output nodes - // specifed in the Run() call - assert(output_tensors.size() == session.GetOutputNames().size() && - output_tensors[0].IsTensor()); - auto output_shape = - output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); - //cout << "output_tensor_shape: " << print_shape(output_shape) << endl; - string predicted = postprocess_resnet(curr_file, output_tensors[0]); - int lab = labeled_images[i].second; - results.push_back(std::make_pair(predicted, lookup(lab))); - } - catch (const Ort::Exception& exception) { - cout << "ERROR running model inference: " << exception.what() << endl; - exit(-1); - } - } - cout << "Final results:" << endl; - for (auto n = 0; n < results.size(); n++) - { - cout << "Predicted label is " << results[n].first << " and actual label is " << results[n].second << endl; - } - - const char* temp = ""; - _putenv_s(env_name, temp); - - return 0; -} - -static cv::Mat read_image(const string file) { - cv::Mat image; - image = cv::imread(file); - return image; -} - -static cv::Mat croppedImage(const cv::Mat& image, int height, int width) { - cv::Mat cropped_img; - int offset_h = (image.rows - height) / 2; - int offset_w = (image.cols - width) / 2; - cv::Rect box(offset_w, offset_h, width, height); - cropped_img = image(box).clone(); - return cropped_img; -} - -static cv::Mat preprocess_image(const cv::Mat& image, cv::Size size) { - float smallest_side = 256; - float scale = smallest_side / ((image.rows > image.cols) ? (float)image.cols - : (float)image.rows); - cv::Mat resized_image; - cv::resize(image, resized_image, - cv::Size(image.cols * (int)scale, image.rows * (int)scale)); - return croppedImage(resized_image, size.height, size.width); -} - -//(image_data - mean) * scale, BRG2RGB and hwc2chw -static void set_input_image(const cv::Mat& image, float* data) { - float mean[3] = { 0.0f, 0.0f, 0.0f }; - float scales[3] = { 1.0f, 1.0f, 1.0f }; - for (int c = 0; c < 3; c++) { - for (int h = 0; h < image.rows; h++) { - for (int w = 0; w < image.cols; w++) { - auto c_t = abs(c - 2); // BRG to RGB - auto image_data = - ((image.at(h, w)[c_t] - mean[c_t]) * scales[c_t]) / 255; - data[c * image.rows * image.cols + h * image.cols + w] = - (float)image_data; - } - } - } -} - -static std::vector softmax(float* data, int64_t size) { - auto output = std::vector(size); - std::transform(data, data + size, output.begin(), expf); - auto sum = - std::accumulate(output.begin(), output.end(), 0.0f, std::plus()); - std::transform(output.begin(), output.end(), output.begin(), - [sum](float v) { return v / sum; }); - return output; -} - -static std::vector> topk(const std::vector& score, - int K) { - auto indices = std::vector(score.size()); - std::iota(indices.begin(), indices.end(), 0); - std::partial_sort(indices.begin(), indices.begin() + K, indices.end(), - [&score](int a, int b) { return score[a] > score[b]; }); - auto ret = std::vector>(K); - std::transform( - indices.begin(), indices.begin() + K, ret.begin(), - [&score](int index) { return std::make_pair(index, score[index]); }); - return ret; -} - -static void print_topk(const std::vector>& topk) { - for (const auto& v : topk) { - std::cout << std::setiosflags(std::ios::left) << std::setw(11) - << "score[" + std::to_string(v.first) + "]" - << " = " << std::setw(12) << v.second - << " text: " << lookup(v.first) - << std::resetiosflags(std::ios::left) << std::endl; - } -} - -static const char* lookup(int index) { - static const char* table[] = { - #include "cifar_word_list.inc" - }; - - if (index < 0) { - return ""; - } - else { - return table[index]; - } -} \ No newline at end of file diff --git a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/util/getopt.c b/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/util/getopt.c deleted file mode 100644 index a4ac2bfb..00000000 --- a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/util/getopt.c +++ /dev/null @@ -1,72 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ************************************************************************************/ -#include "getopt.h" - -#include -#include -int opterr = 1, /* if error message should be printed */ - optind = 1, /* index into parent argv vector */ - optopt, /* character checked for validity */ - optreset; /* reset getopt */ -char *optarg; /* argument associated with option */ -#define BADCH (int)'?' -#define BADARG (int)':' -#define EMSG "" - -/* - * getopt -- - * Parse argc/argv argument vector. - */ -int getopt(int nargc, char *const nargv[], const char *ostr) { - static char *place = EMSG; /* option letter processing */ - const char *oli; /* option letter list index */ - - if (optreset || !*place) { /* update scanning pointer */ - optreset = 0; - if (optind >= nargc || *(place = nargv[optind]) != '-') { - place = EMSG; - return (-1); - } - if (place[1] && *++place == '-') { /* found "--" */ - ++optind; - place = EMSG; - return (-1); - } - } /* option letter okay? */ - if ((optopt = (int)*place++) == (int)':' || !(oli = strchr(ostr, optopt))) { - /* - * if the user didn't specify '-' as an option, - * assume it means -1. - */ - if (optopt == (int)'-') return (-1); - if (!*place) ++optind; - if (opterr && *ostr != ':') (void)printf("illegal option -- %c\n", optopt); - return (BADCH); - } - if (*++oli != ':') { /* don't need argument */ - optarg = NULL; - if (!*place) ++optind; - } else { /* need an argument */ - if (*place) /* no white space */ - optarg = place; - else if (nargc <= ++optind) { /* no arg */ - place = EMSG; - if (*ostr == ':') return (BADARG); - if (opterr) (void)printf("option requires an argument -- %c\n", optopt); - return (BADCH); - } else /* white space */ - optarg = nargv[optind]; - place = EMSG; - ++optind; - } - return (optopt); /* dump back option letter */ -} diff --git a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/util/getopt.h b/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/util/getopt.h deleted file mode 100644 index 5163a113..00000000 --- a/CNN-examples/getting_started_resnet/int8/cpp/resnet_cifar/util/getopt.h +++ /dev/null @@ -1,17 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ************************************************************************************/ -#ifndef GETOPT_H -#define GETOPT_H -extern int opterr, optind, optopt, optreset; -extern char* optarg; -int getopt(int nargc, char* const nargv[], const char* ostr); -#endif diff --git a/CNN-examples/getting_started_resnet/int8/models/resnet_trained_for_cifar10.pt b/CNN-examples/getting_started_resnet/int8/models/resnet_trained_for_cifar10.pt deleted file mode 100644 index bf2fa9a1..00000000 --- a/CNN-examples/getting_started_resnet/int8/models/resnet_trained_for_cifar10.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6fbc917577ccf951e7c6039e3d85e95b11162abc06f9a69fe9ef26a5178d00cd -size 94908320 diff --git a/CNN-examples/getting_started_resnet/int8/predict.py b/CNN-examples/getting_started_resnet/int8/predict.py deleted file mode 100644 index 71d5d39d..00000000 --- a/CNN-examples/getting_started_resnet/int8/predict.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/python3 - -import argparse -import numpy as np -import os -import cv2 -import onnx -import onnxruntime as ort -import numpy as np -from PIL import Image -from pathlib import Path -from resnet_utils import get_npu_info, get_xclbin - - -quantized_model_path = r'./models/resnet_quantized.onnx' -model = onnx.load(quantized_model_path) - - -parser = argparse.ArgumentParser() -parser.add_argument('--ep', type=str, default ='cpu',choices = ['cpu','npu'], help='EP backend selection') -opt = parser.parse_args() - - -providers = ['CPUExecutionProvider'] -provider_options = [{}] - -#NPU Setup -if opt.ep == 'npu': - npu_device = get_npu_info() - providers = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - provider_options = [{ - 'cache_dir': str(cache_dir), - 'log_level':'info', - 'cache_key': 'modelcachekey', - 'enable_cache_file_io_in_mem':'0' - }] - # For PHX/HPT, xclbin is mandatory - if npu_device == 'PHX/HPT': - provider_options[0]['target'] = 'X1' - provider_options[0]['xclbin'] = get_xclbin(npu_device) - -# Create session options -session_options = ort.SessionOptions() -session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - -session = ort.InferenceSession(model.SerializeToString(), - sess_options=session_options, - providers=providers, - provider_options=provider_options) - - -def unpickle(file): - import pickle - with open(file,'rb') as fo: - dict = pickle.load(fo, encoding='latin1') - return dict - - -datafile = r'./data/cifar-10-batches-py/test_batch' -metafile = r'./data/cifar-10-batches-py/batches.meta' - -data_batch_1 = unpickle(datafile) -metadata = unpickle(metafile) - -images = data_batch_1['data'] -labels = data_batch_1['labels'] -images = np.reshape(images,(10000, 3, 32, 32)) - -import os -dirname = 'images' -if not os.path.exists(dirname): - os.mkdir(dirname) - - -#Extract and dump first 10 images -for i in range (0,10): - im = images[i] - im = im.transpose(1,2,0) - im = cv2.cvtColor(im,cv2.COLOR_RGB2BGR) - im_name = f'./images/image_{i}.png' - cv2.imwrite(im_name, im) - -#Pick dumped images and predict -for i in range (0,10): - image_name = f'./images/image_{i}.png' - image = Image.open(image_name).convert('RGB') - # Resize the image to match the input size expected by the model - image = image.resize((32, 32)) - image_array = np.array(image).astype(np.float32) - image_array = image_array/255 - - # Reshape the array to match the input shape expected by the model - image_array = np.transpose(image_array, (2, 0, 1)) - - # Add a batch dimension to the input image - input_data = np.expand_dims(image_array, axis=0) - - - # Run the model - outputs = session.run(None, {'input': input_data}) - - - # Process the outputs - output_array = outputs[0] - predicted_class = np.argmax(output_array) - predicted_label = metadata['label_names'][predicted_class] - label = metadata['label_names'][labels[i]] - print(f'Image {i}: Actual Label {label}, Predicted Label {predicted_label}') - - -################################################################################# -#License -#Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/CNN-examples/getting_started_resnet/int8/prepare_model_data.py b/CNN-examples/getting_started_resnet/int8/prepare_model_data.py deleted file mode 100644 index 564606df..00000000 --- a/CNN-examples/getting_started_resnet/int8/prepare_model_data.py +++ /dev/null @@ -1,155 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import argparse -import random -import tarfile -import urllib.request - -import torch -import torch.nn as nn -import torchvision -import torchvision.transforms as transforms -from resnet_utils import get_directories -from torchvision.models import ResNet50_Weights, resnet50 - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--num_epochs", type=int, default=0) - parser.add_argument("--train", action='store_true') - args = parser.parse_args() - return args - - -def load_resnet_model(): - weights = ResNet50_Weights.DEFAULT - resnet = resnet50(weights=weights) - resnet.fc = torch.nn.Sequential(torch.nn.Linear(2048, 64), torch.nn.ReLU(inplace=True), torch.nn.Linear(64, 10)) - return resnet - - -# For updating learning rate -def update_lr(optimizer, lr): - for param_group in optimizer.param_groups: - param_group["lr"] = lr - - -def prepare_model(num_epochs=0, models_dir="models", data_dir="data"): - # seed everything to 0 - random.seed(0) - torch.manual_seed(0) - torch.cuda.manual_seed(0) - - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - # Hyper-parameters - num_epochs = num_epochs - learning_rate = 0.001 - - # Image preprocessing modules - transform = transforms.Compose( - [transforms.Pad(4), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor()] - ) - - # CIFAR-10 dataset - train_dataset = torchvision.datasets.CIFAR10(root=data_dir, train=True, transform=transform, download=False) - test_dataset = torchvision.datasets.CIFAR10(root=data_dir, train=False, transform=transforms.ToTensor()) - - # Data loader - train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True) - test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False) - - model = load_resnet_model().to(device) - - # Loss and optimizer - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) - - # Train the model - total_step = len(train_loader) - curr_lr = learning_rate - for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = images.to(device) - labels = labels.to(device) - # Forward pass - outputs = model(images) - loss = criterion(outputs, labels) - # Backward and optimize - optimizer.zero_grad() - loss.backward() - optimizer.step() - if (i + 1) % 100 == 0: - print( - "Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format( - epoch + 1, num_epochs, i + 1, total_step, loss.item() - ) - ) - # Decay learning rate - if (epoch + 1) % 20 == 0: - curr_lr /= 3 - update_lr(optimizer, curr_lr) - - # Test the model - model.eval() - if num_epochs: - with torch.no_grad(): - correct = 0 - total = 0 - for images, labels in test_loader: - images = images.to(device) - labels = labels.to(device) - outputs = model(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - - print("Accuracy of the model on the test images: {} %".format(100 * correct / total)) - - # Save the model - model.to("cpu") - torch.save(model, str(models_dir / "resnet_trained_for_cifar10.pt")) - -def export_to_onnx(model, models_dir): - model.to("cpu") - dummy_inputs = torch.randn(1, 3, 32, 32) - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} - tmp_model_path = str(models_dir / "resnet_trained_for_cifar10.onnx") - torch.onnx.export( - model, - dummy_inputs, - tmp_model_path, - export_params=True, - opset_version=17, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - ) - - -def main(): - _, models_dir, data_dir, _ = get_directories() - args = get_args() - - data_download_path_python = data_dir / "cifar-10-python.tar.gz" - data_download_path_bin = data_dir / "cifar-10-binary.tar.gz" - urllib.request.urlretrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", data_download_path_python) - urllib.request.urlretrieve("https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", data_download_path_bin) - file_python = tarfile.open(data_download_path_python) - file_python.extractall(data_dir) - file_python.close() - file_bin = tarfile.open(data_download_path_bin) - file_bin.extractall(data_dir) - file_bin.close() - if args.train: - prepare_model(args.num_epochs, models_dir, data_dir) - model = torch.load(str(models_dir / "resnet_trained_for_cifar10.pt"), weights_only=False) - export_to_onnx(model, models_dir) - - -if __name__ == "__main__": - main() diff --git a/CNN-examples/getting_started_resnet/int8/requirements.txt b/CNN-examples/getting_started_resnet/int8/requirements.txt deleted file mode 100644 index 3156e5ff..00000000 --- a/CNN-examples/getting_started_resnet/int8/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -torchvision==0.23.0 -opencv-python==4.11.0.86 -numpy==1.26.4 diff --git a/CNN-examples/getting_started_resnet/int8/resnet_quantize.py b/CNN-examples/getting_started_resnet/int8/resnet_quantize.py deleted file mode 100644 index 3b451283..00000000 --- a/CNN-examples/getting_started_resnet/int8/resnet_quantize.py +++ /dev/null @@ -1,110 +0,0 @@ -import torch -from onnxruntime.quantization.calibrate import CalibrationDataReader -from torch.utils.data import DataLoader, Dataset -from torchvision import transforms -from torchvision.datasets import CIFAR10 - - -import onnx -import onnxruntime -from onnxruntime.quantization import CalibrationDataReader, QuantType, QuantFormat, CalibrationMethod, quantize_static - -from quark.onnx.quantization.config import (Config, get_default_config) -from quark.onnx import ModelQuantizer - - -class CIFAR10DataSet: - def __init__( - self, - data_dir, - **kwargs, - ): - super().__init__() - self.train_path = data_dir - self.vld_path = data_dir - self.setup("fit") - - def setup(self, stage: str): - transform = transforms.Compose( - [transforms.Pad(4), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor()] - ) - self.train_dataset = CIFAR10(root=self.train_path, train=True, transform=transform, download=False) - self.val_dataset = CIFAR10(root=self.vld_path, train=True, transform=transform, download=False) - - -class PytorchResNetDataset(Dataset): - def __init__(self, dataset): - self.dataset = dataset - - def __len__(self): - return len(self.dataset) - - def __getitem__(self, index): - sample = self.dataset[index] - input_data = sample[0] - label = sample[1] - return input_data, label - - -def create_dataloader(data_dir, batch_size): - cifar10_dataset = CIFAR10DataSet(data_dir) - _, val_set = torch.utils.data.random_split(cifar10_dataset.val_dataset, [49000, 1000]) - benchmark_dataloader = DataLoader(PytorchResNetDataset(val_set), batch_size=batch_size, drop_last=True) - return benchmark_dataloader - - -class ResnetCalibrationDataReader(CalibrationDataReader): - def __init__(self, data_dir: str, batch_size: int = 16): - super().__init__() - self.iterator = iter(create_dataloader(data_dir, batch_size)) - - def get_next(self) -> dict: - try: - images, labels = next(self.iterator) - return {"input": images.numpy()} - except Exception: - return None - - -def resnet_calibration_reader(data_dir, batch_size=16): - return ResnetCalibrationDataReader(data_dir, batch_size=batch_size) - - - -def main(): - # `input_model_path` is the path to the original, unquantized ONNX model. - input_model_path = "models/resnet_trained_for_cifar10.onnx" - - # `output_model_path` is the path where the quantized model will be saved. - output_model_path = "models/resnet_quantized.onnx" - - # `calibration_dataset_path` is the path to the dataset used for calibration during quantization. - calibration_dataset_path = "data/" - - # `dr` (Data Reader) is an instance of ResNetDataReader, which is a utility class that - # reads the calibration dataset and prepares it for the quantization process. - dr = resnet_calibration_reader(calibration_dataset_path) - - - #Quantization with Quark - - # Get quantization configuration - quant_config = get_default_config("XINT8") - config = Config(global_quant_config=quant_config) - print(f"The configuration for quantization is {config}") - - # Create an ONNX quantizer - quantizer = ModelQuantizer(config) - - # Quantize the ONNX model - quantizer.quantize_model(input_model_path, output_model_path, dr) - - -if __name__ == '__main__': - main() - - - -################################################################################# -#License -#Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/CNN-examples/getting_started_resnet/int8/resnet_utils.py b/CNN-examples/getting_started_resnet/int8/resnet_utils.py deleted file mode 100644 index 7e629387..00000000 --- a/CNN-examples/getting_started_resnet/int8/resnet_utils.py +++ /dev/null @@ -1,46 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import os -import subprocess -from pathlib import Path - -def get_directories(): - current_dir = Path(__file__).resolve().parent - - # models directory for resnet sample - models_dir = current_dir / "models" - models_dir.mkdir(parents=True, exist_ok=True) - - # data directory for resnet sample - data_dir = current_dir / "data" - data_dir.mkdir(parents=True, exist_ok=True) - - # cache directory for resnet sample - cache_dir = current_dir / "cache" - cache_dir.mkdir(parents=True, exist_ok=True) - - return current_dir, models_dir, data_dir, cache_dir - -def get_npu_info(): - # Run pnputil as a subprocess to enumerate PCI devices - command = r'pnputil /enum-devices /bus PCI /deviceids ' - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - # Check for supported Hardware IDs - apu_type = '' - if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): apu_type = 'PHX/HPT' - if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): apu_type = 'KRK' - return apu_type - -def get_xclbin(npu_device): - xclbin_file = '' - if npu_device == 'STX' or npu_device=='KRK': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_4x4_Overlay.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - if npu_device == 'PHX/HPT': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\phoenix\\4x4.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - return xclbin_file diff --git a/CNN-examples/hello_world/hello_world.ipynb b/CNN-examples/hello_world/hello_world.ipynb deleted file mode 100644 index 1fd61cea..00000000 --- a/CNN-examples/hello_world/hello_world.ipynb +++ /dev/null @@ -1,616 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hello World Example\n", - "\n", - "This is a simple Jupyter Notebook that walks through the 4 steps of compiling and running a PyTorch model on the embedded Neural Processing Unit (NPU) in your AMD Ryzen AI enabled PC. The steps are as follows:\n", - "\n", - "1. Get model - download or create a PyTorch model that we will run on the NPU\n", - "2. Export to ONNX - convert the PyTorch model to ONNX format.\n", - "3. Quantize - optimize the model for faster inference on the NPU by reducing its precision to INT8.\n", - "4. Run Model on CPU and NPU - compare performance between running the model on the CPU and on the NPU." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: torch in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from -r requirements.txt (line 1)) (2.4.0)\n", - "Requirement already satisfied: ipykernel in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from -r requirements.txt (line 2)) (6.29.5)\n", - "Requirement already satisfied: filelock in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from torch->-r requirements.txt (line 1)) (3.15.4)\n", - "Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from torch->-r requirements.txt (line 1)) (4.12.2)\n", - "Requirement already satisfied: sympy in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from torch->-r requirements.txt (line 1)) (1.13.2)\n", - "Requirement already satisfied: networkx in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from torch->-r requirements.txt (line 1)) (3.3)\n", - "Requirement already satisfied: jinja2 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from torch->-r requirements.txt (line 1)) (3.1.4)\n", - "Requirement already satisfied: fsspec in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from torch->-r requirements.txt (line 1)) (2024.6.1)\n", - "Requirement already satisfied: comm>=0.1.1 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (0.2.2)\n", - "Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (1.8.5)\n", - "Requirement already satisfied: ipython>=7.23.1 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (8.26.0)\n", - "Requirement already satisfied: jupyter-client>=6.1.12 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (8.6.2)\n", - "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (5.7.2)\n", - "Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (0.1.7)\n", - "Requirement already satisfied: nest-asyncio in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (1.6.0)\n", - "Requirement already satisfied: packaging in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (24.1)\n", - "Requirement already satisfied: psutil in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (6.0.0)\n", - "Requirement already satisfied: pyzmq>=24 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (26.1.0)\n", - "Requirement already satisfied: tornado>=6.1 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (6.4.1)\n", - "Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipykernel->-r requirements.txt (line 2)) (5.14.3)\n", - "Requirement already satisfied: decorator in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (5.1.1)\n", - "Requirement already satisfied: jedi>=0.16 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (0.19.1)\n", - "Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (3.0.47)\n", - "Requirement already satisfied: pygments>=2.4.0 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (2.18.0)\n", - "Requirement already satisfied: stack-data in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (0.6.2)\n", - "Requirement already satisfied: exceptiongroup in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (1.2.2)\n", - "Requirement already satisfied: colorama in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (0.4.6)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from jupyter-client>=6.1.12->ipykernel->-r requirements.txt (line 2)) (2.9.0)\n", - "Requirement already satisfied: platformdirs>=2.5 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->-r requirements.txt (line 2)) (4.2.2)\n", - "Requirement already satisfied: pywin32>=300 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->-r requirements.txt (line 2)) (306)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from jinja2->torch->-r requirements.txt (line 1)) (2.1.5)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from sympy->torch->-r requirements.txt (line 1)) (1.3.0)\n", - "Requirement already satisfied: parso<0.9.0,>=0.8.3 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (0.8.4)\n", - "Requirement already satisfied: wcwidth in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (0.2.13)\n", - "Requirement already satisfied: six>=1.5 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=6.1.12->ipykernel->-r requirements.txt (line 2)) (1.16.0)\n", - "Requirement already satisfied: executing>=1.2.0 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (2.0.1)\n", - "Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (2.4.1)\n", - "Requirement already satisfied: pure-eval in c:\\users\\vgods\\miniconda3\\envs\\ryzen-ai-1.2.0\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel->-r requirements.txt (line 2)) (0.2.3)\n" - ] - } - ], - "source": [ - "# Before starting, be sure you've installed the requirements listed in the requirements.txt file:\n", - "!python -m pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 0. Imports & Environment Variables\n", - "\n", - "We'll use the following imports in our example. `torch` and `torch_nn` are used for building and running ML models. We'll use them to define a small neural network and to generate the model weights. `os` is used for interacting with the operating system and is used to manage our environment variables, file paths, and directories. `subprocess` allows us to retrieve the hardware information. `onnx` and `onnxruntime` are used to work with our model in the ONNX format and for running our inference. `vai_q_onnx` is part of the Vitis AI Quantizer for ONNX models. We use it to perform quantization, converting the model into an INT8 format that is optimized for the NPU." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn\n", - "import os\n", - "import subprocess\n", - "import onnxruntime\n", - "import numpy as np\n", - "import onnx\n", - "import shutil\n", - "from timeit import default_timer as timer" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As well, we want to set the environment variables based on the NPU device we have in our PC. For more information about NPU configurations, see: For more information about NPU configurations, refer to the official [AMD Ryzen AI Documentation](https://ryzenai.docs.amd.com/en/latest/runtime_setup.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "APU Type: PHX/HPT\n" - ] - } - ], - "source": [ - "# This function detects the APU (NPU) type in your system to configure environment variables for hardware-specific optimization.\n", - "def get_npu_info():\n", - " # Run pnputil as a subprocess to enumerate PCI devices\n", - " command = r'pnputil /enum-devices /bus PCI /deviceids '\n", - " process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n", - " stdout, stderr = process.communicate()\n", - " # Check for supported Hardware IDs\n", - " npu_type = ''\n", - " if 'PCI\\\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): npu_type = 'PHX/HPT'\n", - " if 'PCI\\\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): npu_type = 'STX'\n", - " if 'PCI\\\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): npu_type = 'STX'\n", - " if 'PCI\\\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): npu_type = 'STX'\n", - " return npu_type\n", - "\n", - "npu_type = get_npu_info()\n", - "print(f\"NPU Type: {npu_type}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Get Model\n", - "Here, we'll use the PyTorch library to define and instantiate a simple neural network model called `SmallModel` as a starting point. You can swap this model with any custom model, but make sure the input/output shapes remain compatible." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SmallModel(\n", - " (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (relu): ReLU()\n", - ")\n" - ] - } - ], - "source": [ - "torch.manual_seed(0)\n", - "\n", - "class SmallModel(nn.Module):\n", - " def __init__(self):\n", - " super(SmallModel, self).__init__()\n", - " self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)\n", - " self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)\n", - " self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)\n", - " self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)\n", - " self.relu = nn.ReLU()\n", - "\n", - " def forward(self, x):\n", - " x = self.conv1(x)\n", - " x = self.relu(x)\n", - " \n", - " x = self.conv2(x)\n", - " x = self.relu(x) \n", - " \n", - " x = self.conv3(x)\n", - " x = self.relu(x) \n", - " \n", - " x = self.conv4(x)\n", - " x = self.relu(x) \n", - " \n", - " x = torch.add(x, 1)\n", - " \n", - " return x\n", - "\n", - "# Instantiate the model\n", - "pytorch_model = SmallModel()\n", - "\n", - "pytorch_model.eval()\n", - "\n", - "# Print the model architecture\n", - "print(pytorch_model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Export to ONNX\n", - "\n", - "The following code is used for exporting a PyTorch model (pytorch_model) to the ONNX (Open Neural Network Exchange) format. ONNX is an open format that facilitates interoperability between different AI frameworks. Ryzen AI uses ONNX as the input format for quantization using the Vitis AI Quantizer. " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Generate dummy input data\n", - "batch_size = 1\n", - "input_channels = 3\n", - "input_size = 224\n", - "dummy_input = torch.rand(batch_size, input_channels, input_size, input_size)\n", - "\n", - "# Prep for ONNX export\n", - "inputs = {\"x\": dummy_input}\n", - "dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}\n", - "tmp_model_path = \"models/helloworld.onnx\"\n", - "\n", - "# Call export function\n", - "torch.onnx.export(\n", - " pytorch_model,\n", - " inputs,\n", - " tmp_model_path,\n", - " export_params=True,\n", - " opset_version=17, # Recommended opset\n", - " input_names=['input'],\n", - " output_names=['output'],\n", - " dynamic_axes=dynamic_axes,\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Quantize Model\n", - "\n", - "Using the static quantization method provided by the AMD Quark Quantizer and providing the newly exported ONNX model, we'll quantize the model to INT8. Quantization reduces the precision of model weights and activations from 32-bit floating point (FP32) to 8-bit integers (INT8). This compression allows the model to run faster on hardware accelerators like NPUs, while maintaining nearly the same accuracy. For more information on this quantization method, see [AMD Quark Quantization](https://ryzenai.docs.amd.com/en/latest/modelport.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:vai_q_onnx.quantize:calibration_data_reader is None, using random data for calibration\n", - "INFO:vai_q_onnx.quant_utils:The input ONNX model models/helloworld.onnx can create InferenceSession successfully\n", - "INFO:vai_q_onnx.quant_utils:Random input name input shape [1, 3, 224, 224] type \n", - "INFO:vai_q_onnx.quant_utils:Obtained calibration data with 1 iters\n", - "INFO:vai_q_onnx.quantize:Removed initializers from input\n", - "INFO:vai_q_onnx.quantize:Simplified model sucessfully\n", - "INFO:vai_q_onnx.quantize:Loading model...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[VAI_Q_ONNX_INFO]: Time information:\n", - "2024-08-23 10:12:35.362481\n", - "[VAI_Q_ONNX_INFO]: OS and CPU information:\n", - " system --- Windows\n", - " node --- vgodsoe-ryzen\n", - " release --- 10\n", - " version --- 10.0.26100\n", - " machine --- AMD64\n", - " processor --- AMD64 Family 25 Model 116 Stepping 1, AuthenticAMD\n", - "[VAI_Q_ONNX_INFO]: Tools version information:\n", - " python --- 3.10.14\n", - " onnx --- 1.16.2\n", - " onnxruntime --- 1.17.0\n", - " vai_q_onnx --- 1.17.0+511d6f4\n", - "[VAI_Q_ONNX_INFO]: Quantized Configuration information:\n", - " model_input --- models/helloworld.onnx\n", - " model_output --- models/helloworld_quantized.onnx\n", - " calibration_data_reader --- None\n", - " calibration_data_path --- None\n", - " quant_format --- QDQ\n", - " input_nodes --- []\n", - " output_nodes --- []\n", - " op_types_to_quantize --- []\n", - " random_data_reader_input_shape --- []\n", - " per_channel --- False\n", - " reduce_range --- False\n", - " activation_type --- QUInt8\n", - " weight_type --- QInt8\n", - " nodes_to_quantize --- []\n", - " nodes_to_exclude --- []\n", - " optimize_model --- True\n", - " use_external_data_format --- False\n", - " calibrate_method --- PowerOfTwoMethod.MinMSE\n", - " execution_providers --- ['CPUExecutionProvider']\n", - " enable_ipu_cnn --- True\n", - " enable_ipu_transformer --- False\n", - " specific_tensor_precision --- False\n", - " debug_mode --- False\n", - " convert_fp16_to_fp32 --- False\n", - " convert_nchw_to_nhwc --- False\n", - " include_cle --- False\n", - " include_sq --- False\n", - " include_fast_ft --- False\n", - " extra_options --- {'ActivationSymmetric': True}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:vai_q_onnx.quant_utils:The input ONNX model C:/Users/vgods/AppData/Local/Temp/vai.simp.kpf9kmm3/model_simp.onnx can run inference successfully\n", - "INFO:vai_q_onnx.quantize:optimize the model for better hardware compatibility.\n", - "INFO:vai_q_onnx.quantize:Start calibration...\n", - "INFO:vai_q_onnx.quantize:Start collecting data, runtime depends on your model size and the number of calibration dataset.\n", - "INFO:vai_q_onnx.calibrate:Finding optimal threshold for each tensor using PowerOfTwoMethod.MinMSE algorithm ...\n", - "INFO:vai_q_onnx.calibrate:Use all calibration data to calculate min mse\n", - "Computing range: 100%|██████████| 10/10 [00:04<00:00, 2.30tensor/s]\n", - "INFO:vai_q_onnx.quantize:Finished the calibration of PowerOfTwoMethod.MinMSE which costs 4.6s\n", - "INFO:vai_q_onnx.qdq_quantizer:Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).\n", - "INFO:vai_q_onnx.refine:Adjust the quantize info to meet the compiler constraints\n" - ] - }, - { - "data": { - "text/html": [ - "
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
-       "┃ Op Type               Float Model                      ┃\n",
-       "┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
-       "│ Conv                 │ 4                                │\n",
-       "│ Relu                 │ 4                                │\n",
-       "│ Constant             │ 1                                │\n",
-       "│ Add                  │ 1                                │\n",
-       "├──────────────────────┼──────────────────────────────────┤\n",
-       "│ Quantized model path │ models/helloworld_quantized.onnx │\n",
-       "└──────────────────────┴──────────────────────────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mOp Type \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mFloat Model \u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", - "│ Conv │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m4 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Relu │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m4 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Constant │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m1 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Add │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m1 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "├──────────────────────┼──────────────────────────────────┤\n", - "│ Quantized model path │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mmodels/helloworld_quantized.onnx\u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "└──────────────────────┴──────────────────────────────────┘\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Calibrated and quantized model saved at: models/helloworld_quantized.onnx\n" - ] - } - ], - "source": [ - "from quark.onnx.quantization.config import Config, get_default_config\n", - "from quark.onnx import ModelQuantizer\n", - "\n", - "# `input_model_path` is the path to the original, unquantized ONNX model.\n", - "input_model_path = \"models/helloworld.onnx\"\n", - "\n", - "# `output_model_path` is the path where the quantized model will be saved.\n", - "output_model_path = \"models/helloworld_quantized.onnx\"\n", - "\n", - "# Use default quantization configuration\n", - "quant_config = get_default_config(\"XINT8\")\n", - "quant_config.extra_options[\"UseRandomData\"] = True\n", - "# Defines the quantization configuration for the whole model\n", - "config = Config(global_quant_config=quant_config)\n", - "print(\"The configuration of the quantization is {}\".format(config))\n", - "\n", - "# Create an ONNX Quantizer\n", - "quantizer = ModelQuantizer(config)\n", - "\n", - "# Quantize the ONNX model\n", - "quant_model = quantizer.quantize_model(model_input = input_model_path,\n", - " model_output = output_model_path,\n", - " calibration_data_path = None)\n", - "\n", - "print('Calibrated and quantized model saved at:', output_model_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Run Model\n", - "\n", - "#### CPU Run\n", - "\n", - "Before runnning the model on the NPU, we'll run the model on the CPU and get the execution time for comparison with the NPU." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# Specify the path to the quantized ONNZ Model\n", - "quantized_model_path = r'./models/helloworld_quantized.onnx'\n", - "model = onnx.load(quantized_model_path)\n", - "\n", - "# Create some random input data for testing\n", - "input_data = np.random.uniform(low=-1, high=1, size=(batch_size, input_channels, input_size, input_size)).astype(np.float32)\n", - "\n", - "cpu_options = onnxruntime.SessionOptions()\n", - "\n", - "# Create Inference Session to run the quantized model on the CPU\n", - "cpu_session = onnxruntime.InferenceSession(\n", - " model.SerializeToString(),\n", - " providers = ['CPUExecutionProvider'],\n", - " sess_options=cpu_options,\n", - ")\n", - "\n", - "# Run Inference\n", - "start = timer()\n", - "cpu_results = cpu_session.run(None, {'input': input_data})\n", - "cpu_total = timer() - start" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### NPU Run\n", - "\n", - "Now, we'll run it on the NPU and time the execution so that we can compare the results with the CPU.\n", - "If the model has already been compiled, it won't recompile unless you delete the generated cache folder using the following cell." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Directory deleted successfully. Starting Fresh.\n" - ] - } - ], - "source": [ - "# We want to make sure we compile everytime, otherwise the tools will use the cached version\n", - "# Get the current working directory\n", - "current_directory = os.getcwd()\n", - "directory_path = os.path.join(current_directory, r'cache\\hello_cache')\n", - "cache_directory = os.path.join(current_directory, r'cache')\n", - "\n", - "# Check if the directory exists and delete it if it does.\n", - "if os.path.exists(directory_path):\n", - " shutil.rmtree(directory_path)\n", - " print(f\"Directory deleted successfully. Starting Fresh.\")\n", - "else:\n", - " print(f\"Directory '{directory_path}' does not exist.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Compile and run\n", - "\n", - "On the first run, the model will compile for the NPU before executing the inference. It's best to run the following cell again if you want to see better inference times." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "install_dir = os.environ['RYZEN_AI_INSTALLATION_PATH']\n", - "config_file_path = os.path.join(install_dir, 'voe-4.0-win_amd64', 'vaip_config.json') # Path to the NPU config file\n", - "xclbin_file = ''\n", - "provider_options = []\n", - "match npu_type:\n", - " case 'PHX/HPT':\n", - " print(\"Setting xclbin file for PHX/HPT\")\n", - " xclbin_file = os.path.join(install_dir, 'voe-4.0-win_amd64', 'xclbins', 'phoenix', '4x4.xclbin')\n", - " provider_options = [{\n", - " 'target': 'X1',\n", - " 'xclbin': xclbin_file\n", - " }]\n", - " case _:\n", - " print(\"Unrecognized NPU type. Exiting.\")\n", - " exit()\n", - "# Create session options\n", - "aie_options = onnxruntime.SessionOptions()\n", - "aie_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal\n", - "\n", - "aie_session = onnxruntime.InferenceSession(\n", - " model.SerializeToString(),\n", - " providers=['VitisAIExecutionProvider'],\n", - " sess_options=aie_options,\n", - " provider_options = provider_options\n", - ")\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# Run Inference\n", - "start = timer()\n", - "npu_results = aie_session.run(None, {'input': input_data})\n", - "npu_total = timer() - start" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's gather our results and see what we have" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU Execution Time: 0.11257850000004055\n", - "NPU Execution Time: 0.08555689999997185\n" - ] - } - ], - "source": [ - "print(f\"CPU Execution Time: {cpu_total}\")\n", - "print(f\"NPU Execution Time: {npu_total}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note:** For a model this small in size, you likely won't see much of a performance gain when using the NPU versus the CPU. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's take a look at running the model on the NPU lots of times so that we can see the NPU being utilized.\n", - "To do this, make sure to have Task Manager opened in a window you can see when you run the next cell." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "iterations = 50 # edit this for more or less\n", - "\n", - "for i in range(iterations):\n", - " npu_results = aie_session.run(None, {'input': input_data})\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And there you have it. Your first model running on the NPU. We recommend trying a more complex model like ResNet50 or a custom model to compare performance and accuracy on the NPU.\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/CNN-examples/hello_world/hello_world.py b/CNN-examples/hello_world/hello_world.py deleted file mode 100644 index 98152f8c..00000000 --- a/CNN-examples/hello_world/hello_world.py +++ /dev/null @@ -1,192 +0,0 @@ -import torch -import torch.nn as nn -import os -import subprocess -import onnxruntime -import numpy as np -import onnx -import shutil -from timeit import default_timer as timer - -torch.manual_seed(0) - -# Create a simple model -class SmallModel(nn.Module): - def __init__(self): - super(SmallModel, self).__init__() - self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1) - self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1) - self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1) - self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) - self.relu = nn.ReLU() - - def forward(self, x): - x = self.conv1(x) - x = self.relu(x) - - x = self.conv2(x) - x = self.relu(x) - - x = self.conv3(x) - x = self.relu(x) - - x = self.conv4(x) - x = self.relu(x) - - x = torch.add(x, 1) - - return x - -# Instantiate the model -pytorch_model = SmallModel() -pytorch_model.eval() - -# Print the model architecture -print(pytorch_model) - -# Generate dummy input data -batch_size = 1 -input_channels = 3 -input_size = 224 -dummy_input = torch.rand(batch_size, input_channels, input_size, input_size) - -# Prep for ONNX export -inputs = {"x": dummy_input} -dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} -tmp_model_path = "models/helloworld.onnx" - -# Call export function -torch.onnx.export( - pytorch_model, - inputs, - tmp_model_path, - export_params=True, - opset_version=17, # Recommended opset - input_names=['input'], - output_names=['output'], - dynamic_axes=dynamic_axes, - ) - -# Quantize Model -from quark.onnx.quantization.config import Config, get_default_config -from quark.onnx import ModelQuantizer - -# `input_model_path` is the path to the original, unquantized ONNX model. -input_model_path = "models/helloworld.onnx" - -# `output_model_path` is the path where the quantized model will be saved. -output_model_path = "models/helloworld_quantized.onnx" - -# Use default quantization configuration -quant_config = get_default_config("XINT8") -quant_config.extra_options["UseRandomData"] = True -# Defines the quantization configuration for the whole model -config = Config(global_quant_config=quant_config) -print("The configuration of the quantization is {}".format(config)) - -# Create an ONNX Quantizer -quantizer = ModelQuantizer(config) - -# Quantize the ONNX model -quant_model = quantizer.quantize_model(model_input = input_model_path, - model_output = output_model_path, - calibration_data_path = None) - -print('Calibrated and quantized model saved at:', output_model_path) - -# Run Model on CPU Run - -# Specify the path to the quantized ONNZ Model -quantized_model_path = r'./models/helloworld_quantized.onnx' -model = onnx.load(quantized_model_path) - -# Create some random input data for testing -input_data = np.random.uniform(low=-1, high=1, size=(batch_size, input_channels, input_size, input_size)).astype(np.float32) - -cpu_options = onnxruntime.SessionOptions() - -# Create Inference Session to run the quantized model on the CPU -cpu_session = onnxruntime.InferenceSession( - model.SerializeToString(), - providers = ['CPUExecutionProvider'], - sess_options=cpu_options, -) - -# Run Inference -start = timer() -cpu_results = cpu_session.run(None, {'input': input_data}) -cpu_total = timer() - start - -# Run Model on NPU - -# Before running, we need to set the ENV variable for the specific NPU we have -# Run pnputil as a subprocess to enumerate PCI devices -command = r'pnputil /enum-devices /bus PCI /deviceids ' -process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) -stdout, stderr = process.communicate() -# Check for supported Hardware IDs -npu_type = '' -if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): npu_type = 'PHX/HPT' -if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): npu_type = 'STX' -if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): npu_type = 'STX' -if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): npu_type = 'STX' - -print(f"APU Type: {npu_type}") - -install_dir = os.environ['RYZEN_AI_INSTALLATION_PATH'] -xclbin_file = '' -match npu_type: - case 'PHX/HPT': - print("Setting xclbin file for PHX/HPT") - xclbin_file = os.path.join(install_dir, 'voe-4.0-win_amd64', 'xclbins', 'phoenix', '4x4.xclbin') - case 'STX': - print("xclbin file is auto generated for STX") - case _: - print("Unrecognized APU type. Exiting.") - exit() - -# We want to make sure we compile everytime, otherwise the tools will use the cached version -# Get the current working directory -current_directory = os.getcwd() -directory_path = os.path.join(current_directory, r'cache\hello_cache') -cache_directory = os.path.join(current_directory, r'cache') - -# Check if the directory exists and delete it if it does. -if os.path.exists(directory_path): - shutil.rmtree(directory_path) - print(f"Directory '{directory_path}' deleted successfully.") -else: - print(f"Directory '{directory_path}' does not exist.") - - -# Compile and run - -# Point to the config file path used for the VitisAI Execution Provider -install_dir = os.environ['RYZEN_AI_INSTALLATION_PATH'] -config_file_path = os.path.join(install_dir, 'voe-4.0-win_amd64', 'vaip_config.json') # Path to the NPU config file -# Create session options -aie_options = onnxruntime.SessionOptions() -aie_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal -provider_options = [{}] -if npu_type == 'PHX/HPT': - # For PHX/HPT devices, xclbin is requireds - provider_options = [{ - 'target': 'X1', - 'xclbin': xclbin_file - }] - -aie_session = onnxruntime.InferenceSession( - model.SerializeToString(), - providers=['VitisAIExecutionProvider'], - sess_options=aie_options, - provider_options = provider_options -) - -# Run Inference -start = timer() -npu_results = aie_session.run(None, {'input': input_data}) -npu_total = timer() - start - - -print(f"CPU Execution Time: {cpu_total}") -print(f"NPU Execution Time: {npu_total}") diff --git a/CNN-examples/hello_world/models/readme.md b/CNN-examples/hello_world/models/readme.md deleted file mode 100644 index bbe2a0b7..00000000 --- a/CNN-examples/hello_world/models/readme.md +++ /dev/null @@ -1 +0,0 @@ -This folder is used for saving generated ONNX files in the Hello World tutorial diff --git a/CNN-examples/hello_world/requirements.txt b/CNN-examples/hello_world/requirements.txt deleted file mode 100644 index 8c972ce3..00000000 --- a/CNN-examples/hello_world/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch==2.8.0 -ipykernel==7.1.0 \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/cat.jpg b/CNN-examples/iGPU/getting_started/cat.jpg deleted file mode 100644 index 40e7ad0d..00000000 Binary files a/CNN-examples/iGPU/getting_started/cat.jpg and /dev/null differ diff --git a/CNN-examples/iGPU/getting_started/cpp/compile.bat b/CNN-examples/iGPU/getting_started/cpp/compile.bat deleted file mode 100644 index abf37f3b..00000000 --- a/CNN-examples/iGPU/getting_started/cpp/compile.bat +++ /dev/null @@ -1,20 +0,0 @@ -@echo off - -if "%RYZEN_AI_INSTALLATION_PATH%" == "" echo RYZEN_AI_INSTALLATION_PATH not set. This script requires the RYZEN_AI_INSTALLATION_PATH env var to be set to the RyzenAI 1.2 installation folder. & goto :error - -REM Check if the first argument is provided -if "%1"=="" ( - echo Usage: %0 [OpenCV_DIR] - exit /b 1 -) - -set "OpenCV_DIR=%~1" - -echo OpenCV_DIR is set to: %OpenCV_DIR% - -cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -DCMAKE_INSTALL_PREFIX=. -DCMAKE_PREFIX_PATH=. -B build -S resnet50 -DOpenCV_DIR="%OpenCV_DIR%" -G "Visual Studio 17 2022" - -cmake --build .\build --config Release --target ALL_BUILD - -:error -exit /b %errorlevel% \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/cpp/resnet50/CMakeLists.txt b/CNN-examples/iGPU/getting_started/cpp/resnet50/CMakeLists.txt deleted file mode 100644 index 27acf7c6..00000000 --- a/CNN-examples/iGPU/getting_started/cpp/resnet50/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -cmake_minimum_required(VERSION 3.5) - -project(resnet50 VERSION 1.0.0 LANGUAGES C CXX) -set(CMAKE_CXX_STANDARD 17) -find_package(Python 3.10 EXACT COMPONENTS Interpreter Development) - -set (RYZEN_AI_INSTALLATION_PATH $ENV{RYZEN_AI_INSTALLATION_PATH}) - - -set(ONNXRUNTIME_ROOTDIR "${RYZEN_AI_INSTALLATION_PATH}/onnxruntime") - -include_directories("${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session") - -link_directories("${ONNXRUNTIME_ROOTDIR}/lib") - -link_directories("${CMAKE_INSTALL_PREFIX}/lib") - - - -find_package(OpenCV COMPONENTS core highgui imgproc REQUIRED) - -add_executable(resnet50 resnet50.cpp util/getopt.c) -target_include_directories(resnet50 - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/util -) -target_link_libraries(resnet50 ${ORT_LIBRARY} ${OpenCV_LIBS} onnxruntime) -install(TARGETS resnet50 RUNTIME DESTINATION bin) - -add_custom_command( - TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy - ${RYZEN_AI_INSTALLATION_PATH}/onnxruntime/bin/DirectML.dll - ${CMAKE_CURRENT_BINARY_DIR}/$/DirectML.dll) - -add_custom_command( - TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy - ${RYZEN_AI_INSTALLATION_PATH}/onnxruntime/bin/onnxruntime.dll - ${CMAKE_CURRENT_BINARY_DIR}/$/onnxruntime.dll) \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/cpp/resnet50/imagenet_labels.inc b/CNN-examples/iGPU/getting_started/cpp/resnet50/imagenet_labels.inc deleted file mode 100644 index 0b6880d6..00000000 --- a/CNN-examples/iGPU/getting_started/cpp/resnet50/imagenet_labels.inc +++ /dev/null @@ -1,1001 +0,0 @@ -"background", -"tench, Tinca tinca", -"goldfish, Carassius auratus", -"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias", -"tiger shark, Galeocerdo cuvieri", -"hammerhead, hammerhead shark", -"electric ray, crampfish, numbfish, torpedo", -"stingray", -"cock", -"hen", -"ostrich, Struthio camelus", -"brambling, Fringilla montifringilla", -"goldfinch, Carduelis carduelis", -"house finch, linnet, Carpodacus mexicanus", -"junco, snowbird", -"indigo bunting, indigo finch, indigo bird, Passerina cyanea", -"robin, American robin, Turdus migratorius", -"bulbul", -"jay", -"magpie", -"chickadee", -"water ouzel, dipper", -"kite", -"bald eagle, American eagle, Haliaeetus leucocephalus", -"vulture", -"great grey owl, great gray owl, Strix nebulosa", -"European fire salamander, Salamandra salamandra", -"common newt, Triturus vulgaris", -"eft", -"spotted salamander, Ambystoma maculatum", -"axolotl, mud puppy, Ambystoma mexicanum", -"bullfrog, Rana catesbeiana", -"tree frog, tree-frog", -"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui", -"loggerhead, loggerhead turtle, Caretta caretta", -"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", -"mud turtle", -"terrapin", -"box turtle, box tortoise", -"banded gecko", -"common iguana, iguana, Iguana iguana", -"American chameleon, anole, Anolis carolinensis", -"whiptail, whiptail lizard", -"agama", -"frilled lizard, Chlamydosaurus kingi", -"alligator lizard", -"Gila monster, Heloderma suspectum", -"green lizard, Lacerta viridis", -"African chameleon, Chamaeleo chamaeleon", -"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis", -"African crocodile, Nile crocodile, Crocodylus niloticus", -"American alligator, Alligator mississipiensis", -"triceratops", -"thunder snake, worm snake, Carphophis amoenus", -"ringneck snake, ring-necked snake, ring snake", -"hognose snake, puff adder, sand viper", -"green snake, grass snake", -"king snake, kingsnake", -"garter snake, grass snake", -"water snake", -"vine snake", -"night snake, Hypsiglena torquata", -"boa constrictor, Constrictor constrictor", -"rock python, rock snake, Python sebae", -"Indian cobra, Naja naja", -"green mamba", -"sea snake", -"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus", -"diamondback, diamondback rattlesnake, Crotalus adamanteus", -"sidewinder, horned rattlesnake, Crotalus cerastes", -"trilobite", -"harvestman, daddy longlegs, Phalangium opilio", -"scorpion", -"black and gold garden spider, Argiope aurantia", -"barn spider, Araneus cavaticus", -"garden spider, Aranea diademata", -"black widow, Latrodectus mactans", -"tarantula", -"wolf spider, hunting spider", -"tick", -"centipede", -"black grouse", -"ptarmigan", -"ruffed grouse, partridge, Bonasa umbellus", -"prairie chicken, prairie grouse, prairie fowl", -"peacock", -"quail", -"partridge", -"African grey, African gray, Psittacus erithacus", -"macaw", -"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita", -"lorikeet", -"coucal", -"bee eater", -"hornbill", -"hummingbird", -"jacamar", -"toucan", -"drake", -"red-breasted merganser, Mergus serrator", -"goose", -"black swan, Cygnus atratus", -"tusker", -"echidna, spiny anteater, anteater", -"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus", -"wallaby, brush kangaroo", -"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus", -"wombat", -"jellyfish", -"sea anemone, anemone", -"brain coral", -"flatworm, platyhelminth", -"nematode, nematode worm, roundworm", -"conch", -"snail", -"slug", -"sea slug, nudibranch", -"chiton, coat-of-mail shell, sea cradle, polyplacophore", -"chambered nautilus, pearly nautilus, nautilus", -"Dungeness crab, Cancer magister", -"rock crab, Cancer irroratus", -"fiddler crab", -"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica", -"American lobster, Northern lobster, Maine lobster, Homarus americanus", -"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish", -"crayfish, crawfish, crawdad, crawdaddy", -"hermit crab", -"isopod", -"white stork, Ciconia ciconia", -"black stork, Ciconia nigra", -"spoonbill", -"flamingo", -"little blue heron, Egretta caerulea", -"American egret, great white heron, Egretta albus", -"bittern", -"crane", -"limpkin, Aramus pictus", -"European gallinule, Porphyrio porphyrio", -"American coot, marsh hen, mud hen, water hen, Fulica americana", -"bustard", -"ruddy turnstone, Arenaria interpres", -"red-backed sandpiper, dunlin, Erolia alpina", -"redshank, Tringa totanus", -"dowitcher", -"oystercatcher, oyster catcher", -"pelican", -"king penguin, Aptenodytes patagonica", -"albatross, mollymawk", -"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus", -"killer whale, killer, orca, grampus, sea wolf, Orcinus orca", -"dugong, Dugong dugon", -"sea lion", -"Chihuahua", -"Japanese spaniel", -"Maltese dog, Maltese terrier, Maltese", -"Pekinese, Pekingese, Peke", -"Shih-Tzu", -"Blenheim spaniel", -"papillon", -"toy terrier", -"Rhodesian ridgeback", -"Afghan hound, Afghan", -"basset, basset hound", -"beagle", -"bloodhound, sleuthhound", -"bluetick", -"black-and-tan coonhound", -"Walker hound, Walker foxhound", -"English foxhound", -"redbone", -"borzoi, Russian wolfhound", -"Irish wolfhound", -"Italian greyhound", -"whippet", -"Ibizan hound, Ibizan Podenco", -"Norwegian elkhound, elkhound", -"otterhound, otter hound", -"Saluki, gazelle hound", -"Scottish deerhound, deerhound", -"Weimaraner", -"Staffordshire bullterrier, Staffordshire bull terrier", -"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier", -"Bedlington terrier", -"Border terrier", -"Kerry blue terrier", -"Irish terrier", -"Norfolk terrier", -"Norwich terrier", -"Yorkshire terrier", -"wire-haired fox terrier", -"Lakeland terrier", -"Sealyham terrier, Sealyham", -"Airedale, Airedale terrier", -"cairn, cairn terrier", -"Australian terrier", -"Dandie Dinmont, Dandie Dinmont terrier", -"Boston bull, Boston terrier", -"miniature schnauzer", -"giant schnauzer", -"standard schnauzer", -"Scotch terrier, Scottish terrier, Scottie", -"Tibetan terrier, chrysanthemum dog", -"silky terrier, Sydney silky", -"soft-coated wheaten terrier", -"West Highland white terrier", -"Lhasa, Lhasa apso", -"flat-coated retriever", -"curly-coated retriever", -"golden retriever", -"Labrador retriever", -"Chesapeake Bay retriever", -"German short-haired pointer", -"vizsla, Hungarian pointer", -"English setter", -"Irish setter, red setter", -"Gordon setter", -"Brittany spaniel", -"clumber, clumber spaniel", -"English springer, English springer spaniel", -"Welsh springer spaniel", -"cocker spaniel, English cocker spaniel, cocker", -"Sussex spaniel", -"Irish water spaniel", -"kuvasz", -"schipperke", -"groenendael", -"malinois", -"briard", -"kelpie", -"komondor", -"Old English sheepdog, bobtail", -"Shetland sheepdog, Shetland sheep dog, Shetland", -"collie", -"Border collie", -"Bouvier des Flandres, Bouviers des Flandres", -"Rottweiler", -"German shepherd, German shepherd dog, German police dog, alsatian", -"Doberman, Doberman pinscher", -"miniature pinscher", -"Greater Swiss Mountain dog", -"Bernese mountain dog", -"Appenzeller", -"EntleBucher", -"boxer", -"bull mastiff", -"Tibetan mastiff", -"French bulldog", -"Great Dane", -"Saint Bernard, St Bernard", -"Eskimo dog, husky", -"malamute, malemute, Alaskan malamute", -"Siberian husky", -"dalmatian, coach dog, carriage dog", -"affenpinscher, monkey pinscher, monkey dog", -"basenji", -"pug, pug-dog", -"Leonberg", -"Newfoundland, Newfoundland dog", -"Great Pyrenees", -"Samoyed, Samoyede", -"Pomeranian", -"chow, chow chow", -"keeshond", -"Brabancon griffon", -"Pembroke, Pembroke Welsh corgi", -"Cardigan, Cardigan Welsh corgi", -"toy poodle", -"miniature poodle", -"standard poodle", -"Mexican hairless", -"timber wolf, grey wolf, gray wolf, Canis lupus", -"white wolf, Arctic wolf, Canis lupus tundrarum", -"red wolf, maned wolf, Canis rufus, Canis niger", -"coyote, prairie wolf, brush wolf, Canis latrans", -"dingo, warrigal, warragal, Canis dingo", -"dhole, Cuon alpinus", -"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus", -"hyena, hyaena", -"red fox, Vulpes vulpes", -"kit fox, Vulpes macrotis", -"Arctic fox, white fox, Alopex lagopus", -"grey fox, gray fox, Urocyon cinereoargenteus", -"tabby, tabby cat", -"tiger cat", -"Persian cat", -"Siamese cat, Siamese", -"Egyptian cat", -"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor", -"lynx, catamount", -"leopard, Panthera pardus", -"snow leopard, ounce, Panthera uncia", -"jaguar, panther, Panthera onca, Felis onca", -"lion, king of beasts, Panthera leo", -"tiger, Panthera tigris", -"cheetah, chetah, Acinonyx jubatus", -"brown bear, bruin, Ursus arctos", -"American black bear, black bear, Ursus americanus, Euarctos americanus", -"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus", -"sloth bear, Melursus ursinus, Ursus ursinus", -"mongoose", -"meerkat, mierkat", -"tiger beetle", -"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle", -"ground beetle, carabid beetle", -"long-horned beetle, longicorn, longicorn beetle", -"leaf beetle, chrysomelid", -"dung beetle", -"rhinoceros beetle", -"weevil", -"fly", -"bee", -"ant, emmet, pismire", -"grasshopper, hopper", -"cricket", -"walking stick, walkingstick, stick insect", -"cockroach, roach", -"mantis, mantid", -"cicada, cicala", -"leafhopper", -"lacewing, lacewing fly", -"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", -"damselfly", -"admiral", -"ringlet, ringlet butterfly", -"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus", -"cabbage butterfly", -"sulphur butterfly, sulfur butterfly", -"lycaenid, lycaenid butterfly", -"starfish, sea star", -"sea urchin", -"sea cucumber, holothurian", -"wood rabbit, cottontail, cottontail rabbit", -"hare", -"Angora, Angora rabbit", -"hamster", -"porcupine, hedgehog", -"fox squirrel, eastern fox squirrel, Sciurus niger", -"marmot", -"beaver", -"guinea pig, Cavia cobaya", -"sorrel", -"zebra", -"hog, pig, grunter, squealer, Sus scrofa", -"wild boar, boar, Sus scrofa", -"warthog", -"hippopotamus, hippo, river horse, Hippopotamus amphibius", -"ox", -"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis", -"bison", -"ram, tup", -"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis", -"ibex, Capra ibex", -"hartebeest", -"impala, Aepyceros melampus", -"gazelle", -"Arabian camel, dromedary, Camelus dromedarius", -"llama", -"weasel", -"mink", -"polecat, fitch, foulmart, foumart, Mustela putorius", -"black-footed ferret, ferret, Mustela nigripes", -"otter", -"skunk, polecat, wood pussy", -"badger", -"armadillo", -"three-toed sloth, ai, Bradypus tridactylus", -"orangutan, orang, orangutang, Pongo pygmaeus", -"gorilla, Gorilla gorilla", -"chimpanzee, chimp, Pan troglodytes", -"gibbon, Hylobates lar", -"siamang, Hylobates syndactylus, Symphalangus syndactylus", -"guenon, guenon monkey", -"patas, hussar monkey, Erythrocebus patas", -"baboon", -"macaque", -"langur", -"colobus, colobus monkey", -"proboscis monkey, Nasalis larvatus", -"marmoset", -"capuchin, ringtail, Cebus capucinus", -"howler monkey, howler", -"titi, titi monkey", -"spider monkey, Ateles geoffroyi", -"squirrel monkey, Saimiri sciureus", -"Madagascar cat, ring-tailed lemur, Lemur catta", -"indri, indris, Indri indri, Indri brevicaudatus", -"Indian elephant, Elephas maximus", -"African elephant, Loxodonta africana", -"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens", -"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca", -"barracouta, snoek", -"eel", -"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch", -"rock beauty, Holocanthus tricolor", -"anemone fish", -"sturgeon", -"gar, garfish, garpike, billfish, Lepisosteus osseus", -"lionfish", -"puffer, pufferfish, blowfish, globefish", -"abacus", -"abaya", -"academic gown, academic robe, judge's robe", -"accordion, piano accordion, squeeze box", -"acoustic guitar", -"aircraft carrier, carrier, flattop, attack aircraft carrier", -"airliner", -"airship, dirigible", -"altar", -"ambulance", -"amphibian, amphibious vehicle", -"analog clock", -"apiary, bee house", -"apron", -"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin", -"assault rifle, assault gun", -"backpack, back pack, knapsack, packsack, rucksack, haversack", -"bakery, bakeshop, bakehouse", -"balance beam, beam", -"balloon", -"ballpoint, ballpoint pen, ballpen, Biro", -"Band Aid", -"banjo", -"bannister, banister, balustrade, balusters, handrail", -"barbell", -"barber chair", -"barbershop", -"barn", -"barometer", -"barrel, cask", -"barrow, garden cart, lawn cart, wheelbarrow", -"baseball", -"basketball", -"bassinet", -"bassoon", -"bathing cap, swimming cap", -"bath towel", -"bathtub, bathing tub, bath, tub", -"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon", -"beacon, lighthouse, beacon light, pharos", -"beaker", -"bearskin, busby, shako", -"beer bottle", -"beer glass", -"bell cote, bell cot", -"bib", -"bicycle-built-for-two, tandem bicycle, tandem", -"bikini, two-piece", -"binder, ring-binder", -"binoculars, field glasses, opera glasses", -"birdhouse", -"boathouse", -"bobsled, bobsleigh, bob", -"bolo tie, bolo, bola tie, bola", -"bonnet, poke bonnet", -"bookcase", -"bookshop, bookstore, bookstall", -"bottlecap", -"bow", -"bow tie, bow-tie, bowtie", -"brass, memorial tablet, plaque", -"brassiere, bra, bandeau", -"breakwater, groin, groyne, mole, bulwark, seawall, jetty", -"breastplate, aegis, egis", -"broom", -"bucket, pail", -"buckle", -"bulletproof vest", -"bullet train, bullet", -"butcher shop, meat market", -"cab, hack, taxi, taxicab", -"caldron, cauldron", -"candle, taper, wax light", -"cannon", -"canoe", -"can opener, tin opener", -"cardigan", -"car mirror", -"carousel, carrousel, merry-go-round, roundabout, whirligig", -"carpenter's kit, tool kit", -"carton", -"car wheel", -"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM", -"cassette", -"cassette player", -"castle", -"catamaran", -"CD player", -"cello, violoncello", -"cellular telephone, cellular phone, cellphone, cell, mobile phone", -"chain", -"chainlink fence", -"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour", -"chain saw, chainsaw", -"chest", -"chiffonier, commode", -"chime, bell, gong", -"china cabinet, china closet", -"Christmas stocking", -"church, church building", -"cinema, movie theater, movie theatre, movie house, picture palace", -"cleaver, meat cleaver, chopper", -"cliff dwelling", -"cloak", -"clog, geta, patten, sabot", -"cocktail shaker", -"coffee mug", -"coffeepot", -"coil, spiral, volute, whorl, helix", -"combination lock", -"computer keyboard, keypad", -"confectionery, confectionary, candy store", -"container ship, containership, container vessel", -"convertible", -"corkscrew, bottle screw", -"cornet, horn, trumpet, trump", -"cowboy boot", -"cowboy hat, ten-gallon hat", -"cradle", -"crane", -"crash helmet", -"crate", -"crib, cot", -"Crock Pot", -"croquet ball", -"crutch", -"cuirass", -"dam, dike, dyke", -"desk", -"desktop computer", -"dial telephone, dial phone", -"diaper, nappy, napkin", -"digital clock", -"digital watch", -"dining table, board", -"dishrag, dishcloth", -"dishwasher, dish washer, dishwashing machine", -"disk brake, disc brake", -"dock, dockage, docking facility", -"dogsled, dog sled, dog sleigh", -"dome", -"doormat, welcome mat", -"drilling platform, offshore rig", -"drum, membranophone, tympan", -"drumstick", -"dumbbell", -"Dutch oven", -"electric fan, blower", -"electric guitar", -"electric locomotive", -"entertainment center", -"envelope", -"espresso maker", -"face powder", -"feather boa, boa", -"file, file cabinet, filing cabinet", -"fireboat", -"fire engine, fire truck", -"fire screen, fireguard", -"flagpole, flagstaff", -"flute, transverse flute", -"folding chair", -"football helmet", -"forklift", -"fountain", -"fountain pen", -"four-poster", -"freight car", -"French horn, horn", -"frying pan, frypan, skillet", -"fur coat", -"garbage truck, dustcart", -"gasmask, respirator, gas helmet", -"gas pump, gasoline pump, petrol pump, island dispenser", -"goblet", -"go-kart", -"golf ball", -"golfcart, golf cart", -"gondola", -"gong, tam-tam", -"gown", -"grand piano, grand", -"greenhouse, nursery, glasshouse", -"grille, radiator grille", -"grocery store, grocery, food market, market", -"guillotine", -"hair slide", -"hair spray", -"half track", -"hammer", -"hamper", -"hand blower, blow dryer, blow drier, hair dryer, hair drier", -"hand-held computer, hand-held microcomputer", -"handkerchief, hankie, hanky, hankey", -"hard disc, hard disk, fixed disk", -"harmonica, mouth organ, harp, mouth harp", -"harp", -"harvester, reaper", -"hatchet", -"holster", -"home theater, home theatre", -"honeycomb", -"hook, claw", -"hoopskirt, crinoline", -"horizontal bar, high bar", -"horse cart, horse-cart", -"hourglass", -"iPod", -"iron, smoothing iron", -"jack-o'-lantern", -"jean, blue jean, denim", -"jeep, landrover", -"jersey, T-shirt, tee shirt", -"jigsaw puzzle", -"jinrikisha, ricksha, rickshaw", -"joystick", -"kimono", -"knee pad", -"knot", -"lab coat, laboratory coat", -"ladle", -"lampshade, lamp shade", -"laptop, laptop computer", -"lawn mower, mower", -"lens cap, lens cover", -"letter opener, paper knife, paperknife", -"library", -"lifeboat", -"lighter, light, igniter, ignitor", -"limousine, limo", -"liner, ocean liner", -"lipstick, lip rouge", -"Loafer", -"lotion", -"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system", -"loupe, jeweler's loupe", -"lumbermill, sawmill", -"magnetic compass", -"mailbag, postbag", -"mailbox, letter box", -"maillot", -"maillot, tank suit", -"manhole cover", -"maraca", -"marimba, xylophone", -"mask", -"matchstick", -"maypole", -"maze, labyrinth", -"measuring cup", -"medicine chest, medicine cabinet", -"megalith, megalithic structure", -"microphone, mike", -"microwave, microwave oven", -"military uniform", -"milk can", -"minibus", -"miniskirt, mini", -"minivan", -"missile", -"mitten", -"mixing bowl", -"mobile home, manufactured home", -"Model T", -"modem", -"monastery", -"monitor", -"moped", -"mortar", -"mortarboard", -"mosque", -"mosquito net", -"motor scooter, scooter", -"mountain bike, all-terrain bike, off-roader", -"mountain tent", -"mouse, computer mouse", -"mousetrap", -"moving van", -"muzzle", -"nail", -"neck brace", -"necklace", -"nipple", -"notebook, notebook computer", -"obelisk", -"oboe, hautboy, hautbois", -"ocarina, sweet potato", -"odometer, hodometer, mileometer, milometer", -"oil filter", -"organ, pipe organ", -"oscilloscope, scope, cathode-ray oscilloscope, CRO", -"overskirt", -"oxcart", -"oxygen mask", -"packet", -"paddle, boat paddle", -"paddlewheel, paddle wheel", -"padlock", -"paintbrush", -"pajama, pyjama, pj's, jammies", -"palace", -"panpipe, pandean pipe, syrinx", -"paper towel", -"parachute, chute", -"parallel bars, bars", -"park bench", -"parking meter", -"passenger car, coach, carriage", -"patio, terrace", -"pay-phone, pay-station", -"pedestal, plinth, footstall", -"pencil box, pencil case", -"pencil sharpener", -"perfume, essence", -"Petri dish", -"photocopier", -"pick, plectrum, plectron", -"pickelhaube", -"picket fence, paling", -"pickup, pickup truck", -"pier", -"piggy bank, penny bank", -"pill bottle", -"pillow", -"ping-pong ball", -"pinwheel", -"pirate, pirate ship", -"pitcher, ewer", -"plane, carpenter's plane, woodworking plane", -"planetarium", -"plastic bag", -"plate rack", -"plow, plough", -"plunger, plumber's helper", -"Polaroid camera, Polaroid Land camera", -"pole", -"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria", -"poncho", -"pool table, billiard table, snooker table", -"pop bottle, soda bottle", -"pot, flowerpot", -"potter's wheel", -"power drill", -"prayer rug, prayer mat", -"printer", -"prison, prison house", -"projectile, missile", -"projector", -"puck, hockey puck", -"punching bag, punch bag, punching ball, punchball", -"purse", -"quill, quill pen", -"quilt, comforter, comfort, puff", -"racer, race car, racing car", -"racket, racquet", -"radiator", -"radio, wireless", -"radio telescope, radio reflector", -"rain barrel", -"recreational vehicle, RV, R.V.", -"reel", -"reflex camera", -"refrigerator, icebox", -"remote control, remote", -"restaurant, eating house, eating place, eatery", -"revolver, six-gun, six-shooter", -"rifle", -"rocking chair, rocker", -"rotisserie", -"rubber eraser, rubber, pencil eraser", -"rugby ball", -"rule, ruler", -"running shoe", -"safe", -"safety pin", -"saltshaker, salt shaker", -"sandal", -"sarong", -"sax, saxophone", -"scabbard", -"scale, weighing machine", -"school bus", -"schooner", -"scoreboard", -"screen, CRT screen", -"screw", -"screwdriver", -"seat belt, seatbelt", -"sewing machine", -"shield, buckler", -"shoe shop, shoe-shop, shoe store", -"shoji", -"shopping basket", -"shopping cart", -"shovel", -"shower cap", -"shower curtain", -"ski", -"ski mask", -"sleeping bag", -"slide rule, slipstick", -"sliding door", -"slot, one-armed bandit", -"snorkel", -"snowmobile", -"snowplow, snowplough", -"soap dispenser", -"soccer ball", -"sock", -"solar dish, solar collector, solar furnace", -"sombrero", -"soup bowl", -"space bar", -"space heater", -"space shuttle", -"spatula", -"speedboat", -"spider web, spider's web", -"spindle", -"sports car, sport car", -"spotlight, spot", -"stage", -"steam locomotive", -"steel arch bridge", -"steel drum", -"stethoscope", -"stole", -"stone wall", -"stopwatch, stop watch", -"stove", -"strainer", -"streetcar, tram, tramcar, trolley, trolley car", -"stretcher", -"studio couch, day bed", -"stupa, tope", -"submarine, pigboat, sub, U-boat", -"suit, suit of clothes", -"sundial", -"sunglass", -"sunglasses, dark glasses, shades", -"sunscreen, sunblock, sun blocker", -"suspension bridge", -"swab, swob, mop", -"sweatshirt", -"swimming trunks, bathing trunks", -"swing", -"switch, electric switch, electrical switch", -"syringe", -"table lamp", -"tank, army tank, armored combat vehicle, armoured combat vehicle", -"tape player", -"teapot", -"teddy, teddy bear", -"television, television system", -"tennis ball", -"thatch, thatched roof", -"theater curtain, theatre curtain", -"thimble", -"thresher, thrasher, threshing machine", -"throne", -"tile roof", -"toaster", -"tobacco shop, tobacconist shop, tobacconist", -"toilet seat", -"torch", -"totem pole", -"tow truck, tow car, wrecker", -"toyshop", -"tractor", -"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi", -"tray", -"trench coat", -"tricycle, trike, velocipede", -"trimaran", -"tripod", -"triumphal arch", -"trolleybus, trolley coach, trackless trolley", -"trombone", -"tub, vat", -"turnstile", -"typewriter keyboard", -"umbrella", -"unicycle, monocycle", -"upright, upright piano", -"vacuum, vacuum cleaner", -"vase", -"vault", -"velvet", -"vending machine", -"vestment", -"viaduct", -"violin, fiddle", -"volleyball", -"waffle iron", -"wall clock", -"wallet, billfold, notecase, pocketbook", -"wardrobe, closet, press", -"warplane, military plane", -"washbasin, handbasin, washbowl, lavabo, wash-hand basin", -"washer, automatic washer, washing machine", -"water bottle", -"water jug", -"water tower", -"whiskey jug", -"whistle", -"wig", -"window screen", -"window shade", -"Windsor tie", -"wine bottle", -"wing", -"wok", -"wooden spoon", -"wool, woolen, woollen", -"worm fence, snake fence, snake-rail fence, Virginia fence", -"wreck", -"yawl", -"yurt", -"web site, website, internet site, site", -"comic book", -"crossword puzzle, crossword", -"street sign", -"traffic light, traffic signal, stoplight", -"book jacket, dust cover, dust jacket, dust wrapper", -"menu", -"plate", -"guacamole", -"consomme", -"hot pot, hotpot", -"trifle", -"ice cream, icecream", -"ice lolly, lolly, lollipop, popsicle", -"French loaf", -"bagel, beigel", -"pretzel", -"cheeseburger", -"hotdog, hot dog, red hot", -"mashed potato", -"head cabbage", -"broccoli", -"cauliflower", -"zucchini, courgette", -"spaghetti squash", -"acorn squash", -"butternut squash", -"cucumber, cuke", -"artichoke, globe artichoke", -"bell pepper", -"cardoon", -"mushroom", -"Granny Smith", -"strawberry", -"orange", -"lemon", -"fig", -"pineapple, ananas", -"banana", -"jackfruit, jak, jack", -"custard apple", -"pomegranate", -"hay", -"carbonara", -"chocolate sauce, chocolate syrup", -"dough", -"meat loaf, meatloaf", -"pizza, pizza pie", -"potpie", -"burrito", -"red wine", -"espresso", -"cup", -"eggnog", -"alp", -"bubble", -"cliff, drop, drop-off", -"coral reef", -"geyser", -"lakeside, lakeshore", -"promontory, headland, head, foreland", -"sandbar, sand bar", -"seashore, coast, seacoast, sea-coast", -"valley, vale", -"volcano", -"ballplayer, baseball player", -"groom, bridegroom", -"scuba diver", -"rapeseed", -"daisy", -"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", -"corn", -"acorn", -"hip, rose hip, rosehip", -"buckeye, horse chestnut, conker", -"coral fungus", -"agaric", -"gyromitra", -"stinkhorn, carrion fungus", -"earthstar", -"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa", -"bolete", -"ear, spike, capitulum", -"toilet tissue, toilet paper, bathroom tissue", diff --git a/CNN-examples/iGPU/getting_started/cpp/resnet50/resnet50.cpp b/CNN-examples/iGPU/getting_started/cpp/resnet50/resnet50.cpp deleted file mode 100644 index 11eb63a3..00000000 --- a/CNN-examples/iGPU/getting_started/cpp/resnet50/resnet50.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ************************************************************************************/ -#include -#include -#include -#include // std::generate -#include -#include -#include -#include -#include -#include -#if _WIN32 -extern "C" { -#include "util/getopt.h" -} -#include -#include -using convert_t = std::codecvt_utf8; -std::wstring_convert strconverter; -#endif -#include -#include -#include -#include -#include -#include - -using namespace std; - - -static cv::Mat read_image(const std::string files); -static cv::Mat croppedImage(const cv::Mat& image, int height, int width); -static cv::Mat preprocess_image(const cv::Mat& image, cv::Size size); -static void set_input_image(const cv::Mat& image, Ort::Float16_t* data); -static std::vector softmax(float* data, int64_t size); -static std::vector> topk(const std::vector& score, - int K); -static void print_topk(const std::vector>& topk); -static const char* lookup(int index); - -std::vector convertToFloat32(const Ort::Float16_t* input, size_t size) { - std::vector output(size); - for (size_t i = 0; i < size; ++i) { - output[i] = static_cast(input[i]); - } - return output; -} - -// preprocess -static void preprocess_resnet(const string file, - std::vector& input_tensor_values, - std::vector& input_shape) { - auto channel = input_shape[1]; - auto height = input_shape[2]; - auto width = input_shape[3]; - auto size = cv::Size((int)width, (int)height); - auto image = read_image(file); - cv::Mat resized_image; - cv::resize(image, image, - cv::Size(224, 224)); - try - { - set_input_image(image, input_tensor_values.data()); - } - catch (const std::exception& exception) { - cout << "ERROR running set_input_image: " << exception.what() << endl; - exit(-1); - } - -} - -// postprocess -static string postprocess_resnet(const string file, - Ort::Value& output_tensor) { - auto output_shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape(); - auto channel = output_shape[1]; - auto output_tensor_ptr = output_tensor.GetTensorMutableData(); - std::vector float_output_tensor = convertToFloat32(output_tensor_ptr, channel); - auto softmax_output = softmax(float_output_tensor.data(), channel); - auto tb_top5 = topk(softmax_output, 5); - //print_topk(tb_top5); - auto top1 = tb_top5[0]; - auto cls = std::string("") + lookup(top1.first) + " prob. " + - std::to_string(top1.second); - return lookup(top1.first); -} - -#define CHECK_STATUS_OK(expr) \ - do { \ - Status _tmp_status = (expr); \ - CHECK(_tmp_status.IsOK()) << _tmp_status; \ - } while (0) - - -// pretty prints a shape dimension vector -static std::string print_shape(const std::vector& v) { - std::stringstream ss(""); - for (size_t i = 0; i < v.size() - 1; i++) - ss << v[i] << "x"; - ss << v[v.size() - 1]; - return ss.str(); -} - -static int calculate_product(const std::vector& v) { - int total = 1; - for (auto& i : v) - total *= (int)i; - return total; -} - -static void usage() { - std::cout << "usage: resnet50 "<< std::endl; -} - -int main(int argc, char* argv[]) { - - vector> results; - int opt = 0; - int64_t batch_number = 1; - auto model_name = strconverter.from_bytes(std::string(argv[optind])); - cout << "model name:" << std::string(argv[optind]) << endl; - auto ep = std::string(argv[optind + 1]); - auto curr_file = std::string(argv[optind + 2]); - cout << "ep:" << ep << endl; - Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "resnet50"); - auto session_options = Ort::SessionOptions(); - - auto config_key = std::string{ "config_file" }; - auto cache_dir = std::filesystem::current_path().string(); - - if (ep == "dml") - { - OrtApi const& ortApi = Ort::GetApi(); - OrtDmlApi const* ortDmlApi = nullptr; - ortApi.GetExecutionProviderApi("DML", ORT_API_VERSION, reinterpret_cast(&ortDmlApi)); - - session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); // For DML EP - session_options.DisableMemPattern(); // For DML EP - - int deviceIndex = 0; - - ortDmlApi->SessionOptionsAppendExecutionProvider_DML(session_options, deviceIndex); - cout << "Finished appending DML session options" << endl; - - } - auto session = Ort::Session(env, model_name.data(), session_options); - // print name/shape of inputs and outputs - Ort::AllocatorWithDefaultOptions allocator; - auto input_count = session.GetInputCount(); - auto input_names = std::vector(); - auto input_names_ptr = std::vector(); - auto input_shapes = std::vector>(); - input_shapes.reserve(input_count); - input_names_ptr.reserve(input_count); - input_names.reserve(input_count); - std::cout << "Input Node Name/Shape (" << input_count << "):" << std::endl; - for (size_t i = 0; i < input_count; i++) - { - input_shapes.push_back(session.GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); - auto name = session.GetInputNameAllocated(i, allocator); - input_names.push_back(name.get()); - input_names_ptr.push_back(std::move(name)); - std::cout << "\t" << input_names[i] << " : " << print_shape(input_shapes[i]) << std::endl; - } - - auto output_count = session.GetOutputCount(); - auto output_shapes = std::vector>(); - auto output_names_ptr = std::vector(); - auto output_names = std::vector(); - output_shapes.reserve(output_count); - output_names_ptr.reserve(output_count); - output_names.reserve(output_count); - - for (size_t i = 0; i < output_count; i++) - { - auto shape = session.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); - output_shapes.push_back(shape); - auto name = session.GetOutputNameAllocated(i, allocator); - output_names.push_back(name.get()); - output_names_ptr.push_back(std::move(name)); - std::cout << "\t" << output_names[i] << " : " << print_shape(output_shapes[i]) << std::endl; - } - for (int i = 0; i < 1; i++) - { - auto input_shape = input_shapes[0]; - if (input_shape[0] == -1) { - input_shape[0] = batch_number; - } - int total_number_elements = calculate_product(input_shape); - std::vector input_tensor_values(total_number_elements); - try - { - preprocess_resnet(curr_file, input_tensor_values, input_shape); - } - catch (const Ort::Exception& exception) { - cout << "ERROR running preprocess: " << exception.what() << endl; - exit(-1); - } - std::vector input_tensors; - Ort::MemoryInfo info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - input_tensors.push_back(Ort::Value::CreateTensor( - info, input_tensor_values.data(), input_tensor_values.size(), - input_shape.data(), input_shape.size())); - - // double-check the dimensions of the input tensor - assert(input_tensors[0].IsTensor() && - input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == - input_shape); - try { - auto output_tensors = session.Run(Ort::RunOptions(), input_names.data(), input_tensors.data(), input_count, output_names.data(), output_count); - assert(output_tensors.size() == session.GetOutputNames().size() && - output_tensors[0].IsTensor()); - auto output_shape = - output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); - string predicted = postprocess_resnet(curr_file, output_tensors[0]); - cout << "Finished inference. output label: " << predicted << endl; - } - catch (const Ort::Exception& exception) { - cout << "ERROR running model inference: " << exception.what() << endl; - exit(-1); - } - } - - return 0; -} - -static cv::Mat read_image(const string file) { - cv::Mat image; - image = cv::imread(file); - return image; -} - -static cv::Mat croppedImage(const cv::Mat& image, int height, int width) { - cv::Mat cropped_img; - int offset_h = (image.rows - height) / 2; - int offset_w = (image.cols - width) / 2; - cv::Rect box(offset_w, offset_h, width, height); - cropped_img = image(box).clone(); - return cropped_img; -} - -static cv::Mat preprocess_image(const cv::Mat& image, cv::Size size) { - float smallest_side = 256; - float scale = smallest_side / ((image.rows > image.cols) ? (float)image.cols - : (float)image.rows); - cv::Mat resized_image; - cv::resize(image, resized_image, - cv::Size(image.cols * (int)scale, image.rows * (int)scale)); - return croppedImage(resized_image, size.height, size.width); -} - -//(image_data - mean) * scale, BRG2RGB and hwc2chw -static void set_input_image(const cv::Mat& image, Ort::Float16_t* data) { - float mean[3] = { 0.0f, 0.0f, 0.0f }; - float scales[3] = { 1.0f, 1.0f, 1.0f }; - for (int c = 0; c < 3; c++) { - for (int h = 0; h < image.rows; h++) { - for (int w = 0; w < image.cols; w++) { - auto c_t = abs(c - 2); // BRG to RGB - auto image_data = - ((image.at(h, w)[c_t] - mean[c_t]) * scales[c_t]) / 255; - data[c * image.rows * image.cols + h * image.cols + w] = - (Ort::Float16_t)image_data; - } - } - } -} - -static std::vector softmax(float* data, int64_t size) { - auto output = std::vector(size); - std::transform(data, data + size, output.begin(), expf); - auto sum = - std::accumulate(output.begin(), output.end(), 0.0f, std::plus()); - std::transform(output.begin(), output.end(), output.begin(), - [sum](float v) { return v / sum; }); - return output; -} - -static std::vector> topk(const std::vector& score, - int K) { - auto indices = std::vector(score.size()); - std::iota(indices.begin(), indices.end(), 0); - std::partial_sort(indices.begin(), indices.begin() + K, indices.end(), - [&score](int a, int b) { return score[a] > score[b]; }); - auto ret = std::vector>(K); - std::transform( - indices.begin(), indices.begin() + K, ret.begin(), - [&score](int index) { return std::make_pair(index, score[index]); }); - return ret; -} - -static void print_topk(const std::vector>& topk) { - for (const auto& v : topk) { - std::cout << std::setiosflags(std::ios::left) << std::setw(11) - << "score[" + std::to_string(v.first) + "]" - << " = " << std::setw(12) << v.second - << " text: " << lookup(v.first) - << std::resetiosflags(std::ios::left) << std::endl; - } -} - -static const char* lookup(int index) { - static const char* table[] = { - #include "imagenet_labels.inc" - }; - - if (index < 0) { - return ""; - } - else { - return table[index]; - } -} \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/cpp/resnet50/util/getopt.c b/CNN-examples/iGPU/getting_started/cpp/resnet50/util/getopt.c deleted file mode 100644 index a4ac2bfb..00000000 --- a/CNN-examples/iGPU/getting_started/cpp/resnet50/util/getopt.c +++ /dev/null @@ -1,72 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ************************************************************************************/ -#include "getopt.h" - -#include -#include -int opterr = 1, /* if error message should be printed */ - optind = 1, /* index into parent argv vector */ - optopt, /* character checked for validity */ - optreset; /* reset getopt */ -char *optarg; /* argument associated with option */ -#define BADCH (int)'?' -#define BADARG (int)':' -#define EMSG "" - -/* - * getopt -- - * Parse argc/argv argument vector. - */ -int getopt(int nargc, char *const nargv[], const char *ostr) { - static char *place = EMSG; /* option letter processing */ - const char *oli; /* option letter list index */ - - if (optreset || !*place) { /* update scanning pointer */ - optreset = 0; - if (optind >= nargc || *(place = nargv[optind]) != '-') { - place = EMSG; - return (-1); - } - if (place[1] && *++place == '-') { /* found "--" */ - ++optind; - place = EMSG; - return (-1); - } - } /* option letter okay? */ - if ((optopt = (int)*place++) == (int)':' || !(oli = strchr(ostr, optopt))) { - /* - * if the user didn't specify '-' as an option, - * assume it means -1. - */ - if (optopt == (int)'-') return (-1); - if (!*place) ++optind; - if (opterr && *ostr != ':') (void)printf("illegal option -- %c\n", optopt); - return (BADCH); - } - if (*++oli != ':') { /* don't need argument */ - optarg = NULL; - if (!*place) ++optind; - } else { /* need an argument */ - if (*place) /* no white space */ - optarg = place; - else if (nargc <= ++optind) { /* no arg */ - place = EMSG; - if (*ostr == ':') return (BADARG); - if (opterr) (void)printf("option requires an argument -- %c\n", optopt); - return (BADCH); - } else /* white space */ - optarg = nargv[optind]; - place = EMSG; - ++optind; - } - return (optopt); /* dump back option letter */ -} diff --git a/CNN-examples/iGPU/getting_started/cpp/resnet50/util/getopt.h b/CNN-examples/iGPU/getting_started/cpp/resnet50/util/getopt.h deleted file mode 100644 index 5163a113..00000000 --- a/CNN-examples/iGPU/getting_started/cpp/resnet50/util/getopt.h +++ /dev/null @@ -1,17 +0,0 @@ -/*********************************************************************************** -MIT License - -Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ************************************************************************************/ -#ifndef GETOPT_H -#define GETOPT_H -extern int opterr, optind, optopt, optreset; -extern char* optarg; -int getopt(int nargc, char* const nargv[], const char* ostr); -#endif diff --git a/CNN-examples/iGPU/getting_started/cpp/run.bat b/CNN-examples/iGPU/getting_started/cpp/run.bat deleted file mode 100644 index 493ade78..00000000 --- a/CNN-examples/iGPU/getting_started/cpp/run.bat +++ /dev/null @@ -1,3 +0,0 @@ -@echo off - -build\Release\resnet50.exe ..\torch_to_onnx-float16_conversion-perf_tuning\gpu-dml_model.onnx dml ..\cat.jpg \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/description.json b/CNN-examples/iGPU/getting_started/description.json deleted file mode 100644 index d8843fe6..00000000 --- a/CNN-examples/iGPU/getting_started/description.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "run_dir": ".", - "owner": "TBD", - "setup": [ - "conda activate ryzen-ai-1.2.0", - "python -m pip install -r requirements.txt", - "python -m olive.workflows.run --config resnet50_config.json --setup", - "python -m olive.workflows.run --config resnet50_config.json", - "git clone https://github.com/opencv/opencv.git -b 4.6.0", - "cd opencv", - "cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -G \"Visual Studio 17 2022\" \"-DCMAKE_INSTALL_PREFIX=C:\\opencv\" \"-DCMAKE_PREFIX_PATH=C:\\opencv\" -DCMAKE_BUILD_TYPE=Release -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_WITH_STATIC_CRT=OFF -B build", - "cmake --build build --config Release", - "cmake --install build --config Release" - ], - "test_steps": [ - { - "name": "igpu-getting-started-python", - "command": "python predict.py", - "run_type": [ - "pr", - "daily" - ], - "devices": [ - "phoenix", - "hpt", - "strix" - ] - }, - { - "name": "igpu-getting-started-cpp", - "command": "cd cpp && compile.bat opencv\\build && run.bat", - "run_type": [ - "pr", - "daily" - ], - "devices": [ - "phoenix", - "hpt", - "strix" - ] - } - ], - "cleanup": [] - } \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/imagenet_labels.txt b/CNN-examples/iGPU/getting_started/imagenet_labels.txt deleted file mode 100644 index a9987082..00000000 --- a/CNN-examples/iGPU/getting_started/imagenet_labels.txt +++ /dev/null @@ -1,1001 +0,0 @@ - 0 background - 1 tench, Tinca tinca - 2 goldfish, Carassius auratus - 3 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias - 4 tiger shark, Galeocerdo cuvieri - 5 hammerhead, hammerhead shark - 6 electric ray, crampfish, numbfish, torpedo - 7 stingray - 8 cock - 9 hen - 10 ostrich, Struthio camelus - 11 brambling, Fringilla montifringilla - 12 goldfinch, Carduelis carduelis - 13 house finch, linnet, Carpodacus mexicanus - 14 junco, snowbird - 15 indigo bunting, indigo finch, indigo bird, Passerina cyanea - 16 robin, American robin, Turdus migratorius - 17 bulbul - 18 jay - 19 magpie - 20 chickadee - 21 water ouzel, dipper - 22 kite - 23 bald eagle, American eagle, Haliaeetus leucocephalus - 24 vulture - 25 great grey owl, great gray owl, Strix nebulosa - 26 European fire salamander, Salamandra salamandra - 27 common newt, Triturus vulgaris - 28 eft - 29 spotted salamander, Ambystoma maculatum - 30 axolotl, mud puppy, Ambystoma mexicanum - 31 bullfrog, Rana catesbeiana - 32 tree frog, tree-frog - 33 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui - 34 loggerhead, loggerhead turtle, Caretta caretta - 35 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea - 36 mud turtle - 37 terrapin - 38 box turtle, box tortoise - 39 banded gecko - 40 common iguana, iguana, Iguana iguana - 41 American chameleon, anole, Anolis carolinensis - 42 whiptail, whiptail lizard - 43 agama - 44 frilled lizard, Chlamydosaurus kingi - 45 alligator lizard - 46 Gila monster, Heloderma suspectum - 47 green lizard, Lacerta viridis - 48 African chameleon, Chamaeleo chamaeleon - 49 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis - 50 African crocodile, Nile crocodile, Crocodylus niloticus - 51 American alligator, Alligator mississipiensis - 52 triceratops - 53 thunder snake, worm snake, Carphophis amoenus - 54 ringneck snake, ring-necked snake, ring snake - 55 hognose snake, puff adder, sand viper - 56 green snake, grass snake - 57 king snake, kingsnake - 58 garter snake, grass snake - 59 water snake - 60 vine snake - 61 night snake, Hypsiglena torquata - 62 boa constrictor, Constrictor constrictor - 63 rock python, rock snake, Python sebae - 64 Indian cobra, Naja naja - 65 green mamba - 66 sea snake - 67 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus - 68 diamondback, diamondback rattlesnake, Crotalus adamanteus - 69 sidewinder, horned rattlesnake, Crotalus cerastes - 70 trilobite - 71 harvestman, daddy longlegs, Phalangium opilio - 72 scorpion - 73 black and gold garden spider, Argiope aurantia - 74 barn spider, Araneus cavaticus - 75 garden spider, Aranea diademata - 76 black widow, Latrodectus mactans - 77 tarantula - 78 wolf spider, hunting spider - 79 tick - 80 centipede - 81 black grouse - 82 ptarmigan - 83 ruffed grouse, partridge, Bonasa umbellus - 84 prairie chicken, prairie grouse, prairie fowl - 85 peacock - 86 quail - 87 partridge - 88 African grey, African gray, Psittacus erithacus - 89 macaw - 90 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita - 91 lorikeet - 92 coucal - 93 bee eater - 94 hornbill - 95 hummingbird - 96 jacamar - 97 toucan - 98 drake - 99 red-breasted merganser, Mergus serrator - 100 goose - 101 black swan, Cygnus atratus - 102 tusker - 103 echidna, spiny anteater, anteater - 104 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus - 105 wallaby, brush kangaroo - 106 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus - 107 wombat - 108 jellyfish - 109 sea anemone, anemone - 110 brain coral - 111 flatworm, platyhelminth - 112 nematode, nematode worm, roundworm - 113 conch - 114 snail - 115 slug - 116 sea slug, nudibranch - 117 chiton, coat-of-mail shell, sea cradle, polyplacophore - 118 chambered nautilus, pearly nautilus, nautilus - 119 Dungeness crab, Cancer magister - 120 rock crab, Cancer irroratus - 121 fiddler crab - 122 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica - 123 American lobster, Northern lobster, Maine lobster, Homarus americanus - 124 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish - 125 crayfish, crawfish, crawdad, crawdaddy - 126 hermit crab - 127 isopod - 128 white stork, Ciconia ciconia - 129 black stork, Ciconia nigra - 130 spoonbill - 131 flamingo - 132 little blue heron, Egretta caerulea - 133 American egret, great white heron, Egretta albus - 134 bittern - 135 crane - 136 limpkin, Aramus pictus - 137 European gallinule, Porphyrio porphyrio - 138 American coot, marsh hen, mud hen, water hen, Fulica americana - 139 bustard - 140 ruddy turnstone, Arenaria interpres - 141 red-backed sandpiper, dunlin, Erolia alpina - 142 redshank, Tringa totanus - 143 dowitcher - 144 oystercatcher, oyster catcher - 145 pelican - 146 king penguin, Aptenodytes patagonica - 147 albatross, mollymawk - 148 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus - 149 killer whale, killer, orca, grampus, sea wolf, Orcinus orca - 150 dugong, Dugong dugon - 151 sea lion - 152 Chihuahua - 153 Japanese spaniel - 154 Maltese dog, Maltese terrier, Maltese - 155 Pekinese, Pekingese, Peke - 156 Shih-Tzu - 157 Blenheim spaniel - 158 papillon - 159 toy terrier - 160 Rhodesian ridgeback - 161 Afghan hound, Afghan - 162 basset, basset hound - 163 beagle - 164 bloodhound, sleuthhound - 165 bluetick - 166 black-and-tan coonhound - 167 Walker hound, Walker foxhound - 168 English foxhound - 169 redbone - 170 borzoi, Russian wolfhound - 171 Irish wolfhound - 172 Italian greyhound - 173 whippet - 174 Ibizan hound, Ibizan Podenco - 175 Norwegian elkhound, elkhound - 176 otterhound, otter hound - 177 Saluki, gazelle hound - 178 Scottish deerhound, deerhound - 179 Weimaraner - 180 Staffordshire bullterrier, Staffordshire bull terrier - 181 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier - 182 Bedlington terrier - 183 Border terrier - 184 Kerry blue terrier - 185 Irish terrier - 186 Norfolk terrier - 187 Norwich terrier - 188 Yorkshire terrier - 189 wire-haired fox terrier - 190 Lakeland terrier - 191 Sealyham terrier, Sealyham - 192 Airedale, Airedale terrier - 193 cairn, cairn terrier - 194 Australian terrier - 195 Dandie Dinmont, Dandie Dinmont terrier - 196 Boston bull, Boston terrier - 197 miniature schnauzer - 198 giant schnauzer - 199 standard schnauzer - 200 Scotch terrier, Scottish terrier, Scottie - 201 Tibetan terrier, chrysanthemum dog - 202 silky terrier, Sydney silky - 203 soft-coated wheaten terrier - 204 West Highland white terrier - 205 Lhasa, Lhasa apso - 206 flat-coated retriever - 207 curly-coated retriever - 208 golden retriever - 209 Labrador retriever - 210 Chesapeake Bay retriever - 211 German short-haired pointer - 212 vizsla, Hungarian pointer - 213 English setter - 214 Irish setter, red setter - 215 Gordon setter - 216 Brittany spaniel - 217 clumber, clumber spaniel - 218 English springer, English springer spaniel - 219 Welsh springer spaniel - 220 cocker spaniel, English cocker spaniel, cocker - 221 Sussex spaniel - 222 Irish water spaniel - 223 kuvasz - 224 schipperke - 225 groenendael - 226 malinois - 227 briard - 228 kelpie - 229 komondor - 230 Old English sheepdog, bobtail - 231 Shetland sheepdog, Shetland sheep dog, Shetland - 232 collie - 233 Border collie - 234 Bouvier des Flandres, Bouviers des Flandres - 235 Rottweiler - 236 German shepherd, German shepherd dog, German police dog, alsatian - 237 Doberman, Doberman pinscher - 238 miniature pinscher - 239 Greater Swiss Mountain dog - 240 Bernese mountain dog - 241 Appenzeller - 242 EntleBucher - 243 boxer - 244 bull mastiff - 245 Tibetan mastiff - 246 French bulldog - 247 Great Dane - 248 Saint Bernard, St Bernard - 249 Eskimo dog, husky - 250 malamute, malemute, Alaskan malamute - 251 Siberian husky - 252 dalmatian, coach dog, carriage dog - 253 affenpinscher, monkey pinscher, monkey dog - 254 basenji - 255 pug, pug-dog - 256 Leonberg - 257 Newfoundland, Newfoundland dog - 258 Great Pyrenees - 259 Samoyed, Samoyede - 260 Pomeranian - 261 chow, chow chow - 262 keeshond - 263 Brabancon griffon - 264 Pembroke, Pembroke Welsh corgi - 265 Cardigan, Cardigan Welsh corgi - 266 toy poodle - 267 miniature poodle - 268 standard poodle - 269 Mexican hairless - 270 timber wolf, grey wolf, gray wolf, Canis lupus - 271 white wolf, Arctic wolf, Canis lupus tundrarum - 272 red wolf, maned wolf, Canis rufus, Canis niger - 273 coyote, prairie wolf, brush wolf, Canis latrans - 274 dingo, warrigal, warragal, Canis dingo - 275 dhole, Cuon alpinus - 276 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus - 277 hyena, hyaena - 278 red fox, Vulpes vulpes - 279 kit fox, Vulpes macrotis - 280 Arctic fox, white fox, Alopex lagopus - 281 grey fox, gray fox, Urocyon cinereoargenteus - 282 tabby, tabby cat - 283 tiger cat - 284 Persian cat - 285 Siamese cat, Siamese - 286 Egyptian cat - 287 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor - 288 lynx, catamount - 289 leopard, Panthera pardus - 290 snow leopard, ounce, Panthera uncia - 291 jaguar, panther, Panthera onca, Felis onca - 292 lion, king of beasts, Panthera leo - 293 tiger, Panthera tigris - 294 cheetah, chetah, Acinonyx jubatus - 295 brown bear, bruin, Ursus arctos - 296 American black bear, black bear, Ursus americanus, Euarctos americanus - 297 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus - 298 sloth bear, Melursus ursinus, Ursus ursinus - 299 mongoose - 300 meerkat, mierkat - 301 tiger beetle - 302 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle - 303 ground beetle, carabid beetle - 304 long-horned beetle, longicorn, longicorn beetle - 305 leaf beetle, chrysomelid - 306 dung beetle - 307 rhinoceros beetle - 308 weevil - 309 fly - 310 bee - 311 ant, emmet, pismire - 312 grasshopper, hopper - 313 cricket - 314 walking stick, walkingstick, stick insect - 315 cockroach, roach - 316 mantis, mantid - 317 cicada, cicala - 318 leafhopper - 319 lacewing, lacewing fly - 320 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk - 321 damselfly - 322 admiral - 323 ringlet, ringlet butterfly - 324 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus - 325 cabbage butterfly - 326 sulphur butterfly, sulfur butterfly - 327 lycaenid, lycaenid butterfly - 328 starfish, sea star - 329 sea urchin - 330 sea cucumber, holothurian - 331 wood rabbit, cottontail, cottontail rabbit - 332 hare - 333 Angora, Angora rabbit - 334 hamster - 335 porcupine, hedgehog - 336 fox squirrel, eastern fox squirrel, Sciurus niger - 337 marmot - 338 beaver - 339 guinea pig, Cavia cobaya - 340 sorrel - 341 zebra - 342 hog, pig, grunter, squealer, Sus scrofa - 343 wild boar, boar, Sus scrofa - 344 warthog - 345 hippopotamus, hippo, river horse, Hippopotamus amphibius - 346 ox - 347 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis - 348 bison - 349 ram, tup - 350 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis - 351 ibex, Capra ibex - 352 hartebeest - 353 impala, Aepyceros melampus - 354 gazelle - 355 Arabian camel, dromedary, Camelus dromedarius - 356 llama - 357 weasel - 358 mink - 359 polecat, fitch, foulmart, foumart, Mustela putorius - 360 black-footed ferret, ferret, Mustela nigripes - 361 otter - 362 skunk, polecat, wood pussy - 363 badger - 364 armadillo - 365 three-toed sloth, ai, Bradypus tridactylus - 366 orangutan, orang, orangutang, Pongo pygmaeus - 367 gorilla, Gorilla gorilla - 368 chimpanzee, chimp, Pan troglodytes - 369 gibbon, Hylobates lar - 370 siamang, Hylobates syndactylus, Symphalangus syndactylus - 371 guenon, guenon monkey - 372 patas, hussar monkey, Erythrocebus patas - 373 baboon - 374 macaque - 375 langur - 376 colobus, colobus monkey - 377 proboscis monkey, Nasalis larvatus - 378 marmoset - 379 capuchin, ringtail, Cebus capucinus - 380 howler monkey, howler - 381 titi, titi monkey - 382 spider monkey, Ateles geoffroyi - 383 squirrel monkey, Saimiri sciureus - 384 Madagascar cat, ring-tailed lemur, Lemur catta - 385 indri, indris, Indri indri, Indri brevicaudatus - 386 Indian elephant, Elephas maximus - 387 African elephant, Loxodonta africana - 388 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens - 389 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca - 390 barracouta, snoek - 391 eel - 392 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch - 393 rock beauty, Holocanthus tricolor - 394 anemone fish - 395 sturgeon - 396 gar, garfish, garpike, billfish, Lepisosteus osseus - 397 lionfish - 398 puffer, pufferfish, blowfish, globefish - 399 abacus - 400 abaya - 401 academic gown, academic robe, judge's robe - 402 accordion, piano accordion, squeeze box - 403 acoustic guitar - 404 aircraft carrier, carrier, flattop, attack aircraft carrier - 405 airliner - 406 airship, dirigible - 407 altar - 408 ambulance - 409 amphibian, amphibious vehicle - 410 analog clock - 411 apiary, bee house - 412 apron - 413 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin - 414 assault rifle, assault gun - 415 backpack, back pack, knapsack, packsack, rucksack, haversack - 416 bakery, bakeshop, bakehouse - 417 balance beam, beam - 418 balloon - 419 ballpoint, ballpoint pen, ballpen, Biro - 420 Band Aid - 421 banjo - 422 bannister, banister, balustrade, balusters, handrail - 423 barbell - 424 barber chair - 425 barbershop - 426 barn - 427 barometer - 428 barrel, cask - 429 barrow, garden cart, lawn cart, wheelbarrow - 430 baseball - 431 basketball - 432 bassinet - 433 bassoon - 434 bathing cap, swimming cap - 435 bath towel - 436 bathtub, bathing tub, bath, tub - 437 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon - 438 beacon, lighthouse, beacon light, pharos - 439 beaker - 440 bearskin, busby, shako - 441 beer bottle - 442 beer glass - 443 bell cote, bell cot - 444 bib - 445 bicycle-built-for-two, tandem bicycle, tandem - 446 bikini, two-piece - 447 binder, ring-binder - 448 binoculars, field glasses, opera glasses - 449 birdhouse - 450 boathouse - 451 bobsled, bobsleigh, bob - 452 bolo tie, bolo, bola tie, bola - 453 bonnet, poke bonnet - 454 bookcase - 455 bookshop, bookstore, bookstall - 456 bottlecap - 457 bow - 458 bow tie, bow-tie, bowtie - 459 brass, memorial tablet, plaque - 460 brassiere, bra, bandeau - 461 breakwater, groin, groyne, mole, bulwark, seawall, jetty - 462 breastplate, aegis, egis - 463 broom - 464 bucket, pail - 465 buckle - 466 bulletproof vest - 467 bullet train, bullet - 468 butcher shop, meat market - 469 cab, hack, taxi, taxicab - 470 caldron, cauldron - 471 candle, taper, wax light - 472 cannon - 473 canoe - 474 can opener, tin opener - 475 cardigan - 476 car mirror - 477 carousel, carrousel, merry-go-round, roundabout, whirligig - 478 carpenter's kit, tool kit - 479 carton - 480 car wheel - 481 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM - 482 cassette - 483 cassette player - 484 castle - 485 catamaran - 486 CD player - 487 cello, violoncello - 488 cellular telephone, cellular phone, cellphone, cell, mobile phone - 489 chain - 490 chainlink fence - 491 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour - 492 chain saw, chainsaw - 493 chest - 494 chiffonier, commode - 495 chime, bell, gong - 496 china cabinet, china closet - 497 Christmas stocking - 498 church, church building - 499 cinema, movie theater, movie theatre, movie house, picture palace - 500 cleaver, meat cleaver, chopper - 501 cliff dwelling - 502 cloak - 503 clog, geta, patten, sabot - 504 cocktail shaker - 505 coffee mug - 506 coffeepot - 507 coil, spiral, volute, whorl, helix - 508 combination lock - 509 computer keyboard, keypad - 510 confectionery, confectionary, candy store - 511 container ship, containership, container vessel - 512 convertible - 513 corkscrew, bottle screw - 514 cornet, horn, trumpet, trump - 515 cowboy boot - 516 cowboy hat, ten-gallon hat - 517 cradle - 518 crane - 519 crash helmet - 520 crate - 521 crib, cot - 522 Crock Pot - 523 croquet ball - 524 crutch - 525 cuirass - 526 dam, dike, dyke - 527 desk - 528 desktop computer - 529 dial telephone, dial phone - 530 diaper, nappy, napkin - 531 digital clock - 532 digital watch - 533 dining table, board - 534 dishrag, dishcloth - 535 dishwasher, dish washer, dishwashing machine - 536 disk brake, disc brake - 537 dock, dockage, docking facility - 538 dogsled, dog sled, dog sleigh - 539 dome - 540 doormat, welcome mat - 541 drilling platform, offshore rig - 542 drum, membranophone, tympan - 543 drumstick - 544 dumbbell - 545 Dutch oven - 546 electric fan, blower - 547 electric guitar - 548 electric locomotive - 549 entertainment center - 550 envelope - 551 espresso maker - 552 face powder - 553 feather boa, boa - 554 file, file cabinet, filing cabinet - 555 fireboat - 556 fire engine, fire truck - 557 fire screen, fireguard - 558 flagpole, flagstaff - 559 flute, transverse flute - 560 folding chair - 561 football helmet - 562 forklift - 563 fountain - 564 fountain pen - 565 four-poster - 566 freight car - 567 French horn, horn - 568 frying pan, frypan, skillet - 569 fur coat - 570 garbage truck, dustcart - 571 gasmask, respirator, gas helmet - 572 gas pump, gasoline pump, petrol pump, island dispenser - 573 goblet - 574 go-kart - 575 golf ball - 576 golfcart, golf cart - 577 gondola - 578 gong, tam-tam - 579 gown - 580 grand piano, grand - 581 greenhouse, nursery, glasshouse - 582 grille, radiator grille - 583 grocery store, grocery, food market, market - 584 guillotine - 585 hair slide - 586 hair spray - 587 half track - 588 hammer - 589 hamper - 590 hand blower, blow dryer, blow drier, hair dryer, hair drier - 591 hand-held computer, hand-held microcomputer - 592 handkerchief, hankie, hanky, hankey - 593 hard disc, hard disk, fixed disk - 594 harmonica, mouth organ, harp, mouth harp - 595 harp - 596 harvester, reaper - 597 hatchet - 598 holster - 599 home theater, home theatre - 600 honeycomb - 601 hook, claw - 602 hoopskirt, crinoline - 603 horizontal bar, high bar - 604 horse cart, horse-cart - 605 hourglass - 606 iPod - 607 iron, smoothing iron - 608 jack-o'-lantern - 609 jean, blue jean, denim - 610 jeep, landrover - 611 jersey, T-shirt, tee shirt - 612 jigsaw puzzle - 613 jinrikisha, ricksha, rickshaw - 614 joystick - 615 kimono - 616 knee pad - 617 knot - 618 lab coat, laboratory coat - 619 ladle - 620 lampshade, lamp shade - 621 laptop, laptop computer - 622 lawn mower, mower - 623 lens cap, lens cover - 624 letter opener, paper knife, paperknife - 625 library - 626 lifeboat - 627 lighter, light, igniter, ignitor - 628 limousine, limo - 629 liner, ocean liner - 630 lipstick, lip rouge - 631 Loafer - 632 lotion - 633 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system - 634 loupe, jeweler's loupe - 635 lumbermill, sawmill - 636 magnetic compass - 637 mailbag, postbag - 638 mailbox, letter box - 639 maillot - 640 maillot, tank suit - 641 manhole cover - 642 maraca - 643 marimba, xylophone - 644 mask - 645 matchstick - 646 maypole - 647 maze, labyrinth - 648 measuring cup - 649 medicine chest, medicine cabinet - 650 megalith, megalithic structure - 651 microphone, mike - 652 microwave, microwave oven - 653 military uniform - 654 milk can - 655 minibus - 656 miniskirt, mini - 657 minivan - 658 missile - 659 mitten - 660 mixing bowl - 661 mobile home, manufactured home - 662 Model T - 663 modem - 664 monastery - 665 monitor - 666 moped - 667 mortar - 668 mortarboard - 669 mosque - 670 mosquito net - 671 motor scooter, scooter - 672 mountain bike, all-terrain bike, off-roader - 673 mountain tent - 674 mouse, computer mouse - 675 mousetrap - 676 moving van - 677 muzzle - 678 nail - 679 neck brace - 680 necklace - 681 nipple - 682 notebook, notebook computer - 683 obelisk - 684 oboe, hautboy, hautbois - 685 ocarina, sweet potato - 686 odometer, hodometer, mileometer, milometer - 687 oil filter - 688 organ, pipe organ - 689 oscilloscope, scope, cathode-ray oscilloscope, CRO - 690 overskirt - 691 oxcart - 692 oxygen mask - 693 packet - 694 paddle, boat paddle - 695 paddlewheel, paddle wheel - 696 padlock - 697 paintbrush - 698 pajama, pyjama, pj's, jammies - 699 palace - 700 panpipe, pandean pipe, syrinx - 701 paper towel - 702 parachute, chute - 703 parallel bars, bars - 704 park bench - 705 parking meter - 706 passenger car, coach, carriage - 707 patio, terrace - 708 pay-phone, pay-station - 709 pedestal, plinth, footstall - 710 pencil box, pencil case - 711 pencil sharpener - 712 perfume, essence - 713 Petri dish - 714 photocopier - 715 pick, plectrum, plectron - 716 pickelhaube - 717 picket fence, paling - 718 pickup, pickup truck - 719 pier - 720 piggy bank, penny bank - 721 pill bottle - 722 pillow - 723 ping-pong ball - 724 pinwheel - 725 pirate, pirate ship - 726 pitcher, ewer - 727 plane, carpenter's plane, woodworking plane - 728 planetarium - 729 plastic bag - 730 plate rack - 731 plow, plough - 732 plunger, plumber's helper - 733 Polaroid camera, Polaroid Land camera - 734 pole - 735 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria - 736 poncho - 737 pool table, billiard table, snooker table - 738 pop bottle, soda bottle - 739 pot, flowerpot - 740 potter's wheel - 741 power drill - 742 prayer rug, prayer mat - 743 printer - 744 prison, prison house - 745 projectile, missile - 746 projector - 747 puck, hockey puck - 748 punching bag, punch bag, punching ball, punchball - 749 purse - 750 quill, quill pen - 751 quilt, comforter, comfort, puff - 752 racer, race car, racing car - 753 racket, racquet - 754 radiator - 755 radio, wireless - 756 radio telescope, radio reflector - 757 rain barrel - 758 recreational vehicle, RV, R.V. - 759 reel - 760 reflex camera - 761 refrigerator, icebox - 762 remote control, remote - 763 restaurant, eating house, eating place, eatery - 764 revolver, six-gun, six-shooter - 765 rifle - 766 rocking chair, rocker - 767 rotisserie - 768 rubber eraser, rubber, pencil eraser - 769 rugby ball - 770 rule, ruler - 771 running shoe - 772 safe - 773 safety pin - 774 saltshaker, salt shaker - 775 sandal - 776 sarong - 777 sax, saxophone - 778 scabbard - 779 scale, weighing machine - 780 school bus - 781 schooner - 782 scoreboard - 783 screen, CRT screen - 784 screw - 785 screwdriver - 786 seat belt, seatbelt - 787 sewing machine - 788 shield, buckler - 789 shoe shop, shoe-shop, shoe store - 790 shoji - 791 shopping basket - 792 shopping cart - 793 shovel - 794 shower cap - 795 shower curtain - 796 ski - 797 ski mask - 798 sleeping bag - 799 slide rule, slipstick - 800 sliding door - 801 slot, one-armed bandit - 802 snorkel - 803 snowmobile - 804 snowplow, snowplough - 805 soap dispenser - 806 soccer ball - 807 sock - 808 solar dish, solar collector, solar furnace - 809 sombrero - 810 soup bowl - 811 space bar - 812 space heater - 813 space shuttle - 814 spatula - 815 speedboat - 816 spider web, spider's web - 817 spindle - 818 sports car, sport car - 819 spotlight, spot - 820 stage - 821 steam locomotive - 822 steel arch bridge - 823 steel drum - 824 stethoscope - 825 stole - 826 stone wall - 827 stopwatch, stop watch - 828 stove - 829 strainer - 830 streetcar, tram, tramcar, trolley, trolley car - 831 stretcher - 832 studio couch, day bed - 833 stupa, tope - 834 submarine, pigboat, sub, U-boat - 835 suit, suit of clothes - 836 sundial - 837 sunglass - 838 sunglasses, dark glasses, shades - 839 sunscreen, sunblock, sun blocker - 840 suspension bridge - 841 swab, swob, mop - 842 sweatshirt - 843 swimming trunks, bathing trunks - 844 swing - 845 switch, electric switch, electrical switch - 846 syringe - 847 table lamp - 848 tank, army tank, armored combat vehicle, armoured combat vehicle - 849 tape player - 850 teapot - 851 teddy, teddy bear - 852 television, television system - 853 tennis ball - 854 thatch, thatched roof - 855 theater curtain, theatre curtain - 856 thimble - 857 thresher, thrasher, threshing machine - 858 throne - 859 tile roof - 860 toaster - 861 tobacco shop, tobacconist shop, tobacconist - 862 toilet seat - 863 torch - 864 totem pole - 865 tow truck, tow car, wrecker - 866 toyshop - 867 tractor - 868 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi - 869 tray - 870 trench coat - 871 tricycle, trike, velocipede - 872 trimaran - 873 tripod - 874 triumphal arch - 875 trolleybus, trolley coach, trackless trolley - 876 trombone - 877 tub, vat - 878 turnstile - 879 typewriter keyboard - 880 umbrella - 881 unicycle, monocycle - 882 upright, upright piano - 883 vacuum, vacuum cleaner - 884 vase - 885 vault - 886 velvet - 887 vending machine - 888 vestment - 889 viaduct - 890 violin, fiddle - 891 volleyball - 892 waffle iron - 893 wall clock - 894 wallet, billfold, notecase, pocketbook - 895 wardrobe, closet, press - 896 warplane, military plane - 897 washbasin, handbasin, washbowl, lavabo, wash-hand basin - 898 washer, automatic washer, washing machine - 899 water bottle - 900 water jug - 901 water tower - 902 whiskey jug - 903 whistle - 904 wig - 905 window screen - 906 window shade - 907 Windsor tie - 908 wine bottle - 909 wing - 910 wok - 911 wooden spoon - 912 wool, woolen, woollen - 913 worm fence, snake fence, snake-rail fence, Virginia fence - 914 wreck - 915 yawl - 916 yurt - 917 web site, website, internet site, site - 918 comic book - 919 crossword puzzle, crossword - 920 street sign - 921 traffic light, traffic signal, stoplight - 922 book jacket, dust cover, dust jacket, dust wrapper - 923 menu - 924 plate - 925 guacamole - 926 consomme - 927 hot pot, hotpot - 928 trifle - 929 ice cream, icecream - 930 ice lolly, lolly, lollipop, popsicle - 931 French loaf - 932 bagel, beigel - 933 pretzel - 934 cheeseburger - 935 hotdog, hot dog, red hot - 936 mashed potato - 937 head cabbage - 938 broccoli - 939 cauliflower - 940 zucchini, courgette - 941 spaghetti squash - 942 acorn squash - 943 butternut squash - 944 cucumber, cuke - 945 artichoke, globe artichoke - 946 bell pepper - 947 cardoon - 948 mushroom - 949 Granny Smith - 950 strawberry - 951 orange - 952 lemon - 953 fig - 954 pineapple, ananas - 955 banana - 956 jackfruit, jak, jack - 957 custard apple - 958 pomegranate - 959 hay - 960 carbonara - 961 chocolate sauce, chocolate syrup - 962 dough - 963 meat loaf, meatloaf - 964 pizza, pizza pie - 965 potpie - 966 burrito - 967 red wine - 968 espresso - 969 cup - 970 eggnog - 971 alp - 972 bubble - 973 cliff, drop, drop-off - 974 coral reef - 975 geyser - 976 lakeside, lakeshore - 977 promontory, headland, head, foreland - 978 sandbar, sand bar - 979 seashore, coast, seacoast, sea-coast - 980 valley, vale - 981 volcano - 982 ballplayer, baseball player - 983 groom, bridegroom - 984 scuba diver - 985 rapeseed - 986 daisy - 987 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum - 988 corn - 989 acorn - 990 hip, rose hip, rosehip - 991 buckeye, horse chestnut, conker - 992 coral fungus - 993 agaric - 994 gyromitra - 995 stinkhorn, carrion fungus - 996 earthstar - 997 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa - 998 bolete - 999 ear, spike, capitulum -1000 toilet tissue, toilet paper, bathroom tissue \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/predict.py b/CNN-examples/iGPU/getting_started/predict.py deleted file mode 100644 index 962a3717..00000000 --- a/CNN-examples/iGPU/getting_started/predict.py +++ /dev/null @@ -1,57 +0,0 @@ -from PIL import Image -import argparse -import numpy as np -import onnx -import onnxruntime as ort -import numpy as np -from PIL import Image -from pathlib import Path - -model_path = r'./torch_to_onnx-float16_conversion-perf_tuning/gpu-dml_model.onnx' -model = onnx.load(model_path) - - -providers = ['DmlExecutionProvider'] -provider_options = [{"device_id": "0"}] - -available_providers = ort.get_available_providers() - -# Create session options -session_options = ort.SessionOptions() -session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - -session = ort.InferenceSession(model.SerializeToString(), - sess_options = session_options, - providers=providers, - provider_options=provider_options) - - -image_path = 'cat.jpg' -image = Image.open(image_path) - -image = image.resize((224, 224)) - -image_array = np.array(image).astype(np.float16) - -mean = np.array([0.485, 0.456, 0.406]) -std = np.array([0.229, 0.224, 0.225]) -image_array = (image_array / 255.0 - mean) / std - -image_array = np.transpose(image_array, (2, 0, 1)) -input_data = np.expand_dims(image_array, axis=0) - -input_data = input_data.astype(np.float16) - -# Run the model -for i in range(1000): - outputs = session.run(None, {'input_image': input_data}) - -# Process the outputs -output_array = outputs[0] -predicted_class_index = np.argmax(output_array) - -with open('imagenet_labels.txt') as f: - labels = [line.strip() for line in f.readlines()] - - -print(labels[predicted_class_index]) \ No newline at end of file diff --git a/CNN-examples/iGPU/getting_started/requirements.txt b/CNN-examples/iGPU/getting_started/requirements.txt deleted file mode 100644 index e91de831..00000000 --- a/CNN-examples/iGPU/getting_started/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -olive-ai==0.5.0 -pillow==12.0.0 -torch==2.8.0 diff --git a/CNN-examples/iGPU/getting_started/resnet50_config.json b/CNN-examples/iGPU/getting_started/resnet50_config.json deleted file mode 100644 index 4d884e6d..00000000 --- a/CNN-examples/iGPU/getting_started/resnet50_config.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "input_model": { - "type": "PyTorchModel", - "config": { - "model_loader": "load_pytorch_origin_model", - "model_script": "user_script.py", - "io_config": { - "input_names": [ "input_image" ], - "input_shapes": [ [ 1, 3, 224, 224 ] ], - "output_names": [ "output" ] - } - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "config": { - "accelerators": ["gpu"] - } - } - }, - "evaluators": { - "common_evaluator": { - "metrics": [ - { - "name": "latency", - "type": "latency", - "sub_types": [ - {"name": "avg", "priority": 1}, - {"name": "max"}, - {"name": "min"} - ], - "user_config": { - "user_script": "user_script.py", - "dataloader_func": "create_dataloader", - "batch_size": 1 - } - } - ] - } - }, - "passes": { - "torch_to_onnx": { - "type": "OnnxConversion", - "config": { - "target_opset": 17 - } - }, - "float16_conversion": { - "type": "OnnxFloatToFloat16" - }, - "perf_tuning": { - "type": "OrtPerfTuning", - "config": { - "user_script": "user_script.py", - "dataloader_func": "create_dataloader", - "device": "gpu", - "batch_size": 1, - "execution_mode_list": [ "ORT_SEQUENTIAL" ], - "providers_list": [ "DmlExecutionProvider" ] - } - } - }, - "engine": { - "log_severity_level": 0, - "evaluator": "common_evaluator", - "evaluate_input_model": false, - "host": "local_system", - "target": "local_system", - "clean_cache": true, - "cache_dir": "cache" - } -} diff --git a/CNN-examples/iGPU/getting_started/user_script.py b/CNN-examples/iGPU/getting_started/user_script.py deleted file mode 100644 index 35c84514..00000000 --- a/CNN-examples/iGPU/getting_started/user_script.py +++ /dev/null @@ -1,23 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import torch - - -def load_pytorch_origin_model(torch_hub_model_path): - return torch.hub.load("pytorch/vision:v0.10.0", "resnet50", pretrained=True) - - -class DataLoader: - def __init__(self, batchsize): - self.batchsize = batchsize - - def __getitem__(self, idx): - input_data = torch.rand((self.batchsize, 3, 224, 224), dtype=torch.float16) - label = None - return input_data, label - - -def create_dataloader(data_dir, batchsize, *args, **kwargs): - return DataLoader(batchsize) diff --git a/CNN-examples/image_classification/image_classification.py b/CNN-examples/image_classification/image_classification.py deleted file mode 100644 index 2325c2ca..00000000 --- a/CNN-examples/image_classification/image_classification.py +++ /dev/null @@ -1,77 +0,0 @@ -import os -import argparse -import onnx -import time -import numpy as np -from PIL import Image -from pathlib import Path -import onnxruntime as ort -from onnxruntime.quantization.calibrate import CalibrationMethod -from onnxruntime.quantization.quant_utils import QuantType -from quark.onnx import ModelQuantizer, PowerOfTwoMethod -from quark.onnx.quantization.config import Config, get_default_config -from utils import ImageDataReader, evaluate_onnx_model - -def preprocess_image(image_path): - image = Image.open(image_path) - image = image.resize((224, 224)) - image_array = np.array(image).astype(np.float32)/255 - image_array = np.transpose(image_array, (2, 0, 1)) - input_data = np.expand_dims(image_array, axis=0) - return input_data - -def benchmark_model(session, runs=100): - input_shape = session.get_inputs()[0].shape - input_shape = tuple(1 if isinstance(dim, str) else dim for dim in input_shape) - input_data = np.random.rand(*input_shape).astype(np.float32) - start_time = time.time() - for _ in range(runs): - outputs = session.run(None, {session.get_inputs()[0].name: input_data}) - end_time = time.time() - avg_time = (end_time - start_time) / runs - print('Average inference time over {} runs: {} ms'.format(runs, avg_time * 1000)) - -def main(args): - # Setup the Input model - input_model_path = args.model_input - calibration_dataset_path = args.calib_data - - # Benchmark the float/quantized models on CPU/NPU - model = onnx.load(input_model_path) - provider = ['CPUExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - provider_options = [{ - 'config_file': 'vaiml_config.json', - 'cacheDir': str(cache_dir), - 'cacheKey': 'modelcachekey' - }] - if args.device == 'cpu': - # Run the float model on CPU - session = ort.InferenceSession(model.SerializeToString(), providers=provider) - print('Benchmarking model on CPU:') - benchmark_model(session) - - elif args.device == 'npu': - # Run quantized model on NPU - quant_model = onnx.load(input_model_path) - provider = ['VitisAIExecutionProvider'] - session = ort.InferenceSession(model.SerializeToString(), providers=provider, - provider_options=provider_options) - print('Benchmarking model on NPU:') - benchmark_model(session) - - # Evaluate the model if the flag is set - if args.evaluate: - print("Model Accuracy:") - top1_acc, top5_acc = evaluate_onnx_model(input_model_path, imagenet_data_path=calibration_dataset_path, device=args.device) - print("{} model accuracy on {}: Top1 {:.3f}, Top5 {:.3f} ".format(args.model_input, args.device, top1_acc, top5_acc)) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Quantize and evaluate ONNX models.") - parser.add_argument('--model_input', type=str, default='models/resnet50_bf16.onnx', help='Path to the input ONNX model.') - parser.add_argument('--calib_data', type=str, default='calib_data', help='Path to the calibration dataset.') - parser.add_argument('--device', type=str, choices=['cpu', 'npu'], required=False, help='device to run the model.') - parser.add_argument('--evaluate', action='store_true', help='Flag to evaluate the model.') - - args = parser.parse_args() - main(args) diff --git a/CNN-examples/image_classification/models/download_ResNet.py b/CNN-examples/image_classification/models/download_ResNet.py deleted file mode 100644 index ea34111e..00000000 --- a/CNN-examples/image_classification/models/download_ResNet.py +++ /dev/null @@ -1,29 +0,0 @@ -import torch -import torchvision.models as models -import torch.onnx -from torchvision.models import resnet50, ResNet50_Weights -import onnx - -# Load a pre-trained ResNet model -model = models.resnet50(weights=ResNet50_Weights.DEFAULT) -model.eval() # Set the model to evaluation mode - -# Create a dummy input tensor with the same size as the model's input -dummy_input = torch.randn(1, 3, 224, 224) - -# Define the path where the ONNX model will be saved -onnx_model_path = "resnet50.onnx" - -# Export the model to ONNX format -torch.onnx.export( - model, - dummy_input, - onnx_model_path, - opset_version=17, - export_params=True, - input_names=['input'], - output_names=['output'], - dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} -) - -print(f"Model has been successfully exported to {onnx_model_path}") \ No newline at end of file diff --git a/CNN-examples/image_classification/prepare_data.py b/CNN-examples/image_classification/prepare_data.py deleted file mode 100644 index 65366bb4..00000000 --- a/CNN-examples/image_classification/prepare_data.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: MIT -# -import os -import shutil -import sys - -if len(sys.argv) < 3: - print("Usage: python prepare_val_data.py ") - sys.exit(1) - -source_folder = sys.argv[1] -calib_data_path = sys.argv[2] - -if not os.path.exists(source_folder): - print("The provided data path does not exist.") - sys.exit(1) - -files = os.listdir(source_folder) - -for filename in files: - if not filename.startswith('ILSVRC2012_val_') or not filename.endswith( - '.JPEG'): - continue - - n_identifier = filename.split('_')[-1].split('.')[0] - folder_name = n_identifier - folder_path = os.path.join(source_folder, folder_name) - if not os.path.exists(folder_path): - os.makedirs(folder_path) - file_path = os.path.join(source_folder, filename) - destination = os.path.join(folder_path, filename) - shutil.move(file_path, destination) - -print("File organization complete.") - -if not os.path.exists(calib_data_path): - os.makedirs(calib_data_path) - -destination_folder = calib_data_path - -subfolders = os.listdir(source_folder) - -for subfolder in subfolders: - source_subfolder = os.path.join(source_folder, subfolder) - destination_subfolder = os.path.join(destination_folder, subfolder) - os.makedirs(destination_subfolder, exist_ok=True) - - files = os.listdir(source_subfolder) - - if files: - file_to_copy = files[0] - source_file = os.path.join(source_subfolder, file_to_copy) - destination_file = os.path.join(destination_subfolder, file_to_copy) - - shutil.copy(source_file, destination_file) - -print("Creating calibration dataset complete.") diff --git a/CNN-examples/image_classification/requirements.txt b/CNN-examples/image_classification/requirements.txt deleted file mode 100644 index 0aea30c7..00000000 --- a/CNN-examples/image_classification/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -timm==1.0.20 -torchvision==0.23.0 -numpy==1.26.4 \ No newline at end of file diff --git a/CNN-examples/image_classification/utils.py b/CNN-examples/image_classification/utils.py deleted file mode 100644 index 56c889c5..00000000 --- a/CNN-examples/image_classification/utils.py +++ /dev/null @@ -1,241 +0,0 @@ -import os -import shutil - -import onnxruntime as ort -import numpy as np -from torchvision import datasets, transforms -from torch.utils.data import DataLoader -from tqdm import tqdm -from pathlib import Path - -def reorganize_imagenet_val(val_dir, mapping_file, output_dir): - # Read the mapping file - with open(mapping_file, 'r') as f: - lines = f.readlines() - - # Create output directory if it doesn't exist - os.makedirs(output_dir, exist_ok=True) - - # Process each line in the mapping file - for line in lines: - image_name, class_label = line.strip().split() - - # Create class directory if it doesn't exist - class_dir = os.path.join(output_dir, class_label) - os.makedirs(class_dir, exist_ok=True) - - # Move the image to the class directory - src = os.path.join(val_dir, image_name) - dst = os.path.join(class_dir, image_name) - shutil.move(src, dst) - -# Example usage -# reorganize_imagenet_val('path/to/val_images', 'path/to/mapping_file.txt', 'path/to/output_dir') - -def evaluate_onnx_model(onnx_model_path, imagenet_data_path, batch_size=1, device='cpu'): - # Load the ONNX model - if device == 'npu': - provider = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - print(cache_dir) - provider_options = [{ - 'config_file': 'vaiml_config.json', - 'cacheDir': str(cache_dir), - 'cacheKey': 'modelcachekey' - }] - # Setting up Session options - from quark.onnx import get_library_path - sess_options = onnxruntime.SessionOptions() - # sess_options.register_custom_ops_library(get_library_path(device)) - session = ort.InferenceSession(onnx_model_path, sess_options, providers=provider, - provider_options=provider_options) - else: - providers = ['CPUExecutionProvider'] - # Setting up Session options - from quark.onnx import get_library_path - sess_options = onnxruntime.SessionOptions() - sess_options.register_custom_ops_library(get_library_path(device)) - session = ort.InferenceSession(onnx_model_path, sess_options, providers=providers) - - input_name = session.get_inputs()[0].name - - # Define the preprocessing transformations - preprocess = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) - - # Load the ImageNet validation dataset - imagenet_data = datasets.ImageFolder(root=imagenet_data_path, transform=preprocess) - data_loader = DataLoader(imagenet_data, batch_size=batch_size, shuffle=False) - - top1_correct = 0 - top5_correct = 0 - total = 0 - - # Evaluate the model - for images, labels in tqdm(data_loader, desc="Evaluating"): - # Run inference - outputs = session.run(None, {input_name: images.numpy()}) - outputs = outputs[0] - - # Calculate top-1 and top-5 predictions - top1_predictions = np.argmax(outputs, axis=1) - top5_predictions = np.argsort(outputs, axis=1)[:, -5:] - - # Update top-1 accuracy - top1_correct += (top1_predictions == labels.numpy()).sum() - - # Update top-5 accuracy - for i, label in enumerate(labels.numpy()): - if label in top5_predictions[i]: - top5_correct += 1 - - total += labels.size(0) - - top1_accuracy = top1_correct / total - top5_accuracy = top5_correct / total - - return top1_accuracy, top5_accuracy - -# print(f"Accuracy: {accuracy * 100:.2f}%") -import numpy -from PIL import Image -import onnxruntime -from onnxruntime.quantization.calibrate import CalibrationDataReader - -def _preprocess_images(images_folder: str, - height: int, - width: int, - size_limit=0, - batch_size=100): - """ - Loads a batch of images and preprocess them - parameter images_folder: path to folder storing images - parameter height: image height in pixels - parameter width: image width in pixels - parameter size_limit: number of images to load. Default is 0 which means all images are picked. - return: list of matrices characterizing multiple images - """ - image_path = os.listdir(images_folder) - image_names = [] - for image_dir in image_path: - image_name = os.listdir(os.path.join(images_folder, image_dir)) - image_names.append(os.path.join(image_dir, image_name[0])) - if size_limit > 0 and len(image_names) >= size_limit: - batch_filenames = [image_names[i] for i in range(size_limit)] - else: - batch_filenames = image_names - unconcatenated_batch_data = [] - - batch_data = [] - for index, image_name in enumerate(batch_filenames): - image_filepath = images_folder + "/" + image_name - pillow_img = Image.new("RGB", (width, height)) - pillow_img.paste(Image.open(image_filepath).resize((width, height))) - image_array = numpy.array(pillow_img) / 255.0 - mean = numpy.array([0.485, 0.456, 0.406]) - image_array = (image_array - mean) - std = numpy.array([0.229, 0.224, 0.225]) - nchw_data = image_array / std - nchw_data = nchw_data.transpose((2, 0, 1)) - nchw_data = numpy.expand_dims(nchw_data, axis=0) - nchw_data = nchw_data.astype(numpy.float32) - unconcatenated_batch_data.append(nchw_data) - - if (index + 1) % batch_size == 0: - one_batch_data = numpy.concatenate(unconcatenated_batch_data, - axis=0) - unconcatenated_batch_data.clear() - batch_data.append(one_batch_data) - - return batch_data - -class ImageDataReader(CalibrationDataReader): - - def __init__(self, calibration_image_folder: str, model_path: str, data_size: int, batch_size: int): - self.enum_data = None - - # Use inference session to get input shape. - session = onnxruntime.InferenceSession( - model_path, providers=['CPUExecutionProvider']) - (_, _, height, width) = session.get_inputs()[0].shape - - # Convert image to input data - self.nhwc_data_list = _preprocess_images(calibration_image_folder, - height, width, data_size, batch_size) - self.input_name = session.get_inputs()[0].name - self.datasize = len(self.nhwc_data_list) - - def get_next(self): - if self.enum_data is None: - self.enum_data = iter([{ - self.input_name: nhwc_data - } for nhwc_data in self.nhwc_data_list]) - return next(self.enum_data, None) - - def rewind(self): - self.enum_data = None - - def reset(self): - self.enum_data = None - -import torch -from timm.data import create_loader, resolve_data_config, create_dataset -from typing import List, Any, Union -from timm.models import create_model - -def post_process_top1(output: torch.tensor) -> float: - _, preds_top1 = torch.max(output, 1) - return preds_top1 - -def getAccuracy_top1(preds: Union[torch.tensor, list], targets: Union[torch.tensor, list]) -> float: - assert len(preds) == len(targets) - assert len(preds) > 0 - count = 0 - for i in range(len(preds)): - pred = preds[i] - target = targets[i] - if pred == target: - count += 1 - return count / len(preds) - -global model_name -model_name = "resnet50" - -global calibration_dataset_path -calibration_dataset_path = "calib_data" - -def top1_accu(results: List[Union[torch.tensor, List[Any]]]) -> float: - """ - Calculate the top1 accuracy of the model. - :param results: the result of the model - :return: the top1 accuracy - """ - timm_model_name = model_name - calib_data_path = calibration_dataset_path - - timm_model = create_model( - timm_model_name, - pretrained=False, - ) - - data_config = resolve_data_config(model=timm_model, use_test_size=True) - - loader = create_loader(create_dataset('', calib_data_path), - input_size=data_config['input_size'], - batch_size=20, - use_prefetcher=False, - interpolation=data_config['interpolation'], - mean=data_config['mean'], - std=data_config['std'], - num_workers=2, - crop_pct=data_config['crop_pct']) - target = [] - for _, labels in loader: - target.extend(labels.data.tolist()) - outputs_top1 = post_process_top1(torch.tensor(numpy.squeeze(numpy.array(results)))) - top1_acc = getAccuracy_top1(outputs_top1, target) - return round(top1_acc, 2) \ No newline at end of file diff --git a/CNN-examples/image_classification/vaiml_config.json b/CNN-examples/image_classification/vaiml_config.json deleted file mode 100644 index 8d7f6408..00000000 --- a/CNN-examples/image_classification/vaiml_config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "optimize_level": 1, - "preferred_data_storage": "auto" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} \ No newline at end of file diff --git a/CNN-examples/object_detection/yolov8m/calib_images/drone.jpg b/CNN-examples/object_detection/yolov8m/calib_images/drone.jpg deleted file mode 100644 index e55500e3..00000000 Binary files a/CNN-examples/object_detection/yolov8m/calib_images/drone.jpg and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/calib_images/football.jpg b/CNN-examples/object_detection/yolov8m/calib_images/football.jpg deleted file mode 100644 index fa29edc6..00000000 Binary files a/CNN-examples/object_detection/yolov8m/calib_images/football.jpg and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/calib_images/parking.jpg b/CNN-examples/object_detection/yolov8m/calib_images/parking.jpg deleted file mode 100644 index 1d886d74..00000000 Binary files a/CNN-examples/object_detection/yolov8m/calib_images/parking.jpg and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/models/export_to_onnx.py b/CNN-examples/object_detection/yolov8m/models/export_to_onnx.py deleted file mode 100644 index 1c86a7de..00000000 --- a/CNN-examples/object_detection/yolov8m/models/export_to_onnx.py +++ /dev/null @@ -1,63 +0,0 @@ -import torch -import onnx -from onnxsim import simplify - -def export_yolov8m_to_onnx(): - # Load the pre-trained YOLOv8m model - model_path = "yolov8m.pt" - # Load the checkpoint - checkpoint = torch.load(model_path, map_location='cpu', weights_only=False) - # Extract the model from checkpoint - if 'model' in checkpoint: - model = checkpoint['model'] - elif 'ema' in checkpoint: - model = checkpoint['ema'] - else: - model = checkpoint - # Set model to evaluation mode - if hasattr(model, 'float'): - model = model.float() - if hasattr(model, 'eval'): - model.eval() - - # Get number of classes - if hasattr(model, 'nc'): - num_classes = model.nc - print(f"Number of classes: {num_classes}") - - # Define input shape (batch_size, channels, height, width) - batch_size = 1 - img_size = 640 - dummy_input = torch.randn(batch_size, 3, img_size, img_size) - - # Export to ONNX - output_path = "yolov8m.onnx" - - torch.onnx.export( - model, - dummy_input, - output_path, - export_params=True, - opset_version=17, - do_constant_folding=True, - input_names=['images'], - output_names=['output0'] - ) - - print(f"YOLOv8m exported to {output_path}") - - # Optional: Simplify the ONNX model - try: - print("Simplifying ONNX model...") - onnx_model = onnx.load(output_path) - model_simp, check = simplify(onnx_model) - if check: - onnx.save(model_simp, output_path) - print("ONNX model simplified successfully") - else: - print("Simplification failed, keeping original model") - except Exception as e: - print(f"Simplification skipped: {e}") - -if __name__ == "__main__": - export_yolov8m_to_onnx() diff --git a/CNN-examples/object_detection/yolov8m/models/modify_onnx_model.py b/CNN-examples/object_detection/yolov8m/models/modify_onnx_model.py deleted file mode 100644 index 3bee3ce4..00000000 --- a/CNN-examples/object_detection/yolov8m/models/modify_onnx_model.py +++ /dev/null @@ -1,36 +0,0 @@ -import onnx -from onnx import helper, ModelProto - -def set_node_as_output(onnx_model_path, node_name, modified_model_path): - # Load the ONNX model - model = onnx.load(onnx_model_path) - graph = model.graph - - # Find the desired node and create a new output for it - new_outputs = [] - node_found = False - for node in graph.node: - if node.name == node_name: - for output in node.output: - # Create a new output ValueInfoProto based on the node's output - output_value_info = helper.make_tensor_value_info(output, onnx.TensorProto.FLOAT, (1,80,8400)) - new_outputs.append(output_value_info) - node_found = True - break - - if not node_found: - print(f"Node '{node_name}' not found in the graph.") - else: - # Set new outputs to the graph - graph.output.extend(new_outputs) - - # Save the modified model - onnx.save(model, modified_model_path) - print(f"Model saved with modified outputs to {modified_model_path}") - -# Example usage -onnx_model_path = 'yolov8m.onnx' -node_name = '/model.22/Sigmoid' -modified_model_path = 'yolov8m_modified.onnx' - -set_node_as_output(onnx_model_path, node_name, modified_model_path) diff --git a/CNN-examples/object_detection/yolov8m/prepare_data.py b/CNN-examples/object_detection/yolov8m/prepare_data.py deleted file mode 100644 index 439d47c6..00000000 --- a/CNN-examples/object_detection/yolov8m/prepare_data.py +++ /dev/null @@ -1,251 +0,0 @@ -""" -This module prepares the COCO dataset by downloading and converting annotations. -""" - -import os -import json -import shutil -import zipfile -from collections import defaultdict -from pathlib import Path -import wget -import numpy as np -from tqdm import tqdm - - -def download_and_extract(url, destination): - print(f"Downloading from {url}...") - filename = wget.download(url, out=destination) - print(f"\nExtracting {filename}...") - with zipfile.ZipFile(filename, "r") as zip_ref: - zip_ref.extractall(destination) - os.remove(filename) - print(f"Extraction complete: {destination}") - - -def make_dirs(directory="./datasets/coco"): - dir_path = Path(directory) - (dir_path / "labels").mkdir(parents=True, exist_ok=True) - return dir_path - - -def coco91_to_coco80_class(): - """ - Maps COCO's 91-class IDs to 80-class IDs. - - Returns: - list: A mapping list from 91-class to 80-class. - """ - return [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - None, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - None, - 24, - 25, - None, - None, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, - 36, - 37, - 38, - 39, - None, - 40, - 41, - 42, - 43, - 44, - 45, - 46, - 47, - 48, - 49, - 50, - 51, - 52, - 53, - 54, - 55, - 56, - 57, - 58, - 59, - None, - 60, - None, - None, - 61, - None, - 62, - 63, - 64, - 65, - 66, - 67, - 68, - 69, - 70, - 71, - 72, - None, - 73, - 74, - 75, - 76, - 77, - 78, - 79, - None, - ] - - -def convert_coco_json( - json_dir="./datasets/coco/annotations/", use_segments=False, cls91to80=False -): - """ - Converts COCO JSON annotations to YOLO format. - - Args: - json_dir (str): Directory containing COCO JSON files. - use_segments (bool): Whether to use segmentation data. - cls91to80 (bool): Whether to convert 91-class to 80-class. - """ - save_dir = make_dirs() - coco80 = coco91_to_coco80_class() - - for json_file in sorted(Path(json_dir).resolve().glob("*.json")): - if not str(json_file).endswith("instances_val2017.json"): - continue - - fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "") - fn.mkdir(parents=True, exist_ok=True) - with open(json_file, "r", encoding="utf-8") as file: - data = json.load(file) - - images = {"%g" % x["id"]: x for x in data["images"]} - img_to_anns = defaultdict(list) - for ann in data["annotations"]: - img_to_anns[ann["image_id"]].append(ann) - - with open( - Path(save_dir / "val2017").with_suffix(".txt"), "a", encoding="utf-8" - ) as txt_file: - for img_id, anns in tqdm( - img_to_anns.items(), desc=f"Annotations {json_file}" - ): - img = images["%g" % img_id] - h, w, f = img["height"], img["width"], img["file_name"] - bboxes = [] - segments = [] - - txt_file.write( - f"./images/{'/'.join(img['coco_url'].split('/')[-2:])}\n" - ) - for ann in anns: - if ann["iscrowd"]: - continue - box = np.array(ann["bbox"], dtype=np.float64) - box[:2] += box[2:] / 2 # xy top-left corner to center - box[[0, 2]] /= w # normalize x - box[[1, 3]] /= h # normalize y - if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0 - continue - - cls = ( - coco80[ann["category_id"] - 1] - if cls91to80 - else ann["category_id"] - 1 - ) - box = [cls] + box.tolist() - if box not in bboxes: - bboxes.append(box) - - if use_segments and ann.get("segmentation"): - seg = ann["segmentation"] - if isinstance(seg, list) and len(seg) > 0: - s = np.array(seg[0]).reshape(-1, 2) - s[:, 0] /= w # normalize x - s[:, 1] /= h # normalize y - s = [cls] + s.reshape(-1).tolist() - if s not in segments: - segments.append(s) - - with open((fn / f).with_suffix(".txt"), "a", encoding="utf-8") as file: - for i, bbox in enumerate(bboxes): - line = ( - segments[i] if use_segments and i < len(segments) else bbox - ) - file.write(("%g " * len(line)).rstrip() % tuple(line) + "\n") - - -def main(): - """ - Main function to download COCO dataset and convert annotations. - """ - base_dir = "./datasets/coco" - os.makedirs(base_dir, exist_ok=True) - - # Download and extract images - images_dir = os.path.join(base_dir, "images") - os.makedirs(images_dir, exist_ok=True) - download_and_extract("http://images.cocodataset.org/zips/val2017.zip", images_dir) - - # Download and extract annotations - annotations_dir = os.path.join(base_dir, "annotations") - os.makedirs(annotations_dir, exist_ok=True) - download_and_extract( - "http://images.cocodataset.org/annotations/annotations_trainval2017.zip", - annotations_dir, - ) - - # Copy instances_val2017.json to parent folder - shutil.copy( - os.path.join(annotations_dir, "annotations", "instances_val2017.json"), - os.path.join(annotations_dir, "instances_val2017.json"), - ) - - # Convert annotations - print("Converting COCO JSON annotations to YOLO format...") - convert_coco_json( - json_dir=os.path.join(annotations_dir, "annotations"), - use_segments=True, - cls91to80=True, - ) - - print("COCO dataset preparation completed.") - - -if __name__ == "__main__": - main() diff --git a/CNN-examples/object_detection/yolov8m/quantize_quark.py b/CNN-examples/object_detection/yolov8m/quantize_quark.py deleted file mode 100644 index 6d1daff1..00000000 --- a/CNN-examples/object_detection/yolov8m/quantize_quark.py +++ /dev/null @@ -1,160 +0,0 @@ -# -# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: MIT -# -import re -import os -import cv2 -import onnx -import copy -import numpy as np -from typing import List, Tuple -from argparse import ArgumentParser, Namespace -from quark.onnx.quantization.config.config import Config -from quark.onnx.quantization.config.custom_config import get_default_config -from onnxruntime.quantization import CalibrationDataReader -from quark.onnx import ModelQuantizer - -DEFAULT_ADAROUND_PARAMS = { - 'DataSize': 1000, - 'FixedSeed': 1705472343, - 'BatchSize': 2, - 'NumIterations': 1000, - 'LearningRate': 0.1, - 'OptimAlgorithm': 'adaround', - 'OptimDevice': 'cpu', - 'InferDevice': 'cpu', - 'EarlyStop': True, -} - -DEFAULT_ADAQUANT_PARAMS = { - 'DataSize': 1000, - 'FixedSeed': 1705472343, - 'BatchSize': 2, - 'NumIterations': 1000, - 'LearningRate': 0.00001, - 'OptimAlgorithm': 'adaquant', - 'OptimDevice': 'cpu', - 'InferDevice': 'cpu', - 'EarlyStop': True, -} - -def parse_subgraphs_list(exclude_subgraphs: str) -> List[Tuple[List[str]]]: - subgraphs_list = [] - tuples = exclude_subgraphs.split(";") - for tup in tuples: - tup = tup.strip() - pattern = r'\[.*?\]' - matches = re.findall(pattern, tup) - assert len(matches) == 2 - start_nodes = matches[0].strip("[").strip("]").split(",") - start_nodes = [node.strip() for node in start_nodes] - end_nodes = matches[1].strip("[").strip("]").split(",") - end_nodes = [node.strip() for node in end_nodes] - subgraphs_list.append((start_nodes, end_nodes)) - return subgraphs_list - -def get_model_input_name(input_model_path: str) -> str: - model = onnx.load(input_model_path) - model_input_name = model.graph.input[0].name - return model_input_name - -class ImageDataReader(CalibrationDataReader): - - def __init__(self, calibration_image_folder: str, input_name: str): - self.enum_data = None - - self.input_name = input_name - - self.data_list = self._preprocess_images( - calibration_image_folder) - - def _preprocess_images(self, image_folder: str): - data_list = [] - img_names = [f for f in os.listdir(image_folder) if f.endswith('.png') or f.endswith('.jpg')] - for name in img_names: - input_image = cv2.imread(os.path.join(image_folder, name)) - # Resize the input image. Because the size of yolov8n is 640. - input_image = cv2.resize(input_image, (640, 640)) - input_data = np.array(input_image).astype(np.float32) - # Customer Pre-Process - input_data = input_data.transpose(2, 0, 1) - input_size = input_data.shape - if input_size[1] > input_size[2]: - input_data = input_data.transpose(0, 2, 1) - input_data = np.expand_dims(input_data, axis=0) - input_data = input_data / 255.0 - data_list.append(input_data) - - return data_list - - def get_next(self): - if self.enum_data is None: - self.enum_data = iter([{self.input_name: data} for data in self.data_list]) - return next(self.enum_data, None) - - def rewind(self): - self.enum_data = None - -def parse_args() -> Namespace: - parser = ArgumentParser() - parser.add_argument("--input_model_path", help="Specify the input model to be quantized", required=True) - parser.add_argument("--calib_data_path", help="Specify the calibration data path for quantization", required=True) - parser.add_argument("--output_model_path", - help="Specify the path to save the quantized model", - type=str, - default='quantized.onnx', - required=False) - parser.add_argument("--config", help="The configuration for quantization", type=str, default="XINT8", required=False) - parser.add_argument('--cle', action='store_true') - parser.add_argument('--adaround', action='store_true') - parser.add_argument('--adaquant', action='store_true') - parser.add_argument("--learning_rate", help="The learing_rate for fastfinetune", type=float, default=0.1, required=False) - parser.add_argument("--num_iters", help="The number of iterations for fastfinetune", type=int, default=1000, required=False) - parser.add_argument("--exclude_nodes", help="The names of excluding nodes", type=str, default='', required=False) - parser.add_argument("--exclude_subgraphs", help="The lists of excluding subgraphs", type=str, default='', required=False) - parser.add_argument('--save_as_external_data', action='store_true') - args, _ = parser.parse_known_args() - return args - -def main(args: Namespace) -> None: - quant_config = get_default_config(args.config) - quant_config.extra_options["BF16QDQToCast"] = True - config_copy = copy.deepcopy(quant_config) - config_copy.use_external_data_format = args.save_as_external_data - if args.exclude_nodes: - exclude_nodes = args.exclude_nodes.split(";") - exclude_nodes = [node_name.strip() for node_name in exclude_nodes] - config_copy.nodes_to_exclude = exclude_nodes - if args.exclude_subgraphs: - exclude_subgraphs = parse_subgraphs_list(args.exclude_subgraphs) - config_copy.subgraphs_to_exclude = exclude_subgraphs - if args.cle: - config_copy.include_cle = True - if args.adaround or args.adaquant: - config_copy.include_fast_ft = True - if args.adaround: - config_copy.extra_options['FastFinetune'] = DEFAULT_ADAROUND_PARAMS - if args.adaquant: - config_copy.extra_options['FastFinetune'] = DEFAULT_ADAQUANT_PARAMS - if args.learning_rate: - config_copy.extra_options['FastFinetune']['LearningRate'] = args.learning_rate - if args.num_iters: - config_copy.extra_options['FastFinetune']['NumIterations'] = args.num_iters - # quant config - # config_copy.nodes_to_exclude = ["/model.22/Concat_3", "/model.22/Split", "/model.22/dfl/Reshape", - # "/model.22/dfl/Transpose", "/model.22/dfl/Softmax", "/model.22/dfl/conv/Conv", - # "/model.22/dfl/Reshape_1", "/model.22/Shape", "/model.22/Gather", "/model.22/Add", - # "/model.22/Div", "/model.22/Mul", "/model.22/Mul_1", - # "/model.22/Slice", "/model.22/Slice_1", - # "/model.22/Sub", "/model.22/Add_1", "/model.22/Sub_1", "/model.22/Add_2", - # "/model.22/Div_1", "/model.22/Concat_4", "/model.22/Mul_2", "/model.22/Sigmoid", "/model.22/Concat_5"] - model_input_name = get_model_input_name(args.input_model_path) - calib_datareader = ImageDataReader(args.calib_data_path, model_input_name) - quant_config = Config(global_quant_config=config_copy) - quantizer = ModelQuantizer(quant_config) - quantizer.quantize_model(args.input_model_path, args.output_model_path, calib_datareader) - -if __name__ == '__main__': - args = parse_args() - main(args) diff --git a/CNN-examples/object_detection/yolov8m/requirements.txt b/CNN-examples/object_detection/yolov8m/requirements.txt deleted file mode 100644 index 6938f2da..00000000 --- a/CNN-examples/object_detection/yolov8m/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -opencv-python==4.11.0.86 -pycocotools==2.0.10 -wget==3.2 -torch==2.8.0 -ultralytics==8.3.155 \ No newline at end of file diff --git a/CNN-examples/object_detection/yolov8m/results/test_output_bf16.png b/CNN-examples/object_detection/yolov8m/results/test_output_bf16.png deleted file mode 100644 index 0820e9b3..00000000 Binary files a/CNN-examples/object_detection/yolov8m/results/test_output_bf16.png and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/results/test_output_int8.jpg b/CNN-examples/object_detection/yolov8m/results/test_output_int8.jpg deleted file mode 100644 index 36510b5a..00000000 Binary files a/CNN-examples/object_detection/yolov8m/results/test_output_int8.jpg and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/results/test_output_int8_skip_nodes.jpg b/CNN-examples/object_detection/yolov8m/results/test_output_int8_skip_nodes.jpg deleted file mode 100644 index d4d77454..00000000 Binary files a/CNN-examples/object_detection/yolov8m/results/test_output_int8_skip_nodes.jpg and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/results/yolov8m_quantized_concat_node.png b/CNN-examples/object_detection/yolov8m/results/yolov8m_quantized_concat_node.png deleted file mode 100644 index 70017677..00000000 Binary files a/CNN-examples/object_detection/yolov8m/results/yolov8m_quantized_concat_node.png and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/results/yolov8m_skip_nodes.png b/CNN-examples/object_detection/yolov8m/results/yolov8m_skip_nodes.png deleted file mode 100644 index 73da5726..00000000 Binary files a/CNN-examples/object_detection/yolov8m/results/yolov8m_skip_nodes.png and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/run_inference.py b/CNN-examples/object_detection/yolov8m/run_inference.py deleted file mode 100644 index 67a91cba..00000000 --- a/CNN-examples/object_detection/yolov8m/run_inference.py +++ /dev/null @@ -1,175 +0,0 @@ -import onnxruntime as ort -import numpy as np -import cv2 -import argparse -import sys -from pathlib import Path -from utils import evaluate_on_coco, get_npu_info, get_xclbin -import os -import time - -# Load COCO class labels (optional) -COCO_CLASSES = [ # 80 classes - "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", - "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", - "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", - "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", - "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", - "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", - "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", - "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", - "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", - "hair drier", "toothbrush" -] - -# Preprocessing: resize, normalize, convert to CHW -def preprocess_image(image_path, img_size=640): - img = cv2.imread(image_path) - orig = img.copy() - img = cv2.resize(img, (img_size, img_size)) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = img.astype(np.float32) / 255.0 - img = np.transpose(img, (2, 0, 1)) # HWC to CHW - img = np.expand_dims(img, axis=0) - return img, orig - -# Postprocessing: extract detections, draw boxes -def postprocess(outputs, img, conf_thres=0.4): - predictions = np.transpose(np.squeeze(outputs[0])) - - boxes = [] - confidences = [] - class_ids = [] - - for pred in predictions: - x_center, y_center, width, height = pred[0:4] - class_scores = pred[4:] # 80 class scores - - class_id = np.argmax(class_scores) - confidence = class_scores[class_id] - - if confidence < conf_thres: - continue - - # Convert xywh (center) to xyxy (corners) - x1 = int((x_center - width / 2) * img.shape[1] / 640) - y1 = int((y_center - height / 2) * img.shape[0] / 640) - x2 = int((x_center + width / 2) * img.shape[1] / 640) - y2 = int((y_center + height / 2) * img.shape[0] / 640) - - # boxes.append((x1, y1, x2, y2, confidence, class_id)) - boxes.append([x1, y1, x2, y2]) - confidences.append(float(confidence)) - class_ids.append(class_id) - - # Apply Non-Maximum Suppression (NMS) - indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_thres, nms_threshold=0.5) - - # Draw boxes - # for (x1, y1, x2, y2, conf, class_id) in boxes: - for i in indices: - x1, y1, x2, y2 = boxes[i] - conf = confidences[i] - class_id = class_ids[i] - label = f"Class {COCO_CLASSES[class_id]}: {conf:.2f}" - cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) - cv2.putText(img, label, (x1, y1 - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) - return img - -def benchmark(session,input_name,input_img,num_inference=100): - - # Perform warmup and 100 inference run of onnx model - - # warmup for 10 runs - for _ in range(10): - session.run(None, {input_name: input_img}) - - # Running for 100 inference: - time_list = [] - for _ in range(num_inference): - start = time.time() - output = session.run(None, {input_name: input_img}) - end = time.time() - time_list.append(end-start) - - avg_time_per_inference = sum(time_list)/num_inference - print("Avg time for each inference run:{:.3f} seconds".format(avg_time_per_inference)) - print("Model performance:{:.1f} FPS".format(1/avg_time_per_inference)) - - -def main(args): - image_path = args.input_image - onnx_path = args.model_input - # Preprocess the input image - input_img, original = preprocess_image(image_path) - session_options = ort.SessionOptions() - session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - if args.device == 'cpu': - print('Running Model on CPU') - ort_session = ort.InferenceSession(onnx_path, sess_options=session_options providers=['CPUExecutionProvider']) - elif args.device == 'npu-int8': - print('Running INT8 Model on NPU') - npu_device = get_npu_info() - if npu_device == 'PHX/HPT': - provider_options = [{ - 'cache_dir': str(Path(__file__).parent.resolve()), - 'cache_key': 'modelcachekey', - 'enable_cache_file_io_in_mem':'0', - 'target': 'X1', - 'xclbin': get_xclbin(npu_device) - }] - elif npu_device == 'STX' or 'KRK': - provider_options = [{ - 'cache_dir': str(Path(__file__).parent.resolve()), - 'cache_key': 'modelcachekey', - 'enable_cache_file_io_in_mem':'0' - }] - ort_session = ort.InferenceSession(onnx_path, sess_options=session_options,providers=['VitisAIExecutionProvider'], provider_options=provider_options) - - elif args.device == 'npu-bf16': - print('Running BF16 Model on NPU') - provider_options = [{ - 'config_file': 'vaiml_config.json', - 'cache_dir': str(Path(__file__).parent.resolve()), - 'cache_key': 'modelcachekey' - }] - ort_session = ort.InferenceSession(onnx_path, sess_options=session_options, providers=['VitisAIExecutionProvider'], provider_options=provider_options) - else: - print("Unsupported device. Please use 'cpu' or 'npu'.") - sys.exit(1) - - # Get the input name from the ONNX model - input_name = ort_session.get_inputs()[0].name - - outputs = ort_session.run(None, {input_name: input_img}) - # post process the outputs - result_img = postprocess(outputs, original) - - # Save or display the result image - cv2.imwrite(args.output_image, result_img) - - # Evaluate the model if the flag is set - if args.evaluate: - print("Model Accuracy:") - mAP, mAP50, mAP75 = evaluate_on_coco(args.model_input, ort_session, coco_dataset=args.coco_dataset, device=args.device) - print("{} model accuracy on {}: mAP {:.3f}, mAP50 {:.3f}, mAP75 {:.3f}".format(args.model_input, args.device, mAP, mAP50, mAP75)) - - if args.benchmark: - print("Model Performance:") - benchmark(ort_session,input_name,input_img) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Quantize and evaluate ONNX models.") - parser.add_argument('--model_input', type=str, default='models/resnet50_bf16.onnx', help='Path to the input ONNX model.') - parser.add_argument('--input_image', type=str, default='test_image.jpg', help='Path to the input image for inference.') - parser.add_argument('--output_image', type=str, default='test_output.jpg', help='Path to the output image for inference.') - parser.add_argument('--device', type=str, default='cpu', choices=['cpu', 'npu-int8', 'npu-bf16'], required=False, help='device to run the model.') - parser.add_argument('--int', action='store_true', help='Flag to set xclbin if model is INT8 type') - parser.add_argument('--evaluate', action='store_true', help='Flag to evaluate the model.') - parser.add_argument('--coco_dataset', type=str, default='datasets/coco', help='Path to the validation dataset.') - parser.add_argument('--benchmark', action='store_true', help='Flag to benchmark the model.') - - args = parser.parse_args() - main(args) diff --git a/CNN-examples/object_detection/yolov8m/test_image.jpg b/CNN-examples/object_detection/yolov8m/test_image.jpg deleted file mode 100644 index 19023f71..00000000 Binary files a/CNN-examples/object_detection/yolov8m/test_image.jpg and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8m/utils.py b/CNN-examples/object_detection/yolov8m/utils.py deleted file mode 100644 index fb1aa202..00000000 --- a/CNN-examples/object_detection/yolov8m/utils.py +++ /dev/null @@ -1,433 +0,0 @@ -import json -from pathlib import Path -import sys -import cv2 -import os -import subprocess -import numpy as np -import onnxruntime as ort -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -from tqdm import tqdm - -PROJECT_DIR = Path(__file__).parent - -def get_npu_info(): - # Run pnputil as a subprocess to enumerate PCI devices - command = r'pnputil /enum-devices /bus PCI /deviceids ' - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - # Check for supported Hardware IDs - apu_type = '' - if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): apu_type = 'PHX/HPT' - if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): apu_type = 'KRK' - return apu_type - -def get_xclbin(npu_device): - xclbin_file = '' - if npu_device == 'STX' or npu_device=='KRK': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_4x4_Overlay.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - if npu_device == 'PHX/HPT': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\phoenix\\4x4.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - return xclbin_file - -# Default mapping from YOLOv8 class IDs to COCO class names -id_to_cls_map_default = {'0': 'person', '1': 'bicycle', '2': 'car', '3': 'motorcycle', '4': 'airplane', '5': 'bus', '6': 'train', '7': 'truck', '8': 'boat', '9': 'traffic light', - '10': 'fire hydrant', '11': 'stop sign', '12': 'parking meter', '13': 'bench', '14': 'bird', '15': 'cat', '16': 'dog', '17': 'horse', '18': 'sheep', '19': 'cow', - '20': 'elephant', '21': 'bear', '22': 'zebra', '23': 'giraffe', '24': 'backpack', '25': 'umbrella', '26': 'handbag', '27': 'tie', '28': 'suitcase', '29': 'frisbee', - '30': 'skis', '31': 'snowboard', '32': 'sports ball', '33': 'kite', '34': 'baseball bat', '35': 'baseball glove', '36': 'skateboard', '37': 'surfboard', '38': 'tennis racket', '39': 'bottle', - '40': 'wine glass', '41': 'cup', '42': 'fork', '43': 'knife', '44': 'spoon', '45': 'bowl', '46': 'banana', '47': 'apple', '48': 'sandwich', '49': 'orange', - '50': 'broccoli', '51': 'carrot', '52': 'hot dog', '53': 'pizza', '54': 'donut', '55': 'cake', '56': 'chair', '57': 'couch', '58': 'potted plant', '59': 'bed', - '60': 'dining table', '61': 'toilet', '62': 'tv', '63': 'laptop', '64': 'mouse', '65': 'remote', '66': 'keyboard', '67': 'cell phone', '68': 'microwave', '69': 'oven', - '70': 'toaster', '71': 'sink', '72': 'refrigerator', '73': 'book', '74': 'clock', '75': 'vase', '76': 'scissors', '77': 'teddy bear', '78': 'hair drier', '79': 'toothbrush'} - -def get_npu_info(): - # Run pnputil as a subprocess to enumerate PCI devices - command = r'pnputil /enum-devices /bus PCI /deviceids ' - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - # Check for supported Hardware IDs - apu_type = '' - if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): apu_type = 'PHX/HPT' - if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): apu_type = 'KRK' - return apu_type - -def get_xclbin(npu_device): - xclbin_file = '' - if npu_device == 'STX' or npu_device=='KRK': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_4x4_Overlay.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - if npu_device == 'PHX/HPT': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\phoenix\\4x4.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - return xclbin_file - -def load_onnx_model(session, device: str): - # if device=='npu': - # providers = ["CUDAExecutionProvider"] - # else: - # providers = ["CPUExecutionProvider"] - # session = ort.InferenceSession(model_path, providers=providers) - - input_name = session.get_inputs()[0].name - custom_meta_map: dict = session.get_modelmeta().custom_metadata_map - id_to_cls_json_str = custom_meta_map.get("id_to_cls", None) - if id_to_cls_json_str is not None: - id_to_cls_map = json.loads(id_to_cls_json_str) - else: - id_to_cls_map = id_to_cls_map_default - - assert id_to_cls_map is not None - - return session, input_name, id_to_cls_map - - -def load_coco_dataset(annotations_path): - coco = COCO(annotations_path) - img_ids = coco.getImgIds() - - cats = coco.loadCats(coco.getCatIds()) - id_to_name = {cat["id"]: cat["name"] for cat in cats} - - return coco, img_ids, id_to_name - - -def preprocess_image(img: np.ndarray, input_size, bgr2rgb=False): - img_height, img_width = img.shape[:2] - scale = min(input_size[0] / img_width, input_size[1] / img_height) - new_size = int(img_width * scale), int(img_height * scale) - img_resized = cv2.resize(img, new_size) - - top = (input_size[1] - new_size[1]) // 2 - bottom = (input_size[1] - new_size[1]) - top - left = (input_size[0] - new_size[0]) // 2 - right = (input_size[0] - new_size[0]) - left - - img_resized = cv2.copyMakeBorder( - img_resized, - top, - bottom, - left, - right, - borderType=cv2.BORDER_CONSTANT, - value=(0, 0, 0), - ) - - # cv2.imwrite("runs/resized_and_padded.png", img_resized) - - if bgr2rgb: - img_resized = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) - - img_resized = np.float32(img_resized) / 255.0 - img_resized = img_resized.transpose(2, 0, 1) # hwc --> chw - img_resized = np.expand_dims(img_resized, axis=0) # chw --> 1chw - - # print(f"pad top {top}, left {left}, scale {scale}") - - return img_resized, (top, left), scale - - -def postprocess_output( - output: np.ndarray, - pad_top_left: tuple, - scale: float, - yolo_id_to_coco_id_map: dict, - min_score_thres: float, - nms_iou_thres: float, - img_width: int, - img_height: int, -): - # output shape: (xyxy + num-cls, num-boxes) - - # shape: (num-boxes, cxcywh + num-cls) - output = np.transpose(output, (1, 0)) - - cxcywh_nx4 = output[:, :4] # shape: (num-boxes, 4) - # restore boxes - cxcywh_nx4[:, 0] -= pad_top_left[1] # minus pad left from cx - cxcywh_nx4[:, 1] -= pad_top_left[0] # minus pad top from cy - cxcywh_nx4 /= scale # restore to original image scale - cx, cy, w, h = ( - cxcywh_nx4[:, 0], - cxcywh_nx4[:, 1], - cxcywh_nx4[:, 2], - cxcywh_nx4[:, 3], - ) - x0 = cx - w / 2.0 - y0 = cy - h / 2.0 - - class_scores_nxc = output[:, 4:] # shape: (num-boxes, num-cls) - scores = np.amax(class_scores_nxc, axis=1) # shape: (num-boxes,) - class_indices = np.argmax(class_scores_nxc, axis=1) - - # Stack boxes into list of [left, top, width, height] - boxes_xywh_nx4: np.ndarray = np.stack(arrays=[x0, y0, w, h], axis=1) - indices = cv2.dnn.NMSBoxes(boxes_xywh_nx4, scores, min_score_thres, nms_iou_thres) - - detections = [] - for i in indices: - cls_id = class_indices[i] - score = class_scores_nxc[i, cls_id] - if score >= min_score_thres: - detections.append( - { - "category_id": yolo_id_to_coco_id_map[int(cls_id)], - "bbox": tuple(round(float(x), 4) for x in boxes_xywh_nx4[i]), - "score": round(float(score), 4), - } - ) - - # keep map 100 - detections = sorted(detections, key=lambda d: d["score"], reverse=True) - if len(detections) > 100: - detections = detections[:100] - - return detections - - -def draw_detections( - canvas: np.ndarray, - img_detections: list, - id_to_name: dict = None, - save_path: Path = None, -): - for pred in img_detections: - score = pred["score"] - if score < 0.25: - continue - cls_id = pred["category_id"] - x, y, w, h = np.asarray(pred["bbox"], int) - cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 255, 0), thickness=1) - - if id_to_name is not None: - text = f"{id_to_name[cls_id]}: {score * 100:.1f}%" - else: - text = f"{cls_id}: {score * 100:.1f}%" - - cv2.putText( - canvas, - text=text, - org=(x, max(y - 10, 0)), - fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=0.75, - color=(255, 0, 255), - thickness=1, - lineType=cv2.LINE_AA, - ) - - save_path = str(save_path or "runs/debug_postprocess.png") - cv2.imwrite(save_path, canvas) - - -def evaluate_model( - session: ort.InferenceSession, - input_name: str, - coco: COCO, - images_folder: Path, - img_ids: list, - yolo_id_to_coco_id_map: dict, - coco_id_to_cls_map: dict, - input_size=(640, 640), - min_score_thres=0.001, - nms_iou_thresh=0.5, - num_max_images=None, - output_root: Path = None, -): - images_folder = Path(images_folder) - assert images_folder.is_dir() - - if num_max_images is not None: - img_ids = img_ids[:num_max_images] - - detections = [] - demo_saved = False - for img_id in tqdm(img_ids): - img_info = coco.loadImgs(img_id)[0] - img_path = images_folder / img_info["file_name"] - img: np.ndarray = cv2.imread(img_path, cv2.IMREAD_COLOR) - img_height, img_width = img.shape[:2] - - img_resized, pad_top_left, scale = preprocess_image( - img, input_size, bgr2rgb=True - ) - - # outputs shape: (bs=1, xyxy + num-cls, num-boxes) - # outputs = session.run( - # output_names=["bbox_output", "cls_output"], - # input_feed={input_name: img_resized}, - # ) - # outputs = np.concat(outputs, axis=1) - outputs = session.run(output_names=None, input_feed={input_name: img_resized}) - outputs = outputs[0] - - img_detections = postprocess_output( - outputs[0], - pad_top_left, - scale, - yolo_id_to_coco_id_map, - min_score_thres, - nms_iou_thresh, - img_width, - img_height, - ) - - if not demo_saved: - save_path = None - if output_root is not None: - output_root = Path(output_root) - output_root.mkdir(parents=True, exist_ok=True) - save_path = output_root / f"predict_of_{img_id}.png" - - draw_detections(img.copy(), img_detections, coco_id_to_cls_map, save_path) - demo_saved = True - - for det in img_detections: - det["image_id"] = img_id - - detections.extend(img_detections) - - return detections - - -def save_detections(detections, output_path="detections.json"): - # Sort detections by image_id first and then by category_id - sorted_detections = sorted(detections, key=lambda x: (x['image_id'], x['category_id'])) - with open(output_path, "w") as f: - json.dump(sorted_detections, f, indent=2) - - -def save_coco_eval_results( - coco_eval: COCOeval, save_path: str = "coco_eval_results.json" -): - # Extract summary metrics (12 values: mAP, AR, etc.) - summary = { - "mAP": coco_eval.stats[0], - "mAP50": coco_eval.stats[1], - "mAP75": coco_eval.stats[2], - "mAP_small": coco_eval.stats[3], - "mAP_medium": coco_eval.stats[4], - "mAP_large": coco_eval.stats[5], - "AR@1": coco_eval.stats[6], - "AR@10": coco_eval.stats[7], - "AR@100": coco_eval.stats[8], - "AR_small": coco_eval.stats[9], - "AR_medium": coco_eval.stats[10], - "AR_large": coco_eval.stats[11], - } - - with open(save_path, "w") as f: - json.dump(summary, f, indent=2) - - print(f"COCO evaluation results saved to: {save_path}") - - -def evaluate_coco(coco_gt: COCO, detections_path: str, results_save_path: str): - coco_dt = coco_gt.loadRes(str(detections_path)) - coco_eval = COCOeval(coco_gt, coco_dt, "bbox") - - coco_eval.evaluate() - coco_eval.accumulate() - - # Print overall evaluation summary - coco_eval.summarize() - - # Extract per-category AP (IoU=0.5:0.95, area=all) - cat_ids = coco_gt.getCatIds() - cat_id_to_name = {cat["id"]: cat["name"] for cat in coco_gt.loadCats(cat_ids)} - - # precision shape: [IoU thresholds, Recall thresholds, Categories, Area range, MaxDets] - prec = coco_eval.eval["precision"] - - print("\nPer-category AP (IoU=0.5:0.95, area=all):") - for idx, cat_id in enumerate(cat_ids): - # Select metrics: IoU=0.5:0.95, area=all (index 0), maxDet=100 (index 2) - precision = prec[:, :, idx, 0, 2] - precision = precision[precision > -1] - ap = np.mean(precision) if precision.size else float("nan") - print(f"{cat_id_to_name[cat_id]:<20} AP: {ap:.3f}") - - mAP = coco_eval.stats[0] * 100 - mAP50 = coco_eval.stats[1] * 100 - mAP75 = coco_eval.stats[2] * 100 - - # print("\nMain COCO Metrics:") - # print(f"mAP (AP@[IoU=0.50:0.95]): {mAP:.1f}") - # print(f"mAP50 (AP@IoU=0.50) : {mAP50:.1f}") - # print(f"mAP75 (AP@IoU=0.75) : {mAP75:.1f}") - - save_coco_eval_results(coco_eval, results_save_path) - return mAP, mAP50, mAP75 - - -def calc_yolo_id_to_coco_map(yolo_id_to_cls_map: dict, coco_id_to_cls_map: dict): - yolo_cls_names = sorted(yolo_id_to_cls_map.values()) - coco_cls_names = sorted(coco_id_to_cls_map.values()) - - assert yolo_cls_names == coco_cls_names - - coco_cls_to_id_map = {v: k for k, v in coco_id_to_cls_map.items()} - - yolo_id_to_coco_id_map = { - int(k): coco_cls_to_id_map[v] for k, v in yolo_id_to_cls_map.items() - } - - return yolo_id_to_coco_id_map - - -def evaluate_on_coco(onnx_model: str, session, coco_dataset: str, device: str = "cpu"): - onnx_model_path = Path(onnx_model) - print(f"Evaluating model: {onnx_model_path}") - coco_dataset_path = Path(coco_dataset) - anno_file_path = coco_dataset_path / "annotations/instances_val2017.json" - coco, img_ids, coco_id_to_cls_map = load_coco_dataset(anno_file_path) - - session, input_name, yolo_id_to_cls_map = load_onnx_model(session, device) - - yolo_id_to_coco_id_map = calc_yolo_id_to_coco_map( - yolo_id_to_cls_map, coco_id_to_cls_map - ) - - coco_val2017_images_folder = coco_dataset_path / "images/val2017" - - nms_iou_thresh = 0.5 - - output_root = ( - PROJECT_DIR - / f"runs/onnx-predict/{onnx_model_path.stem}-{anno_file_path.stem}-iou={nms_iou_thresh:.2f}" - ) - output_root.mkdir(exist_ok=True, parents=True) - - _, _, img_w, img_h = session.get_inputs()[0].shape - - detections = evaluate_model( - session, - input_name, - coco, - coco_val2017_images_folder, - img_ids, - yolo_id_to_coco_id_map, - coco_id_to_cls_map, - input_size=(img_w, img_h), - min_score_thres=0.25, - nms_iou_thresh=nms_iou_thresh, - output_root=output_root, - # num_max_images=100, - ) - - if not detections: - print('Model did not generate any predictions. Unable to evaluate Model accuracy on COCO dataset') - sys.exit(1) - - pred_json_save_path = output_root / "pred.json" - save_detections(detections, pred_json_save_path) - print(f"detections saved to: {pred_json_save_path}") - - coco_eval_save_path = output_root / "coco-metrics.json" - mAP, mAP50, mAP75 = evaluate_coco(coco, pred_json_save_path, PROJECT_DIR / coco_eval_save_path) - - return mAP, mAP50, mAP75 - diff --git a/CNN-examples/object_detection/yolov8m/vaiml_config.json b/CNN-examples/object_detection/yolov8m/vaiml_config.json deleted file mode 100644 index 06759f0a..00000000 --- a/CNN-examples/object_detection/yolov8m/vaiml_config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "optimize_level": 1, - "logging_level": "info" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} diff --git a/CNN-examples/object_detection/yolov8m/yolov8m_XINT8_full_dataset_log.txt b/CNN-examples/object_detection/yolov8m/yolov8m_XINT8_full_dataset_log.txt deleted file mode 100644 index 6fc8ed48..00000000 --- a/CNN-examples/object_detection/yolov8m/yolov8m_XINT8_full_dataset_log.txt +++ /dev/null @@ -1,116 +0,0 @@ -Evaluating model: C:\Users\dwchenna\github\dwchenna\RyzenAI-SW\tutorial\object_detection\models\yolov8m_XINT8.onnx -loading annotations into memory... -Done (t=0.53s) -creating index... -index created! -{'stride': '32', 'onnx.infer': 'onnxruntime.quant', 'date': '2025-05-21T23:12:53.672766', 'description': 'Ultralytics YOLOv8m model trained on coco.yaml', 'author': 'Ultralytics', 'version': '8.3.115', 'license': 'AGPL-3.0 License (https://ultralytics.com/license)', 'docs': 'https://docs.ultralytics.com', 'task': 'detect', 'batch': '1', 'imgsz': '[640, 640]', 'names': "{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}", 'args': "{'batch': 1, 'half': False, 'dynamic': False, 'simplify': True, 'opset': 17, 'nms': False}", 'channels': '3'} -detectoins saved to: C:\Users\dwchenna\github\dwchenna\RyzenAI-SW\tutorial\object_detection\runs\onnx-predict\yolov8m_XINT8-instances_val2017-iou=0.50\pred.json -Loading and preparing results... -DONE (t=0.28s) -creating index... -index created! -Running per image evaluation... -Evaluate annotation type *bbox* -DONE (t=8.97s). -Accumulating evaluation results... -DONE (t=1.63s). - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.382 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.523 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.418 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.223 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.435 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.499 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.308 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.447 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.455 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.262 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.512 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.596 - -Per-category AP (IoU=0.5:0.95, area=all): -person AP: 0.532 -bicycle AP: 0.313 -car AP: 0.406 -motorcycle AP: 0.410 -airplane AP: 0.665 -bus AP: 0.629 -train AP: 0.639 -truck AP: 0.314 -boat AP: 0.211 -traffic light AP: 0.234 -fire hydrant AP: 0.663 -stop sign AP: 0.627 -parking meter AP: 0.413 -bench AP: 0.252 -bird AP: 0.324 -cat AP: 0.605 -dog AP: 0.557 -horse AP: 0.597 -sheep AP: 0.506 -cow AP: 0.530 -elephant AP: 0.604 -bear AP: 0.560 -zebra AP: 0.653 -giraffe AP: 0.720 -backpack AP: 0.112 -umbrella AP: 0.387 -handbag AP: 0.125 -tie AP: 0.308 -suitcase AP: 0.325 -frisbee AP: 0.613 -skis AP: 0.206 -snowboard AP: 0.326 -sports ball AP: 0.397 -kite AP: 0.361 -baseball bat AP: 0.321 -baseball glove AP: 0.345 -skateboard AP: 0.542 -surfboard AP: 0.333 -tennis racket AP: 0.518 -bottle AP: 0.342 -wine glass AP: 0.316 -cup AP: 0.395 -fork AP: 0.329 -knife AP: 0.170 -spoon AP: 0.145 -bowl AP: 0.366 -banana AP: 0.200 -apple AP: 0.126 -sandwich AP: 0.247 -orange AP: 0.244 -broccoli AP: 0.151 -carrot AP: 0.156 -hot dog AP: 0.276 -pizza AP: 0.518 -donut AP: 0.394 -cake AP: 0.319 -chair AP: 0.282 -couch AP: 0.351 -potted plant AP: 0.255 -bed AP: 0.309 -dining table AP: 0.224 -toilet AP: 0.570 -tv AP: 0.508 -laptop AP: 0.609 -mouse AP: 0.592 -remote AP: 0.255 -keyboard AP: 0.492 -cell phone AP: 0.287 -microwave AP: 0.559 -oven AP: 0.334 -toaster AP: 0.331 -sink AP: 0.362 -refrigerator AP: 0.539 -book AP: 0.093 -clock AP: 0.481 -vase AP: 0.329 -scissors AP: 0.242 -teddy bear AP: 0.410 -hair drier AP: 0.040 -toothbrush AP: 0.259 - -Main COCO Metrics: -mAP (AP@[IoU=0.50:0.95]): 38.2 -mAP50 (AP@IoU=0.50) : 52.3 -mAP75 (AP@IoU=0.75) : 41.8 -COCO evaluation results saved to: C:\Users\dwchenna\github\dwchenna\RyzenAI-SW\tutorial\object_detection\runs\onnx-predict\yolov8m_XINT8-instances_val2017-iou=0.50\coco-metrics.json diff --git a/CNN-examples/object_detection/yolov8s-worldv2/download.bat b/CNN-examples/object_detection/yolov8s-worldv2/download.bat deleted file mode 100644 index 1e3570cc..00000000 --- a/CNN-examples/object_detection/yolov8s-worldv2/download.bat +++ /dev/null @@ -1 +0,0 @@ -curl -L -o models/yolov8s-worldv2.pt https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov8s-worldv2.pt \ No newline at end of file diff --git a/CNN-examples/object_detection/yolov8s-worldv2/envs.py b/CNN-examples/object_detection/yolov8s-worldv2/envs.py deleted file mode 100644 index d22ab272..00000000 --- a/CNN-examples/object_detection/yolov8s-worldv2/envs.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (C) 2023 - 2026 Advanced Micro Devices, Inc. All rights reserved. -# Licensed under the MIT License. -import sys -from pathlib import Path - -PROJECT_DIR = Path(__file__).parent -MODELS_DIR = PROJECT_DIR / "models" -DATA_DIR = PROJECT_DIR / "images" - -COCO_DATA_ROOT = Path("C:\\Users\\Administrator\\Desktop\\max\\datasets\\COCO") - -sys.path.append(PROJECT_DIR.as_posix()) diff --git a/CNN-examples/object_detection/yolov8s-worldv2/eval_on_coco.py b/CNN-examples/object_detection/yolov8s-worldv2/eval_on_coco.py deleted file mode 100644 index dc25b109..00000000 --- a/CNN-examples/object_detection/yolov8s-worldv2/eval_on_coco.py +++ /dev/null @@ -1,410 +0,0 @@ -# Copyright (C) 2023 - 2026 Advanced Micro Devices, Inc. All rights reserved. -# Licensed under the MIT License. - -import json -from pathlib import Path -import argparse - -import sys -import cv2 -import numpy as np -import onnxruntime as ort -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -from tqdm import tqdm - -import envs as ENVS -import json - -def load_onnx_model(model_path: str, providers): - session = ort.InferenceSession(model_path, providers=providers) - - input_tensor = session.get_inputs()[0] - input_name = input_tensor.name - input_shape = input_tensor.shape # BCHW - in_h, in_w = int(input_shape[2]), int(input_shape[3]) - - custom_meta_map: dict = session.get_modelmeta().custom_metadata_map - - id_to_cls_json_str = custom_meta_map.get("id_to_cls", None) - if id_to_cls_json_str is not None: - id_to_cls_map = json.loads(id_to_cls_json_str) - else: - id_to_cls_map = None - - assert id_to_cls_map is not None - - return session, input_name, (in_w,in_h), id_to_cls_map - - -def load_coco_dataset(annotations_path): - coco = COCO(annotations_path) - img_ids = coco.getImgIds() - - cats = coco.loadCats(coco.getCatIds()) - id_to_name = {cat["id"]: cat["name"] for cat in cats} - - return coco, img_ids, id_to_name - - -def preprocess_image(img: np.ndarray, input_size_wh, bgr2rgb=False): - # cv2.imwrite("runs/raw_img.png", img) - - img_height, img_width = img.shape[:2] - scale = min(input_size_wh[0] / img_width, input_size_wh[1] / img_height) - new_size = int(img_width * scale), int(img_height * scale) - img_resized = cv2.resize(img, new_size) - - top = (input_size_wh[1] - new_size[1]) // 2 - bottom = (input_size_wh[1] - new_size[1]) - top - left = (input_size_wh[0] - new_size[0]) // 2 - right = (input_size_wh[0] - new_size[0]) - left - - img_resized = cv2.copyMakeBorder( - img_resized, - top, - bottom, - left, - right, - borderType=cv2.BORDER_CONSTANT, - value=(0, 0, 0), - ) - - # cv2.imwrite("runs/resized_and_padded.png", img_resized) - - if bgr2rgb: - img_resized = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) - - img_resized = np.float32(img_resized) / 255.0 - img_resized = img_resized.transpose(2, 0, 1) # hwc --> chw - img_resized = np.expand_dims(img_resized, axis=0) # chw --> 1chw - - # print(f"pad top {top}, left {left}, scale {scale}") - - return img_resized, (top, left), scale - - -def postprocess_output( - output: np.ndarray, - pad_top_left: tuple, - scale: float, - yolo_id_to_coco_id_map: dict, - min_score_thres: float, - nms_iou_thres: float, - img_width: int, - img_height: int, -): - # output shape: (xyxy + num-cls, num-boxes) - - # shape: (num-boxes, cxcywh + num-cls) - output = np.transpose(output, (1, 0)) - - cxcywh_nx4 = output[:, :4] # shape: (num-boxes, 4) - # restore boxes - cxcywh_nx4[:, 0] -= pad_top_left[1] # minus pad left from cx - cxcywh_nx4[:, 1] -= pad_top_left[0] # minus pad top from cy - cxcywh_nx4 /= scale # restore to original image scale - cx, cy, w, h = ( - cxcywh_nx4[:, 0], - cxcywh_nx4[:, 1], - cxcywh_nx4[:, 2], - cxcywh_nx4[:, 3], - ) - x0 = cx - w / 2.0 - y0 = cy - h / 2.0 - - class_scores_nxc = output[:, 4:] # shape: (num-boxes, num-cls) - scores = np.amax(class_scores_nxc, axis=1) # shape: (num-boxes,) - class_indices = np.argmax(class_scores_nxc, axis=1) - - # Stack boxes into list of [left, top, width, height] - boxes_xywh_nx4: np.ndarray = np.stack(arrays=[x0, y0, w, h], axis=1) - indices = cv2.dnn.NMSBoxes(boxes_xywh_nx4, scores, min_score_thres, nms_iou_thres) - - detections = [] - for i in indices: - cls_id = class_indices[i] - score = class_scores_nxc[i, cls_id] - if score >= min_score_thres: - detections.append( - { - "category_id": yolo_id_to_coco_id_map[int(cls_id)], - "bbox": tuple(round(float(x), 4) for x in boxes_xywh_nx4[i]), - "score": round(float(score), 4), - } - ) - - # keep map 100 - detections = sorted(detections, key=lambda d: d["score"], reverse=True) - if len(detections) > 100: - detections = detections[:100] - - return detections - - -def draw_detections( - canvas: np.ndarray, - img_detections: list, - id_to_name: dict = None, - save_path: Path = None, -): - for pred in img_detections: - score = pred["score"] - if score < 0.25: - continue - cls_id = pred["category_id"] - x, y, w, h = np.asarray(pred["bbox"], int) - cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 255, 0), thickness=1) - - if id_to_name is not None: - text = f"{id_to_name[cls_id]}: {score * 100:.1f}%" - else: - text = f"{cls_id}: {score * 100:.1f}%" - - cv2.putText( - canvas, - text=text, - org=(x, max(y - 10, 0)), - fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=0.75, - color=(255, 0, 255), - thickness=1, - lineType=cv2.LINE_AA, - ) - - save_path = str(save_path or "runs/debug_postprocess.png") - cv2.imwrite(save_path, canvas) - - -def evaluate_model( - session: ort.InferenceSession, - input_name: str, - coco: COCO, - images_folder: Path, - img_ids: list, - yolo_id_to_coco_id_map: dict, - coco_id_to_cls_map: dict, - input_size_wh, - min_score_thres=0.001, - nms_iou_thresh=0.5, - num_max_images=None, - output_root: Path = None, -): - images_folder = Path(images_folder) - assert images_folder.is_dir() - - if num_max_images is not None: - img_ids = img_ids[:num_max_images] - - detections = [] - demo_saved = False - for img_id in tqdm(img_ids): - img_info = coco.loadImgs(img_id)[0] - img_path = images_folder / img_info["file_name"] - img: np.ndarray = cv2.imread(img_path, cv2.IMREAD_COLOR) - img_height, img_width = img.shape[:2] - - img_resized, pad_top_left, scale = preprocess_image( - img, input_size_wh, bgr2rgb=True - ) - - # outputs shape: (bs=1, xyxy + num-cls, num-boxes) - # outputs = session.run( - # output_names=["bbox_output", "cls_output"], - # input_feed={input_name: img_resized}, - # ) - # outputs = np.concat(outputs, axis=1) - outputs = session.run(output_names=None, input_feed={input_name: img_resized}) - outputs = outputs[0] - - img_detections = postprocess_output( - outputs[0], - pad_top_left, - scale, - yolo_id_to_coco_id_map, - min_score_thres, - nms_iou_thresh, - img_width, - img_height, - ) - - if not demo_saved: - save_path = None - if output_root is not None: - output_root = Path(output_root) - output_root.mkdir(parents=True, exist_ok=True) - save_path = output_root / f"predict_of_{img_id}.png" - - draw_detections(img.copy(), img_detections, coco_id_to_cls_map, save_path) - demo_saved = True - - for det in img_detections: - det["image_id"] = img_id - - detections.extend(img_detections) - - return detections - - -def save_detections(detections, output_path="detections.json"): - with open(output_path, "w") as f: - json.dump(detections, f, indent=2) - - -def save_coco_eval_results( - coco_eval: COCOeval, save_path: str = "coco_eval_results.json" -): - # Extract summary metrics (12 values: mAP, AR, etc.) - summary = { - "mAP": coco_eval.stats[0], - "mAP50": coco_eval.stats[1], - "mAP75": coco_eval.stats[2], - "mAP_small": coco_eval.stats[3], - "mAP_medium": coco_eval.stats[4], - "mAP_large": coco_eval.stats[5], - "AR@1": coco_eval.stats[6], - "AR@10": coco_eval.stats[7], - "AR@100": coco_eval.stats[8], - "AR_small": coco_eval.stats[9], - "AR_medium": coco_eval.stats[10], - "AR_large": coco_eval.stats[11], - } - - with open(save_path, "w") as f: - json.dump(summary, f, indent=2) - - print(f"COCO evaluation results saved to: {save_path}") - - -def evaluate_coco(coco_gt: COCO, detections_path: str, results_save_path: str): - coco_dt = coco_gt.loadRes(str(detections_path)) - coco_eval = COCOeval(coco_gt, coco_dt, "bbox") - - coco_eval.evaluate() - coco_eval.accumulate() - - # Print overall evaluation summary - coco_eval.summarize() - - # Extract per-category AP (IoU=0.5:0.95, area=all) - cat_ids = coco_gt.getCatIds() - cat_id_to_name = {cat["id"]: cat["name"] for cat in coco_gt.loadCats(cat_ids)} - - # precision shape: [IoU thresholds, Recall thresholds, Categories, Area range, MaxDets] - prec = coco_eval.eval["precision"] - - print("\nPer-category AP (IoU=0.5:0.95, area=all):") - for idx, cat_id in enumerate(cat_ids): - # Select metrics: IoU=0.5:0.95, area=all (index 0), maxDet=100 (index 2) - precision = prec[:, :, idx, 0, 2] - precision = precision[precision > -1] - ap = np.mean(precision) if precision.size else float("nan") - print(f"{cat_id_to_name[cat_id]:<20} AP: {ap:.3f}") - - mAP = coco_eval.stats[0] * 100 - mAP50 = coco_eval.stats[1] * 100 - mAP75 = coco_eval.stats[2] * 100 - - print("\nMain COCO Metrics:") - print(f"mAP (AP@[IoU=0.50:0.95]): {mAP:.1f}") - print(f"mAP50 (AP@IoU=0.50) : {mAP50:.1f}") - print(f"mAP75 (AP@IoU=0.75) : {mAP75:.1f}") - - save_coco_eval_results(coco_eval, results_save_path) - - -def calc_yolo_id_to_coco_map(yolo_id_to_cls_map: dict, coco_id_to_cls_map: dict): - yolo_cls_names = sorted(yolo_id_to_cls_map.values()) - coco_cls_names = sorted(coco_id_to_cls_map.values()) - - assert yolo_cls_names == coco_cls_names - - coco_cls_to_id_map = {v: k for k, v in coco_id_to_cls_map.items()} - - yolo_id_to_coco_id_map = { - int(k): coco_cls_to_id_map[v] for k, v in yolo_id_to_cls_map.items() - } - - return yolo_id_to_coco_id_map - -def eval_by_coco_tools(args): - if args.model is not None: - onnx_model_path = Path(args.model) - assert onnx_model_path.is_file(), f"Model not found: {onnx_model_path}" - else: - onnx_model_path = ENVS.MODELS_DIR / "yolov8s-world.onnx" - - if args.device == "npu": - providers = ["VitisAIExecutionProvider"] - elif args.device == "gpu": - providers = ["CUDAExecutionProvider"] - else: - providers = ["CPUExecutionProvider"] - - print(f"Evaluating model: {onnx_model_path}") - print(f"Using device: {args.device} -> providers={providers}") - - anno_file_path = ENVS.COCO_DATA_ROOT / "annotations/instances_val2017.json" - coco, img_ids, coco_id_to_cls_map = load_coco_dataset(anno_file_path) - - session, input_name, input_size_wh, yolo_id_to_cls_map = load_onnx_model( - onnx_model_path, providers - ) - - yolo_id_to_coco_id_map = calc_yolo_id_to_coco_map( - yolo_id_to_cls_map, - coco_id_to_cls_map - ) - - coco_val2017_images_folder = ENVS.COCO_DATA_ROOT / "val2017" - nms_iou_thresh = 0.75 - - output_root = ( - ENVS.PROJECT_DIR - / f"runs/onnx-predict/{onnx_model_path.stem}-{anno_file_path.stem}-iou={nms_iou_thresh:.2f}" - ) - output_root.mkdir(exist_ok=True, parents=True) - - detections = evaluate_model( - session, - input_name, - coco, - coco_val2017_images_folder, - img_ids, - yolo_id_to_coco_id_map, - coco_id_to_cls_map, - input_size_wh=input_size_wh, - min_score_thres=0.001, - nms_iou_thresh=nms_iou_thresh, - output_root=output_root, - # num_max_images=10, - ) - - pred_json_save_path = output_root / "pred.json" - save_detections(detections, pred_json_save_path) - print(f"detectoins saved to: {pred_json_save_path}") - - coco_eval_save_path = output_root / "coco-metrics.json" - evaluate_coco(coco, pred_json_save_path, ENVS.PROJECT_DIR / coco_eval_save_path) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--model", - type=str, - default=None, - help="Path to ONNX model (default: yolov8s-world.onnx in ENVS.MODELS_DIR)" - ) - - parser.add_argument( - "--device", - type=str, - default="cpu", - choices=["cpu", "gpu", "npu"], - help="Inference device backend" - ) - - args = parser.parse_args() - eval_by_coco_tools(args) - diff --git a/CNN-examples/object_detection/yolov8s-worldv2/images/test.jpg b/CNN-examples/object_detection/yolov8s-worldv2/images/test.jpg deleted file mode 100644 index 19023f71..00000000 Binary files a/CNN-examples/object_detection/yolov8s-worldv2/images/test.jpg and /dev/null differ diff --git a/CNN-examples/object_detection/yolov8s-worldv2/infer_single.py b/CNN-examples/object_detection/yolov8s-worldv2/infer_single.py deleted file mode 100644 index 6c028818..00000000 --- a/CNN-examples/object_detection/yolov8s-worldv2/infer_single.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (C) 2023 - 2026 Advanced Micro Devices, Inc. All rights reserved. -# Licensed under the MIT License. -import argparse -import time -from pathlib import Path -import cv2 -import numpy as np -import onnxruntime as ort - -from pycocotools.coco import COCO -import envs as ENVS - -from eval_on_coco import ( - load_onnx_model, - preprocess_image, - postprocess_output, -) - -def infer_single_image( - model_path: str, - image_path: str, - providers, - runtime_seconds: int = 0, -): - print("providers ",providers) - session, input_name, input_size_wh, yolo_id_to_cls_map = load_onnx_model( - model_path, providers - ) - - anno_file_path = ENVS.COCO_DATA_ROOT / "annotations/instances_val2017.json" - coco = COCO(anno_file_path) - cats = coco.loadCats(coco.getCatIds()) - coco_id_to_cls_map = {cat["id"]: cat["name"] for cat in cats} - - yolo_id_to_coco_id_map = { - int(k): coco_id for k, coco_id in enumerate(sorted(coco_id_to_cls_map.keys())) - } - - img_path = Path(image_path) - assert img_path.is_file(), f"Image not found: {img_path}" - - img: np.ndarray = cv2.imread(str(img_path), cv2.IMREAD_COLOR) - img_h, img_w = img.shape[:2] - - img_resized, pad_top_left, scale = preprocess_image( - img, input_size_wh, bgr2rgb=True - ) - - latencies = [] - if runtime_seconds > 0: - print(f"\n=== Running {runtime_seconds}-second performance benchmark ===") - end_time = time.time() + runtime_seconds - while time.time() < end_time: - t0 = time.time() - _ = session.run(None, {input_name: img_resized}) - t1 = time.time() - latencies.append(t1 - t0) - - outputs = session.run(None, {input_name: img_resized}) - outputs = outputs[0] - - detections = postprocess_output( - outputs[0], - pad_top_left, - scale, - yolo_id_to_coco_id_map, - min_score_thres=0.25, - nms_iou_thres=0.5, - img_width=img_w, - img_height=img_h, - ) - - print("\n=== Detection Results ===") - if len(detections) == 0: - print("No objects detected.") - else: - for d in detections: - coco_id = d["category_id"] - cls_name = coco_id_to_cls_map.get(coco_id, "unknown") - print( - f"[{cls_name}] score={d['score']:.3f}, bbox={d['bbox']}" - ) - - avg_latency, fps = None, None - if latencies: - latencies = np.array(latencies) - avg_latency = latencies.mean() * 1000 - fps = 1.0 / latencies.mean() - print("\n=== Performance Results (E2E) ===") - print(f"AVG Latency per inference: {avg_latency:.2f} ms") - print(f"Throughput (FPS): {fps:.2f}") - - return detections, avg_latency, fps - -def parse_args(): - parser = argparse.ArgumentParser( - description="YOLOWorld single-image inference + optional performance benchmark" - ) - parser.add_argument( - "--model", - type=str, - required=True, - help="Path to yoloworld ONNX model", - ) - parser.add_argument( - "--image", - type=str, - required=True, - help="Path to image to test", - ) - parser.add_argument( - "--device", - type=str, - default="cpu", - help="Device to run inference: cpu/gpu/npu", - ) - parser.add_argument( - "--runtime-seconds", - type=int, - default=0, - help="Time (seconds) for E2E performance benchmark, 0 to skip", - ) - return parser.parse_args() - -if __name__ == "__main__": - args = parse_args() - if args.device == "npu": - providers = ["VitisAIExecutionProvider"] - elif args.device == "gpu": - providers = ["CUDAExecutionProvider"] - else: - providers = ["CPUExecutionProvider"] - - #print(f"[INFO] Using device={args.device}, providers={providers}") - - detections, latency, fps = infer_single_image( - model_path=args.model, - image_path=args.image, - providers=providers, - runtime_seconds=args.runtime_seconds, - ) - - print("\n=== Finished ===") \ No newline at end of file diff --git a/CNN-examples/object_detection/yolov8s-worldv2/quark_quant.py b/CNN-examples/object_detection/yolov8s-worldv2/quark_quant.py deleted file mode 100644 index 4b7ba55f..00000000 --- a/CNN-examples/object_detection/yolov8s-worldv2/quark_quant.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (C) 2023 - 2026 Advanced Micro Devices, Inc. All rights reserved. -# Licensed under the MIT License. -# refer https://quark.docs.amd.com/latest/onnx/basic_usage_onnx.html - -import argparse -import copy -import logging -from pathlib import Path - -import cv2 -import onnx -from onnxruntime.quantization import CalibrationDataReader -from quark.onnx import ModelQuantizer -from quark.onnx.quantization.config import custom_config as qcc -from quark.onnx.quantization.config.config import Config - -import envs as ENVS -from eval_on_coco import preprocess_image -from utils import vis_check_onnx - -COCO_TEST_IMAGES_DIR = ENVS.COCO_DATA_ROOT / "test2017" - - -def setup_logging(): - import sys - - logging.basicConfig( - level=logging.INFO, - format="[%(asctime)s] [%(levelname)s] [%(name)s]: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - handlers=[ - logging.StreamHandler(sys.stdout), - logging.FileHandler("quark_quant.log", mode="a", encoding="utf-8"), - ], - ) - - -setup_logging() - - -def find_exclude_subgraphs_and_nodes(model_path): - model = onnx.load(model_path) - - concat_nodes = [n for n in model.graph.node if n.op_type == "Concat"] - reshape_nodes = [n for n in model.graph.node if n.op_type == "Reshape"] - - def get_successor_nodes(target_node: onnx.NodeProto): - target_outputs = set(target_node.output) - - successor_nodes = [] - for node in model.graph.node: - for input_name in node.input: - if input_name in target_outputs: - successor_nodes.append(node.name) - - return successor_nodes - - def get_predecessor_nodes(target_node: onnx.NodeProto): - target_inputs = set(target_node.input) - - predecessor_nodes = [] - for node in model.graph.node: - for output_name in node.output: - if output_name in target_inputs: - predecessor_nodes.append(node.name) - - return predecessor_nodes - - # get bbox post-process subgraph start-nodes and end-nodes - # It started from the last Reshape node, ended before second-to-last Concat node - last_reshape = reshape_nodes[-1] # before start - bbox_postprocess_start_nodes = get_successor_nodes(last_reshape) - - second_to_last_concat = concat_nodes[-2] # after end - bbox_postprocess_end_nodes = get_predecessor_nodes(second_to_last_concat) - - exclude_subgraphs = [(bbox_postprocess_start_nodes, bbox_postprocess_end_nodes)] - - last_concat = concat_nodes[-1] - exclude_nodes = [last_concat.name] - - return exclude_subgraphs, exclude_nodes - - -def find_postprocess_subgraph(model_path): - """The post-process subgraph started at 3rd-to-last Concat node, ended at output node""" - model = onnx.load(model_path) - - concat_nodes = [n for n in model.graph.node if n.op_type == "Concat"] - - post_process_start_node_names = [concat_nodes[-3].name] - post_process_end_node_names = [concat_nodes[-1].name] - - exclude_subgraphs = [ - (post_process_start_node_names, post_process_end_node_names), - ] - - return exclude_subgraphs - - -def find_last_concat(model_path): - model = onnx.load(model_path) - - concat_nodes = [n for n in model.graph.node if n.op_type == "Concat"] - - return [concat_nodes[-1].name] - - -def get_model_input(input_model_path: str) -> str: - model = onnx.load(input_model_path) - - input_tensor = model.graph.input[0] - - input_name = input_tensor.name - shape_dims = input_tensor.type.tensor_type.shape.dim - - height, width = None, None - if len(shape_dims) >= 3: - # NCHW - height_dim = shape_dims[-2] - width_dim = shape_dims[-1] - - height = height_dim.dim_value if height_dim.dim_value > 0 else None - width = width_dim.dim_value if width_dim.dim_value > 0 else None - - return input_name, (height, width) - - -class ImageDataReader(CalibrationDataReader): - def __init__(self, images_paths: list, input_name: str, input_size_hw: int): - self._enum_data = None - self._input_name = input_name - self._image_1chw_list = self._prepare_images(images_paths, input_size_hw) - - def _prepare_images(self, images_paths: list, input_size_hw): - in_h, in_w = input_size_hw - images_1chw_list = [] - for img_path in images_paths: - img = cv2.imread(str(img_path), cv2.IMREAD_COLOR) - - if img is None: - continue - - img_1chw, *_ = preprocess_image( - img, input_size_wh=(in_w, in_h), bgr2rgb=True - ) - assert img_1chw.shape == (1, 3, in_h, in_w) - - images_1chw_list.append(img_1chw) - - print(f"Load {len(images_1chw_list)} calibration images!") - - return images_1chw_list - - def get_next(self): - if self._enum_data is None: - self._enum_data = iter( - [{self._input_name: data} for data in self._image_1chw_list] - ) - return next(self._enum_data, None) - - def rewind(self): - self._enum_data = None - - -def get_calib_images_path(num_calib_images: int): - """Sample images from coco-test2017 as calibration images.""" - - all_images_path = sorted(COCO_TEST_IMAGES_DIR.glob("*.jpg")) - - sampled_images_path = all_images_path[:num_calib_images] - - return sampled_images_path - - -def quant( - input_onnx_model_path: Path, - quant_cfg_name: str, - num_calib_images: int, - exclude_post: bool, - lr: float = 0.1, - iters: int = 3000, -): - print( - f"Quanting model: {input_onnx_model_path} with " - f"cfg: {quant_cfg_name}, {num_calib_images} calib images " - f"lr: {lr}, iters: {iters}" - ) - - if exclude_post: - exclude_last_concat = None - exclude_post_subgraph = find_postprocess_subgraph(input_onnx_model_path) - print(f"Excluding {len(exclude_post_subgraph)} subgraphs!") - for i, subgraph in enumerate(exclude_post_subgraph): - print(f"ignore subgraph {i}: {subgraph}") - else: - exclude_post_subgraph = None - exclude_last_concat = find_last_concat(input_onnx_model_path) - print(f"Excluding last concat node: {exclude_last_concat}") - - model_input_name, model_input_shape_hw = get_model_input(input_onnx_model_path) - calib_images_paths = get_calib_images_path(num_calib_images) - - calib_data_reader = ImageDataReader( - calib_images_paths, model_input_name, model_input_shape_hw - ) - - quant_config: qcc.QuantizationConfig = copy.deepcopy( - qcc.get_default_config(quant_cfg_name) - ) - - if exclude_post_subgraph is not None: - quant_config.subgraphs_to_exclude = exclude_post_subgraph - - if exclude_last_concat is not None: - quant_config.nodes_to_exclude = exclude_last_concat - - quant_config.execution_providers = ["CPUExecutionProvider"] - quant_config.extra_op_types_to_quantize = ["Einsum", "ReduceMax"] - - extra_params = None - if "ADAROUND" in quant_cfg_name: - extra_params = { - "LearningRate": lr, - "NumIterations": iters, - "OptimDevice": "cuda:0", - "InferDevice": "cuda:0", - "BatchSize": 4, - } - - if "ADAQUANT" in quant_cfg_name: - extra_params = { - "LearningRate": lr, - "NumIterations": iters, - "OptimDevice": "cuda:0", - "InferDevice": "cuda:0", - "BatchSize": 4, - } - - if extra_params is not None: - print(f"Set special params for {quant_cfg_name}, params: {extra_params}") - ft_dict: dict = quant_config.extra_options["FastFinetune"] - ft_dict.update(extra_params) - - print(f"Quantizing using params: {quant_config}") - - quantization_config = Config(global_quant_config=quant_config) - quantization_config.global_quant_config.log_severity_level = 0 - - post_suffix = "exclude-post" if exclude_post else "exclude-last-concat" - in_h, in_w = model_input_shape_hw - quant_onnx_model_path = input_onnx_model_path.with_stem( - f"{input_onnx_model_path.stem}-{quant_cfg_name}-{in_h}x{in_w}-{post_suffix}" - ) - - quantizer = ModelQuantizer(quantization_config) - quantizer.quantize_model( - input_onnx_model_path.as_posix(), - quant_onnx_model_path.as_posix(), - calib_data_reader, - ) - - print(f"quantize success, saved to: {quant_onnx_model_path}") - - vis_check_onnx(quant_onnx_model_path, in_h) - - -def main(): - parser = argparse.ArgumentParser() - - parser.add_argument("--onnx", type=str, required=True) - parser.add_argument( - "--quant", - type=str, - choices=list(qcc.DefaultConfigMapping.keys()), - required=True, - ) - #print(qcc.DefaultConfigMapping.keys()) - parser.add_argument("-exclude-post", action="store_true", default=False) - #parser.add_argument("-adaround", action="store_true", default=False) - - parser.add_argument("--num-calib-images", type=int, default=512) - parser.add_argument("--lr", type=float, default=0.1) - parser.add_argument("--iters", type=int, default=3000) - - args = parser.parse_args() - - print(f"Quant input args: {args}") - - quant( - input_onnx_model_path=Path(args.onnx), - quant_cfg_name=args.quant, - num_calib_images=args.num_calib_images, - exclude_post=args.exclude_post, - lr=args.lr, - iters=args.iters, - ) - - -if __name__ == "__main__": - main() diff --git a/CNN-examples/object_detection/yolov8s-worldv2/ultra_yolo_to_onnx.py b/CNN-examples/object_detection/yolov8s-worldv2/ultra_yolo_to_onnx.py deleted file mode 100644 index c5e042e1..00000000 --- a/CNN-examples/object_detection/yolov8s-worldv2/ultra_yolo_to_onnx.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (C) 2023 - 2026 Advanced Micro Devices, Inc. All rights reserved. -# Licensed under the MIT License. -import json - -import onnx - -from utils import vis_check_onnx - - -def split_concat_output(model: onnx.ModelProto): - import onnx_graphsurgeon as gs - - # build graph - graph = gs.import_onnx(model) - - # Find the last concat node (assuming it's the final output) - concat_nodes = [n for n in graph.nodes if n.op == "Concat"] - concat_node = concat_nodes[-1] - inputs = concat_node.inputs # Expecting [bbox_pred, class_pred] - - # Remove the concat node - concat_node.outputs[0].outputs.clear() # Remove downstream connection - graph.nodes.remove(concat_node) - - # Set its inputs as new outputs - graph.outputs = inputs - - # Rename outputs for clarity - inputs[0].name = "bbox_output" - inputs[1].name = "cls_output" - - # Save updated model - return gs.export_onnx(graph) - - -def pt_to_onnx(pt_model_name: str, input_size): - import onnx - from ultralytics import YOLOWorld - - yolo_model = YOLOWorld(pt_model_name) - - onnx_model_path = yolo_model.export( - format="onnx", - nms=False, - dynamic=False, - simplify=True, - # default is 640, but will raise an exception if use 640 - imgsz=input_size, - opset=20, - ) - - # append id-to-cls map to model - onnx_model = onnx.load(onnx_model_path) - onnx.checker.check_model(onnx_model) - - # onnx_model = split_concat_output(onnx_model) - - id_to_cls_meta = onnx_model.metadata_props.add() - id_to_cls_meta.key = "id_to_cls" - id_to_cls_meta.value = json.dumps(yolo_model.names) - - onnx.save_model(onnx_model, onnx_model_path) - - return onnx_model_path - - -def check_onnx_model(onnx_model_path: str): - import onnx - - model = onnx.load(onnx_model_path) - onnx.checker.check_model(model) - - print("=" * 88) - print("Model inputs:") - for input_tensor in model.graph.input: - name = input_tensor.name - type_info = input_tensor.type.tensor_type - shape = [d.dim_value if (d.dim_value > 0) else "?" for d in type_info.shape.dim] - print(f" - name: {name}, shape: {shape}") - - print("Model outputs:") - for output_tensor in model.graph.output: - name = output_tensor.name - type_info = output_tensor.type.tensor_type - shape = [d.dim_value if (d.dim_value > 0) else "?" for d in type_info.shape.dim] - print(f" - name: {name}, shape: {shape}") - print("=" * 88) - - -def export_pt_to_onnx(pt_model: str, input_size: int): - onnx_model_path = pt_to_onnx(pt_model, input_size) - - check_onnx_model(onnx_model_path) - - vis_check_onnx(onnx_model_path, input_size) - - -def main(): - import argparse - - parser = argparse.ArgumentParser() - - parser.add_argument( - "--pt-model", - required=True, - type=str, - help="Local or remote path to pytorch pt weight", - ) - parser.add_argument( - "--input-size", - required=True, - type=int, - help="Input size of model, eg 320, 640", - ) - - args = parser.parse_args() - - print(f"Exporting model: {args.pt_model} with input size: {args.input_size}") - - export_pt_to_onnx(args.pt_model, args.input_size) - - -if __name__ == "__main__": - main() diff --git a/CNN-examples/object_detection/yolov8s-worldv2/utils.py b/CNN-examples/object_detection/yolov8s-worldv2/utils.py deleted file mode 100644 index 6177506f..00000000 --- a/CNN-examples/object_detection/yolov8s-worldv2/utils.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (C) 2023 - 2026 Advanced Micro Devices, Inc. All rights reserved. -# Licensed under the MIT License. -from pathlib import Path - -import cv2 -import numpy as np -from onnxruntime import InferenceSession - -import envs as ENVS - -__all__ = [ - "ONNXDetect", - "vis_check_onnx", -] - - -# from https://github.com/jahongir7174/YOLOv8-onnx/blob/master/main.py -class ONNXDetect: - def __init__( - self, input_size: int, onnx_path, confidence_threshold=0.25, iou_threshold=0.75 - ): - self.session = InferenceSession(onnx_path) - - self.inputs = self.session.get_inputs()[0].name - self.confidence_threshold = confidence_threshold - self.iou_threshold = iou_threshold - self.input_size = input_size - - def __call__(self, image: np.ndarray): - x, pad, gain = self.resize(image, image.shape) - x = x.transpose((2, 0, 1))[::-1] - x = x.astype("float32") / 255 - x = x[np.newaxis, ...] - - # outputs_list = self.session.run( - # output_names=["bbox_output", "cls_output"], input_feed={self.inputs: x} - # ) - # outputs = np.concat(outputs_list, axis=1) - - outputs = self.session.run(output_names=None, input_feed={self.inputs: x})[0] - - outputs = outputs[0].transpose(1, 0) - - outputs[:, 0] -= pad[1] - outputs[:, 1] -= pad[0] - - # Extract class scores (all rows, columns 4 onwards) - class_scores = outputs[:, 4:] # Shape: (8400, num_classes) - - # Find maximum score and corresponding class ID for each detection - max_scores = np.amax(class_scores, axis=1) # Shape: (8400,) - class_indices = np.argmax(class_scores, axis=1) # Shape: (8400,) - - # Filter detections based on confidence threshold - mask = max_scores >= self.confidence_threshold - if not np.any(mask): - return [] - - # Apply mask to filter valid detections - outputs = outputs[mask] # Shape: (N, 4 + num_classes) - scores = max_scores[mask] # Shape: (N,) - class_indices = class_indices[mask] # Shape: (N,) - - # Extract bounding box coordinates (cx, cy, w, h) - cx, cy, w, h = outputs[:, 0], outputs[:, 1], outputs[:, 2], outputs[:, 3] - - # Calculate scaled bounding box coordinates - left = ((cx - w / 2) / gain).astype(int) - top = ((cy - h / 2) / gain).astype(int) - width = (w / gain).astype(int) - height = (h / gain).astype(int) - - # Stack boxes into list of [left, top, width, height] - boxes = np.stack(arrays=[left, top, width, height], axis=1).tolist() - scores = scores.tolist() - class_indices = class_indices.tolist() - - # Apply non-maximum suppression to filter out overlapping bounding boxes - indices = cv2.dnn.NMSBoxes( - boxes, scores, self.confidence_threshold, self.iou_threshold - ) - - # Iterate over the selected indices after non-maximum suppression - nms_outputs = [] - for i in indices: - # Get the box, score, and class ID corresponding to the index - box = boxes[i] - score = scores[i] - class_id = class_indices[i] - nms_outputs.append([*box, score, class_id]) - return nms_outputs - - def resize(self, image: np.ndarray, shape): - r = min(self.input_size / shape[0], self.input_size / shape[1]) - - # Compute padding - pad = int(round(shape[1] * r)), int(round(shape[0] * r)) - w = (self.input_size - pad[0]) / 2 # w padding - h = (self.input_size - pad[1]) / 2 # h padding - - if shape[::-1] != pad: - image = cv2.resize(image, pad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(h - 0.1)), int(round(h + 0.1)) - left, right = int(round(w - 0.1)), int(round(w + 0.1)) - image: np.ndarray = cv2.copyMakeBorder( - image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0) - ) - return ( - image, - (top, left), - min(self.input_size / shape[0], self.input_size / shape[1]), - ) - - -def vis_check_onnx(onnx_model_path: str, input_size: int): - image_path = ENVS.DATA_DIR / "test.jpg" - - image = cv2.imread(image_path.as_posix(), cv2.IMREAD_COLOR) - canvas = image.copy() - model = ONNXDetect(input_size=input_size, onnx_path=onnx_model_path) - outputs = model(image) - for output in outputs: - x, y, w, h, score, index = output - cv2.rectangle( - canvas, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 2 - ) - - cv2.putText( - canvas, - text=f"{index}: {score * 100:.1f}%", - org=(x, max(y - 10, 0)), - fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=1.25, - color=(255, 0, 255), - thickness=2, - lineType=cv2.LINE_AA, - ) - - output_dir = ENVS.PROJECT_DIR / "runs/onnx-predict" - output_dir.mkdir(parents=True, exist_ok=True) - - model_name = Path(onnx_model_path).stem - img_path = output_dir / f"{model_name}_output.png" - cv2.imwrite(img_path.as_posix(), canvas) - print(f"visualize output write to {img_path}") diff --git a/CNN-examples/quark_quantization/advanced_quark_quantize.py b/CNN-examples/quark_quantization/advanced_quark_quantize.py deleted file mode 100644 index 60630c75..00000000 --- a/CNN-examples/quark_quantization/advanced_quark_quantize.py +++ /dev/null @@ -1,90 +0,0 @@ -import os -import argparse -import onnxruntime -from quark.onnx import ModelQuantizer -from quark.onnx.quantization.config import Config, get_default_config -from quark.onnx.quantization.config.config import QuantizationConfig -from onnxruntime.quantization.calibrate import CalibrationMethod -from onnxruntime.quantization.quant_utils import QuantType, QuantFormat -from quark.onnx import ModelQuantizer, PowerOfTwoMethod, QuantType -from quark.onnx.quant_utils import PowerOfTwoMethod, VitisQuantType, VitisQuantFormat -from utils import top1_accu, ImageDataReader, evaluate_onnx_model - -def main(args): - # Setup the Input model - input_model_path = args.model_input - output_model_path = args.model_output - calibration_dataset_path = args.calib_data - - # Select quantization configuration based on arguments - if args.adaround: - quant_config = get_default_config('XINT8_ADAROUND') - elif args.adaquant: - quant_config = get_default_config('XINT8_ADAQUANT') - elif args.fast_finetune: - quant_type = 'XINT8' - quant_config = QuantizationConfig(calibrate_method=PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - include_fast_ft=True, - extra_options={'ActivationSymmetric': True}) - elif args.cross_layer_equalization: - quant_type = 'XINT8' - quant_config = QuantizationConfig(calibrate_method=PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - include_cle=True, - extra_options={ - 'ActivationSymmetric': True}) - else: - quant_type = 'XINT8' - quant_config = get_default_config("XINT8") - - # Defines the quantization configuration for the whole model - config = Config(global_quant_config=quant_config) - print("The configuration of the quantization is {}".format(config)) - - # Define the calibration data reader - num_calib_data = 100 - calibration_dataset = ImageDataReader(calibration_dataset_path, input_model_path, data_size=num_calib_data, batch_size=1) - - # Create an ONNX Quantizer - quantizer = ModelQuantizer(config) - - # Quantize the ONNX model - quant_model = quantizer.quantize_model(model_input=input_model_path, - model_output=output_model_path, - calibration_data_reader=calibration_dataset) - - print("Model Size:") - print("Float32 model size: {:.2f} MB".format(os.path.getsize(input_model_path)/(1024 * 1024))) - print("{} quantized model size: {:.2f} MB".format(quant_type, os.path.getsize(output_model_path)/(1024 * 1024))) - - # Evaluate the model - print("Model Accuracy:") - top1_acc, top5_acc = evaluate_onnx_model(input_model_path, imagenet_data_path=calibration_dataset_path) - print("Float32 model accuracy: Top1 {:.3f}, Top5 {:.3f} ".format(top1_acc, top5_acc)) - top1_acc, top5_acc = evaluate_onnx_model(output_model_path, imagenet_data_path=calibration_dataset_path) - print("{} quantized model accuracy: Top1 {:.3f}, Top5 {:.3f} ".format(quant_type, top1_acc, top5_acc)) - top1_acc, top5_acc = evaluate_onnx_model(output_model_path, imagenet_data_path=calibration_dataset_path, device='npu') - print("{} quantized model accuracy (NPU): Top1 {:.3f}, Top5 {:.3f} ".format(quant_type, top1_acc, top5_acc)) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Quantize and evaluate ONNX models.") - parser.add_argument('--model_input', type=str, default='models/mobilenetv2.onnx', help='Path to the input ONNX model.') - parser.add_argument('--model_output', type=str, default='models/mobilenetv2_quant.onnx', help='Path to save the quantized ONNX model.') - parser.add_argument('--calib_data', type=str, default='calib_data', help='Path to the calibration dataset.') - parser.add_argument('--fast_finetune', action='store_true', help='Use fast fine-tuning configuration.') - parser.add_argument('--cross_layer_equalization', action='store_true', help='Use cross-layer equalization configuration.') - parser.add_argument('--adaround', action='store_true', help='Use adaround quantization') - parser.add_argument('--adaquant', action='store_true', help='Use adaquant quantization') - - args = parser.parse_args() - main(args) - - - - - diff --git a/CNN-examples/quark_quantization/docs/advanced_quant_readme.md b/CNN-examples/quark_quantization/docs/advanced_quant_readme.md deleted file mode 100644 index 0cce94ea..00000000 --- a/CNN-examples/quark_quantization/docs/advanced_quant_readme.md +++ /dev/null @@ -1,159 +0,0 @@ - - - - -

Ryzen™ AI Advanced Quantization Tools

-
- -## Advanced Quantization Tools - -In this section, we explore the advanced quantization capabilities of the Quark quantizer, designed to recover the lost accuracy in quantized models. While basic quantization configurations are effective for many models, advanced and optimized models often require sophisticated techniques to enhance the accuracy of the quantized versions. This guide will walk you through these advanced methods, ensuring your models maintain high performance even after quantization. - -This tutorials takes [MobileNetV2](https://github.com/onnx/models/blob/main/validated/vision/classification/mobilenet/model/mobilenetv2-12.onnx) onnx model as an example, which can be challenging to quantize with minimal accuracy loss, using the advanced quark quantization tools. - -ImageNet Dataset ----------------- - -Please ensure to setup the validation and calibration datase using the instruction from [AMD Quark Quantization Tutorial](quark_quant_readme.md) - - -Model Evaluation ----------------- - -MobileNet: Using ``XINT8`` configuration, we see a drop of ~3% in the Top-1 accuracy. Optimized models like MobileNetV2 tend to be more difficult to quantize. To bridge the gap between float and quantized accuracy of the model, we can use some advanced quantization configurations or techniques. - -```python -cd models -python download_MobileNetV2.py -``` - -```python -python advanced_quark_quantize.py --model_input models/mobilenetv2.onnx --model_output models/mobilenetv2_quant.onnx -``` - -
- -| MobileNetV2 | Model Size | Top-1 Accuracy | Top-5 Accuracy | -|---------------|------------|----------------|----------------| -| Float 32 | 13.34 MB | 71.3% | 90.6% | -| INT8 (CPU) | 3.44 MB | 64.0% | 86.5% | -| INT8 (NPU) | 3.44 MB | 63.7% | 87.0% | - -
- -ResNet50: Using ``XINT8`` configuration - -```python -python advanced_quark_quantize.py --model_input models/resnet50.onnx --model_output models/resnet50_quant.onnx -``` - -
- -| ResNet50 | Model Size | Top-1 Accuracy | Top-5 Accuracy | -|---------------|------------|----------------|----------------| -| Float 32 | 97.41 MB | 80.0% | 96.1% | -| INT8 (CPU) | 24.46 MB | 77.3% | 94.9% | -| INT8 (NPU) | 24.46 MB | 77.4% | 95.2% | - -
- - -### Fast Fine Tuning - -Fast fine-tuning involves adjusting a pre-trained model to enhance its accuracy after quantization. This approach helps recover accuracy lost during quantization, making the model more suitable for deployment. - -```python -INT8_CNN_ACCURATE_CONFIG = QuantizationConfig(calibrate_method=CalibrationMethod.Percentile, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - include_fast_ft=True, - extra_options={ - 'Percentile': 99.9999, - 'FastFinetune': DEFAULT_ADAROUND_PARAMS - }) -config = Config(global_quant_config=INT8_CNN_ACCURATE_CONFIG) -``` - -MobileNet: Using ``INT8_CNN_ACCURATE`` configuration, which improve the accuracy of the model through ``Fast Fine-tuning`` and ``Histogram Percentile`` based techniques. - -```python -python advanced_quark_quantize.py --model_input models/mobilenetv2.onnx --model_output models/mobilenetv2_quant.onnx --fast_finetune -``` - -
- -| MobileNetV2 | Model Size | Top-1 Accuracy | Top-5 Accuracy | -|---------------|------------|----------------|----------------| -| Float 32 | 13.32 | 71.3% | 90.6% | -| INT8 (CPU) | 3.43 | 70.5% | 90.3% | -| INT8 (NPU) | 3.43 | 69.6% | 89.4% | - -
- -ResNet50: Using ```Fast Fine-tuning`` configuration - -```python -python advanced_quark_quantize.py --model_input models/resnet50.onnx --model_output models/resnet50_quant.onnx --fast_finetune -``` - -
- -| ResNet50 | Model Size | Top-1 Accuracy | Top-5 Accuracy | -|---------------|------------|----------------|----------------| -| Float 32 | 97.41 MB | 80.0% | 96.1% | -| INT8 (CPU) | 24.46 MB | 79.3% | 96.2% | -| INT8 (NPU) | 24.46 MB | 77.4% | 95.2% | - -
- -### Cross Layer Equalization (CLE) - -Cross-Layer Equalization (CLE) optimizes neural networks for quantization by balancing weight distributions across layers, reducing quantization errors. This technique helps maintain model accuracy while enabling efficient quantization. - -```python -INT8_CLE_CONFIG = QuantizationConfig(calibrate_method=PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - include_cle=True, - extra_options={'ActivationSymmetric': True}) - -config = Config(global_quant_config=INT8_CLE_CONFIG) -``` - -MobileNet: Using ``Cross Layer Equalization`` configuration - -```python -python advanced_quark_quantize.py --model_input models/mobilenetv2.onnx --model_output models/mobilenetv2_quant.onnx --cross_layer_equalization -``` - -
- -| MobileNetV2 | Model Size | Top-1 Accuracy | Top-5 Accuracy | -|---------------|------------|----------------|----------------| -| Float 32 | 13.32 | 71.3% | 90.6% | -| INT8 (CPU) | 3.43 | 62.7% | 85.4% | -| INT8 (NPU) | 3.43 | 63.7% | 86.4% | - -
- -ResNet50: Using ``Cross Layer Equalization`` configuration - -```python -python advanced_quark_quantize.py --model_input models/resnet50.onnx --model_output models/resnet50_quant.onnx --cross_layer_equalization -``` - -
- -| ResNet50 | Model Size | Top-1 Accuracy | Top-5 Accuracy | -|---------------|------------|----------------|----------------| -| Float 32 | 97.41 MB | 80.0% | 96.1% | -| INT8 (CPU) | 24.46 MB | 77.7% | 95.7% | -| INT8 (NPU) | 24.46 MB | 78.2% | 95.6% | - -
- -Reference ---------- - -For more details on the Quark API features in the [Quark Documentation](https://quark.docs.amd.com/latest/index.html) diff --git a/CNN-examples/quark_quantization/models/densenet121_A16W8.log b/CNN-examples/quark_quantization/models/densenet121_A16W8.log deleted file mode 100644 index 2de49d6d..00000000 --- a/CNN-examples/quark_quantization/models/densenet121_A16W8.log +++ /dev/null @@ -1,77 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:54:24.980510 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\densenet121.onnx - model_output --- models\densenet121_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------| -| Conv | 120 | -| Relu | 121 | -| MaxPool | 1 | -| Concat | 62 | -| BatchNormalization | 62 | -| AveragePool | 3 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-------------------------------| -| Quantized model path | models\densenet121_A16W8.onnx | -+------------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT16(182) | INT8(182) | INT32(121) | -| MaxPool | INT16(1) | | | -| Concat | INT16(58) | | | -| AveragePool | INT16(3) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 30.81 MB -A16W8 quantized model size: 8.26 MB -Model Accuracy: diff --git a/CNN-examples/quark_quantization/models/densenet121_A8W8.log b/CNN-examples/quark_quantization/models/densenet121_A8W8.log deleted file mode 100644 index 221f1239..00000000 --- a/CNN-examples/quark_quantization/models/densenet121_A8W8.log +++ /dev/null @@ -1,77 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:33:32.168122 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\densenet121.onnx - model_output --- models\densenet121_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+-----------------------------------------------------+ -| Op Type | Float Model | -|----------------------+------------------------------| -| Conv | 120 | -| Relu | 121 | -| MaxPool | 1 | -| Concat | 62 | -| BatchNormalization | 62 | -| AveragePool | 3 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+------------------------------| -| Quantized model path | models\densenet121_A8W8.onnx | -+-----------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT8(182) | INT8(182) | INT32(121) | -| MaxPool | INT8(1) | | | -| Concat | INT8(58) | | | -| AveragePool | INT8(3) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 30.81 MB -A8W8 quantized model size: 8.26 MB -Model Accuracy: diff --git a/CNN-examples/quark_quantization/models/densenet121_BF16.log b/CNN-examples/quark_quantization/models/densenet121_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/densenet121_XINT8.log b/CNN-examples/quark_quantization/models/densenet121_XINT8.log deleted file mode 100644 index 579bb826..00000000 --- a/CNN-examples/quark_quantization/models/densenet121_XINT8.log +++ /dev/null @@ -1,83 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:52:09.768165 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\densenet121.onnx - model_output --- models\densenet121_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------| -| Conv | 120 | -| Relu | 121 | -| MaxPool | 1 | -| Concat | 62 | -| BatchNormalization | 62 | -| AveragePool | 3 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-------------------------------| -| Quantized model path | models\densenet121_XINT8.onnx | -+------------------------------------------------------+ -+--------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+-----------| -| Conv | UINT8(182) | INT8(182) | INT8(121) | -| MaxPool | UINT8(1) | | | -| Concat | UINT8(58) | | | -| AveragePool | UINT8(3) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+--------------------------------------------------------+ -Model Size: -Float32 model size: 30.81 MB -Int8 quantized model size: 8.20 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 356 NPU 42 VITIS_EP_CPU 1307 -[Vitis AI EP] No. of Subgraphs : NPU 63 Actually running on NPU 5 -Float32 model accuracy: Top1 0.733, Top5 0.909 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.703, Top5 0.898 diff --git a/CNN-examples/quark_quantization/models/densenet169_A16W8.log b/CNN-examples/quark_quantization/models/densenet169_A16W8.log deleted file mode 100644 index 9443496f..00000000 --- a/CNN-examples/quark_quantization/models/densenet169_A16W8.log +++ /dev/null @@ -1,77 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:56:54.012759 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\densenet169.onnx - model_output --- models\densenet169_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------| -| Conv | 168 | -| Relu | 169 | -| MaxPool | 1 | -| Concat | 86 | -| BatchNormalization | 86 | -| AveragePool | 3 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-------------------------------| -| Quantized model path | models\densenet169_A16W8.onnx | -+------------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT16(254) | INT8(254) | INT32(169) | -| MaxPool | INT16(1) | | | -| Concat | INT16(82) | | | -| AveragePool | INT16(3) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 54.68 MB -A16W8 quantized model size: 14.49 MB -Model Accuracy: diff --git a/CNN-examples/quark_quantization/models/densenet169_A8W8.log b/CNN-examples/quark_quantization/models/densenet169_A8W8.log deleted file mode 100644 index 26b17d9c..00000000 --- a/CNN-examples/quark_quantization/models/densenet169_A8W8.log +++ /dev/null @@ -1,77 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:35:31.919383 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\densenet169.onnx - model_output --- models\densenet169_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+-----------------------------------------------------+ -| Op Type | Float Model | -|----------------------+------------------------------| -| Conv | 168 | -| Relu | 169 | -| MaxPool | 1 | -| Concat | 86 | -| BatchNormalization | 86 | -| AveragePool | 3 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+------------------------------| -| Quantized model path | models\densenet169_A8W8.onnx | -+-----------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT8(254) | INT8(254) | INT32(169) | -| MaxPool | INT8(1) | | | -| Concat | INT8(82) | | | -| AveragePool | INT8(3) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 54.68 MB -A8W8 quantized model size: 14.49 MB -Model Accuracy: diff --git a/CNN-examples/quark_quantization/models/densenet169_BF16.log b/CNN-examples/quark_quantization/models/densenet169_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/densenet169_XINT8.log b/CNN-examples/quark_quantization/models/densenet169_XINT8.log deleted file mode 100644 index fd313f2e..00000000 --- a/CNN-examples/quark_quantization/models/densenet169_XINT8.log +++ /dev/null @@ -1,83 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:00:53.430579 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\densenet169.onnx - model_output --- models\densenet169_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------| -| Conv | 168 | -| Relu | 169 | -| MaxPool | 1 | -| Concat | 86 | -| BatchNormalization | 86 | -| AveragePool | 3 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-------------------------------| -| Quantized model path | models\densenet169_XINT8.onnx | -+------------------------------------------------------+ -+--------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+-----------| -| Conv | UINT8(254) | INT8(254) | INT8(169) | -| MaxPool | UINT8(1) | | | -| Concat | UINT8(82) | | | -| AveragePool | UINT8(3) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+--------------------------------------------------------+ -Model Size: -Float32 model size: 54.68 MB -Int8 quantized model size: 14.35 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 500 NPU 42 VITIS_EP_CPU 2239 -[Vitis AI EP] No. of Subgraphs : NPU 87 Actually running on NPU 5 -Float32 model accuracy: Top1 0.746, Top5 0.918 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.704, Top5 0.901 diff --git a/CNN-examples/quark_quantization/models/densenext201_A16W8.log b/CNN-examples/quark_quantization/models/densenext201_A16W8.log deleted file mode 100644 index b3b14a81..00000000 --- a/CNN-examples/quark_quantization/models/densenext201_A16W8.log +++ /dev/null @@ -1 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) diff --git a/CNN-examples/quark_quantization/models/densenext201_A8W8.log b/CNN-examples/quark_quantization/models/densenext201_A8W8.log deleted file mode 100644 index d82bf594..00000000 --- a/CNN-examples/quark_quantization/models/densenext201_A8W8.log +++ /dev/null @@ -1 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) diff --git a/CNN-examples/quark_quantization/models/densenext201_BF16.log b/CNN-examples/quark_quantization/models/densenext201_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/densenext201_XINT8.log b/CNN-examples/quark_quantization/models/densenext201_XINT8.log deleted file mode 100644 index 04c6110e..00000000 --- a/CNN-examples/quark_quantization/models/densenext201_XINT8.log +++ /dev/null @@ -1 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) diff --git a/CNN-examples/quark_quantization/models/download_MobileNetV2.py b/CNN-examples/quark_quantization/models/download_MobileNetV2.py deleted file mode 100644 index 62abc82d..00000000 --- a/CNN-examples/quark_quantization/models/download_MobileNetV2.py +++ /dev/null @@ -1,27 +0,0 @@ -import torch -import torchvision.models as models -import torch.onnx -from torchvision.models import mobilenet_v2, MobileNet_V2_Weights - -# Load a pre-trained MobileNetV2 model -model = mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT) -model.eval() # Set the model to evaluation mode - -# Create a dummy input tensor with the same size as the model's input -dummy_input = torch.randn(1, 3, 224, 224) - -# Define the path where the ONNX model will be saved -onnx_model_path = "mobilenetv2.onnx" - -# Export the model to ONNX format -torch.onnx.export( - model, - dummy_input, - onnx_model_path, - opset_version=17, - input_names=['input'], - output_names=['output'], - dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} -) - -print(f"Model has been successfully exported to {onnx_model_path}") diff --git a/CNN-examples/quark_quantization/models/download_ResNet.py b/CNN-examples/quark_quantization/models/download_ResNet.py deleted file mode 100644 index 7e6a7ace..00000000 --- a/CNN-examples/quark_quantization/models/download_ResNet.py +++ /dev/null @@ -1,27 +0,0 @@ -import torch -import torchvision.models as models -import torch.onnx -from torchvision.models import resnet50, ResNet50_Weights - -# Load a pre-trained ResNet model -model = models.resnet50(weights=ResNet50_Weights.DEFAULT) -model.eval() # Set the model to evaluation mode - -# Create a dummy input tensor with the same size as the model's input -dummy_input = torch.randn(1, 3, 224, 224) - -# Define the path where the ONNX model will be saved -onnx_model_path = "resnet50.onnx" - -# Export the model to ONNX format -torch.onnx.export( - model, - dummy_input, - onnx_model_path, - opset_version=17, - input_names=['input'], - output_names=['output'], - dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} -) - -print(f"Model has been successfully exported to {onnx_model_path}") diff --git a/CNN-examples/quark_quantization/models/efficientnet_b0_A16W8.log b/CNN-examples/quark_quantization/models/efficientnet_b0_A16W8.log deleted file mode 100644 index 5df190ed..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b0_A16W8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:49:07.874128 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b0.onnx - model_output --- models\efficientnet_b0_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+----------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-----------------------------------| -| Conv | 81 | -| Sigmoid | 65 | -| Mul | 65 | -| GlobalAveragePool | 17 | -| Add | 9 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-----------------------------------| -| Quantized model path | models\efficientnet_b0_A16W8.onnx | -+----------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(81) | INT8(81) | INT32(81) | -| Sigmoid | INT16(65) | | | -| Mul | INT16(65) | | | -| AveragePool | INT16(5) | | | -| GlobalAveragePool | INT16(17) | | | -| Add | INT16(9) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 20.16 MB -A16W8 quantized model size: 5.52 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 970 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.771, Top5 0.929 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.005, Top5 0.014 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b0_A8W8.log b/CNN-examples/quark_quantization/models/efficientnet_b0_A8W8.log deleted file mode 100644 index 6eb7701d..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b0_A8W8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:29:19.943710 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b0.onnx - model_output --- models\efficientnet_b0_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+---------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------------| -| Conv | 81 | -| Sigmoid | 65 | -| Mul | 65 | -| GlobalAveragePool | 17 | -| Add | 9 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------------| -| Quantized model path | models\efficientnet_b0_A8W8.onnx | -+---------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(81) | INT8(81) | INT32(81) | -| Sigmoid | INT8(65) | | | -| Mul | INT8(65) | | | -| AveragePool | INT8(5) | | | -| GlobalAveragePool | INT8(17) | | | -| Add | INT8(9) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 20.16 MB -A8W8 quantized model size: 5.52 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 970 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.771, Top5 0.929 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.001, Top5 0.007 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b0_BF16.log b/CNN-examples/quark_quantization/models/efficientnet_b0_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b0_XINT8.log b/CNN-examples/quark_quantization/models/efficientnet_b0_XINT8.log deleted file mode 100644 index 7a573914..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b0_XINT8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:31:08.929151 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b0.onnx - model_output --- models\efficientnet_b0_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+----------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-----------------------------------| -| Conv | 81 | -| Sigmoid | 65 | -| Mul | 65 | -| GlobalAveragePool | 17 | -| Add | 9 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-----------------------------------| -| Quantized model path | models\efficientnet_b0_XINT8.onnx | -+----------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(81) | INT8(81) | INT8(81) | -| Mul | UINT8(65) | | | -| AveragePool | UINT8(5) | | | -| GlobalAveragePool | UINT8(17) | | | -| Add | UINT8(9) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -| HardSigmoid | UINT8(65) | | | -+------------------------------------------------------+ -Model Size: -Float32 model size: 20.16 MB -Int8 quantized model size: 5.55 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 130 NPU 792 VITIS_EP_CPU 132 -[Vitis AI EP] No. of Subgraphs : NPU 66 Actually running on NPU 66 -Float32 model accuracy: Top1 0.771, Top5 0.929 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.001, Top5 0.005 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b0_XINT8_Quant.log b/CNN-examples/quark_quantization/models/efficientnet_b0_XINT8_Quant.log deleted file mode 100644 index dc422201..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b0_XINT8_Quant.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 16:52:06.525867 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b0.onnx - model_output --- perf_models\efficientnet_b0_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+---------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------------------| -| Conv | 81 | -| Sigmoid | 65 | -| Mul | 65 | -| GlobalAveragePool | 17 | -| Add | 9 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------------------| -| Quantized model path | perf_models\efficientnet_b0_XINT8.onnx | -+---------------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(81) | INT8(81) | INT8(81) | -| Mul | UINT8(65) | | | -| AveragePool | UINT8(5) | | | -| GlobalAveragePool | UINT8(17) | | | -| Add | UINT8(9) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -| HardSigmoid | UINT8(65) | | | -+------------------------------------------------------+ -Model Size: -Float32 model size: 20.16 MB -Int8 quantized model size: 5.55 MB -Model Accuracy: -Float32 model accuracy: Top1 0.771, Top5 0.929 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 130 NPU 792 VITIS_EP_CPU 132 -[Vitis AI EP] No. of Subgraphs : NPU 66 Actually running on NPU 66 -Int8 quantized model accuracy: Top1 0.000, Top5 0.007 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization diff --git a/CNN-examples/quark_quantization/models/efficientnet_b1_A16W8.log b/CNN-examples/quark_quantization/models/efficientnet_b1_A16W8.log deleted file mode 100644 index 34af5418..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b1_A16W8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:50:24.553944 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b1.onnx - model_output --- models\efficientnet_b1_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+----------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-----------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-----------------------------------| -| Quantized model path | models\efficientnet_b1_A16W8.onnx | -+----------------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT16(115) | INT8(115) | INT32(115) | -| Sigmoid | INT16(92) | | | -| Mul | INT16(92) | | | -| AveragePool | INT16(8) | | | -| GlobalAveragePool | INT16(24) | | | -| Add | INT16(16) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 29.72 MB -A16W8 quantized model size: 8.12 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 1387 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.774, Top5 0.932 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.295, Top5 0.525 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b1_A8W8.log b/CNN-examples/quark_quantization/models/efficientnet_b1_A8W8.log deleted file mode 100644 index 065883a0..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b1_A8W8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:30:26.799687 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b1.onnx - model_output --- models\efficientnet_b1_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+---------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------------| -| Quantized model path | models\efficientnet_b1_A8W8.onnx | -+---------------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT8(115) | INT8(115) | INT32(115) | -| Sigmoid | INT8(92) | | | -| Mul | INT8(92) | | | -| AveragePool | INT8(8) | | | -| GlobalAveragePool | INT8(24) | | | -| Add | INT8(16) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 29.72 MB -A8W8 quantized model size: 8.12 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 1387 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.774, Top5 0.932 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.083, Top5 0.206 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b1_BF16.log b/CNN-examples/quark_quantization/models/efficientnet_b1_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b1_XINT8.log b/CNN-examples/quark_quantization/models/efficientnet_b1_XINT8.log deleted file mode 100644 index cfc38d3e..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b1_XINT8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:36:17.226188 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b1.onnx - model_output --- models\efficientnet_b1_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+----------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-----------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-----------------------------------| -| Quantized model path | models\efficientnet_b1_XINT8.onnx | -+----------------------------------------------------------+ -+--------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+-----------| -| Conv | UINT8(115) | INT8(115) | INT8(115) | -| Mul | UINT8(92) | | | -| AveragePool | UINT8(8) | | | -| GlobalAveragePool | UINT8(24) | | | -| Add | UINT8(16) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -| HardSigmoid | UINT8(92) | | | -+--------------------------------------------------------+ -Model Size: -Float32 model size: 29.72 MB -Int8 quantized model size: 8.15 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 184 NPU 1135 VITIS_EP_CPU 186 -[Vitis AI EP] No. of Subgraphs : NPU 93 Actually running on NPU 93 -Float32 model accuracy: Top1 0.774, Top5 0.932 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.001, Top5 0.005 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b1_XINT8_Quant.log b/CNN-examples/quark_quantization/models/efficientnet_b1_XINT8_Quant.log deleted file mode 100644 index bac777dc..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b1_XINT8_Quant.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 16:57:20.520264 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b1.onnx - model_output --- perf_models\efficientnet_b1_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+---------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------------------| -| Quantized model path | perf_models\efficientnet_b1_XINT8.onnx | -+---------------------------------------------------------------+ -+--------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+-----------| -| Conv | UINT8(115) | INT8(115) | INT8(115) | -| Mul | UINT8(92) | | | -| AveragePool | UINT8(8) | | | -| GlobalAveragePool | UINT8(24) | | | -| Add | UINT8(16) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -| HardSigmoid | UINT8(92) | | | -+--------------------------------------------------------+ -Model Size: -Float32 model size: 29.72 MB -Int8 quantized model size: 8.15 MB -Model Accuracy: -Float32 model accuracy: Top1 0.774, Top5 0.932 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 184 NPU 1135 VITIS_EP_CPU 186 -[Vitis AI EP] No. of Subgraphs : NPU 93 Actually running on NPU 93 -Int8 quantized model accuracy: Top1 0.087, Top5 0.192 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization diff --git a/CNN-examples/quark_quantization/models/efficientnet_b2_A16W8.log b/CNN-examples/quark_quantization/models/efficientnet_b2_A16W8.log deleted file mode 100644 index 1c877a1e..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b2_A16W8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:52:21.696586 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b2.onnx - model_output --- models\efficientnet_b2_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+----------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-----------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-----------------------------------| -| Quantized model path | models\efficientnet_b2_A16W8.onnx | -+----------------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT16(115) | INT8(115) | INT32(115) | -| Sigmoid | INT16(92) | | | -| Mul | INT16(92) | | | -| AveragePool | INT16(8) | | | -| GlobalAveragePool | INT16(24) | | | -| Add | INT16(16) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 34.72 MB -A16W8 quantized model size: 9.38 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 1387 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.779, Top5 0.940 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.045, Top5 0.100 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b2_A8W8.log b/CNN-examples/quark_quantization/models/efficientnet_b2_A8W8.log deleted file mode 100644 index faf83164..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b2_A8W8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:32:00.620712 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b2.onnx - model_output --- models\efficientnet_b2_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+---------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------------| -| Quantized model path | models\efficientnet_b2_A8W8.onnx | -+---------------------------------------------------------+ -+---------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+------------| -| Conv | INT8(115) | INT8(115) | INT32(115) | -| Sigmoid | INT8(92) | | | -| Mul | INT8(92) | | | -| AveragePool | INT8(8) | | | -| GlobalAveragePool | INT8(24) | | | -| Add | INT8(16) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+---------------------------------------------------------+ -Model Size: -Float32 model size: 34.72 MB -A8W8 quantized model size: 9.38 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 1387 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.779, Top5 0.940 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.002, Top5 0.013 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b2_BF16.log b/CNN-examples/quark_quantization/models/efficientnet_b2_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b2_XINT8.log b/CNN-examples/quark_quantization/models/efficientnet_b2_XINT8.log deleted file mode 100644 index 0a3a1e5c..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b2_XINT8.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:44:22.410149 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b2.onnx - model_output --- models\efficientnet_b2_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+----------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-----------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-----------------------------------| -| Quantized model path | models\efficientnet_b2_XINT8.onnx | -+----------------------------------------------------------+ -+--------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+-----------| -| Conv | UINT8(115) | INT8(115) | INT8(115) | -| Mul | UINT8(92) | | | -| AveragePool | UINT8(8) | | | -| GlobalAveragePool | UINT8(24) | | | -| Add | UINT8(16) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -| HardSigmoid | UINT8(92) | | | -+--------------------------------------------------------+ -Model Size: -Float32 model size: 34.72 MB -Int8 quantized model size: 9.40 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 184 NPU 1135 VITIS_EP_CPU 186 -[Vitis AI EP] No. of Subgraphs : NPU 93 Actually running on NPU 93 -Float32 model accuracy: Top1 0.779, Top5 0.940 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.002, Top5 0.005 diff --git a/CNN-examples/quark_quantization/models/efficientnet_b2_XINT8_Quant.log b/CNN-examples/quark_quantization/models/efficientnet_b2_XINT8_Quant.log deleted file mode 100644 index 2a3878bf..00000000 --- a/CNN-examples/quark_quantization/models/efficientnet_b2_XINT8_Quant.log +++ /dev/null @@ -1,82 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 17:04:10.060790 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\efficientnet_b2.onnx - model_output --- perf_models\efficientnet_b2_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+---------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------------------| -| Conv | 115 | -| Sigmoid | 92 | -| Mul | 92 | -| GlobalAveragePool | 24 | -| Add | 16 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------------------| -| Quantized model path | perf_models\efficientnet_b2_XINT8.onnx | -+---------------------------------------------------------------+ -+--------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+-----------+-----------| -| Conv | UINT8(115) | INT8(115) | INT8(115) | -| Mul | UINT8(92) | | | -| AveragePool | UINT8(8) | | | -| GlobalAveragePool | UINT8(24) | | | -| Add | UINT8(16) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -| HardSigmoid | UINT8(92) | | | -+--------------------------------------------------------+ -Model Size: -Float32 model size: 34.72 MB -Int8 quantized model size: 9.40 MB -Model Accuracy: -Float32 model accuracy: Top1 0.779, Top5 0.940 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 184 NPU 1135 VITIS_EP_CPU 186 -[Vitis AI EP] No. of Subgraphs : NPU 93 Actually running on NPU 93 -Int8 quantized model accuracy: Top1 0.001, Top5 0.010 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization diff --git a/CNN-examples/quark_quantization/models/mobilenet_v2_A16W8.log b/CNN-examples/quark_quantization/models/mobilenet_v2_A16W8.log deleted file mode 100644 index 0cd3f336..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v2_A16W8.log +++ /dev/null @@ -1,79 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:47:32.346775 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v2.onnx - model_output --- models\mobilenet_v2_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+-------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------| -| Conv | 52 | -| Constant | 70 | -| Clip | 35 | -| Add | 10 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+--------------------------------| -| Quantized model path | models\mobilenet_v2_A16W8.onnx | -+-------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(52) | INT8(52) | INT32(52) | -| Add | INT16(10) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 13.34 MB -A16W8 quantized model size: 3.50 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 346 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.707, Top5 0.897 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.001, Top5 0.007 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v2_A8W8.log b/CNN-examples/quark_quantization/models/mobilenet_v2_A8W8.log deleted file mode 100644 index ef1c2700..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v2_A8W8.log +++ /dev/null @@ -1,79 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:27:54.477173 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v2.onnx - model_output --- models\mobilenet_v2_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------| -| Conv | 52 | -| Constant | 70 | -| Clip | 35 | -| Add | 10 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-------------------------------| -| Quantized model path | models\mobilenet_v2_A8W8.onnx | -+------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(52) | INT8(52) | INT32(52) | -| Add | INT8(10) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 13.34 MB -A8W8 quantized model size: 3.50 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 36 NPU 238 VITIS_EP_CPU 74 -[Vitis AI EP] No. of Subgraphs : NPU 18 Actually running on NPU 18 -Float32 model accuracy: Top1 0.707, Top5 0.897 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.001, Top5 0.006 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v2_BF16.log b/CNN-examples/quark_quantization/models/mobilenet_v2_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v2_XINT8.log b/CNN-examples/quark_quantization/models/mobilenet_v2_XINT8.log deleted file mode 100644 index 9fd39318..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v2_XINT8.log +++ /dev/null @@ -1,79 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:25:57.646219 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v2.onnx - model_output --- models\mobilenet_v2_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+-------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------| -| Conv | 52 | -| Constant | 70 | -| Clip | 35 | -| Add | 10 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+--------------------------------| -| Quantized model path | models\mobilenet_v2_XINT8.onnx | -+-------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(52) | INT8(52) | INT8(52) | -| Add | UINT8(10) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 13.34 MB -Int8 quantized model size: 3.46 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 36 NPU 239 VITIS_EP_CPU 74 -[Vitis AI EP] No. of Subgraphs : NPU 18 Actually running on NPU 18 -Float32 model accuracy: Top1 0.707, Top5 0.897 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.001, Top5 0.005 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v2_XINT8_Quant.log b/CNN-examples/quark_quantization/models/mobilenet_v2_XINT8_Quant.log deleted file mode 100644 index 7a0cb311..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v2_XINT8_Quant.log +++ /dev/null @@ -1,79 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 16:47:47.896491 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v2.onnx - model_output --- perf_models\mobilenet_v2_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------------| -| Conv | 52 | -| Constant | 70 | -| Clip | 35 | -| Add | 10 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+-------------------------------------| -| Quantized model path | perf_models\mobilenet_v2_XINT8.onnx | -+------------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(52) | INT8(52) | INT8(52) | -| Add | UINT8(10) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 13.34 MB -Int8 quantized model size: 3.46 MB -Model Accuracy: -Float32 model accuracy: Top1 0.707, Top5 0.897 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 36 NPU 239 VITIS_EP_CPU 74 -[Vitis AI EP] No. of Subgraphs : NPU 18 Actually running on NPU 18 -Int8 quantized model accuracy: Top1 0.618, Top5 0.833 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization diff --git a/CNN-examples/quark_quantization/models/mobilenet_v3_A16W8.log b/CNN-examples/quark_quantization/models/mobilenet_v3_A16W8.log deleted file mode 100644 index e10bfe8f..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v3_A16W8.log +++ /dev/null @@ -1,83 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:48:17.305600 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v3.onnx - model_output --- models\mobilenet_v3_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+-------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------| -| Conv | 52 | -| HardSigmoid | 28 | -| Mul | 28 | -| Relu | 14 | -| GlobalAveragePool | 10 | -| Add | 6 | -| Flatten | 1 | -| Gemm | 2 | -|----------------------+--------------------------------| -| Quantized model path | models\mobilenet_v3_A16W8.onnx | -+-------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(52) | INT8(52) | INT32(52) | -| HardSigmoid | INT16(28) | | | -| Mul | INT16(28) | | | -| AveragePool | INT16(1) | | | -| GlobalAveragePool | INT16(10) | | | -| Add | INT16(6) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(2) | INT8(2) | INT32(2) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 9.71 MB -A16W8 quantized model size: 2.65 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 540 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.674, Top5 0.868 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.003, Top5 0.013 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v3_A8W8.log b/CNN-examples/quark_quantization/models/mobilenet_v3_A8W8.log deleted file mode 100644 index f97e02fe..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v3_A8W8.log +++ /dev/null @@ -1,83 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:28:39.821882 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v3.onnx - model_output --- models\mobilenet_v3_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------| -| Conv | 52 | -| HardSigmoid | 28 | -| Mul | 28 | -| Relu | 14 | -| GlobalAveragePool | 10 | -| Add | 6 | -| Flatten | 1 | -| Gemm | 2 | -|----------------------+-------------------------------| -| Quantized model path | models\mobilenet_v3_A8W8.onnx | -+------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(52) | INT8(52) | INT32(52) | -| HardSigmoid | INT8(28) | | | -| Mul | INT8(28) | | | -| AveragePool | INT8(1) | | | -| GlobalAveragePool | INT8(10) | | | -| Add | INT8(6) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(2) | INT8(2) | INT32(2) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 9.71 MB -A8W8 quantized model size: 2.65 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 540 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.674, Top5 0.868 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.004, Top5 0.014 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v3_BF16.log b/CNN-examples/quark_quantization/models/mobilenet_v3_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v3_XINT8.log b/CNN-examples/quark_quantization/models/mobilenet_v3_XINT8.log deleted file mode 100644 index 0a58f6de..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v3_XINT8.log +++ /dev/null @@ -1,83 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:28:54.293867 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v3.onnx - model_output --- models\mobilenet_v3_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+-------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------| -| Conv | 52 | -| HardSigmoid | 28 | -| Mul | 28 | -| Relu | 14 | -| GlobalAveragePool | 10 | -| Add | 6 | -| Flatten | 1 | -| Gemm | 2 | -|----------------------+--------------------------------| -| Quantized model path | models\mobilenet_v3_XINT8.onnx | -+-------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(52) | INT8(52) | INT8(52) | -| HardSigmoid | UINT8(28) | | | -| Mul | UINT8(28) | | | -| AveragePool | UINT8(1) | | | -| GlobalAveragePool | UINT8(10) | | | -| Add | UINT8(6) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(2) | INT8(2) | INT8(2) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 9.71 MB -Int8 quantized model size: 2.67 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 62 NPU 448 VITIS_EP_CPU 70 -[Vitis AI EP] No. of Subgraphs : NPU 32 Actually running on NPU 32 -Float32 model accuracy: Top1 0.674, Top5 0.868 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.001, Top5 0.005 diff --git a/CNN-examples/quark_quantization/models/mobilenet_v3_XINT8_Quant.log b/CNN-examples/quark_quantization/models/mobilenet_v3_XINT8_Quant.log deleted file mode 100644 index bc93d6a0..00000000 --- a/CNN-examples/quark_quantization/models/mobilenet_v3_XINT8_Quant.log +++ /dev/null @@ -1,84 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 16:50:23.009755 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\mobilenet_v3.onnx - model_output --- perf_models\mobilenet_v3_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------------| -| Conv | 52 | -| HardSwish | 19 | -| Relu | 14 | -| GlobalAveragePool | 10 | -| HardSigmoid | 9 | -| Mul | 9 | -| Add | 6 | -| Flatten | 1 | -| Gemm | 2 | -|----------------------+-------------------------------------| -| Quantized model path | perf_models\mobilenet_v3_XINT8.onnx | -+------------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(52) | INT8(52) | INT8(52) | -| HardSigmoid | UINT8(28) | | | -| Mul | UINT8(28) | | | -| AveragePool | UINT8(1) | | | -| GlobalAveragePool | UINT8(10) | | | -| Add | UINT8(6) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(2) | INT8(2) | INT8(2) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 9.71 MB -Int8 quantized model size: 2.66 MB -Model Accuracy: -Float32 model accuracy: Top1 0.674, Top5 0.868 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 62 NPU 448 VITIS_EP_CPU 70 -[Vitis AI EP] No. of Subgraphs : NPU 32 Actually running on NPU 32 -Int8 quantized model accuracy: Top1 0.002, Top5 0.007 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization diff --git a/CNN-examples/quark_quantization/models/resnet18_A16W8.log b/CNN-examples/quark_quantization/models/resnet18_A16W8.log deleted file mode 100644 index 7e538b1e..00000000 --- a/CNN-examples/quark_quantization/models/resnet18_A16W8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:45:20.847310 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet18.onnx - model_output --- models\resnet18_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+---------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------| -| Conv | 20 | -| Relu | 17 | -| MaxPool | 1 | -| Add | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------| -| Quantized model path | models\resnet18_A16W8.onnx | -+---------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(20) | INT8(20) | INT32(20) | -| MaxPool | INT16(1) | | | -| Add | INT16(8) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 44.58 MB -A16W8 quantized model size: 11.21 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 163 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.695, Top5 0.879 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.698, Top5 0.877 diff --git a/CNN-examples/quark_quantization/models/resnet18_A8W8.log b/CNN-examples/quark_quantization/models/resnet18_A8W8.log deleted file mode 100644 index 2a53e490..00000000 --- a/CNN-examples/quark_quantization/models/resnet18_A8W8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:26:13.582824 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet18.onnx - model_output --- models\resnet18_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+--------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------| -| Conv | 20 | -| Relu | 17 | -| MaxPool | 1 | -| Add | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+---------------------------| -| Quantized model path | models\resnet18_A8W8.onnx | -+--------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(20) | INT8(20) | INT32(20) | -| MaxPool | INT8(1) | | | -| Add | INT8(8) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 44.58 MB -A8W8 quantized model size: 11.21 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 163 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.695, Top5 0.879 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.687, Top5 0.878 diff --git a/CNN-examples/quark_quantization/models/resnet18_BF16.log b/CNN-examples/quark_quantization/models/resnet18_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/resnet18_XINT8.log b/CNN-examples/quark_quantization/models/resnet18_XINT8.log deleted file mode 100644 index d33153f2..00000000 --- a/CNN-examples/quark_quantization/models/resnet18_XINT8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:20:54.161003 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet18.onnx - model_output --- models\resnet18_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+---------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------| -| Conv | 20 | -| Relu | 17 | -| MaxPool | 1 | -| Add | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------| -| Quantized model path | models\resnet18_XINT8.onnx | -+---------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(20) | INT8(20) | INT8(20) | -| MaxPool | UINT8(1) | | | -| Add | UINT8(8) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 44.58 MB -Int8 quantized model size: 11.20 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 164 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.695, Top5 0.879 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.683, Top5 0.879 diff --git a/CNN-examples/quark_quantization/models/resnet18_XINT8_Quant.log b/CNN-examples/quark_quantization/models/resnet18_XINT8_Quant.log deleted file mode 100644 index 83cda243..00000000 --- a/CNN-examples/quark_quantization/models/resnet18_XINT8_Quant.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 16:43:06.530873 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet18.onnx - model_output --- perf_models\resnet18_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+--------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------------| -| Conv | 20 | -| Relu | 17 | -| MaxPool | 1 | -| Add | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+---------------------------------| -| Quantized model path | perf_models\resnet18_XINT8.onnx | -+--------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(20) | INT8(20) | INT8(20) | -| MaxPool | UINT8(1) | | | -| Add | UINT8(8) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 44.58 MB -Int8 quantized model size: 11.20 MB -Model Accuracy: -Float32 model accuracy: Top1 0.695, Top5 0.879 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 164 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Int8 quantized model accuracy: Top1 0.680, Top5 0.873 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization diff --git a/CNN-examples/quark_quantization/models/resnet34_A16W8.log b/CNN-examples/quark_quantization/models/resnet34_A16W8.log deleted file mode 100644 index 38878ee3..00000000 --- a/CNN-examples/quark_quantization/models/resnet34_A16W8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:46:14.156934 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet34.onnx - model_output --- models\resnet34_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+---------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------| -| Conv | 36 | -| Relu | 33 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------| -| Quantized model path | models\resnet34_A16W8.onnx | -+---------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(36) | INT8(36) | INT32(36) | -| MaxPool | INT16(1) | | | -| Add | INT16(16) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 83.14 MB -A16W8 quantized model size: 20.89 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 291 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.723, Top5 0.909 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.720, Top5 0.906 diff --git a/CNN-examples/quark_quantization/models/resnet34_A8W8.log b/CNN-examples/quark_quantization/models/resnet34_A8W8.log deleted file mode 100644 index ff0d475b..00000000 --- a/CNN-examples/quark_quantization/models/resnet34_A8W8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:26:54.595305 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet34.onnx - model_output --- models\resnet34_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+--------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------| -| Conv | 36 | -| Relu | 33 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+---------------------------| -| Quantized model path | models\resnet34_A8W8.onnx | -+--------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(36) | INT8(36) | INT32(36) | -| MaxPool | INT8(1) | | | -| Add | INT8(16) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 83.14 MB -A8W8 quantized model size: 20.89 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 291 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.723, Top5 0.909 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.720, Top5 0.903 diff --git a/CNN-examples/quark_quantization/models/resnet34_BF16.log b/CNN-examples/quark_quantization/models/resnet34_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/resnet34_XINT8.log b/CNN-examples/quark_quantization/models/resnet34_XINT8.log deleted file mode 100644 index b8f34ff6..00000000 --- a/CNN-examples/quark_quantization/models/resnet34_XINT8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:22:58.064575 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet34.onnx - model_output --- models\resnet34_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+---------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------| -| Conv | 36 | -| Relu | 33 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------| -| Quantized model path | models\resnet34_XINT8.onnx | -+---------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(36) | INT8(36) | INT8(36) | -| MaxPool | UINT8(1) | | | -| Add | UINT8(16) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 83.14 MB -Int8 quantized model size: 20.87 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 292 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.723, Top5 0.909 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.704, Top5 0.906 diff --git a/CNN-examples/quark_quantization/models/resnet34_XINT8_Quant.log b/CNN-examples/quark_quantization/models/resnet34_XINT8_Quant.log deleted file mode 100644 index 2c4b6a45..00000000 --- a/CNN-examples/quark_quantization/models/resnet34_XINT8_Quant.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 16:45:03.090738 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet34.onnx - model_output --- perf_models\resnet34_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+--------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------------| -| Conv | 36 | -| Relu | 33 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+---------------------------------| -| Quantized model path | perf_models\resnet34_XINT8.onnx | -+--------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(36) | INT8(36) | INT8(36) | -| MaxPool | UINT8(1) | | | -| Add | UINT8(16) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 83.14 MB -Int8 quantized model size: 20.87 MB -Model Accuracy: -Float32 model accuracy: Top1 0.723, Top5 0.909 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 292 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Int8 quantized model accuracy: Top1 0.705, Top5 0.906 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization diff --git a/CNN-examples/quark_quantization/models/resnet50_A16W8.log b/CNN-examples/quark_quantization/models/resnet50_A16W8.log deleted file mode 100644 index 817ca96b..00000000 --- a/CNN-examples/quark_quantization/models/resnet50_A16W8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:43:50.924806 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet50.onnx - model_output --- models\resnet50_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+---------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------| -| Conv | 53 | -| Relu | 49 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------| -| Quantized model path | models\resnet50_A16W8.onnx | -+---------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(53) | INT8(53) | INT32(53) | -| MaxPool | INT16(1) | | | -| Add | INT16(16) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 97.41 MB -A16W8 quantized model size: 24.54 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 392 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.757, Top5 0.928 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.755, Top5 0.929 diff --git a/CNN-examples/quark_quantization/models/resnet50_A8W8.log b/CNN-examples/quark_quantization/models/resnet50_A8W8.log deleted file mode 100644 index 0e3a33d3..00000000 --- a/CNN-examples/quark_quantization/models/resnet50_A8W8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:25:02.432674 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet50.onnx - model_output --- models\resnet50_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+--------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------| -| Conv | 53 | -| Relu | 49 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+---------------------------| -| Quantized model path | models\resnet50_A8W8.onnx | -+--------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(53) | INT8(53) | INT32(53) | -| MaxPool | INT8(1) | | | -| Add | INT8(16) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 97.41 MB -A8W8 quantized model size: 24.54 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 392 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.757, Top5 0.928 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.754, Top5 0.921 diff --git a/CNN-examples/quark_quantization/models/resnet50_BF16.log b/CNN-examples/quark_quantization/models/resnet50_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/resnet50_XINT8.log b/CNN-examples/quark_quantization/models/resnet50_XINT8.log deleted file mode 100644 index 169fc2c7..00000000 --- a/CNN-examples/quark_quantization/models/resnet50_XINT8.log +++ /dev/null @@ -1,80 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 18:15:48.301033 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet50.onnx - model_output --- models\resnet50_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+---------------------------------------------------+ -| Op Type | Float Model | -|----------------------+----------------------------| -| Conv | 53 | -| Relu | 49 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+----------------------------| -| Quantized model path | models\resnet50_XINT8.onnx | -+---------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(53) | INT8(53) | INT8(53) | -| MaxPool | UINT8(1) | | | -| Add | UINT8(16) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 97.41 MB -Int8 quantized model size: 24.47 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 393 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.757, Top5 0.928 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.758, Top5 0.924 diff --git a/CNN-examples/quark_quantization/models/resnet50_XINT8_Quant.log b/CNN-examples/quark_quantization/models/resnet50_XINT8_Quant.log deleted file mode 100644 index 1d4f0787..00000000 --- a/CNN-examples/quark_quantization/models/resnet50_XINT8_Quant.log +++ /dev/null @@ -1,89 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 16:36:59.446949 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\resnet50.onnx - model_output --- perf_models\resnet50_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+--------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------------| -| Conv | 53 | -| Relu | 49 | -| MaxPool | 1 | -| Add | 16 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -| Gemm | 1 | -|----------------------+---------------------------------| -| Quantized model path | perf_models\resnet50_XINT8.onnx | -+--------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(53) | INT8(53) | INT8(53) | -| MaxPool | UINT8(1) | | | -| Add | UINT8(16) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+------------------------------------------------------+ -Model Size: -Float32 model size: 97.41 MB -Int8 quantized model size: 24.47 MB -Model Accuracy: -Float32 model accuracy: Top1 0.757, Top5 0.928 -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : NPU 393 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Int8 quantized model accuracy: Top1 0.759, Top5 0.925 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -[Vitis AI EP] No. of Operators : NPU 393 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Int8 quantized model accuracy (NPU): Top1 0.758, Top5 0.924 -Benchmarking CPU float model: -Average inference time over 100 runs: 22.182538509368896 ms -Benchmarking CPU quantized model: -Average inference time over 100 runs: 48.507442474365234 ms -Benchmarking NPU quantized model: -Average inference time over 100 runs: 5.243556499481201 ms diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_A16W8.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_A16W8.log deleted file mode 100644 index 3fe33543..00000000 --- a/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_A16W8.log +++ /dev/null @@ -1,90 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 20:03:13.652233 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\shufflenet_v2_x0_5.onnx - model_output --- models\shufflenet_v2_x0_5_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+-------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------------| -| Conv | 56 | -| Relu | 37 | -| MaxPool | 1 | -| Concat | 16 | -| Constant | 110 | -| Reshape | 32 | -| Transpose | 16 | -| Shape | 13 | -| Gather | 13 | -| Add | 13 | -| Div | 13 | -| Mul | 26 | -| Slice | 26 | -| ReduceMean | 1 | -| Gemm | 1 | -|----------------------+--------------------------------------| -| Quantized model path | models\shufflenet_v2_x0_5_A16W8.onnx | -+-------------------------------------------------------------+ -+------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|------------+------------+----------+-----------| -| Conv | INT16(56) | INT8(56) | INT32(56) | -| MaxPool | INT16(1) | | | -| Concat | INT16(16) | | | -| Reshape | INT16(32) | | | -| Transpose | INT16(16) | | | -| Slice | INT16(26) | | | -| ReduceMean | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+------------------------------------------------+ -Model Size: -Float32 model size: 5.26 MB -A16W8 quantized model size: 1.50 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 48 NPU 470 VITIS_EP_CPU 98 -[Vitis AI EP] No. of Subgraphs : NPU 17 Actually running on NPU 17 -Float32 model accuracy: Top1 0.604, Top5 0.815 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.575, Top5 0.791 diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_A8W8.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_A8W8.log deleted file mode 100644 index ae5c5f28..00000000 --- a/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_A8W8.log +++ /dev/null @@ -1,90 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:42:10.421102 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\shufflenet_v2_x0_5.onnx - model_output --- models\shufflenet_v2_x0_5_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------------| -| Conv | 56 | -| Relu | 37 | -| MaxPool | 1 | -| Concat | 16 | -| Constant | 110 | -| Reshape | 32 | -| Transpose | 16 | -| Shape | 13 | -| Gather | 13 | -| Add | 13 | -| Div | 13 | -| Mul | 26 | -| Slice | 26 | -| ReduceMean | 1 | -| Gemm | 1 | -|----------------------+-------------------------------------| -| Quantized model path | models\shufflenet_v2_x0_5_A8W8.onnx | -+------------------------------------------------------------+ -+------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|------------+------------+----------+-----------| -| Conv | INT8(56) | INT8(56) | INT32(56) | -| MaxPool | INT8(1) | | | -| Concat | INT8(16) | | | -| Reshape | INT8(32) | | | -| Transpose | INT8(16) | | | -| Slice | INT8(26) | | | -| ReduceMean | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+------------------------------------------------+ -Model Size: -Float32 model size: 5.26 MB -A8W8 quantized model size: 1.50 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 48 NPU 470 VITIS_EP_CPU 98 -[Vitis AI EP] No. of Subgraphs : NPU 17 Actually running on NPU 17 -Float32 model accuracy: Top1 0.604, Top5 0.815 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.567, Top5 0.769 diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_BF16.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_XINT8.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_XINT8.log deleted file mode 100644 index 93598d38..00000000 --- a/CNN-examples/quark_quantization/models/shufflenet_v2_x0_5_XINT8.log +++ /dev/null @@ -1,90 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:14:10.610645 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\shufflenet_v2_x0_5.onnx - model_output --- models\shufflenet_v2_x0_5_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+-------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------------| -| Conv | 56 | -| Relu | 37 | -| MaxPool | 1 | -| Concat | 16 | -| Constant | 110 | -| Reshape | 32 | -| Transpose | 16 | -| Shape | 13 | -| Gather | 13 | -| Add | 13 | -| Div | 13 | -| Mul | 26 | -| Slice | 26 | -| ReduceMean | 1 | -| Gemm | 1 | -|----------------------+--------------------------------------| -| Quantized model path | models\shufflenet_v2_x0_5_XINT8.onnx | -+-------------------------------------------------------------+ -+-----------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|------------+------------+----------+----------| -| Conv | UINT8(56) | INT8(56) | INT8(56) | -| MaxPool | UINT8(1) | | | -| Concat | UINT8(16) | | | -| Reshape | UINT8(32) | | | -| Transpose | UINT8(16) | | | -| Slice | UINT8(26) | | | -| ReduceMean | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+-----------------------------------------------+ -Model Size: -Float32 model size: 5.26 MB -Int8 quantized model size: 1.51 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 50 NPU 467 VITIS_EP_CPU 100 -[Vitis AI EP] No. of Subgraphs : NPU 18 Actually running on NPU 18 -Float32 model accuracy: Top1 0.604, Top5 0.815 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.001, Top5 0.005 diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_A16W8.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_A16W8.log deleted file mode 100644 index 8fdbbb74..00000000 --- a/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_A16W8.log +++ /dev/null @@ -1,90 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 20:04:04.114187 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\shufflenet_v2_x1_0.onnx - model_output --- models\shufflenet_v2_x1_0_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+-------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------------| -| Conv | 56 | -| Relu | 37 | -| MaxPool | 1 | -| Concat | 16 | -| Constant | 110 | -| Reshape | 32 | -| Transpose | 16 | -| Shape | 13 | -| Gather | 13 | -| Add | 13 | -| Div | 13 | -| Mul | 26 | -| Slice | 26 | -| ReduceMean | 1 | -| Gemm | 1 | -|----------------------+--------------------------------------| -| Quantized model path | models\shufflenet_v2_x1_0_A16W8.onnx | -+-------------------------------------------------------------+ -+------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|------------+------------+----------+-----------| -| Conv | INT16(56) | INT8(56) | INT32(56) | -| MaxPool | INT16(1) | | | -| Concat | INT16(16) | | | -| Reshape | INT16(32) | | | -| Transpose | INT16(16) | | | -| Slice | INT16(26) | | | -| ReduceMean | INT16(1) | | | -| Gemm | INT16(1) | INT8(1) | INT32(1) | -+------------------------------------------------+ -Model Size: -Float32 model size: 8.72 MB -A16W8 quantized model size: 2.37 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 48 NPU 470 VITIS_EP_CPU 98 -[Vitis AI EP] No. of Subgraphs : NPU 17 Actually running on NPU 17 -Float32 model accuracy: Top1 0.686, Top5 0.881 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.680, Top5 0.878 diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_A8W8.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_A8W8.log deleted file mode 100644 index 4728d617..00000000 --- a/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_A8W8.log +++ /dev/null @@ -1,90 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:43:00.113577 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\shufflenet_v2_x1_0.onnx - model_output --- models\shufflenet_v2_x1_0_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+-------------------------------------| -| Conv | 56 | -| Relu | 37 | -| MaxPool | 1 | -| Concat | 16 | -| Constant | 110 | -| Reshape | 32 | -| Transpose | 16 | -| Shape | 13 | -| Gather | 13 | -| Add | 13 | -| Div | 13 | -| Mul | 26 | -| Slice | 26 | -| ReduceMean | 1 | -| Gemm | 1 | -|----------------------+-------------------------------------| -| Quantized model path | models\shufflenet_v2_x1_0_A8W8.onnx | -+------------------------------------------------------------+ -+------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|------------+------------+----------+-----------| -| Conv | INT8(56) | INT8(56) | INT32(56) | -| MaxPool | INT8(1) | | | -| Concat | INT8(16) | | | -| Reshape | INT8(32) | | | -| Transpose | INT8(16) | | | -| Slice | INT8(26) | | | -| ReduceMean | INT8(1) | | | -| Gemm | INT8(1) | INT8(1) | INT32(1) | -+------------------------------------------------+ -Model Size: -Float32 model size: 8.72 MB -A8W8 quantized model size: 2.37 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 48 NPU 470 VITIS_EP_CPU 98 -[Vitis AI EP] No. of Subgraphs : NPU 17 Actually running on NPU 17 -Float32 model accuracy: Top1 0.686, Top5 0.881 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.673, Top5 0.869 diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_BF16.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_XINT8.log b/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_XINT8.log deleted file mode 100644 index 9d061e19..00000000 --- a/CNN-examples/quark_quantization/models/shufflenet_v2_x1_0_XINT8.log +++ /dev/null @@ -1,90 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:15:08.617672 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\shufflenet_v2_x1_0.onnx - model_output --- models\shufflenet_v2_x1_0_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+-------------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------------| -| Conv | 56 | -| Relu | 37 | -| MaxPool | 1 | -| Concat | 16 | -| Constant | 110 | -| Reshape | 32 | -| Transpose | 16 | -| Shape | 13 | -| Gather | 13 | -| Add | 13 | -| Div | 13 | -| Mul | 26 | -| Slice | 26 | -| ReduceMean | 1 | -| Gemm | 1 | -|----------------------+--------------------------------------| -| Quantized model path | models\shufflenet_v2_x1_0_XINT8.onnx | -+-------------------------------------------------------------+ -+-----------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|------------+------------+----------+----------| -| Conv | UINT8(56) | INT8(56) | INT8(56) | -| MaxPool | UINT8(1) | | | -| Concat | UINT8(16) | | | -| Reshape | UINT8(32) | | | -| Transpose | UINT8(16) | | | -| Slice | UINT8(26) | | | -| ReduceMean | UINT8(1) | | | -| Gemm | UINT8(1) | INT8(1) | INT8(1) | -+-----------------------------------------------+ -Model Size: -Float32 model size: 8.72 MB -Int8 quantized model size: 2.38 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 50 NPU 467 VITIS_EP_CPU 100 -[Vitis AI EP] No. of Subgraphs : NPU 18 Actually running on NPU 18 -Float32 model accuracy: Top1 0.686, Top5 0.881 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.003, Top5 0.003 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_0_A16W8.log b/CNN-examples/quark_quantization/models/squeezenet1_0_A16W8.log deleted file mode 100644 index e5ac2e42..00000000 --- a/CNN-examples/quark_quantization/models/squeezenet1_0_A16W8.log +++ /dev/null @@ -1,78 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 20:02:03.157295 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\squeezenet1_0.onnx - model_output --- models\squeezenet1_0_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+--------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------------| -| Conv | 26 | -| Relu | 26 | -| MaxPool | 3 | -| Concat | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -|----------------------+---------------------------------| -| Quantized model path | models\squeezenet1_0_A16W8.onnx | -+--------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(26) | INT8(26) | INT32(26) | -| MaxPool | INT16(3) | | | -| Concat | INT16(8) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 4.78 MB -A16W8 quantized model size: 1.27 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 1 NPU 200 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.586, Top5 0.798 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.575, Top5 0.793 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_0_A8W8.log b/CNN-examples/quark_quantization/models/squeezenet1_0_A8W8.log deleted file mode 100644 index 23924b76..00000000 --- a/CNN-examples/quark_quantization/models/squeezenet1_0_A8W8.log +++ /dev/null @@ -1,78 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:40:59.779375 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\squeezenet1_0.onnx - model_output --- models\squeezenet1_0_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+-------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------| -| Conv | 26 | -| Relu | 26 | -| MaxPool | 3 | -| Concat | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -|----------------------+--------------------------------| -| Quantized model path | models\squeezenet1_0_A8W8.onnx | -+-------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(26) | INT8(26) | INT32(26) | -| MaxPool | INT8(3) | | | -| Concat | INT8(8) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 4.78 MB -A8W8 quantized model size: 1.27 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 1 NPU 200 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.586, Top5 0.798 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.572, Top5 0.793 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_0_BF16.log b/CNN-examples/quark_quantization/models/squeezenet1_0_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_0_XINT8.log b/CNN-examples/quark_quantization/models/squeezenet1_0_XINT8.log deleted file mode 100644 index 5d77a4af..00000000 --- a/CNN-examples/quark_quantization/models/squeezenet1_0_XINT8.log +++ /dev/null @@ -1,78 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:11:27.808133 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\squeezenet1_0.onnx - model_output --- models\squeezenet1_0_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+--------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------------| -| Conv | 26 | -| Relu | 26 | -| MaxPool | 3 | -| Concat | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -|----------------------+---------------------------------| -| Quantized model path | models\squeezenet1_0_XINT8.onnx | -+--------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(26) | INT8(26) | INT8(26) | -| MaxPool | UINT8(3) | | | -| Concat | UINT8(8) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -+------------------------------------------------------+ -Model Size: -Float32 model size: 4.78 MB -Int8 quantized model size: 1.27 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 1 NPU 201 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.586, Top5 0.798 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.527, Top5 0.768 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_1_A16W8.log b/CNN-examples/quark_quantization/models/squeezenet1_1_A16W8.log deleted file mode 100644 index 5e029cb5..00000000 --- a/CNN-examples/quark_quantization/models/squeezenet1_1_A16W8.log +++ /dev/null @@ -1,78 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True})) -[QUARK_INFO]: Time information: -2025-10-14 20:02:40.168543 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\squeezenet1_1.onnx - model_output --- models\squeezenet1_1_A16W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt16 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True, 'AlignEltwiseQuantType': True} -+--------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------------| -| Conv | 26 | -| Relu | 26 | -| MaxPool | 3 | -| Concat | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -|----------------------+---------------------------------| -| Quantized model path | models\squeezenet1_1_A16W8.onnx | -+--------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT16(26) | INT8(26) | INT32(26) | -| MaxPool | INT16(3) | | | -| Concat | INT16(8) | | | -| GlobalAveragePool | INT16(1) | | | -| Flatten | INT16(1) | | | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 4.73 MB -A16W8 quantized model size: 1.26 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 1 NPU 200 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.595, Top5 0.804 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A16W8 quantized model accuracy (NPU): Top1 0.590, Top5 0.804 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_1_A8W8.log b/CNN-examples/quark_quantization/models/squeezenet1_1_A8W8.log deleted file mode 100644 index 7bd98cc9..00000000 --- a/CNN-examples/quark_quantization/models/squeezenet1_1_A8W8.log +++ /dev/null @@ -1,78 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=False, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:41:37.247602 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\squeezenet1_1.onnx - model_output --- models\squeezenet1_1_A8W8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- CalibrationMethod.MinMax - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- False - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True, 'AlignSlice': False, 'FoldRelu': True, 'AlignConcat': True} -+-------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+--------------------------------| -| Conv | 26 | -| Relu | 26 | -| MaxPool | 3 | -| Concat | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -|----------------------+--------------------------------| -| Quantized model path | models\squeezenet1_1_A8W8.onnx | -+-------------------------------------------------------+ -+-------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+-----------| -| Conv | INT8(26) | INT8(26) | INT32(26) | -| MaxPool | INT8(3) | | | -| Concat | INT8(8) | | | -| GlobalAveragePool | INT8(1) | | | -| Flatten | INT8(1) | | | -+-------------------------------------------------------+ -Model Size: -Float32 model size: 4.73 MB -A8W8 quantized model size: 1.26 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 1 NPU 200 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.595, Top5 0.804 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -A8W8 quantized model accuracy (NPU): Top1 0.581, Top5 0.803 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_1_BF16.log b/CNN-examples/quark_quantization/models/squeezenet1_1_BF16.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/models/squeezenet1_1_XINT8.log b/CNN-examples/quark_quantization/models/squeezenet1_1_XINT8.log deleted file mode 100644 index 113d7a05..00000000 --- a/CNN-examples/quark_quantization/models/squeezenet1_1_XINT8.log +++ /dev/null @@ -1,78 +0,0 @@ -The configuration of the quantization is Config(global_quant_config=QuantizationConfig(calibrate_method=, quant_format=, activation_type=, weight_type=, input_nodes=[], output_nodes=[], op_types_to_quantize=[], nodes_to_quantize=[], extra_op_types_to_quantize=[], nodes_to_exclude=[], subgraphs_to_exclude=[], specific_tensor_precision=False, execution_providers=['CPUExecutionProvider'], per_channel=False, reduce_range=False, optimize_model=True, use_dynamic_quant=False, use_external_data_format=False, convert_fp16_to_fp32=False, convert_nchw_to_nhwc=False, include_sq=False, include_rotation=False, include_cle=True, include_auto_mp=False, include_fast_ft=False, enable_npu_cnn=True, enable_npu_transformer=False, debug_mode=False, crypto_mode=False, print_summary=True, ignore_warnings=True, log_severity_level=1, extra_options={'ActivationSymmetric': True})) -[QUARK_INFO]: Time information: -2025-10-14 19:13:01.170111 -[QUARK_INFO]: OS and CPU information: - system --- Windows - node --- XSJSTRXHPOMNI01 - release --- 11 - version --- 10.0.26100 - machine --- AMD64 - processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD -[QUARK_INFO]: Tools version information: - python --- 3.12.11 - onnx --- 1.18.0 - onnxruntime --- 1.23.0.dev20250928 - quark.onnx --- 0.10+db671e3+db671e3 -[QUARK_INFO]: Quantized Configuration information: - model_input --- perf_models\squeezenet1_1.onnx - model_output --- models\squeezenet1_1_XINT8.onnx - calibration_data_reader --- - calibration_data_path --- None - quant_format --- QDQ - input_nodes --- [] - output_nodes --- [] - op_types_to_quantize --- [] - extra_op_types_to_quantize --- [] - per_channel --- False - reduce_range --- False - activation_type --- QUInt8 - weight_type --- QInt8 - nodes_to_quantize --- [] - nodes_to_exclude --- [] - subgraphs_to_exclude --- [] - optimize_model --- True - use_external_data_format --- False - calibrate_method --- PowerOfTwoMethod.MinMSE - execution_providers --- ['CPUExecutionProvider'] - enable_npu_cnn --- True - enable_npu_transformer --- False - specific_tensor_precision --- False - debug_mode --- False - convert_fp16_to_fp32 --- False - convert_nchw_to_nhwc --- False - include_cle --- True - include_sq --- False - include_rotation --- False - include_fast_ft --- False - extra_options --- {'ActivationSymmetric': True} -+--------------------------------------------------------+ -| Op Type | Float Model | -|----------------------+---------------------------------| -| Conv | 26 | -| Relu | 26 | -| MaxPool | 3 | -| Concat | 8 | -| GlobalAveragePool | 1 | -| Flatten | 1 | -|----------------------+---------------------------------| -| Quantized model path | models\squeezenet1_1_XINT8.onnx | -+--------------------------------------------------------+ -+------------------------------------------------------+ -| Op Type | Activation | Weights | Bias | -|-------------------+------------+----------+----------| -| Conv | UINT8(26) | INT8(26) | INT8(26) | -| MaxPool | UINT8(3) | | | -| Concat | UINT8(8) | | | -| GlobalAveragePool | UINT8(1) | | | -| Flatten | UINT8(1) | | | -+------------------------------------------------------+ -Model Size: -Float32 model size: 4.73 MB -Int8 quantized model size: 1.26 MB -Model Accuracy: -Using TXN FORMAT 0.1 -[Vitis AI EP] No. of Operators : CPU 1 NPU 201 VITIS_EP_CPU 2 -[Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 -Float32 model accuracy: Top1 0.595, Top5 0.804 -C:\Users\dwchenna\github\amd_repo\RyzenAI-SW\tutorial\quark_quantization -Int8 quantized model accuracy (NPU): Top1 0.546, Top5 0.778 diff --git a/CNN-examples/quark_quantization/models/yolov8m_XINT8_Quant.log b/CNN-examples/quark_quantization/models/yolov8m_XINT8_Quant.log deleted file mode 100644 index e69de29b..00000000 diff --git a/CNN-examples/quark_quantization/prepare_data.py b/CNN-examples/quark_quantization/prepare_data.py deleted file mode 100644 index 65366bb4..00000000 --- a/CNN-examples/quark_quantization/prepare_data.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: MIT -# -import os -import shutil -import sys - -if len(sys.argv) < 3: - print("Usage: python prepare_val_data.py ") - sys.exit(1) - -source_folder = sys.argv[1] -calib_data_path = sys.argv[2] - -if not os.path.exists(source_folder): - print("The provided data path does not exist.") - sys.exit(1) - -files = os.listdir(source_folder) - -for filename in files: - if not filename.startswith('ILSVRC2012_val_') or not filename.endswith( - '.JPEG'): - continue - - n_identifier = filename.split('_')[-1].split('.')[0] - folder_name = n_identifier - folder_path = os.path.join(source_folder, folder_name) - if not os.path.exists(folder_path): - os.makedirs(folder_path) - file_path = os.path.join(source_folder, filename) - destination = os.path.join(folder_path, filename) - shutil.move(file_path, destination) - -print("File organization complete.") - -if not os.path.exists(calib_data_path): - os.makedirs(calib_data_path) - -destination_folder = calib_data_path - -subfolders = os.listdir(source_folder) - -for subfolder in subfolders: - source_subfolder = os.path.join(source_folder, subfolder) - destination_subfolder = os.path.join(destination_folder, subfolder) - os.makedirs(destination_subfolder, exist_ok=True) - - files = os.listdir(source_subfolder) - - if files: - file_to_copy = files[0] - source_file = os.path.join(source_subfolder, file_to_copy) - destination_file = os.path.join(destination_subfolder, file_to_copy) - - shutil.copy(source_file, destination_file) - -print("Creating calibration dataset complete.") diff --git a/CNN-examples/quark_quantization/quark_quantize.py b/CNN-examples/quark_quantization/quark_quantize.py deleted file mode 100644 index aaf53cbe..00000000 --- a/CNN-examples/quark_quantization/quark_quantize.py +++ /dev/null @@ -1,138 +0,0 @@ -import os -import argparse -import onnx -import time -import numpy as np -from PIL import Image -from pathlib import Path -import onnxruntime as ort -from onnxruntime.quantization.calibrate import CalibrationMethod -from onnxruntime.quantization.quant_utils import QuantType -from quark.onnx import ModelQuantizer -from quark.onnx.quantization.config import Config, get_default_config -from utils import ImageDataReader, evaluate_onnx_model, get_npu_info, get_xclbin - -def preprocess_image(image_path): - image = Image.open(image_path) - image = image.resize((224, 224)) - image_array = np.array(image).astype(np.float32)/255 - image_array = np.transpose(image_array, (2, 0, 1)) - input_data = np.expand_dims(image_array, axis=0) - return input_data - -def benchmark_model(session, runs=100): - input_shape = session.get_inputs()[0].shape - input_shape = tuple(1 if isinstance(dim, str) else dim for dim in input_shape) - input_data = np.random.rand(*input_shape).astype(np.float32) - start_time = time.time() - for _ in range(runs): - outputs = session.run(None, {session.get_inputs()[0].name: input_data}) - end_time = time.time() - avg_time = (end_time - start_time) / runs - print('Average inference time over {} runs: {} ms'.format(runs, avg_time * 1000)) - -def main(args): - # Setup the Input model - input_model_path = args.model_input - output_model_path = args.model_output - calibration_dataset_path = args.calib_data - - # Get quantization configuration - if args.config == "XINT8": - quant_config = get_default_config("XINT8") - elif args.config == "A8W8": - quant_config = get_default_config("A8W8") - elif args.config == "A16W8": - quant_config = get_default_config("A16W8") - elif args.config == "BF16": - quant_config = get_default_config("BF16") - quant_config.extra_options["BF16QDQToCast"] = True - else: - quant_config = get_default_config("XINT8") - # Defines the quantization configuration for the whole model - config = Config(global_quant_config=quant_config) - print("The configuration of the quantization is {}".format(config)) - - # Define the calibration data reader - num_calib_data = 100 - calibration_dataset = ImageDataReader(calibration_dataset_path, input_model_path, data_size=num_calib_data, batch_size=1) - - # Create an ONNX Quantizer - quantizer = ModelQuantizer(config) - - # Quantize the ONNX model if the flag is set - if args.quantize: - quant_model = quantizer.quantize_model(model_input=input_model_path, - model_output=output_model_path, - calibration_data_reader=calibration_dataset) - print("Model Size:") - print("Float32 model size: {:.2f} MB".format(os.path.getsize(input_model_path)/(1024 * 1024))) - print("{} quantized model size: {:.2f} MB".format(args.config, os.path.getsize(output_model_path)/(1024 * 1024))) - - # Evaluate the model if the flag is set - if args.evaluate: - print("Model Accuracy:") - top1_acc, top5_acc = evaluate_onnx_model(input_model_path, imagenet_data_path=calibration_dataset_path) - print("Float32 model accuracy: Top1 {:.3f}, Top5 {:.3f} ".format(top1_acc, top5_acc)) - model_name = os.path.basename(output_model_path)[:-5] - print('model_name:', model_name) - if 'XINT8' in model_name: - top1_acc, top5_acc = evaluate_onnx_model(output_model_path, imagenet_data_path=calibration_dataset_path) - print("{} quantized model accuracy (CPU): Top1 {:.3f}, Top5 {:.3f} ".format(args.config, top1_acc, top5_acc)) - top1_acc, top5_acc = evaluate_onnx_model(output_model_path, imagenet_data_path=calibration_dataset_path, device='npu') - print("{} quantized model accuracy (NPU): Top1 {:.3f}, Top5 {:.3f} ".format(args.config, top1_acc, top5_acc)) - else: - top1_acc, top5_acc = evaluate_onnx_model(output_model_path, imagenet_data_path=calibration_dataset_path, device='npu') - print("{} quantized model accuracy (NPU): Top1 {:.3f}, Top5 {:.3f} ".format(args.config, top1_acc, top5_acc)) - - # Benchmark the float/quantized models on CPU/NPU - if args.benchmark: - # Run the float model on CPU - model = onnx.load(input_model_path) - provider = ['CPUExecutionProvider'] - session = ort.InferenceSession(model.SerializeToString(), providers=provider) - print('Benchmarking CPU float model:') - benchmark_model(session) - - # Run the quantized model on CPU - quant_model = onnx.load(output_model_path) - session = ort.InferenceSession(quant_model.SerializeToString(), providers=provider) - print('Benchmarking CPU quantized model:') - benchmark_model(session) - - # Run quantized model on NPU - npu_device = get_npu_info() - quant_model = onnx.load(output_model_path) - provider = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - provider_options = [{ - 'cacheDir': str(cache_dir), - 'cacheKey': 'modelcachekey', - 'enable_cache_file_io_in_mem':'0' - }] - # Create session options - session_options = ort.SessionOptions() - session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - # For PHX/HPT, xclbin is required - if npu_device == 'PHX/HPT': - provider_options[0]['target'] = 'X1' - provider_options[0]['xclbin'] = get_xclbin(npu_device) - session = ort.InferenceSession(quant_model.SerializeToString(), - sess_options=session_options, - providers=provider, - provider_options=provider_options) - print('Benchmarking NPU quantized model:') - benchmark_model(session) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Quantize and evaluate ONNX models.") - parser.add_argument('--model_input', type=str, default='models/resnet50.onnx', help='Path to the input ONNX model.') - parser.add_argument('--model_output', type=str, default='models/resnet50_quant.onnx', help='Path to save the quantized ONNX model.') - parser.add_argument('--calib_data', type=str, default='calib_data', help='Path to the calibration dataset.') - parser.add_argument('--config', type=str, choices=["XINT8", "A8W8", "A16W8", "BF16"], default="XINT8", help='The model quantization configuration.') - parser.add_argument('--quantize', action='store_true', help='Flag to quantize the model.') - parser.add_argument('--evaluate', action='store_true', help='Flag to evaluate the model.') - parser.add_argument('--benchmark', action='store_true', help='Flag to benchmasrk the model.') - - args = parser.parse_args() - main(args) diff --git a/CNN-examples/quark_quantization/requirements.txt b/CNN-examples/quark_quantization/requirements.txt deleted file mode 100644 index 93a4fbf7..00000000 --- a/CNN-examples/quark_quantization/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -numpy==1.26.4 -Pillow==12.0.0 -protobuf==6.33.0 -rich==14.2.0 -torchvision==0.23.0 -timm==1.0.20 -numpy==1.26.4 \ No newline at end of file diff --git a/CNN-examples/quark_quantization/utils.py b/CNN-examples/quark_quantization/utils.py deleted file mode 100644 index 076e90ae..00000000 --- a/CNN-examples/quark_quantization/utils.py +++ /dev/null @@ -1,276 +0,0 @@ -import os -import subprocess -import shutil - -import onnxruntime as ort -import numpy as np -from torchvision import datasets, transforms -from torch.utils.data import DataLoader -from tqdm import tqdm -from pathlib import Path - -def get_npu_info(): - # Run pnputil as a subprocess to enumerate PCI devices - command = r'pnputil /enum-devices /bus PCI /deviceids ' - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - # Check for supported Hardware IDs - npu_type = '' - if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): npu_type = 'PHX/HPT' - if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): npu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): npu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): npu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): npu_type = 'KRK' - return npu_type - -def get_xclbin(npu_device): - xclbin_file = '' - if npu_device == 'STX' or npu_device=='KRK': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_4x4_Overlay.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - if npu_device == 'PHX/HPT': - xclbin_file = '{}\\voe-4.0-win_amd64\\xclbins\\phoenix\\4x4.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - return xclbin_file - -def reorganize_imagenet_val(val_dir, mapping_file, output_dir): - # Read the mapping file - with open(mapping_file, 'r') as f: - lines = f.readlines() - - # Create output directory if it doesn't exist - os.makedirs(output_dir, exist_ok=True) - - # Process each line in the mapping file - for line in lines: - image_name, class_label = line.strip().split() - - # Create class directory if it doesn't exist - class_dir = os.path.join(output_dir, class_label) - os.makedirs(class_dir, exist_ok=True) - - # Move the image to the class directory - src = os.path.join(val_dir, image_name) - dst = os.path.join(class_dir, image_name) - shutil.move(src, dst) - -# Example usage -# reorganize_imagenet_val('path/to/val_images', 'path/to/mapping_file.txt', 'path/to/output_dir') -def get_npu_info(): - # Run pnputil as a subprocess to enumerate PCI devices - command = r'pnputil /enum-devices /bus PCI /deviceids ' - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - # Check for supported Hardware IDs - npu_type = '' - if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): npu_type = 'PHX/HPT' - if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): npu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): npu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): npu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): npu_type = 'KRK' - return npu_type - -def evaluate_onnx_model(onnx_model_path, imagenet_data_path, batch_size=1, device='cpu'): - # Load the ONNX model - if device == 'npu': - npu_device = get_npu_info() - provider = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - model_name = os.path.basename(onnx_model_path)[:-5] - print(cache_dir) - provider_options = [{}] - if npu_device == 'PHX/HPT': - provider_options[0]['target'] = 'X1' - provider_options[0]['xclbin'] = get_xclbin(npu_device) - session = ort.InferenceSession(onnx_model_path, providers=provider, - provider_options=provider_options) - else: - # session = ort.InferenceSession(onnx_model_path) - if 'BF16' in onnx_model_path: - from quark.onnx import get_library_path - sess_options = onnxruntime.SessionOptions() - sess_options.register_custom_ops_library(get_library_path(device='cpu')) - session = ort.InferenceSession(onnx_model_path, providers=['CPUExecutionProvider'], sess_options=sess_options) - else: - session = ort.InferenceSession(onnx_model_path) - - input_name = session.get_inputs()[0].name - - # Define the preprocessing transformations - preprocess = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) - - # Load the ImageNet validation dataset - imagenet_data = datasets.ImageFolder(root=imagenet_data_path, transform=preprocess) - data_loader = DataLoader(imagenet_data, batch_size=batch_size, shuffle=False) - - top1_correct = 0 - top5_correct = 0 - total = 0 - - # Evaluate the model - for images, labels in tqdm(data_loader, desc="Evaluating"): - # Run inference - outputs = session.run(None, {input_name: images.numpy()}) - outputs = outputs[0] - - # Calculate top-1 and top-5 predictions - top1_predictions = np.argmax(outputs, axis=1) - top5_predictions = np.argsort(outputs, axis=1)[:, -5:] - - # Update top-1 accuracy - top1_correct += (top1_predictions == labels.numpy()).sum() - - # Update top-5 accuracy - for i, label in enumerate(labels.numpy()): - if label in top5_predictions[i]: - top5_correct += 1 - - total += labels.size(0) - - top1_accuracy = top1_correct / total - top5_accuracy = top5_correct / total - - return top1_accuracy, top5_accuracy - -# print(f"Accuracy: {accuracy * 100:.2f}%") -import numpy -from PIL import Image -import onnxruntime -from onnxruntime.quantization.calibrate import CalibrationDataReader - -def _preprocess_images(images_folder: str, - height: int, - width: int, - size_limit=0, - batch_size=100): - """ - Loads a batch of images and preprocess them - parameter images_folder: path to folder storing images - parameter height: image height in pixels - parameter width: image width in pixels - parameter size_limit: number of images to load. Default is 0 which means all images are picked. - return: list of matrices characterizing multiple images - """ - image_path = os.listdir(images_folder) - image_names = [] - for image_dir in image_path: - image_name = os.listdir(os.path.join(images_folder, image_dir)) - image_names.append(os.path.join(image_dir, image_name[0])) - if size_limit > 0 and len(image_names) >= size_limit: - batch_filenames = [image_names[i] for i in range(size_limit)] - else: - batch_filenames = image_names - unconcatenated_batch_data = [] - - batch_data = [] - for index, image_name in enumerate(batch_filenames): - image_filepath = images_folder + "/" + image_name - pillow_img = Image.new("RGB", (width, height)) - pillow_img.paste(Image.open(image_filepath).resize((width, height))) - image_array = numpy.array(pillow_img) / 255.0 - mean = numpy.array([0.485, 0.456, 0.406]) - image_array = (image_array - mean) - std = numpy.array([0.229, 0.224, 0.225]) - nchw_data = image_array / std - nchw_data = nchw_data.transpose((2, 0, 1)) - nchw_data = numpy.expand_dims(nchw_data, axis=0) - nchw_data = nchw_data.astype(numpy.float32) - unconcatenated_batch_data.append(nchw_data) - - if (index + 1) % batch_size == 0: - one_batch_data = numpy.concatenate(unconcatenated_batch_data, - axis=0) - unconcatenated_batch_data.clear() - batch_data.append(one_batch_data) - - return batch_data - -class ImageDataReader(CalibrationDataReader): - - def __init__(self, calibration_image_folder: str, model_path: str, data_size: int, batch_size: int): - self.enum_data = None - - # Use inference session to get input shape. - session = onnxruntime.InferenceSession( - model_path, providers=['CPUExecutionProvider']) - (_, _, height, width) = session.get_inputs()[0].shape - - # Convert image to input data - self.nhwc_data_list = _preprocess_images(calibration_image_folder, - height, width, data_size, batch_size) - self.input_name = session.get_inputs()[0].name - self.datasize = len(self.nhwc_data_list) - - def get_next(self): - if self.enum_data is None: - self.enum_data = iter([{ - self.input_name: nhwc_data - } for nhwc_data in self.nhwc_data_list]) - return next(self.enum_data, None) - - def rewind(self): - self.enum_data = None - - def reset(self): - self.enum_data = None - -import torch -from timm.data import create_loader, resolve_data_config, create_dataset -from typing import List, Any, Union -from timm.models import create_model - -def post_process_top1(output: torch.tensor) -> float: - _, preds_top1 = torch.max(output, 1) - return preds_top1 - -def getAccuracy_top1(preds: Union[torch.tensor, list], targets: Union[torch.tensor, list]) -> float: - assert len(preds) == len(targets) - assert len(preds) > 0 - count = 0 - for i in range(len(preds)): - pred = preds[i] - target = targets[i] - if pred == target: - count += 1 - return count / len(preds) - -global model_name -model_name = "resnet50" - -global calibration_dataset_path -calibration_dataset_path = "calib_data" - -def top1_accu(results: List[Union[torch.tensor, List[Any]]]) -> float: - """ - Calculate the top1 accuracy of the model. - :param results: the result of the model - :return: the top1 accuracy - """ - timm_model_name = model_name - calib_data_path = calibration_dataset_path - - timm_model = create_model( - timm_model_name, - pretrained=False, - ) - - data_config = resolve_data_config(model=timm_model, use_test_size=True) - - loader = create_loader(create_dataset('', calib_data_path), - input_size=data_config['input_size'], - batch_size=20, - use_prefetcher=False, - interpolation=data_config['interpolation'], - mean=data_config['mean'], - std=data_config['std'], - num_workers=2, - crop_pct=data_config['crop_pct']) - target = [] - for _, labels in loader: - target.extend(labels.data.tolist()) - outputs_top1 = post_process_top1(torch.tensor(numpy.squeeze(numpy.array(results)))) - top1_acc = getAccuracy_top1(outputs_top1, target) - return round(top1_acc, 2) \ No newline at end of file diff --git a/CNN-examples/quark_quantization/vaiml_config.json b/CNN-examples/quark_quantization/vaiml_config.json deleted file mode 100644 index 8d7f6408..00000000 --- a/CNN-examples/quark_quantization/vaiml_config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "optimize_level": 1, - "preferred_data_storage": "auto" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} \ No newline at end of file diff --git a/CNN-examples/torchvision_inference/classification.ipynb b/CNN-examples/torchvision_inference/classification.ipynb deleted file mode 100644 index 70e999ad..00000000 --- a/CNN-examples/torchvision_inference/classification.ipynb +++ /dev/null @@ -1,868 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "41a5cbbb-8d20-49ae-9dcc-90ed3f623793", - "metadata": {}, - "source": [ - "### Classification example inference with Ryzen AI" - ] - }, - { - "cell_type": "markdown", - "id": "777199ee-9183-40bb-8355-1fb470552f5f", - "metadata": {}, - "source": [ - "This example demonstrates the 5 steps of classification model inference on the embedded Neural Processing Unit (NPU) in your AMD Ryzen AI enabled PC. The steps are as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "ba8184db-eee4-499b-b5c0-c494e643bd06", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\n", - "[QUARK-INFO]: Checking custom ops library ...\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: The CPU version of custom ops library already exists.\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Checked custom ops library.\u001b[0m\n" - ] - } - ], - "source": [ - "# Import necessary libraries\n", - "import os\n", - "import torch\n", - "import torch.nn as nn\n", - "import torchvision\n", - "import subprocess\n", - "import onnxruntime\n", - "import numpy as np\n", - "import onnx\n", - "import shutil\n", - "import time \n", - "from timeit import default_timer as timer\n", - "from quark.onnx import ModelQuantizer \n", - "from quark.onnx.quantization.config import Config, get_default_config \n", - "from utils_custom import ImageDataReader, evaluate_onnx_model \n", - "import json \n", - "import shutil\n", - "import sys" - ] - }, - { - "cell_type": "markdown", - "id": "3470d814-78bb-481e-b512-d431d267c003", - "metadata": {}, - "source": [ - "#### 1. Get Model" - ] - }, - { - "cell_type": "markdown", - "id": "2cd67743-027b-4988-a467-b4a4a173259e", - "metadata": {}, - "source": [ - "Here, we'll use the resnet50 model as an example. You may choose any classification models train with Imagenet from torchvision." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "738304be-b948-4f34-a69d-ba4d2277e0c0", - "metadata": {}, - "outputs": [], - "source": [ - "# ---------------- Model Setup ---------------- #\n", - "\n", - "# Define directories\n", - "models_dir = \"models\"\n", - "os.makedirs(models_dir, exist_ok=True)\n", - "\n", - "# Load pre-trained ResNet50 model\n", - "model = torchvision.models.resnet50(weights=\"IMAGENET1K_V2\")\n", - "\n", - "# Save the model\n", - "model.to(\"cpu\")\n", - "torch.save(model, os.path.join(models_dir, \"resnet50.pt\"))\n" - ] - }, - { - "cell_type": "markdown", - "id": "ce87c669-4f5c-433a-bb37-676b40d4640f", - "metadata": {}, - "source": [ - "#### 2. Export to ONNX" - ] - }, - { - "cell_type": "markdown", - "id": "2f07e198-29b2-463a-b9df-10266d390249", - "metadata": {}, - "source": [ - "The model inference with Ryzen AI is based on onnxruntime. The following code is used for exporting a PyTorch model to the ONNX (Open Neural Network Exchange) format. The ONNX file is needed to use the AMD Quark Quantizer." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "73d3bad8-b5ea-4c08-bea6-5c92c973055b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Model exported to ONNX at: models\\resnet50.onnx\n" - ] - } - ], - "source": [ - "# Export model to ONNX\n", - "dummy_inputs = torch.randn(1, 3, 224, 224)\n", - "input_names = ['input']\n", - "output_names = ['output']\n", - "dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}\n", - "tmp_model_path = os.path.join(models_dir, \"resnet50.onnx\")\n", - "\n", - "torch.onnx.export(\n", - " model,\n", - " dummy_inputs,\n", - " tmp_model_path,\n", - " export_params=True,\n", - " opset_version=13, # Recommended opset\n", - " input_names=input_names,\n", - " output_names=output_names,\n", - " dynamic_axes=dynamic_axes,\n", - ")\n", - "\n", - "print(f\"✅ Model exported to ONNX at: {tmp_model_path}\")" - ] - }, - { - "cell_type": "markdown", - "id": "aad6bfea-4270-4d2d-98cd-0a5497224265", - "metadata": {}, - "source": [ - "#### 3. Quantize Model" - ] - }, - { - "cell_type": "markdown", - "id": "0a3d5103-9e41-43c0-8298-fc4e5bd8d6d5", - "metadata": {}, - "source": [ - "Using the AMD Quark Quantizer and providing the newly exported ONNX model, we'll quantize the model. The quantization progress will need the calibration data from Imagenet. Download the data from [here](https://huggingface.co/datasets/imagenet-1k/tree/main/data) to download it.\n", - "You need to register on Hugging Face and download the following file:\n", - "**val_images.tar.gz**.\n", - "This file contains a subset of ImageNet images used specifically for calibration.\n", - "\n", - "Once downloaded, move the file to your working directory (val_images) and extract the dataset into the calib_data directory..\n", - "Below code will read the images from val_image folder and create a calib_data folder." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "e0a4752b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File organization complete.\n", - "Creating calibration dataset complete.\n" - ] - } - ], - "source": [ - "#\n", - "# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.\n", - "# SPDX-License-Identifier: MIT\n", - "#\n", - "# if len(sys.argv) < 3:\n", - "# print(\"Usage: python prepare_val_data.py \")\n", - "# sys.exit(1)\n", - "\n", - "source_folder = 'val_images'\n", - "calib_data_path = 'calib_data'\n", - "\n", - "if not os.path.exists(source_folder):\n", - " print(\"The provided data path does not exist.\")\n", - " sys.exit(1)\n", - "\n", - "files = os.listdir(source_folder)\n", - "\n", - "for filename in files:\n", - " if not filename.startswith('ILSVRC2012_val_') or not filename.endswith(\n", - " '.JPEG'):\n", - " continue\n", - "\n", - " n_identifier = filename.split('_')[-1].split('.')[0]\n", - " folder_name = n_identifier\n", - " folder_path = os.path.join(source_folder, folder_name)\n", - " if not os.path.exists(folder_path):\n", - " os.makedirs(folder_path)\n", - " file_path = os.path.join(source_folder, filename)\n", - " destination = os.path.join(folder_path, filename)\n", - " shutil.move(file_path, destination)\n", - "\n", - "print(\"File organization complete.\")\n", - "\n", - "if not os.path.exists(calib_data_path):\n", - " os.makedirs(calib_data_path)\n", - "\n", - "destination_folder = calib_data_path\n", - "\n", - "subfolders = os.listdir(source_folder)\n", - "\n", - "for subfolder in subfolders:\n", - " source_subfolder = os.path.join(source_folder, subfolder)\n", - " destination_subfolder = os.path.join(destination_folder, subfolder)\n", - " os.makedirs(destination_subfolder, exist_ok=True)\n", - "\n", - " files = os.listdir(source_subfolder)\n", - "\n", - " if files:\n", - " file_to_copy = files[0]\n", - " source_file = os.path.join(source_subfolder, file_to_copy)\n", - " destination_file = os.path.join(destination_subfolder, file_to_copy)\n", - "\n", - " shutil.copy(source_file, destination_file)\n", - "\n", - "print(\"Creating calibration dataset complete.\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "67ae056f-a96c-47fb-b447-9de43e36e8a4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\n", - "[QUARK-INFO]: The input ONNX model models\\resnet50.onnx can create InferenceSession successfully\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[QUARK_INFO]: Time information:\n", - "2025-06-30 11:53:36.817912\n", - "[QUARK_INFO]: OS and CPU information:\n", - " system --- Windows\n", - " node --- xhdnucstr12\n", - " release --- 10\n", - " version --- 10.0.26100\n", - " machine --- AMD64\n", - " processor --- AMD64 Family 26 Model 36 Stepping 0, AuthenticAMD\n", - "[QUARK_INFO]: Tools version information:\n", - " python --- 3.10.0\n", - " onnx --- 1.18.0\n", - " onnxruntime --- 1.22.0.dev20250626\n", - " quark.onnx --- 0.9+1a74724+1a74724\n", - "[QUARK_INFO]: Quantized Configuration information:\n", - " model_input --- models\\resnet50.onnx\n", - " model_output --- models\\resnet50_quantized.onnx\n", - " calibration_data_reader --- \n", - " calibration_data_path --- None\n", - " quant_format --- QDQ\n", - " input_nodes --- []\n", - " output_nodes --- []\n", - " op_types_to_quantize --- []\n", - " extra_op_types_to_quantize --- []\n", - " per_channel --- False\n", - " reduce_range --- False\n", - " activation_type --- QUInt8\n", - " weight_type --- QInt8\n", - " nodes_to_quantize --- []\n", - " nodes_to_exclude --- []\n", - " subgraphs_to_exclude --- []\n", - " optimize_model --- True\n", - " use_external_data_format --- False\n", - " calibrate_method --- PowerOfTwoMethod.MinMSE\n", - " execution_providers --- ['CPUExecutionProvider']\n", - " enable_npu_cnn --- True\n", - " enable_npu_transformer --- False\n", - " specific_tensor_precision --- False\n", - " debug_mode --- False\n", - " convert_fp16_to_fp32 --- False\n", - " convert_nchw_to_nhwc --- False\n", - " include_cle --- True\n", - " include_sq --- False\n", - " include_rotation --- False\n", - " include_fast_ft --- False\n", - " extra_options --- {'ActivationSymmetric': True}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\n", - "[QUARK-INFO]: Obtained calibration data with 9 iters\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Removed initializers from input\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Simplified model sucessfully\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Loading model...\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: The input ONNX model C:/Users/akumar23/AppData/Local/Temp/vai.simp.5bw9altr/model_simp.onnx can run inference successfully\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Start CrossLayerEqualization...\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: CrossLayerEqualization pattern num: 32\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Total CrossLayerEqualization steps: 1\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: CrossLayerEqualization Done.\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: optimize the model for better hardware compatibility.\u001b[0m\n", - "\u001b[33m\n", - "[QUARK-WARNING]: The opset version is 13 < 17. Skipping fusing layer normalization.\u001b[0m\n", - "\u001b[33m\n", - "[QUARK-WARNING]: The opset version is 13 < 20. Skipping fusing Gelu.\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Start calibration...\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Start collecting data, runtime depends on your model size and the number of calibration dataset.\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Finding optimal threshold for each tensor using PowerOfTwoMethod.MinMSE algorithm ...\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Use all calibration data to calculate min mse\u001b[0m\n", - "Computing range: 100%|███████████████████████████████████████████████████████████| 123/123 [01:13<00:00, 1.67tensor/s]\n", - "\u001b[32m\n", - "[QUARK-INFO]: Finished the calibration of PowerOfTwoMethod.MinMSE which costs 77.5s\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Remove QuantizeLinear & DequantizeLinear on certain operations(such as conv-relu).\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Rescale GlobalAveragePool /avgpool/GlobalAveragePool with factor 1.0048828125 to simulate DPU behavior.\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Adjust the quantize info to meet the compiler constraints\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Input pos of pooling layer /avgpool/GlobalAveragePool is 1. Output pos of pooling layer /avgpool/GlobalAveragePool is 4.Modify opos from 4 to 1.\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: Adjust the quantize info to meet the compiler constraints\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: The operation types and their corresponding quantities of the input float model is shown in the table below.\u001b[0m\n" - ] - }, - { - "data": { - "text/html": [ - "
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
-       "┃ Op Type               Float Model                    ┃\n",
-       "┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
-       "│ Conv                 │ 53                             │\n",
-       "│ Relu                 │ 49                             │\n",
-       "│ MaxPool              │ 1                              │\n",
-       "│ Add                  │ 16                             │\n",
-       "│ GlobalAveragePool    │ 1                              │\n",
-       "│ Flatten              │ 1                              │\n",
-       "│ Gemm                 │ 1                              │\n",
-       "├──────────────────────┼────────────────────────────────┤\n",
-       "│ Quantized model path │ models\\resnet50_quantized.onnx │\n",
-       "└──────────────────────┴────────────────────────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mOp Type \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mFloat Model \u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", - "│ Conv │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m53 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Relu │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m49 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ MaxPool │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m1 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Add │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m16 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ GlobalAveragePool │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m1 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Flatten │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m1 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Gemm │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m1 \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "├──────────────────────┼────────────────────────────────┤\n", - "│ Quantized model path │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mmodels\\resnet50_quantized.onnx\u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "└──────────────────────┴────────────────────────────────┘\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\n", - "[QUARK-INFO]: The quantized information for all operation types is shown in the table below.\u001b[0m\n", - "\u001b[32m\n", - "[QUARK-INFO]: The discrepancy between the operation types in the quantized model and the float model is due to the application of graph optimization.\u001b[0m\n" - ] - }, - { - "data": { - "text/html": [ - "
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┓\n",
-       "┃ Op Type            Activation  Weights   Bias     ┃\n",
-       "┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━┩\n",
-       "│ Conv              │ UINT8(53)   INT8(53)  INT8(53) │\n",
-       "│ MaxPool           │ UINT8(1)                       │\n",
-       "│ Add               │ UINT8(16)                      │\n",
-       "│ GlobalAveragePool │ UINT8(1)                       │\n",
-       "│ Flatten           │ UINT8(1)                       │\n",
-       "│ Gemm              │ UINT8(1)    INT8(1)   INT8(1)  │\n",
-       "└───────────────────┴────────────┴──────────┴──────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mOp Type \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mActivation\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mWeights \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mBias \u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━┩\n", - "│ Conv │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mUINT8(53) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mINT8(53)\u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mINT8(53)\u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ MaxPool │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mUINT8(1) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Add │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mUINT8(16) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ GlobalAveragePool │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mUINT8(1) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Flatten │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mUINT8(1) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "│ Gemm │\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mUINT8(1) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mINT8(1) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\u001b[1;38;5;46m \u001b[0m\u001b[1;38;5;46mINT8(1) \u001b[0m\u001b[1;38;5;46m \u001b[0m│\n", - "└───────────────────┴────────────┴──────────┴──────────┘\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Quark Quantized model saved at: models\\resnet50_quantized.onnx\n" - ] - } - ], - "source": [ - "# ---------------- Quark Quantization ---------------- #\n", - "\n", - "# Define dataset directory\n", - "calib_dir = \"calib_data\" \n", - "\n", - "# Set input & output ONNX model paths\n", - "input_model_path = tmp_model_path\n", - "output_model_path = os.path.join(models_dir, \"resnet50_quantized.onnx\")\n", - "\n", - "# Preprocessing transformations\n", - "preprocess = torchvision.transforms.Compose([\n", - " torchvision.transforms.Resize(256),\n", - " torchvision.transforms.CenterCrop(224),\n", - " torchvision.transforms.ToTensor(),\n", - " torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", - "])\n", - "\n", - "# Load dataset\n", - "calib_dataset = torchvision.datasets.ImageFolder(root=calib_dir, transform=preprocess)\n", - "\n", - "#Data set \n", - "num_calib_data = 54 \n", - "calib_dataset = torch.utils.data.Subset(calib_dataset, range(num_calib_data))\n", - "\n", - "# Define DataLoader for Calibration\n", - "calibration_dataloader = torch.utils.data.DataLoader(calib_dataset, batch_size=6, shuffle=False)\n", - "\n", - "# Configure Quark Quantization\n", - "quant_config = get_default_config(\"XINT8\") # Use XINT8 quantization \n", - "config = Config(global_quant_config=quant_config)\n", - "\n", - "# Create an ONNX Quantizer \n", - "quantizer = ModelQuantizer(config) \n", - "\n", - "# Perform Quark Quantization \n", - "quant_model = quantizer.quantize_model(\n", - " model_input=input_model_path, \n", - " model_output=output_model_path, \n", - " calibration_data_reader=ImageDataReader(calibration_dataloader) # Use ImageDataReader from utils_custom\n", - ")\n", - "\n", - "print(f\"✅ Quark Quantized model saved at: {output_model_path}\")" - ] - }, - { - "cell_type": "markdown", - "id": "46397081-c10c-4630-8299-563e81dea7b6", - "metadata": {}, - "source": [ - "#### 4. Model inference on CPU / iGPU / NPU with single image" - ] - }, - { - "cell_type": "markdown", - "id": "5c5cb104-5c6f-4043-a812-b5ee4a48af57", - "metadata": {}, - "source": [ - "Now we have successfully quantized the model, and we will use the onnxruntime to do the inference on CPU, iGPU and NPU." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "b98ec05e-9b2e-4cc0-b3af-6d640c167074", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Image size: (224, 224)\n" - ] - } - ], - "source": [ - "from PIL import Image\n", - "\n", - "def load_labels(path):\n", - " with open(path) as f:\n", - " data = json.load(f)\n", - " return np.asarray(data)\n", - "\n", - "def preprocess_image(input):\n", - " normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n", - " \n", - " transform = torchvision.transforms.Compose([\n", - " torchvision.transforms.ToTensor(),\n", - " torchvision.transforms.Resize((224, 224)),\n", - " normalize,\n", - " ])\n", - " img_tensor = transform(input).unsqueeze(0)\n", - " return img_tensor.numpy()\n", - "\n", - "def softmax(x):\n", - " x = x.reshape(-1)\n", - " e_x = np.exp(x - np.max(x))\n", - " return e_x / e_x.sum(axis=0)\n", - "\n", - "def postprocess(result):\n", - " return softmax(np.array(result)).tolist()\n", - "\n", - "labels = load_labels('data/imagenet-simple-labels.json')\n", - "image = Image.open('data/dog.jpg')\n", - "\n", - "print(\"Image size: \", image.size)\n", - "input_data = preprocess_image(image)" - ] - }, - { - "cell_type": "markdown", - "id": "e8db896e-fe9b-4167-9b0e-758040d50dd4", - "metadata": {}, - "source": [ - "#### CPU Inference" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "fd83dc38-b0ad-4001-bb8f-31b6b6f0cc52", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "Final top prediction is: Golden Retriever\n", - "----------------------------------------\n", - "Inference time: 37.91 ms\n", - "----------------------------------------\n", - "------------ Top 5 labels are: ----------------------------\n", - "['Golden Retriever' 'Labrador Retriever' 'Norwich Terrier'\n", - " 'Curly-coated Retriever' 'Flat-Coated Retriever']\n", - "-----------------------------------------------------------\n" - ] - } - ], - "source": [ - "# Run inference on CPU\n", - "onnx_model_path = output_model_path\n", - "cpu_options = onnxruntime.SessionOptions()\n", - "\n", - "cpu_session = onnxruntime.InferenceSession(\n", - " onnx_model_path,\n", - " providers=['CPUExecutionProvider'],\n", - " sess_options=cpu_options,\n", - ")\n", - "\n", - "start = timer()\n", - "cpu_outputs = cpu_session.run(None, {'input': input_data})\n", - "end = timer()\n", - "\n", - "cpu_results = postprocess(cpu_outputs)\n", - "inference_time = np.round((end - start) * 1000, 2)\n", - "idx = np.argmax(cpu_results)\n", - "\n", - "print('----------------------------------------')\n", - "print(f'Final top prediction is: {labels[idx]}')\n", - "print('----------------------------------------')\n", - "print(f'Inference time: {inference_time} ms')\n", - "print('----------------------------------------')\n", - "\n", - "sort_idx = np.flip(np.squeeze(np.argsort(cpu_results)))\n", - "print('------------ Top 5 labels are: ----------------------------')\n", - "print(labels[sort_idx[:5]])\n", - "print('-----------------------------------------------------------')" - ] - }, - { - "cell_type": "markdown", - "id": "2d267505-2078-44f0-9db2-e150b6e7af76", - "metadata": {}, - "source": [ - "#### iGPU Inference" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "05b83ff1-63a1-40cf-b4a5-013857b42a53", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------\n", - "Final top prediction is: Golden Retriever\n", - "----------------------------------------\n", - "----------------------------------------\n", - "Inference time: 101.22 ms\n", - "----------------------------------------\n", - "------------ Top 5 labels are: ----------------------------\n", - "['Golden Retriever' 'Labrador Retriever' 'Norwich Terrier'\n", - " 'Curly-coated Retriever' 'Flat-Coated Retriever']\n", - "-----------------------------------------------------------\n" - ] - } - ], - "source": [ - "#iGPU inference\n", - "dml_options = onnxruntime.SessionOptions()\n", - "\n", - "# Create Inference Session to run the quantized model on the iGPU\n", - "dml_session = onnxruntime.InferenceSession(\n", - " onnx_model_path,\n", - " providers = ['DmlExecutionProvider'],\n", - " provider_options = [{\"device_id\": \"0\"}]\n", - ")\n", - "start = time.time()\n", - "dml_outputs = dml_session.run(None, {'input': input_data})\n", - "end = time.time()\n", - "\n", - "dml_results = postprocess(dml_outputs)\n", - "inference_time = np.round((end - start) * 1000, 2)\n", - "idx = np.argmax(dml_results)\n", - "\n", - "print('----------------------------------------')\n", - "print('Final top prediction is: ' + labels[idx])\n", - "print('----------------------------------------')\n", - "\n", - "print('----------------------------------------')\n", - "print('Inference time: ' + str(inference_time) + \" ms\")\n", - "print('----------------------------------------')\n", - "\n", - "sort_idx = np.flip(np.squeeze(np.argsort(dml_results)))\n", - "print('------------ Top 5 labels are: ----------------------------')\n", - "print(labels[sort_idx[:5]])\n", - "print('-----------------------------------------------------------') " - ] - }, - { - "cell_type": "markdown", - "id": "29c9156d-6186-4256-9e3d-3826d6a3e0b8", - "metadata": {}, - "source": [ - "#### NPU Inference" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "128e098d", - "metadata": {}, - "outputs": [], - "source": [ - "# set the RYZEN_AI_INSTALLATION_PATH location.\n", - "# Reader can find the installation path either under C:\\Program Files or the path defined at the time of installation.\n", - "# example\n", - "os.environ['RYZEN_AI_INSTALLATION_PATH']='C:\\Program Files\\RyzenAI\\1.5.0-0627'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c122300c-c9c7-43b7-8e58-38a053a7f227", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "APU Type: STX\n", - "C:\\Program Files\\RyzenAI\u0001.5.0-0627\n", - "Setting xclbin file for STX\n", - "C:\\Program Files\\RyzenAI\\1.5.0-0627\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_4x4_Overlay.xclbin\n", - "----------------------------------------\n", - "Final top prediction is: Golden Retriever\n", - "----------------------------------------\n", - "----------------------------------------\n", - "Inference time: 12.82 ms\n", - "----------------------------------------\n", - "------------ Top 5 labels are: ----------------------------\n", - "['Golden Retriever' 'Labrador Retriever' 'Norwich Terrier'\n", - " 'Curly-coated Retriever' 'Flat-Coated Retriever']\n", - "-----------------------------------------------------------\n" - ] - } - ], - "source": [ - "#NPU inference\n", - "\n", - "# Before running, we need to set the ENV variable for the specific NPU we have\n", - "# Run pnputil as a subprocess to enumerate PCI devices\n", - "command = r'pnputil /enum-devices /bus PCI /deviceids '\n", - "process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n", - "stdout, stderr = process.communicate()\n", - "# Check for supported Hardware IDs\n", - "npu_type = ''\n", - "if 'PCI\\\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): npu_type = 'PHX/HPT'\n", - "if 'PCI\\\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): npu_type = 'STX'\n", - "if 'PCI\\\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): npu_type = 'STX'\n", - "if 'PCI\\\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): npu_type = 'STX'\n", - "\n", - "print(f\"APU Type: {npu_type}\")\n", - "\n", - "install_dir = os.environ['RYZEN_AI_INSTALLATION_PATH']\n", - "print(install_dir)\n", - "\n", - "match npu_type:\n", - " case 'PHX/HPT':\n", - " print(\"Setting provider options for PHX/HPT\")\n", - " xclbin_file = os.path.join(install_dir, 'voe-4.0-win_amd64', 'xclbins', 'phoenix', '4x4.xclbin')\n", - " provider_options = [{\n", - " 'target': 'X1',\n", - " 'xclbin': xclbin_file,\n", - " 'ai_analyzer_visualization': True,\n", - " 'ai_analyzer_profiling': True,\n", - " }]\n", - " case 'STX':\n", - " print(\"Setting provider options for STX\")\n", - " provider_options = [{\n", - " 'ai_analyzer_visualization': True,\n", - " 'ai_analyzer_profiling': True,\n", - " }]\n", - " case _:\n", - " print(\"Unrecognized APU type. Exiting.\")\n", - " exit()\n", - "\n", - "# Create session options\n", - "session_options = ort.SessionOptions()\n", - "session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal\n", - "\n", - "npu_session = onnxruntime.InferenceSession(\n", - " onnx_model_path,\n", - " sess_options = session_options,\n", - " providers = ['VitisAIExecutionProvider'],\n", - " provider_options = provider_options\n", - ")\n", - "\n", - "start = time.time()\n", - "npu_outputs = npu_session.run(None, {'input': input_data})\n", - "end = time.time()\n", - "\n", - "npu_results = postprocess(npu_outputs)\n", - "inference_time = np.round((end - start) * 1000, 2)\n", - "idx = np.argmax(npu_results)\n", - "\n", - "print('----------------------------------------')\n", - "print('Final top prediction is: ' + labels[idx])\n", - "print('----------------------------------------')\n", - "\n", - "print('----------------------------------------')\n", - "print('Inference time: ' + str(inference_time) + \" ms\")\n", - "print('----------------------------------------')\n", - "\n", - "sort_idx = np.flip(np.squeeze(np.argsort(npu_results)))\n", - "print('------------ Top 5 labels are: ----------------------------')\n", - "print(labels[sort_idx[:5]])\n", - "print('-----------------------------------------------------------')" - ] - }, - { - "cell_type": "markdown", - "id": "371b1bae-6afa-434a-9393-373a5aad3ac9", - "metadata": {}, - "source": [ - "#### 5. Model Analysis on NPU" - ] - }, - { - "cell_type": "markdown", - "id": "8ee5f148-094f-4e91-87c7-bce623a5af2e", - "metadata": {}, - "source": [ - "After NPU inference, there are several '.json' files generated by the Ryzen AI tracing tool, which could be open by the AI Analyzer for further optimization." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "fb04c788-f16a-4ce5-a7e8-6e414a45fda6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "^C\n" - ] - } - ], - "source": [ - "!aianalyzer ./ -p 8001" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9535c38b-4bb7-400a-bc75-be3b2018fe7b", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "clone-0627", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/CNN-examples/torchvision_inference/classification.py b/CNN-examples/torchvision_inference/classification.py deleted file mode 100644 index a532ef64..00000000 --- a/CNN-examples/torchvision_inference/classification.py +++ /dev/null @@ -1,265 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- - -# Import necessary libraries -import os -import torch -import torch.nn as nn -import torchvision -import subprocess -import onnxruntime -import numpy as np -import onnx -import shutil -import time -from timeit import default_timer as timer -from quark.onnx import ModelQuantizer -from quark.onnx.quantization.config import Config, get_default_config -from utils_custom import ImageDataReader, evaluate_onnx_model -import json -from classification_utils import calib_data_formatting - -# ---------------- Model Setup ---------------- # - -# Define directories -models_dir = "models" -os.makedirs(models_dir, exist_ok=True) - -# Load pre-trained ResNet50 model -model = torchvision.models.resnet50(weights="IMAGENET1K_V2") - -# Save the model -model.to("cpu") -torch.save(model, os.path.join(models_dir, "resnet50.pt")) - -# Export model to ONNX -dummy_inputs = torch.randn(1, 3, 224, 224) -input_names = ['input'] -output_names = ['output'] -dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} -tmp_model_path = os.path.join(models_dir, "resnet50.onnx") - -torch.onnx.export( - model, - dummy_inputs, - tmp_model_path, - export_params=True, - opset_version=17, # Recommended opset - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, -) - -print(f" Model exported to ONNX at: {tmp_model_path}") - -# ---------------- Quark Quantization ---------------- # - -# Define dataset directory -calib_dir = "calib_data" -# fomat val_images and store it in calib_data for calibeeration. -os.makedirs(calib_dir, exist_ok=True) -calib_data_formatting() - -# Set input & output ONNX model paths -input_model_path = tmp_model_path -output_model_path = os.path.join(models_dir, "resnet50_quantized.onnx") - -# Preprocessing transformations -preprocess = torchvision.transforms.Compose([ - torchvision.transforms.Resize(256), - torchvision.transforms.CenterCrop(224), - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), -]) - -# Load dataset -calib_dataset = torchvision.datasets.ImageFolder(root=calib_dir, transform=preprocess) - -#Data set -num_calib_data = 600 -calib_dataset = torch.utils.data.Subset(calib_dataset, range(num_calib_data)) - -# Define DataLoader for Calibration -calibration_dataloader = torch.utils.data.DataLoader(calib_dataset, batch_size=10, shuffle=False) - -# Configure Quark Quantization -quant_config = get_default_config("XINT8") # Use XINT8 quantization -config = Config(global_quant_config=quant_config) - -# Create an ONNX Quantizer -quantizer = ModelQuantizer(config) - -# Perform Quark Quantization -quant_model = quantizer.quantize_model( - model_input=input_model_path, - model_output=output_model_path, - calibration_data_reader=ImageDataReader(calibration_dataloader) # Use ImageDataReader from utils_custom -) - -print(f" Quark Quantized model saved at: {output_model_path}") - - -# ---------------- Inference & Evaluation ---------------- # - -from PIL import Image - -def load_labels(path): - with open(path) as f: - data = json.load(f) - return np.asarray(data) - -def preprocess_image(input): - normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - - transform = torchvision.transforms.Compose([ - torchvision.transforms.ToTensor(), - torchvision.transforms.Resize((224, 224)), - normalize, - ]) - img_tensor = transform(input).unsqueeze(0) - return img_tensor.numpy() - -def softmax(x): - x = x.reshape(-1) - e_x = np.exp(x - np.max(x)) - return e_x / e_x.sum(axis=0) - -def postprocess(result): - return softmax(np.array(result)).tolist() - -labels = load_labels('data/imagenet-simple-labels.json') -image = Image.open('data/dog.jpg') - -print("Image size: ", image.size) -input_data = preprocess_image(image) - -# Run inference on CPU -onnx_model_path = output_model_path -cpu_options = onnxruntime.SessionOptions() - -cpu_session = onnxruntime.InferenceSession( - onnx_model_path, - providers=['CPUExecutionProvider'], - sess_options=cpu_options, -) - -start = timer() -cpu_outputs = cpu_session.run(None, {'input': input_data}) -end = timer() - -cpu_results = postprocess(cpu_outputs) -inference_time = np.round((end - start) * 1000, 2) -idx = np.argmax(cpu_results) - -print('----------------------------------------') -print(f'Final top prediction is: {labels[idx]}') -print('----------------------------------------') -print(f'Inference time: {inference_time} ms') -print('----------------------------------------') - -sort_idx = np.flip(np.squeeze(np.argsort(cpu_results))) -print('------------ Top 5 labels are: ----------------------------') -print(labels[sort_idx[:5]]) -print('-----------------------------------------------------------') - -#iGPU inference -dml_options = onnxruntime.SessionOptions() - -# Create Inference Session to run the quantized model on the iGPU -dml_session = onnxruntime.InferenceSession( - onnx_model_path, - providers = ['DmlExecutionProvider'], - provider_options = [{"device_id": "0"}] -) -start = time.time() -dml_outputs = dml_session.run(None, {'input': input_data}) -end = time.time() - -dml_results = postprocess(dml_outputs) -inference_time = np.round((end - start) * 1000, 2) -idx = np.argmax(dml_results) - -print('----------------------------------------') -print('Final top prediction is: ' + labels[idx]) -print('----------------------------------------') - -print('----------------------------------------') -print('Inference time: ' + str(inference_time) + " ms") -print('----------------------------------------') - -sort_idx = np.flip(np.squeeze(np.argsort(dml_results))) -print('------------ Top 5 labels are: ----------------------------') -print(labels[sort_idx[:5]]) -print('-----------------------------------------------------------') - -#NPU inference - -# Before running, we need to set the ENV variable for the specific NPU we have -# Run pnputil as a subprocess to enumerate PCI devices -command = r'pnputil /enum-devices /bus PCI /deviceids ' -process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) -stdout, stderr = process.communicate() -# Check for supported Hardware IDs -npu_type = '' -if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): npu_type = 'PHX/HPT' -if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): npu_type = 'STX' -if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): npu_type = 'STX' -if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): npu_type = 'STX' - -print(f"NPU Type: {npu_type}") - -install_dir = os.environ['RYZEN_AI_INSTALLATION_PATH'] -match npu_type: - case 'PHX/HPT': - print("Setting provider options for PHX/HPT") - xclbin_file = os.path.join(install_dir, 'voe-4.0-win_amd64', 'xclbins', 'phoenix', '4x4.xclbin') - provider_options = [{ - 'target': 'X1', - 'xclbin': xclbin_file, - 'ai_analyzer_visualization': True, - 'ai_analyzer_profiling': True, - }] - case 'STX': - print("Setting provider options for STX") - xclbin_file = os.path.join(install_dir, 'voe-4.0-win_amd64', 'xclbins', 'strix', 'AMD_AIE2P_4x4_Overlay.xclbin') - provider_options = [{ - 'ai_analyzer_visualization': True, - 'ai_analyzer_profiling': True, - }] - case _: - print("Unrecognized APU type. Exiting.") - exit() - -# Create session options -session_options = ort.SessionOptions() -session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - -npu_session = onnxruntime.InferenceSession( - onnx_model_path, - sess_options = session_options, - providers = ['VitisAIExecutionProvider'], - provider_options = provider_options -) - -start = time.time() -npu_outputs = npu_session.run(None, {'input': input_data}) -end = time.time() - -npu_results = postprocess(npu_outputs) -inference_time = np.round((end - start) * 1000, 2) -idx = np.argmax(npu_results) - -print('----------------------------------------') -print('Final top prediction is: ' + labels[idx]) -print('----------------------------------------') - -print('----------------------------------------') -print('Inference time: ' + str(inference_time) + " ms") -print('----------------------------------------') - -sort_idx = np.flip(np.squeeze(np.argsort(npu_results))) -print('------------ Top 5 labels are: ----------------------------') -print(labels[sort_idx[:5]]) -print('-----------------------------------------------------------') diff --git a/CNN-examples/torchvision_inference/classification_utils.py b/CNN-examples/torchvision_inference/classification_utils.py deleted file mode 100644 index 23183bad..00000000 --- a/CNN-examples/torchvision_inference/classification_utils.py +++ /dev/null @@ -1,68 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -from pathlib import Path -import sys -import os -import shutil - - -def get_directories(): - current_dir = Path(__file__).resolve().parent - - # models directory for resnet sample - models_dir = current_dir / "models" - models_dir.mkdir(parents=True, exist_ok=True) - - return current_dir, models_dir - -def calib_data_formatting(): - - source_folder = 'val_images' - calib_data_path = 'calib_data' - - if not os.path.exists(source_folder): - print("The provided data path does not exist.") - sys.exit(1) - - files = os.listdir(source_folder) - - for filename in files: - if not filename.startswith('ILSVRC2012_val_') or not filename.endswith( - '.JPEG'): - continue - - n_identifier = filename.split('_')[-1].split('.')[0] - folder_name = n_identifier - folder_path = os.path.join(source_folder, folder_name) - if not os.path.exists(folder_path): - os.makedirs(folder_path) - file_path = os.path.join(source_folder, filename) - destination = os.path.join(folder_path, filename) - shutil.move(file_path, destination) - - print("File organization complete.") - - if not os.path.exists(calib_data_path): - os.makedirs(calib_data_path) - - destination_folder = calib_data_path - - subfolders = os.listdir(source_folder) - - for subfolder in subfolders: - source_subfolder = os.path.join(source_folder, subfolder) - destination_subfolder = os.path.join(destination_folder, subfolder) - os.makedirs(destination_subfolder, exist_ok=True) - - files = os.listdir(source_subfolder) - - if files: - file_to_copy = files[0] - source_file = os.path.join(source_subfolder, file_to_copy) - destination_file = os.path.join(destination_subfolder, file_to_copy) - - shutil.copy(source_file, destination_file) - - print("Creating calibration dataset complete.") diff --git a/CNN-examples/torchvision_inference/data/dog.jpg b/CNN-examples/torchvision_inference/data/dog.jpg deleted file mode 100644 index 99d42c28..00000000 Binary files a/CNN-examples/torchvision_inference/data/dog.jpg and /dev/null differ diff --git a/CNN-examples/torchvision_inference/data/imagenet-simple-labels.json b/CNN-examples/torchvision_inference/data/imagenet-simple-labels.json deleted file mode 100644 index 7e7739ce..00000000 --- a/CNN-examples/torchvision_inference/data/imagenet-simple-labels.json +++ /dev/null @@ -1,1000 +0,0 @@ -["tench", -"goldfish", -"great white shark", -"tiger shark", -"hammerhead shark", -"electric ray", -"stingray", -"cock", -"hen", -"ostrich", -"brambling", -"goldfinch", -"house finch", -"junco", -"indigo bunting", -"American robin", -"bulbul", -"jay", -"magpie", -"chickadee", -"American dipper", -"kite", -"bald eagle", -"vulture", -"great grey owl", -"fire salamander", -"smooth newt", -"newt", -"spotted salamander", -"axolotl", -"American bullfrog", -"tree frog", -"tailed frog", -"loggerhead sea turtle", -"leatherback sea turtle", -"mud turtle", -"terrapin", -"box turtle", -"banded gecko", -"green iguana", -"Carolina anole", -"desert grassland whiptail lizard", -"agama", -"frilled-necked lizard", -"alligator lizard", -"Gila monster", -"European green lizard", -"chameleon", -"Komodo dragon", -"Nile crocodile", -"American alligator", -"triceratops", -"worm snake", -"ring-necked snake", -"eastern hog-nosed snake", -"smooth green snake", -"kingsnake", -"garter snake", -"water snake", -"vine snake", -"night snake", -"boa constrictor", -"African rock python", -"Indian cobra", -"green mamba", -"sea snake", -"Saharan horned viper", -"eastern diamondback rattlesnake", -"sidewinder", -"trilobite", -"harvestman", -"scorpion", -"yellow garden spider", -"barn spider", -"European garden spider", -"southern black widow", -"tarantula", -"wolf spider", -"tick", -"centipede", -"black grouse", -"ptarmigan", -"ruffed grouse", -"prairie grouse", -"peacock", -"quail", -"partridge", -"grey parrot", -"macaw", -"sulphur-crested cockatoo", -"lorikeet", -"coucal", -"bee eater", -"hornbill", -"hummingbird", -"jacamar", -"toucan", -"duck", -"red-breasted merganser", -"goose", -"black swan", -"tusker", -"echidna", -"platypus", -"wallaby", -"koala", -"wombat", -"jellyfish", -"sea anemone", -"brain coral", -"flatworm", -"nematode", -"conch", -"snail", -"slug", -"sea slug", -"chiton", -"chambered nautilus", -"Dungeness crab", -"rock crab", -"fiddler crab", -"red king crab", -"American lobster", -"spiny lobster", -"crayfish", -"hermit crab", -"isopod", -"white stork", -"black stork", -"spoonbill", -"flamingo", -"little blue heron", -"great egret", -"bittern", -"crane (bird)", -"limpkin", -"common gallinule", -"American coot", -"bustard", -"ruddy turnstone", -"dunlin", -"common redshank", -"dowitcher", -"oystercatcher", -"pelican", -"king penguin", -"albatross", -"grey whale", -"killer whale", -"dugong", -"sea lion", -"Chihuahua", -"Japanese Chin", -"Maltese", -"Pekingese", -"Shih Tzu", -"King Charles Spaniel", -"Papillon", -"toy terrier", -"Rhodesian Ridgeback", -"Afghan Hound", -"Basset Hound", -"Beagle", -"Bloodhound", -"Bluetick Coonhound", -"Black and Tan Coonhound", -"Treeing Walker Coonhound", -"English foxhound", -"Redbone Coonhound", -"borzoi", -"Irish Wolfhound", -"Italian Greyhound", -"Whippet", -"Ibizan Hound", -"Norwegian Elkhound", -"Otterhound", -"Saluki", -"Scottish Deerhound", -"Weimaraner", -"Staffordshire Bull Terrier", -"American Staffordshire Terrier", -"Bedlington Terrier", -"Border Terrier", -"Kerry Blue Terrier", -"Irish Terrier", -"Norfolk Terrier", -"Norwich Terrier", -"Yorkshire Terrier", -"Wire Fox Terrier", -"Lakeland Terrier", -"Sealyham Terrier", -"Airedale Terrier", -"Cairn Terrier", -"Australian Terrier", -"Dandie Dinmont Terrier", -"Boston Terrier", -"Miniature Schnauzer", -"Giant Schnauzer", -"Standard Schnauzer", -"Scottish Terrier", -"Tibetan Terrier", -"Australian Silky Terrier", -"Soft-coated Wheaten Terrier", -"West Highland White Terrier", -"Lhasa Apso", -"Flat-Coated Retriever", -"Curly-coated Retriever", -"Golden Retriever", -"Labrador Retriever", -"Chesapeake Bay Retriever", -"German Shorthaired Pointer", -"Vizsla", -"English Setter", -"Irish Setter", -"Gordon Setter", -"Brittany Spaniel", -"Clumber Spaniel", -"English Springer Spaniel", -"Welsh Springer Spaniel", -"Cocker Spaniels", -"Sussex Spaniel", -"Irish Water Spaniel", -"Kuvasz", -"Schipperke", -"Groenendael", -"Malinois", -"Briard", -"Australian Kelpie", -"Komondor", -"Old English Sheepdog", -"Shetland Sheepdog", -"collie", -"Border Collie", -"Bouvier des Flandres", -"Rottweiler", -"German Shepherd Dog", -"Dobermann", -"Miniature Pinscher", -"Greater Swiss Mountain Dog", -"Bernese Mountain Dog", -"Appenzeller Sennenhund", -"Entlebucher Sennenhund", -"Boxer", -"Bullmastiff", -"Tibetan Mastiff", -"French Bulldog", -"Great Dane", -"St. Bernard", -"husky", -"Alaskan Malamute", -"Siberian Husky", -"Dalmatian", -"Affenpinscher", -"Basenji", -"pug", -"Leonberger", -"Newfoundland", -"Pyrenean Mountain Dog", -"Samoyed", -"Pomeranian", -"Chow Chow", -"Keeshond", -"Griffon Bruxellois", -"Pembroke Welsh Corgi", -"Cardigan Welsh Corgi", -"Toy Poodle", -"Miniature Poodle", -"Standard Poodle", -"Mexican hairless dog", -"grey wolf", -"Alaskan tundra wolf", -"red wolf", -"coyote", -"dingo", -"dhole", -"African wild dog", -"hyena", -"red fox", -"kit fox", -"Arctic fox", -"grey fox", -"tabby cat", -"tiger cat", -"Persian cat", -"Siamese cat", -"Egyptian Mau", -"cougar", -"lynx", -"leopard", -"snow leopard", -"jaguar", -"lion", -"tiger", -"cheetah", -"brown bear", -"American black bear", -"polar bear", -"sloth bear", -"mongoose", -"meerkat", -"tiger beetle", -"ladybug", -"ground beetle", -"longhorn beetle", -"leaf beetle", -"dung beetle", -"rhinoceros beetle", -"weevil", -"fly", -"bee", -"ant", -"grasshopper", -"cricket", -"stick insect", -"cockroach", -"mantis", -"cicada", -"leafhopper", -"lacewing", -"dragonfly", -"damselfly", -"red admiral", -"ringlet", -"monarch butterfly", -"small white", -"sulphur butterfly", -"gossamer-winged butterfly", -"starfish", -"sea urchin", -"sea cucumber", -"cottontail rabbit", -"hare", -"Angora rabbit", -"hamster", -"porcupine", -"fox squirrel", -"marmot", -"beaver", -"guinea pig", -"common sorrel", -"zebra", -"pig", -"wild boar", -"warthog", -"hippopotamus", -"ox", -"water buffalo", -"bison", -"ram", -"bighorn sheep", -"Alpine ibex", -"hartebeest", -"impala", -"gazelle", -"dromedary", -"llama", -"weasel", -"mink", -"European polecat", -"black-footed ferret", -"otter", -"skunk", -"badger", -"armadillo", -"three-toed sloth", -"orangutan", -"gorilla", -"chimpanzee", -"gibbon", -"siamang", -"guenon", -"patas monkey", -"baboon", -"macaque", -"langur", -"black-and-white colobus", -"proboscis monkey", -"marmoset", -"white-headed capuchin", -"howler monkey", -"titi", -"Geoffroy's spider monkey", -"common squirrel monkey", -"ring-tailed lemur", -"indri", -"Asian elephant", -"African bush elephant", -"red panda", -"giant panda", -"snoek", -"eel", -"coho salmon", -"rock beauty", -"clownfish", -"sturgeon", -"garfish", -"lionfish", -"pufferfish", -"abacus", -"abaya", -"academic gown", -"accordion", -"acoustic guitar", -"aircraft carrier", -"airliner", -"airship", -"altar", -"ambulance", -"amphibious vehicle", -"analog clock", -"apiary", -"apron", -"waste container", -"assault rifle", -"backpack", -"bakery", -"balance beam", -"balloon", -"ballpoint pen", -"Band-Aid", -"banjo", -"baluster", -"barbell", -"barber chair", -"barbershop", -"barn", -"barometer", -"barrel", -"wheelbarrow", -"baseball", -"basketball", -"bassinet", -"bassoon", -"swimming cap", -"bath towel", -"bathtub", -"station wagon", -"lighthouse", -"beaker", -"military cap", -"beer bottle", -"beer glass", -"bell-cot", -"bib", -"tandem bicycle", -"bikini", -"ring binder", -"binoculars", -"birdhouse", -"boathouse", -"bobsleigh", -"bolo tie", -"poke bonnet", -"bookcase", -"bookstore", -"bottle cap", -"bow", -"bow tie", -"brass", -"bra", -"breakwater", -"breastplate", -"broom", -"bucket", -"buckle", -"bulletproof vest", -"high-speed train", -"butcher shop", -"taxicab", -"cauldron", -"candle", -"cannon", -"canoe", -"can opener", -"cardigan", -"car mirror", -"carousel", -"tool kit", -"carton", -"car wheel", -"automated teller machine", -"cassette", -"cassette player", -"castle", -"catamaran", -"CD player", -"cello", -"mobile phone", -"chain", -"chain-link fence", -"chain mail", -"chainsaw", -"chest", -"chiffonier", -"chime", -"china cabinet", -"Christmas stocking", -"church", -"movie theater", -"cleaver", -"cliff dwelling", -"cloak", -"clogs", -"cocktail shaker", -"coffee mug", -"coffeemaker", -"coil", -"combination lock", -"computer keyboard", -"confectionery store", -"container ship", -"convertible", -"corkscrew", -"cornet", -"cowboy boot", -"cowboy hat", -"cradle", -"crane (machine)", -"crash helmet", -"crate", -"infant bed", -"Crock Pot", -"croquet ball", -"crutch", -"cuirass", -"dam", -"desk", -"desktop computer", -"rotary dial telephone", -"diaper", -"digital clock", -"digital watch", -"dining table", -"dishcloth", -"dishwasher", -"disc brake", -"dock", -"dog sled", -"dome", -"doormat", -"drilling rig", -"drum", -"drumstick", -"dumbbell", -"Dutch oven", -"electric fan", -"electric guitar", -"electric locomotive", -"entertainment center", -"envelope", -"espresso machine", -"face powder", -"feather boa", -"filing cabinet", -"fireboat", -"fire engine", -"fire screen sheet", -"flagpole", -"flute", -"folding chair", -"football helmet", -"forklift", -"fountain", -"fountain pen", -"four-poster bed", -"freight car", -"French horn", -"frying pan", -"fur coat", -"garbage truck", -"gas mask", -"gas pump", -"goblet", -"go-kart", -"golf ball", -"golf cart", -"gondola", -"gong", -"gown", -"grand piano", -"greenhouse", -"grille", -"grocery store", -"guillotine", -"barrette", -"hair spray", -"half-track", -"hammer", -"hamper", -"hair dryer", -"hand-held computer", -"handkerchief", -"hard disk drive", -"harmonica", -"harp", -"harvester", -"hatchet", -"holster", -"home theater", -"honeycomb", -"hook", -"hoop skirt", -"horizontal bar", -"horse-drawn vehicle", -"hourglass", -"iPod", -"clothes iron", -"jack-o'-lantern", -"jeans", -"jeep", -"T-shirt", -"jigsaw puzzle", -"pulled rickshaw", -"joystick", -"kimono", -"knee pad", -"knot", -"lab coat", -"ladle", -"lampshade", -"laptop computer", -"lawn mower", -"lens cap", -"paper knife", -"library", -"lifeboat", -"lighter", -"limousine", -"ocean liner", -"lipstick", -"slip-on shoe", -"lotion", -"speaker", -"loupe", -"sawmill", -"magnetic compass", -"mail bag", -"mailbox", -"tights", -"tank suit", -"manhole cover", -"maraca", -"marimba", -"mask", -"match", -"maypole", -"maze", -"measuring cup", -"medicine chest", -"megalith", -"microphone", -"microwave oven", -"military uniform", -"milk can", -"minibus", -"miniskirt", -"minivan", -"missile", -"mitten", -"mixing bowl", -"mobile home", -"Model T", -"modem", -"monastery", -"monitor", -"moped", -"mortar", -"square academic cap", -"mosque", -"mosquito net", -"scooter", -"mountain bike", -"tent", -"computer mouse", -"mousetrap", -"moving van", -"muzzle", -"nail", -"neck brace", -"necklace", -"nipple", -"notebook computer", -"obelisk", -"oboe", -"ocarina", -"odometer", -"oil filter", -"organ", -"oscilloscope", -"overskirt", -"bullock cart", -"oxygen mask", -"packet", -"paddle", -"paddle wheel", -"padlock", -"paintbrush", -"pajamas", -"palace", -"pan flute", -"paper towel", -"parachute", -"parallel bars", -"park bench", -"parking meter", -"passenger car", -"patio", -"payphone", -"pedestal", -"pencil case", -"pencil sharpener", -"perfume", -"Petri dish", -"photocopier", -"plectrum", -"Pickelhaube", -"picket fence", -"pickup truck", -"pier", -"piggy bank", -"pill bottle", -"pillow", -"ping-pong ball", -"pinwheel", -"pirate ship", -"pitcher", -"hand plane", -"planetarium", -"plastic bag", -"plate rack", -"plow", -"plunger", -"Polaroid camera", -"pole", -"police van", -"poncho", -"billiard table", -"soda bottle", -"pot", -"potter's wheel", -"power drill", -"prayer rug", -"printer", -"prison", -"projectile", -"projector", -"hockey puck", -"punching bag", -"purse", -"quill", -"quilt", -"race car", -"racket", -"radiator", -"radio", -"radio telescope", -"rain barrel", -"recreational vehicle", -"reel", -"reflex camera", -"refrigerator", -"remote control", -"restaurant", -"revolver", -"rifle", -"rocking chair", -"rotisserie", -"eraser", -"rugby ball", -"ruler", -"running shoe", -"safe", -"safety pin", -"salt shaker", -"sandal", -"sarong", -"saxophone", -"scabbard", -"weighing scale", -"school bus", -"schooner", -"scoreboard", -"CRT screen", -"screw", -"screwdriver", -"seat belt", -"sewing machine", -"shield", -"shoe store", -"shoji", -"shopping basket", -"shopping cart", -"shovel", -"shower cap", -"shower curtain", -"ski", -"ski mask", -"sleeping bag", -"slide rule", -"sliding door", -"slot machine", -"snorkel", -"snowmobile", -"snowplow", -"soap dispenser", -"soccer ball", -"sock", -"solar thermal collector", -"sombrero", -"soup bowl", -"space bar", -"space heater", -"space shuttle", -"spatula", -"motorboat", -"spider web", -"spindle", -"sports car", -"spotlight", -"stage", -"steam locomotive", -"through arch bridge", -"steel drum", -"stethoscope", -"scarf", -"stone wall", -"stopwatch", -"stove", -"strainer", -"tram", -"stretcher", -"couch", -"stupa", -"submarine", -"suit", -"sundial", -"sunglass", -"sunglasses", -"sunscreen", -"suspension bridge", -"mop", -"sweatshirt", -"swimsuit", -"swing", -"switch", -"syringe", -"table lamp", -"tank", -"tape player", -"teapot", -"teddy bear", -"television", -"tennis ball", -"thatched roof", -"front curtain", -"thimble", -"threshing machine", -"throne", -"tile roof", -"toaster", -"tobacco shop", -"toilet seat", -"torch", -"totem pole", -"tow truck", -"toy store", -"tractor", -"semi-trailer truck", -"tray", -"trench coat", -"tricycle", -"trimaran", -"tripod", -"triumphal arch", -"trolleybus", -"trombone", -"tub", -"turnstile", -"typewriter keyboard", -"umbrella", -"unicycle", -"upright piano", -"vacuum cleaner", -"vase", -"vault", -"velvet", -"vending machine", -"vestment", -"viaduct", -"violin", -"volleyball", -"waffle iron", -"wall clock", -"wallet", -"wardrobe", -"military aircraft", -"sink", -"washing machine", -"water bottle", -"water jug", -"water tower", -"whiskey jug", -"whistle", -"wig", -"window screen", -"window shade", -"Windsor tie", -"wine bottle", -"wing", -"wok", -"wooden spoon", -"wool", -"split-rail fence", -"shipwreck", -"yawl", -"yurt", -"website", -"comic book", -"crossword", -"traffic sign", -"traffic light", -"dust jacket", -"menu", -"plate", -"guacamole", -"consomme", -"hot pot", -"trifle", -"ice cream", -"ice pop", -"baguette", -"bagel", -"pretzel", -"cheeseburger", -"hot dog", -"mashed potato", -"cabbage", -"broccoli", -"cauliflower", -"zucchini", -"spaghetti squash", -"acorn squash", -"butternut squash", -"cucumber", -"artichoke", -"bell pepper", -"cardoon", -"mushroom", -"Granny Smith", -"strawberry", -"orange", -"lemon", -"fig", -"pineapple", -"banana", -"jackfruit", -"custard apple", -"pomegranate", -"hay", -"carbonara", -"chocolate syrup", -"dough", -"meatloaf", -"pizza", -"pot pie", -"burrito", -"red wine", -"espresso", -"cup", -"eggnog", -"alp", -"bubble", -"cliff", -"coral reef", -"geyser", -"lakeshore", -"promontory", -"shoal", -"seashore", -"valley", -"volcano", -"baseball player", -"bridegroom", -"scuba diver", -"rapeseed", -"daisy", -"yellow lady's slipper", -"corn", -"acorn", -"rose hip", -"horse chestnut seed", -"coral fungus", -"agaric", -"gyromitra", -"stinkhorn mushroom", -"earth star", -"hen-of-the-woods", -"bolete", -"ear of corn", -"toilet paper"] diff --git a/CNN-examples/torchvision_inference/requirements.txt b/CNN-examples/torchvision_inference/requirements.txt deleted file mode 100644 index a7f99950..00000000 --- a/CNN-examples/torchvision_inference/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -torch==2.8.0 -torchvision==0.23.0 -tqdm==4.67.1 -ipykernel==7.1.0 -matplotlib==3.10.8 \ No newline at end of file diff --git a/CNN-examples/torchvision_inference/utils_custom.py b/CNN-examples/torchvision_inference/utils_custom.py deleted file mode 100644 index 98ac1837..00000000 --- a/CNN-examples/torchvision_inference/utils_custom.py +++ /dev/null @@ -1,34 +0,0 @@ -import torch -import numpy as np -import onnxruntime - -class ImageDataReader: - """Data reader class for ONNX calibration.""" - def __init__(self, dataloader): - self.dataloader = iter(dataloader) - - def get_next(self): - try: - images, _ = next(self.dataloader) # Ignore labels - return {"input": images.numpy()} - except StopIteration: - return None - -def evaluate_onnx_model(onnx_model_path, dataset, batch_size=8): - """Evaluates ONNX model accuracy using given dataset.""" - session = onnxruntime.InferenceSession(onnx_model_path, providers=['CPUExecutionProvider']) - - correct = 0 - total = 0 - dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False) - - for images, labels in dataloader: - inputs = {"input": images.numpy()} - outputs = session.run(None, inputs) - predictions = np.argmax(outputs, axis=1) - - correct += (predictions.argmax(axis=1) == labels.numpy()).sum() - total += labels.size(0) - - accuracy = 100 * correct / total - return accuracy diff --git a/Demos/.gitkeep b/Demos/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/Demos/ASR/Whisper/README.md b/Demos/ASR/Whisper/README.md deleted file mode 100644 index 3f4bee5d..00000000 --- a/Demos/ASR/Whisper/README.md +++ /dev/null @@ -1,158 +0,0 @@ - - - - -

Ryzen™ AI Automatic Speech Recognition

-
- -# Automatic Speech Recognition using OpenAI Whisper - -Unlock fast, on-device speech recognition with RyzenAI and OpenAI’s Whisper. This demo walks you through preparing and running OpenAI's Whisper (base, small, medium) for fast, local ASR on AMD NPU. - -## Features - -* 🚀 Download NPU Optimized Whisper ONNX models from HF -* ⚡ Run ASR locally on CPU or NPU -* 📊 Evaluate ASR on LibriSpeech samples and report WER/CER -* 🎧 Supports transcription of audio files and microphone input -* ⏱️ Reports Performance using RTF and TTFT - -## 🔗 Quick Links -- [Prerequisites](#prerequisites) -- [Accelerate Whisper on AMD NPU](#accelerate-whisper-on-amd-npu) - - [Why run on NPU?](#why-run-on-npu) - - [Set up VitisEP Configuration for NPU](#set-up-vitisep-configuration-for-npu) -- [ Usage](#usage) - - [Transcribe Audio File](#transcribe-audio-file) - - [Transcribe from Microphone](#transcribe-from-microphone) - - [Evaluate on Dataset](#evaluate-on-dataset) -- [ Notes](#notes) - -## 📦 Prerequisites - -1. **Install Ryzen AI SDK** - Follow [RyzenAI documentation](https://ryzenai.docs.amd.com/en/latest/inst.html#) to install SDK and drivers. - -2. **Activate environment** - - ```bash - conda activate ryzen-ai- - ``` - -3. **Clone repository** - - ```bash - git clone https://github.com/amd/RyzenAI-SW.git - cd RyzenAI-SW/demo/ASR/Whisper - ``` - -4. **Install dependencies** - - ```bash - pip install -r requirements.txt - ``` - -## ⚡Accelerate Whisper on AMD NPU - -### Why run on NPU? - -* Offloads compute from CPU onto NPU, freeing up CPU for other tasks. -* Delivers higher throughput and lower power consumption when running AI workloads -* Optimized execution of Whisper’s encoder and decoder models. -* Runs models with BFP16 precision for near-FP32 accuracy and INT8-like performance. - -#### NPU Run for Whisper-Base -When running inference on the NPU, 100% of the encoder operators and 93.4% of the decoder operators are executed on the NPU. -```bash - #encoder operations - [Vitis AI EP] No. of Operators : VAIML 225 - [Vitis AI EP] No. of Subgraphs : VAIML 1 - - #decoder operations - [Vitis AI EP] No. of Operators : CPU 24 VAIML 341 - [Vitis AI EP] No. of Subgraphs : VAIML 2 -``` -#### Set up VitisEP Configuration for NPU - -* Edit `config/model_config.json` to specify Execution Providers. -* For NPU: - - * Set `cache_key` and `cache_dir` - * Use corresponding `vitisai_config` from `config/` - -Example: - -```json -{ - "config_file": "config/vitisai_config_whisper_decoder.json", - "cache_dir": "./cache", - "cache_key": "whisper_medium_decoder" -} -``` -#### ⚠️ Special Instructions for Whisper-Medium -When running whisper-medium on NPU, it is recommended to add the following flags to `configs\vitisai_config_whisper_encoder.json` incase of compilation issues. -```json -"vaiml_config": { - "optimize_level": 3, - "aiecompiler_args": "--system-stack-size=512" -} -``` -These settings: - -- optimize_level=3: Enables aggressive optimizations for larger models. -- --system-stack-size=512: Increases the AI Engine system stack size to handle Whisper-Medium’s higher resource demand. - -## 🚀 Usage - -### Transcribe Audio File -Use this to transcribe a pre-recorded `.wav` file into text using the Whisper mode -```bash -python run_whisper.py \ - --model-type \ - --device npu \ - --input path/to/audio.wav -``` -- Replace with whisper-base, whisper-small, or whisper-medium. - -- Replace path/to/audio.wav with your audio file. - -For example, run whisper-large-v3-turbo -```bash -python run_whisper.py --model-type whisper-large-v3-turbo --device npu --input audio_files\1089-134686-0000.wav -``` - -### Transcribe from Microphone -Run real-time speech-to-text by capturing audio from your microphone. This allows you to speak and see live transcription: - -```bash -python run_whisper.py \ - --model-type \ - --device npu \ - --input mic \ - --duration 0 -``` -- --duration 0 means continuous recording until stopped (Ctrl+C) or detects silence for a set duration - -- Ideal for demos and testing live ASR performance. - -### Evaluate on Dataset -Run batch evaluation on a dataset (e.g., LibriSpeech samples) to measure model performance with metrics like WER, CER, and RTF: -```bash -python run_whisper.py \ - --model-type \ - --device npu \ - --eval-dir eval_dataset/LibriSpeech-samples \ - --results-dir results -``` -- --eval-dir specifies the dataset directory. - -- --results-dir is where evaluation reports (WER, CER, TTFT, RTF) will be saved. - -- Useful for benchmarking and validating models. - -## Notes - -* First run on NPU may take \~15 min for model compilation. -* Ensure paths for encoder, decoder, and config files are correct. -* Supports CPU and NPU devices. - diff --git a/Demos/ASR/Whisper/audio_files/1089-134686-0000.wav b/Demos/ASR/Whisper/audio_files/1089-134686-0000.wav deleted file mode 100644 index 2cdc6df3..00000000 Binary files a/Demos/ASR/Whisper/audio_files/1089-134686-0000.wav and /dev/null differ diff --git a/Demos/ASR/Whisper/audio_files/61-52s.wav b/Demos/ASR/Whisper/audio_files/61-52s.wav deleted file mode 100644 index b94fb03b..00000000 Binary files a/Demos/ASR/Whisper/audio_files/61-52s.wav and /dev/null differ diff --git a/Demos/ASR/Whisper/config/model_config.json b/Demos/ASR/Whisper/config/model_config.json deleted file mode 100644 index ffd53868..00000000 --- a/Demos/ASR/Whisper/config/model_config.json +++ /dev/null @@ -1,76 +0,0 @@ -{ - "whisper": { - "base": { - "npu": { - "encoder": { - "cache_key": "whisper_base_encoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_encoder.json" - }, - "decoder": { - "cache_key": "whisper_base_decoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_decoder.json" - } - }, - "cpu": { - "encoder": {}, - "decoder": {} - } - }, - "small": { - "npu": { - "encoder": { - "cache_key": "whisper_small_encoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_encoder.json" - }, - "decoder": { - "cache_key": "whisper_small_decoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_decoder.json" - } - }, - "cpu": { - "encoder": {}, - "decoder": {} - } - }, - "medium": { - "npu": { - "encoder": { - "cache_key": "whisper_medium_encoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_encoder.json" - }, - "decoder": { - "cache_key": "whisper_medium_decoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_decoder.json" - } - }, - "cpu": { - "encoder": {}, - "decoder": {} - } - }, - "large-v3-turbo": { - "npu": { - "encoder": { - "cache_key": "whisper_large_turbo_encoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_encoder.json" - }, - "decoder": { - "cache_key": "whisper_large_turbo_decoder", - "cache_dir": "./cache/", - "config_file": "./config/vitisai_config_whisper_decoder.json" - } - }, - "cpu": { - "encoder": {}, - "decoder": {} - } - } - } -} \ No newline at end of file diff --git a/Demos/ASR/Whisper/config/vitisai_config_whisper_decoder.json b/Demos/ASR/Whisper/config/vitisai_config_whisper_decoder.json deleted file mode 100644 index 83216e30..00000000 --- a/Demos/ASR/Whisper/config/vitisai_config_whisper_decoder.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "optimize_level": 3, - "aiecompiler_args": "--system-stack-size=512" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} \ No newline at end of file diff --git a/Demos/ASR/Whisper/config/vitisai_config_whisper_encoder.json b/Demos/ASR/Whisper/config/vitisai_config_whisper_encoder.json deleted file mode 100644 index c13fad79..00000000 --- a/Demos/ASR/Whisper/config/vitisai_config_whisper_encoder.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "optimize_level": 3, - "fe_experiment": "use-accurate-mode=LayerNorm2PassAdf", - "aiecompiler_args": "--system-stack-size=512" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} \ No newline at end of file diff --git a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/all_transcripts.txt b/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/all_transcripts.txt deleted file mode 100644 index cc654b69..00000000 --- a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/all_transcripts.txt +++ /dev/null @@ -1,2619 +0,0 @@ -1089-134686-0000 HE HOPED THERE WOULD BE STEW FOR DINNER TURNIPS AND CARROTS AND BRUISED POTATOES AND FAT MUTTON PIECES TO BE LADLED OUT IN THICK PEPPERED FLOUR FATTENED SAUCE -1089-134686-0001 STUFF IT INTO YOU HIS BELLY COUNSELLED HIM -1089-134686-0002 AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS -1089-134686-0003 HELLO BERTIE ANY GOOD IN YOUR MIND -1089-134686-0004 NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND -1089-134686-0005 THE MUSIC CAME NEARER AND HE RECALLED THE WORDS THE WORDS OF SHELLEY'S FRAGMENT UPON THE MOON WANDERING COMPANIONLESS PALE FOR WEARINESS -1089-134686-0006 THE DULL LIGHT FELL MORE FAINTLY UPON THE PAGE WHEREON ANOTHER EQUATION BEGAN TO UNFOLD ITSELF SLOWLY AND TO SPREAD ABROAD ITS WIDENING TAIL -1089-134686-0007 A COLD LUCID INDIFFERENCE REIGNED IN HIS SOUL -1089-134686-0008 THE CHAOS IN WHICH HIS ARDOUR EXTINGUISHED ITSELF WAS A COLD INDIFFERENT KNOWLEDGE OF HIMSELF -1089-134686-0009 AT MOST BY AN ALMS GIVEN TO A BEGGAR WHOSE BLESSING HE FLED FROM HE MIGHT HOPE WEARILY TO WIN FOR HIMSELF SOME MEASURE OF ACTUAL GRACE -1089-134686-0010 WELL NOW ENNIS I DECLARE YOU HAVE A HEAD AND SO HAS MY STICK -1089-134686-0011 ON SATURDAY MORNINGS WHEN THE SODALITY MET IN THE CHAPEL TO RECITE THE LITTLE OFFICE HIS PLACE WAS A CUSHIONED KNEELING DESK AT THE RIGHT OF THE ALTAR FROM WHICH HE LED HIS WING OF BOYS THROUGH THE RESPONSES -1089-134686-0012 HER EYES SEEMED TO REGARD HIM WITH MILD PITY HER HOLINESS A STRANGE LIGHT GLOWING FAINTLY UPON HER FRAIL FLESH DID NOT HUMILIATE THE SINNER WHO APPROACHED HER -1089-134686-0013 IF EVER HE WAS IMPELLED TO CAST SIN FROM HIM AND TO REPENT THE IMPULSE THAT MOVED HIM WAS THE WISH TO BE HER KNIGHT -1089-134686-0014 HE TRIED TO THINK HOW IT COULD BE -1089-134686-0015 BUT THE DUSK DEEPENING IN THE SCHOOLROOM COVERED OVER HIS THOUGHTS THE BELL RANG -1089-134686-0016 THEN YOU CAN ASK HIM QUESTIONS ON THE CATECHISM DEDALUS -1089-134686-0017 STEPHEN LEANING BACK AND DRAWING IDLY ON HIS SCRIBBLER LISTENED TO THE TALK ABOUT HIM WHICH HERON CHECKED FROM TIME TO TIME BY SAYING -1089-134686-0018 IT WAS STRANGE TOO THAT HE FOUND AN ARID PLEASURE IN FOLLOWING UP TO THE END THE RIGID LINES OF THE DOCTRINES OF THE CHURCH AND PENETRATING INTO OBSCURE SILENCES ONLY TO HEAR AND FEEL THE MORE DEEPLY HIS OWN CONDEMNATION -1089-134686-0019 THE SENTENCE OF SAINT JAMES WHICH SAYS THAT HE WHO OFFENDS AGAINST ONE COMMANDMENT BECOMES GUILTY OF ALL HAD SEEMED TO HIM FIRST A SWOLLEN PHRASE UNTIL HE HAD BEGUN TO GROPE IN THE DARKNESS OF HIS OWN STATE -1089-134686-0020 IF A MAN HAD STOLEN A POUND IN HIS YOUTH AND HAD USED THAT POUND TO AMASS A HUGE FORTUNE HOW MUCH WAS HE OBLIGED TO GIVE BACK THE POUND HE HAD STOLEN ONLY OR THE POUND TOGETHER WITH THE COMPOUND INTEREST ACCRUING UPON IT OR ALL HIS HUGE FORTUNE -1089-134686-0021 IF A LAYMAN IN GIVING BAPTISM POUR THE WATER BEFORE SAYING THE WORDS IS THE CHILD BAPTIZED -1089-134686-0022 HOW COMES IT THAT WHILE THE FIRST BEATITUDE PROMISES THE KINGDOM OF HEAVEN TO THE POOR OF HEART THE SECOND BEATITUDE PROMISES ALSO TO THE MEEK THAT THEY SHALL POSSESS THE LAND -1089-134686-0023 WHY WAS THE SACRAMENT OF THE EUCHARIST INSTITUTED UNDER THE TWO SPECIES OF BREAD AND WINE IF JESUS CHRIST BE PRESENT BODY AND BLOOD SOUL AND DIVINITY IN THE BREAD ALONE AND IN THE WINE ALONE -1089-134686-0024 IF THE WINE CHANGE INTO VINEGAR AND THE HOST CRUMBLE INTO CORRUPTION AFTER THEY HAVE BEEN CONSECRATED IS JESUS CHRIST STILL PRESENT UNDER THEIR SPECIES AS GOD AND AS MAN -1089-134686-0025 A GENTLE KICK FROM THE TALL BOY IN THE BENCH BEHIND URGED STEPHEN TO ASK A DIFFICULT QUESTION -1089-134686-0026 THE RECTOR DID NOT ASK FOR A CATECHISM TO HEAR THE LESSON FROM -1089-134686-0027 HE CLASPED HIS HANDS ON THE DESK AND SAID -1089-134686-0028 THE RETREAT WILL BEGIN ON WEDNESDAY AFTERNOON IN HONOUR OF SAINT FRANCIS XAVIER WHOSE FEAST DAY IS SATURDAY -1089-134686-0029 ON FRIDAY CONFESSION WILL BE HEARD ALL THE AFTERNOON AFTER BEADS -1089-134686-0030 BEWARE OF MAKING THAT MISTAKE -1089-134686-0031 STEPHEN'S HEART BEGAN SLOWLY TO FOLD AND FADE WITH FEAR LIKE A WITHERING FLOWER -1089-134686-0032 HE IS CALLED AS YOU KNOW THE APOSTLE OF THE INDIES -1089-134686-0033 A GREAT SAINT SAINT FRANCIS XAVIER -1089-134686-0034 THE RECTOR PAUSED AND THEN SHAKING HIS CLASPED HANDS BEFORE HIM WENT ON -1089-134686-0035 HE HAD THE FAITH IN HIM THAT MOVES MOUNTAINS -1089-134686-0036 A GREAT SAINT SAINT FRANCIS XAVIER -1089-134686-0037 IN THE SILENCE THEIR DARK FIRE KINDLED THE DUSK INTO A TAWNY GLOW -1089-134691-0000 HE COULD WAIT NO LONGER -1089-134691-0001 FOR A FULL HOUR HE HAD PACED UP AND DOWN WAITING BUT HE COULD WAIT NO LONGER -1089-134691-0002 HE SET OFF ABRUPTLY FOR THE BULL WALKING RAPIDLY LEST HIS FATHER'S SHRILL WHISTLE MIGHT CALL HIM BACK AND IN A FEW MOMENTS HE HAD ROUNDED THE CURVE AT THE POLICE BARRACK AND WAS SAFE -1089-134691-0003 THE UNIVERSITY -1089-134691-0004 PRIDE AFTER SATISFACTION UPLIFTED HIM LIKE LONG SLOW WAVES -1089-134691-0005 WHOSE FEET ARE AS THE FEET OF HARTS AND UNDERNEATH THE EVERLASTING ARMS -1089-134691-0006 THE PRIDE OF THAT DIM IMAGE BROUGHT BACK TO HIS MIND THE DIGNITY OF THE OFFICE HE HAD REFUSED -1089-134691-0007 SOON THE WHOLE BRIDGE WAS TREMBLING AND RESOUNDING -1089-134691-0008 THE UNCOUTH FACES PASSED HIM TWO BY TWO STAINED YELLOW OR RED OR LIVID BY THE SEA AND AS HE STROVE TO LOOK AT THEM WITH EASE AND INDIFFERENCE A FAINT STAIN OF PERSONAL SHAME AND COMMISERATION ROSE TO HIS OWN FACE -1089-134691-0009 ANGRY WITH HIMSELF HE TRIED TO HIDE HIS FACE FROM THEIR EYES BY GAZING DOWN SIDEWAYS INTO THE SHALLOW SWIRLING WATER UNDER THE BRIDGE BUT HE STILL SAW A REFLECTION THEREIN OF THEIR TOP HEAVY SILK HATS AND HUMBLE TAPE LIKE COLLARS AND LOOSELY HANGING CLERICAL CLOTHES BROTHER HICKEY -1089-134691-0010 BROTHER MAC ARDLE BROTHER KEOGH -1089-134691-0011 THEIR PIETY WOULD BE LIKE THEIR NAMES LIKE THEIR FACES LIKE THEIR CLOTHES AND IT WAS IDLE FOR HIM TO TELL HIMSELF THAT THEIR HUMBLE AND CONTRITE HEARTS IT MIGHT BE PAID A FAR RICHER TRIBUTE OF DEVOTION THAN HIS HAD EVER BEEN A GIFT TENFOLD MORE ACCEPTABLE THAN HIS ELABORATE ADORATION -1089-134691-0012 IT WAS IDLE FOR HIM TO MOVE HIMSELF TO BE GENEROUS TOWARDS THEM TO TELL HIMSELF THAT IF HE EVER CAME TO THEIR GATES STRIPPED OF HIS PRIDE BEATEN AND IN BEGGAR'S WEEDS THAT THEY WOULD BE GENEROUS TOWARDS HIM LOVING HIM AS THEMSELVES -1089-134691-0013 IDLE AND EMBITTERING FINALLY TO ARGUE AGAINST HIS OWN DISPASSIONATE CERTITUDE THAT THE COMMANDMENT OF LOVE BADE US NOT TO LOVE OUR NEIGHBOUR AS OURSELVES WITH THE SAME AMOUNT AND INTENSITY OF LOVE BUT TO LOVE HIM AS OURSELVES WITH THE SAME KIND OF LOVE -1089-134691-0014 THE PHRASE AND THE DAY AND THE SCENE HARMONIZED IN A CHORD -1089-134691-0015 WORDS WAS IT THEIR COLOURS -1089-134691-0016 THEY WERE VOYAGING ACROSS THE DESERTS OF THE SKY A HOST OF NOMADS ON THE MARCH VOYAGING HIGH OVER IRELAND WESTWARD BOUND -1089-134691-0017 THE EUROPE THEY HAD COME FROM LAY OUT THERE BEYOND THE IRISH SEA EUROPE OF STRANGE TONGUES AND VALLEYED AND WOODBEGIRT AND CITADELLED AND OF ENTRENCHED AND MARSHALLED RACES -1089-134691-0018 AGAIN AGAIN -1089-134691-0019 A VOICE FROM BEYOND THE WORLD WAS CALLING -1089-134691-0020 HELLO STEPHANOS HERE COMES THE DEDALUS -1089-134691-0021 THEIR DIVING STONE POISED ON ITS RUDE SUPPORTS AND ROCKING UNDER THEIR PLUNGES AND THE ROUGH HEWN STONES OF THE SLOPING BREAKWATER OVER WHICH THEY SCRAMBLED IN THEIR HORSEPLAY GLEAMED WITH COLD WET LUSTRE -1089-134691-0022 HE STOOD STILL IN DEFERENCE TO THEIR CALLS AND PARRIED THEIR BANTER WITH EASY WORDS -1089-134691-0023 IT WAS A PAIN TO SEE THEM AND A SWORD LIKE PAIN TO SEE THE SIGNS OF ADOLESCENCE THAT MADE REPELLENT THEIR PITIABLE NAKEDNESS -1089-134691-0024 STEPHANOS DEDALOS -1089-134691-0025 A MOMENT BEFORE THE GHOST OF THE ANCIENT KINGDOM OF THE DANES HAD LOOKED FORTH THROUGH THE VESTURE OF THE HAZEWRAPPED CITY -1188-133604-0000 YOU WILL FIND ME CONTINUALLY SPEAKING OF FOUR MEN TITIAN HOLBEIN TURNER AND TINTORET IN ALMOST THE SAME TERMS -1188-133604-0001 THEY UNITE EVERY QUALITY AND SOMETIMES YOU WILL FIND ME REFERRING TO THEM AS COLORISTS SOMETIMES AS CHIAROSCURISTS -1188-133604-0002 BY BEING STUDIOUS OF COLOR THEY ARE STUDIOUS OF DIVISION AND WHILE THE CHIAROSCURIST DEVOTES HIMSELF TO THE REPRESENTATION OF DEGREES OF FORCE IN ONE THING UNSEPARATED LIGHT THE COLORISTS HAVE FOR THEIR FUNCTION THE ATTAINMENT OF BEAUTY BY ARRANGEMENT OF THE DIVISIONS OF LIGHT -1188-133604-0003 MY FIRST AND PRINCIPAL REASON WAS THAT THEY ENFORCED BEYOND ALL RESISTANCE ON ANY STUDENT WHO MIGHT ATTEMPT TO COPY THEM THIS METHOD OF LAYING PORTIONS OF DISTINCT HUE SIDE BY SIDE -1188-133604-0004 SOME OF THE TOUCHES INDEED WHEN THE TINT HAS BEEN MIXED WITH MUCH WATER HAVE BEEN LAID IN LITTLE DROPS OR PONDS SO THAT THE PIGMENT MIGHT CRYSTALLIZE HARD AT THE EDGE -1188-133604-0005 IT IS THE HEAD OF A PARROT WITH A LITTLE FLOWER IN HIS BEAK FROM A PICTURE OF CARPACCIO'S ONE OF HIS SERIES OF THE LIFE OF SAINT GEORGE -1188-133604-0006 THEN HE COMES TO THE BEAK OF IT -1188-133604-0007 THE BROWN GROUND BENEATH IS LEFT FOR THE MOST PART ONE TOUCH OF BLACK IS PUT FOR THE HOLLOW TWO DELICATE LINES OF DARK GRAY DEFINE THE OUTER CURVE AND ONE LITTLE QUIVERING TOUCH OF WHITE DRAWS THE INNER EDGE OF THE MANDIBLE -1188-133604-0008 FOR BELIEVE ME THE FINAL PHILOSOPHY OF ART CAN ONLY RATIFY THEIR OPINION THAT THE BEAUTY OF A COCK ROBIN IS TO BE RED AND OF A GRASS PLOT TO BE GREEN AND THE BEST SKILL OF ART IS IN INSTANTLY SEIZING ON THE MANIFOLD DELICIOUSNESS OF LIGHT WHICH YOU CAN ONLY SEIZE BY PRECISION OF INSTANTANEOUS TOUCH -1188-133604-0009 NOW YOU WILL SEE IN THESE STUDIES THAT THE MOMENT THE WHITE IS INCLOSED PROPERLY AND HARMONIZED WITH THE OTHER HUES IT BECOMES SOMEHOW MORE PRECIOUS AND PEARLY THAN THE WHITE PAPER AND THAT I AM NOT AFRAID TO LEAVE A WHOLE FIELD OF UNTREATED WHITE PAPER ALL ROUND IT BEING SURE THAT EVEN THE LITTLE DIAMONDS IN THE ROUND WINDOW WILL TELL AS JEWELS IF THEY ARE GRADATED JUSTLY -1188-133604-0010 BUT IN THIS VIGNETTE COPIED FROM TURNER YOU HAVE THE TWO PRINCIPLES BROUGHT OUT PERFECTLY -1188-133604-0011 THEY ARE BEYOND ALL OTHER WORKS THAT I KNOW EXISTING DEPENDENT FOR THEIR EFFECT ON LOW SUBDUED TONES THEIR FAVORITE CHOICE IN TIME OF DAY BEING EITHER DAWN OR TWILIGHT AND EVEN THEIR BRIGHTEST SUNSETS PRODUCED CHIEFLY OUT OF GRAY PAPER -1188-133604-0012 IT MAY BE THAT A GREAT COLORIST WILL USE HIS UTMOST FORCE OF COLOR AS A SINGER HIS FULL POWER OF VOICE BUT LOUD OR LOW THE VIRTUE IS IN BOTH CASES ALWAYS IN REFINEMENT NEVER IN LOUDNESS -1188-133604-0013 IT MUST REMEMBER BE ONE OR THE OTHER -1188-133604-0014 DO NOT THEREFORE THINK THAT THE GOTHIC SCHOOL IS AN EASY ONE -1188-133604-0015 THE LAW OF THAT SCHOOL IS THAT EVERYTHING SHALL BE SEEN CLEARLY OR AT LEAST ONLY IN SUCH MIST OR FAINTNESS AS SHALL BE DELIGHTFUL AND I HAVE NO DOUBT THAT THE BEST INTRODUCTION TO IT WOULD BE THE ELEMENTARY PRACTICE OF PAINTING EVERY STUDY ON A GOLDEN GROUND -1188-133604-0016 THIS AT ONCE COMPELS YOU TO UNDERSTAND THAT THE WORK IS TO BE IMAGINATIVE AND DECORATIVE THAT IT REPRESENTS BEAUTIFUL THINGS IN THE CLEAREST WAY BUT NOT UNDER EXISTING CONDITIONS AND THAT IN FACT YOU ARE PRODUCING JEWELER'S WORK RATHER THAN PICTURES -1188-133604-0017 THAT A STYLE IS RESTRAINED OR SEVERE DOES NOT MEAN THAT IT IS ALSO ERRONEOUS -1188-133604-0018 IN ALL EARLY GOTHIC ART INDEED YOU WILL FIND FAILURE OF THIS KIND ESPECIALLY DISTORTION AND RIGIDITY WHICH ARE IN MANY RESPECTS PAINFULLY TO BE COMPARED WITH THE SPLENDID REPOSE OF CLASSIC ART -1188-133604-0019 THE LARGE LETTER CONTAINS INDEED ENTIRELY FEEBLE AND ILL DRAWN FIGURES THAT IS MERELY CHILDISH AND FAILING WORK OF AN INFERIOR HAND IT IS NOT CHARACTERISTIC OF GOTHIC OR ANY OTHER SCHOOL -1188-133604-0020 BUT OBSERVE YOU CAN ONLY DO THIS ON ONE CONDITION THAT OF STRIVING ALSO TO CREATE IN REALITY THE BEAUTY WHICH YOU SEEK IN IMAGINATION -1188-133604-0021 IT WILL BE WHOLLY IMPOSSIBLE FOR YOU TO RETAIN THE TRANQUILLITY OF TEMPER AND FELICITY OF FAITH NECESSARY FOR NOBLE PURIST PAINTING UNLESS YOU ARE ACTIVELY ENGAGED IN PROMOTING THE FELICITY AND PEACE OF PRACTICAL LIFE -1188-133604-0022 YOU MUST LOOK AT HIM IN THE FACE FIGHT HIM CONQUER HIM WITH WHAT SCATHE YOU MAY YOU NEED NOT THINK TO KEEP OUT OF THE WAY OF HIM -1188-133604-0023 THE COLORIST SAYS FIRST OF ALL AS MY DELICIOUS PAROQUET WAS RUBY SO THIS NASTY VIPER SHALL BE BLACK AND THEN IS THE QUESTION CAN I ROUND HIM OFF EVEN THOUGH HE IS BLACK AND MAKE HIM SLIMY AND YET SPRINGY AND CLOSE DOWN CLOTTED LIKE A POOL OF BLACK BLOOD ON THE EARTH ALL THE SAME -1188-133604-0024 NOTHING WILL BE MORE PRECIOUS TO YOU I THINK IN THE PRACTICAL STUDY OF ART THAN THE CONVICTION WHICH WILL FORCE ITSELF ON YOU MORE AND MORE EVERY HOUR OF THE WAY ALL THINGS ARE BOUND TOGETHER LITTLE AND GREAT IN SPIRIT AND IN MATTER -1188-133604-0025 YOU KNOW I HAVE JUST BEEN TELLING YOU HOW THIS SCHOOL OF MATERIALISM AND CLAY INVOLVED ITSELF AT LAST IN CLOUD AND FIRE -1188-133604-0026 HERE IS AN EQUALLY TYPICAL GREEK SCHOOL LANDSCAPE BY WILSON LOST WHOLLY IN GOLDEN MIST THE TREES SO SLIGHTLY DRAWN THAT YOU DON'T KNOW IF THEY ARE TREES OR TOWERS AND NO CARE FOR COLOR WHATEVER PERFECTLY DECEPTIVE AND MARVELOUS EFFECT OF SUNSHINE THROUGH THE MIST APOLLO AND THE PYTHON -1188-133604-0027 NOW HERE IS RAPHAEL EXACTLY BETWEEN THE TWO TREES STILL DRAWN LEAF BY LEAF WHOLLY FORMAL BUT BEAUTIFUL MIST COMING GRADUALLY INTO THE DISTANCE -1188-133604-0028 WELL THEN LAST HERE IS TURNER'S GREEK SCHOOL OF THE HIGHEST CLASS AND YOU DEFINE HIS ART ABSOLUTELY AS FIRST THE DISPLAYING INTENSELY AND WITH THE STERNEST INTELLECT OF NATURAL FORM AS IT IS AND THEN THE ENVELOPMENT OF IT WITH CLOUD AND FIRE -1188-133604-0029 ONLY THERE ARE TWO SORTS OF CLOUD AND FIRE -1188-133604-0030 HE KNOWS THEM BOTH -1188-133604-0031 THERE'S ONE AND THERE'S ANOTHER THE DUDLEY AND THE FLINT -1188-133604-0032 IT IS ONLY A PENCIL OUTLINE BY EDWARD BURNE JONES IN ILLUSTRATION OF THE STORY OF PSYCHE IT IS THE INTRODUCTION OF PSYCHE AFTER ALL HER TROUBLES INTO HEAVEN -1188-133604-0033 EVERY PLANT IN THE GRASS IS SET FORMALLY GROWS PERFECTLY AND MAY BE REALIZED COMPLETELY -1188-133604-0034 EXQUISITE ORDER AND UNIVERSAL WITH ETERNAL LIFE AND LIGHT THIS IS THE FAITH AND EFFORT OF THE SCHOOLS OF CRYSTAL AND YOU MAY DESCRIBE AND COMPLETE THEIR WORK QUITE LITERALLY BY TAKING ANY VERSES OF CHAUCER IN HIS TENDER MOOD AND OBSERVING HOW HE INSISTS ON THE CLEARNESS AND BRIGHTNESS FIRST AND THEN ON THE ORDER -1188-133604-0035 THUS IN CHAUCER'S DREAM -1188-133604-0036 IN BOTH THESE HIGH MYTHICAL SUBJECTS THE SURROUNDING NATURE THOUGH SUFFERING IS STILL DIGNIFIED AND BEAUTIFUL -1188-133604-0037 EVERY LINE IN WHICH THE MASTER TRACES IT EVEN WHERE SEEMINGLY NEGLIGENT IS LOVELY AND SET DOWN WITH A MEDITATIVE CALMNESS WHICH MAKES THESE TWO ETCHINGS CAPABLE OF BEING PLACED BESIDE THE MOST TRANQUIL WORK OF HOLBEIN OR DUERER -1188-133604-0038 BUT NOW HERE IS A SUBJECT OF WHICH YOU WILL WONDER AT FIRST WHY TURNER DREW IT AT ALL -1188-133604-0039 IT HAS NO BEAUTY WHATSOEVER NO SPECIALTY OF PICTURESQUENESS AND ALL ITS LINES ARE CRAMPED AND POOR -1188-133604-0040 THE CRAMPNESS AND THE POVERTY ARE ALL INTENDED -1188-133604-0041 IT IS A GLEANER BRINGING DOWN HER ONE SHEAF OF CORN TO AN OLD WATERMILL ITSELF MOSSY AND RENT SCARCELY ABLE TO GET ITS STONES TO TURN -1188-133604-0042 THE SCENE IS ABSOLUTELY ARCADIAN -1188-133604-0043 SEE THAT YOUR LIVES BE IN NOTHING WORSE THAN A BOY'S CLIMBING FOR HIS ENTANGLED KITE -1188-133604-0044 IT WILL BE WELL FOR YOU IF YOU JOIN NOT WITH THOSE WHO INSTEAD OF KITES FLY FALCONS WHO INSTEAD OF OBEYING THE LAST WORDS OF THE GREAT CLOUD SHEPHERD TO FEED HIS SHEEP LIVE THE LIVES HOW MUCH LESS THAN VANITY OF THE WAR WOLF AND THE GIER EAGLE -121-121726-0000 ALSO A POPULAR CONTRIVANCE WHEREBY LOVE MAKING MAY BE SUSPENDED BUT NOT STOPPED DURING THE PICNIC SEASON -121-121726-0001 HARANGUE THE TIRESOME PRODUCT OF A TIRELESS TONGUE -121-121726-0002 ANGOR PAIN PAINFUL TO HEAR -121-121726-0003 HAY FEVER A HEART TROUBLE CAUSED BY FALLING IN LOVE WITH A GRASS WIDOW -121-121726-0004 HEAVEN A GOOD PLACE TO BE RAISED TO -121-121726-0005 HEDGE A FENCE -121-121726-0006 HEREDITY THE CAUSE OF ALL OUR FAULTS -121-121726-0007 HORSE SENSE A DEGREE OF WISDOM THAT KEEPS ONE FROM BETTING ON THE RACES -121-121726-0008 HOSE MAN'S EXCUSE FOR WETTING THE WALK -121-121726-0009 HOTEL A PLACE WHERE A GUEST OFTEN GIVES UP GOOD DOLLARS FOR POOR QUARTERS -121-121726-0010 HOUSECLEANING A DOMESTIC UPHEAVAL THAT MAKES IT EASY FOR THE GOVERNMENT TO ENLIST ALL THE SOLDIERS IT NEEDS -121-121726-0011 HUSBAND THE NEXT THING TO A WIFE -121-121726-0012 HUSSY WOMAN AND BOND TIE -121-121726-0013 TIED TO A WOMAN -121-121726-0014 HYPOCRITE A HORSE DEALER -121-123852-0000 THOSE PRETTY WRONGS THAT LIBERTY COMMITS WHEN I AM SOMETIME ABSENT FROM THY HEART THY BEAUTY AND THY YEARS FULL WELL BEFITS FOR STILL TEMPTATION FOLLOWS WHERE THOU ART -121-123852-0001 AY ME -121-123852-0002 NO MATTER THEN ALTHOUGH MY FOOT DID STAND UPON THE FARTHEST EARTH REMOV'D FROM THEE FOR NIMBLE THOUGHT CAN JUMP BOTH SEA AND LAND AS SOON AS THINK THE PLACE WHERE HE WOULD BE BUT AH -121-123852-0003 THOUGHT KILLS ME THAT I AM NOT THOUGHT TO LEAP LARGE LENGTHS OF MILES WHEN THOU ART GONE BUT THAT SO MUCH OF EARTH AND WATER WROUGHT I MUST ATTEND TIME'S LEISURE WITH MY MOAN RECEIVING NOUGHT BY ELEMENTS SO SLOW BUT HEAVY TEARS BADGES OF EITHER'S WOE -121-123852-0004 MY HEART DOTH PLEAD THAT THOU IN HIM DOST LIE A CLOSET NEVER PIERC'D WITH CRYSTAL EYES BUT THE DEFENDANT DOTH THAT PLEA DENY AND SAYS IN HIM THY FAIR APPEARANCE LIES -121-123859-0000 YOU ARE MY ALL THE WORLD AND I MUST STRIVE TO KNOW MY SHAMES AND PRAISES FROM YOUR TONGUE NONE ELSE TO ME NOR I TO NONE ALIVE THAT MY STEEL'D SENSE OR CHANGES RIGHT OR WRONG -121-123859-0001 O TIS THE FIRST TIS FLATTERY IN MY SEEING AND MY GREAT MIND MOST KINGLY DRINKS IT UP MINE EYE WELL KNOWS WHAT WITH HIS GUST IS GREEING AND TO HIS PALATE DOTH PREPARE THE CUP IF IT BE POISON'D TIS THE LESSER SIN THAT MINE EYE LOVES IT AND DOTH FIRST BEGIN -121-123859-0002 BUT RECKONING TIME WHOSE MILLION'D ACCIDENTS CREEP IN TWIXT VOWS AND CHANGE DECREES OF KINGS TAN SACRED BEAUTY BLUNT THE SHARP'ST INTENTS DIVERT STRONG MINDS TO THE COURSE OF ALTERING THINGS ALAS WHY FEARING OF TIME'S TYRANNY MIGHT I NOT THEN SAY NOW I LOVE YOU BEST WHEN I WAS CERTAIN O'ER INCERTAINTY CROWNING THE PRESENT DOUBTING OF THE REST -121-123859-0003 LOVE IS A BABE THEN MIGHT I NOT SAY SO TO GIVE FULL GROWTH TO THAT WHICH STILL DOTH GROW -121-123859-0004 SO I RETURN REBUK'D TO MY CONTENT AND GAIN BY ILL THRICE MORE THAN I HAVE SPENT -121-127105-0000 IT WAS THIS OBSERVATION THAT DREW FROM DOUGLAS NOT IMMEDIATELY BUT LATER IN THE EVENING A REPLY THAT HAD THE INTERESTING CONSEQUENCE TO WHICH I CALL ATTENTION -121-127105-0001 SOMEONE ELSE TOLD A STORY NOT PARTICULARLY EFFECTIVE WHICH I SAW HE WAS NOT FOLLOWING -121-127105-0002 CRIED ONE OF THE WOMEN HE TOOK NO NOTICE OF HER HE LOOKED AT ME BUT AS IF INSTEAD OF ME HE SAW WHAT HE SPOKE OF -121-127105-0003 THERE WAS A UNANIMOUS GROAN AT THIS AND MUCH REPROACH AFTER WHICH IN HIS PREOCCUPIED WAY HE EXPLAINED -121-127105-0004 THE STORY'S WRITTEN -121-127105-0005 I COULD WRITE TO MY MAN AND ENCLOSE THE KEY HE COULD SEND DOWN THE PACKET AS HE FINDS IT -121-127105-0006 THE OTHERS RESENTED POSTPONEMENT BUT IT WAS JUST HIS SCRUPLES THAT CHARMED ME -121-127105-0007 TO THIS HIS ANSWER WAS PROMPT OH THANK GOD NO AND IS THE RECORD YOURS -121-127105-0008 HE HUNG FIRE AGAIN A WOMAN'S -121-127105-0009 SHE HAS BEEN DEAD THESE TWENTY YEARS -121-127105-0010 SHE SENT ME THE PAGES IN QUESTION BEFORE SHE DIED -121-127105-0011 SHE WAS THE MOST AGREEABLE WOMAN I'VE EVER KNOWN IN HER POSITION SHE WOULD HAVE BEEN WORTHY OF ANY WHATEVER -121-127105-0012 IT WASN'T SIMPLY THAT SHE SAID SO BUT THAT I KNEW SHE HADN'T I WAS SURE I COULD SEE -121-127105-0013 YOU'LL EASILY JUDGE WHY WHEN YOU HEAR BECAUSE THE THING HAD BEEN SUCH A SCARE HE CONTINUED TO FIX ME -121-127105-0014 YOU ARE ACUTE -121-127105-0015 HE QUITTED THE FIRE AND DROPPED BACK INTO HIS CHAIR -121-127105-0016 PROBABLY NOT TILL THE SECOND POST -121-127105-0017 IT WAS ALMOST THE TONE OF HOPE EVERYBODY WILL STAY -121-127105-0018 CRIED THE LADIES WHOSE DEPARTURE HAD BEEN FIXED -121-127105-0019 MISSUS GRIFFIN HOWEVER EXPRESSED THE NEED FOR A LITTLE MORE LIGHT -121-127105-0020 WHO WAS IT SHE WAS IN LOVE WITH THE STORY WILL TELL I TOOK UPON MYSELF TO REPLY OH I CAN'T WAIT FOR THE STORY THE STORY WON'T TELL SAID DOUGLAS NOT IN ANY LITERAL VULGAR WAY MORE'S THE PITY THEN -121-127105-0021 WON'T YOU TELL DOUGLAS -121-127105-0022 WELL IF I DON'T KNOW WHO SHE WAS IN LOVE WITH I KNOW WHO HE WAS -121-127105-0023 LET ME SAY HERE DISTINCTLY TO HAVE DONE WITH IT THAT THIS NARRATIVE FROM AN EXACT TRANSCRIPT OF MY OWN MADE MUCH LATER IS WHAT I SHALL PRESENTLY GIVE -121-127105-0024 POOR DOUGLAS BEFORE HIS DEATH WHEN IT WAS IN SIGHT COMMITTED TO ME THE MANUSCRIPT THAT REACHED HIM ON THE THIRD OF THESE DAYS AND THAT ON THE SAME SPOT WITH IMMENSE EFFECT HE BEGAN TO READ TO OUR HUSHED LITTLE CIRCLE ON THE NIGHT OF THE FOURTH -121-127105-0025 THE DEPARTING LADIES WHO HAD SAID THEY WOULD STAY DIDN'T OF COURSE THANK HEAVEN STAY THEY DEPARTED IN CONSEQUENCE OF ARRANGEMENTS MADE IN A RAGE OF CURIOSITY AS THEY PROFESSED PRODUCED BY THE TOUCHES WITH WHICH HE HAD ALREADY WORKED US UP -121-127105-0026 THE FIRST OF THESE TOUCHES CONVEYED THAT THE WRITTEN STATEMENT TOOK UP THE TALE AT A POINT AFTER IT HAD IN A MANNER BEGUN -121-127105-0027 HE HAD FOR HIS OWN TOWN RESIDENCE A BIG HOUSE FILLED WITH THE SPOILS OF TRAVEL AND THE TROPHIES OF THE CHASE BUT IT WAS TO HIS COUNTRY HOME AN OLD FAMILY PLACE IN ESSEX THAT HE WISHED HER IMMEDIATELY TO PROCEED -121-127105-0028 THE AWKWARD THING WAS THAT THEY HAD PRACTICALLY NO OTHER RELATIONS AND THAT HIS OWN AFFAIRS TOOK UP ALL HIS TIME -121-127105-0029 THERE WERE PLENTY OF PEOPLE TO HELP BUT OF COURSE THE YOUNG LADY WHO SHOULD GO DOWN AS GOVERNESS WOULD BE IN SUPREME AUTHORITY -121-127105-0030 I DON'T ANTICIPATE -121-127105-0031 SHE WAS YOUNG UNTRIED NERVOUS IT WAS A VISION OF SERIOUS DUTIES AND LITTLE COMPANY OF REALLY GREAT LONELINESS -121-127105-0032 YES BUT THAT'S JUST THE BEAUTY OF HER PASSION -121-127105-0033 IT WAS THE BEAUTY OF IT -121-127105-0034 IT SOUNDED DULL IT SOUNDED STRANGE AND ALL THE MORE SO BECAUSE OF HIS MAIN CONDITION WHICH WAS -121-127105-0035 SHE PROMISED TO DO THIS AND SHE MENTIONED TO ME THAT WHEN FOR A MOMENT DISBURDENED DELIGHTED HE HELD HER HAND THANKING HER FOR THE SACRIFICE SHE ALREADY FELT REWARDED -121-127105-0036 BUT WAS THAT ALL HER REWARD ONE OF THE LADIES ASKED -1221-135766-0000 HOW STRANGE IT SEEMED TO THE SAD WOMAN AS SHE WATCHED THE GROWTH AND THE BEAUTY THAT BECAME EVERY DAY MORE BRILLIANT AND THE INTELLIGENCE THAT THREW ITS QUIVERING SUNSHINE OVER THE TINY FEATURES OF THIS CHILD -1221-135766-0001 GOD AS A DIRECT CONSEQUENCE OF THE SIN WHICH MAN THUS PUNISHED HAD GIVEN HER A LOVELY CHILD WHOSE PLACE WAS ON THAT SAME DISHONOURED BOSOM TO CONNECT HER PARENT FOR EVER WITH THE RACE AND DESCENT OF MORTALS AND TO BE FINALLY A BLESSED SOUL IN HEAVEN -1221-135766-0002 YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION -1221-135766-0003 THE CHILD HAD A NATIVE GRACE WHICH DOES NOT INVARIABLY CO EXIST WITH FAULTLESS BEAUTY ITS ATTIRE HOWEVER SIMPLE ALWAYS IMPRESSED THE BEHOLDER AS IF IT WERE THE VERY GARB THAT PRECISELY BECAME IT BEST -1221-135766-0004 THIS OUTWARD MUTABILITY INDICATED AND DID NOT MORE THAN FAIRLY EXPRESS THE VARIOUS PROPERTIES OF HER INNER LIFE -1221-135766-0005 HESTER COULD ONLY ACCOUNT FOR THE CHILD'S CHARACTER AND EVEN THEN MOST VAGUELY AND IMPERFECTLY BY RECALLING WHAT SHE HERSELF HAD BEEN DURING THAT MOMENTOUS PERIOD WHILE PEARL WAS IMBIBING HER SOUL FROM THE SPIRITUAL WORLD AND HER BODILY FRAME FROM ITS MATERIAL OF EARTH -1221-135766-0006 THEY WERE NOW ILLUMINATED BY THE MORNING RADIANCE OF A YOUNG CHILD'S DISPOSITION BUT LATER IN THE DAY OF EARTHLY EXISTENCE MIGHT BE PROLIFIC OF THE STORM AND WHIRLWIND -1221-135766-0007 HESTER PRYNNE NEVERTHELESS THE LOVING MOTHER OF THIS ONE CHILD RAN LITTLE RISK OF ERRING ON THE SIDE OF UNDUE SEVERITY -1221-135766-0008 MINDFUL HOWEVER OF HER OWN ERRORS AND MISFORTUNES SHE EARLY SOUGHT TO IMPOSE A TENDER BUT STRICT CONTROL OVER THE INFANT IMMORTALITY THAT WAS COMMITTED TO HER CHARGE -1221-135766-0009 AS TO ANY OTHER KIND OF DISCIPLINE WHETHER ADDRESSED TO HER MIND OR HEART LITTLE PEARL MIGHT OR MIGHT NOT BE WITHIN ITS REACH IN ACCORDANCE WITH THE CAPRICE THAT RULED THE MOMENT -1221-135766-0010 IT WAS A LOOK SO INTELLIGENT YET INEXPLICABLE PERVERSE SOMETIMES SO MALICIOUS BUT GENERALLY ACCOMPANIED BY A WILD FLOW OF SPIRITS THAT HESTER COULD NOT HELP QUESTIONING AT SUCH MOMENTS WHETHER PEARL WAS A HUMAN CHILD -1221-135766-0011 BEHOLDING IT HESTER WAS CONSTRAINED TO RUSH TOWARDS THE CHILD TO PURSUE THE LITTLE ELF IN THE FLIGHT WHICH SHE INVARIABLY BEGAN TO SNATCH HER TO HER BOSOM WITH A CLOSE PRESSURE AND EARNEST KISSES NOT SO MUCH FROM OVERFLOWING LOVE AS TO ASSURE HERSELF THAT PEARL WAS FLESH AND BLOOD AND NOT UTTERLY DELUSIVE -1221-135766-0012 BROODING OVER ALL THESE MATTERS THE MOTHER FELT LIKE ONE WHO HAS EVOKED A SPIRIT BUT BY SOME IRREGULARITY IN THE PROCESS OF CONJURATION HAS FAILED TO WIN THE MASTER WORD THAT SHOULD CONTROL THIS NEW AND INCOMPREHENSIBLE INTELLIGENCE -1221-135766-0013 PEARL WAS A BORN OUTCAST OF THE INFANTILE WORLD -1221-135766-0014 PEARL SAW AND GAZED INTENTLY BUT NEVER SOUGHT TO MAKE ACQUAINTANCE -1221-135766-0015 IF SPOKEN TO SHE WOULD NOT SPEAK AGAIN -1221-135767-0000 HESTER PRYNNE WENT ONE DAY TO THE MANSION OF GOVERNOR BELLINGHAM WITH A PAIR OF GLOVES WHICH SHE HAD FRINGED AND EMBROIDERED TO HIS ORDER AND WHICH WERE TO BE WORN ON SOME GREAT OCCASION OF STATE FOR THOUGH THE CHANCES OF A POPULAR ELECTION HAD CAUSED THIS FORMER RULER TO DESCEND A STEP OR TWO FROM THE HIGHEST RANK HE STILL HELD AN HONOURABLE AND INFLUENTIAL PLACE AMONG THE COLONIAL MAGISTRACY -1221-135767-0001 ANOTHER AND FAR MORE IMPORTANT REASON THAN THE DELIVERY OF A PAIR OF EMBROIDERED GLOVES IMPELLED HESTER AT THIS TIME TO SEEK AN INTERVIEW WITH A PERSONAGE OF SO MUCH POWER AND ACTIVITY IN THE AFFAIRS OF THE SETTLEMENT -1221-135767-0002 AT THAT EPOCH OF PRISTINE SIMPLICITY HOWEVER MATTERS OF EVEN SLIGHTER PUBLIC INTEREST AND OF FAR LESS INTRINSIC WEIGHT THAN THE WELFARE OF HESTER AND HER CHILD WERE STRANGELY MIXED UP WITH THE DELIBERATIONS OF LEGISLATORS AND ACTS OF STATE -1221-135767-0003 THE PERIOD WAS HARDLY IF AT ALL EARLIER THAN THAT OF OUR STORY WHEN A DISPUTE CONCERNING THE RIGHT OF PROPERTY IN A PIG NOT ONLY CAUSED A FIERCE AND BITTER CONTEST IN THE LEGISLATIVE BODY OF THE COLONY BUT RESULTED IN AN IMPORTANT MODIFICATION OF THE FRAMEWORK ITSELF OF THE LEGISLATURE -1221-135767-0004 WE HAVE SPOKEN OF PEARL'S RICH AND LUXURIANT BEAUTY A BEAUTY THAT SHONE WITH DEEP AND VIVID TINTS A BRIGHT COMPLEXION EYES POSSESSING INTENSITY BOTH OF DEPTH AND GLOW AND HAIR ALREADY OF A DEEP GLOSSY BROWN AND WHICH IN AFTER YEARS WOULD BE NEARLY AKIN TO BLACK -1221-135767-0005 IT WAS THE SCARLET LETTER IN ANOTHER FORM THE SCARLET LETTER ENDOWED WITH LIFE -1221-135767-0006 THE MOTHER HERSELF AS IF THE RED IGNOMINY WERE SO DEEPLY SCORCHED INTO HER BRAIN THAT ALL HER CONCEPTIONS ASSUMED ITS FORM HAD CAREFULLY WROUGHT OUT THE SIMILITUDE LAVISHING MANY HOURS OF MORBID INGENUITY TO CREATE AN ANALOGY BETWEEN THE OBJECT OF HER AFFECTION AND THE EMBLEM OF HER GUILT AND TORTURE -1221-135767-0007 BUT IN TRUTH PEARL WAS THE ONE AS WELL AS THE OTHER AND ONLY IN CONSEQUENCE OF THAT IDENTITY HAD HESTER CONTRIVED SO PERFECTLY TO REPRESENT THE SCARLET LETTER IN HER APPEARANCE -1221-135767-0008 COME THEREFORE AND LET US FLING MUD AT THEM -1221-135767-0009 BUT PEARL WHO WAS A DAUNTLESS CHILD AFTER FROWNING STAMPING HER FOOT AND SHAKING HER LITTLE HAND WITH A VARIETY OF THREATENING GESTURES SUDDENLY MADE A RUSH AT THE KNOT OF HER ENEMIES AND PUT THEM ALL TO FLIGHT -1221-135767-0010 SHE SCREAMED AND SHOUTED TOO WITH A TERRIFIC VOLUME OF SOUND WHICH DOUBTLESS CAUSED THE HEARTS OF THE FUGITIVES TO QUAKE WITHIN THEM -1221-135767-0011 IT WAS FURTHER DECORATED WITH STRANGE AND SEEMINGLY CABALISTIC FIGURES AND DIAGRAMS SUITABLE TO THE QUAINT TASTE OF THE AGE WHICH HAD BEEN DRAWN IN THE STUCCO WHEN NEWLY LAID ON AND HAD NOW GROWN HARD AND DURABLE FOR THE ADMIRATION OF AFTER TIMES -1221-135767-0012 THEY APPROACHED THE DOOR WHICH WAS OF AN ARCHED FORM AND FLANKED ON EACH SIDE BY A NARROW TOWER OR PROJECTION OF THE EDIFICE IN BOTH OF WHICH WERE LATTICE WINDOWS THE WOODEN SHUTTERS TO CLOSE OVER THEM AT NEED -1221-135767-0013 LIFTING THE IRON HAMMER THAT HUNG AT THE PORTAL HESTER PRYNNE GAVE A SUMMONS WHICH WAS ANSWERED BY ONE OF THE GOVERNOR'S BOND SERVANT A FREE BORN ENGLISHMAN BUT NOW A SEVEN YEARS SLAVE -1221-135767-0014 YEA HIS HONOURABLE WORSHIP IS WITHIN BUT HE HATH A GODLY MINISTER OR TWO WITH HIM AND LIKEWISE A LEECH -1221-135767-0015 YE MAY NOT SEE HIS WORSHIP NOW -1221-135767-0016 WITH MANY VARIATIONS SUGGESTED BY THE NATURE OF HIS BUILDING MATERIALS DIVERSITY OF CLIMATE AND A DIFFERENT MODE OF SOCIAL LIFE GOVERNOR BELLINGHAM HAD PLANNED HIS NEW HABITATION AFTER THE RESIDENCES OF GENTLEMEN OF FAIR ESTATE IN HIS NATIVE LAND -1221-135767-0017 ON THE TABLE IN TOKEN THAT THE SENTIMENT OF OLD ENGLISH HOSPITALITY HAD NOT BEEN LEFT BEHIND STOOD A LARGE PEWTER TANKARD AT THE BOTTOM OF WHICH HAD HESTER OR PEARL PEEPED INTO IT THEY MIGHT HAVE SEEN THE FROTHY REMNANT OF A RECENT DRAUGHT OF ALE -1221-135767-0018 LITTLE PEARL WHO WAS AS GREATLY PLEASED WITH THE GLEAMING ARMOUR AS SHE HAD BEEN WITH THE GLITTERING FRONTISPIECE OF THE HOUSE SPENT SOME TIME LOOKING INTO THE POLISHED MIRROR OF THE BREASTPLATE -1221-135767-0019 MOTHER CRIED SHE I SEE YOU HERE LOOK LOOK -1221-135767-0020 IN TRUTH SHE SEEMED ABSOLUTELY HIDDEN BEHIND IT -1221-135767-0021 PEARL ACCORDINGLY RAN TO THE BOW WINDOW AT THE FURTHER END OF THE HALL AND LOOKED ALONG THE VISTA OF A GARDEN WALK CARPETED WITH CLOSELY SHAVEN GRASS AND BORDERED WITH SOME RUDE AND IMMATURE ATTEMPT AT SHRUBBERY -1221-135767-0022 BUT THE PROPRIETOR APPEARED ALREADY TO HAVE RELINQUISHED AS HOPELESS THE EFFORT TO PERPETUATE ON THIS SIDE OF THE ATLANTIC IN A HARD SOIL AND AMID THE CLOSE STRUGGLE FOR SUBSISTENCE THE NATIVE ENGLISH TASTE FOR ORNAMENTAL GARDENING -1221-135767-0023 THERE WERE A FEW ROSE BUSHES HOWEVER AND A NUMBER OF APPLE TREES PROBABLY THE DESCENDANTS OF THOSE PLANTED BY THE REVEREND MISTER BLACKSTONE THE FIRST SETTLER OF THE PENINSULA THAT HALF MYTHOLOGICAL PERSONAGE WHO RIDES THROUGH OUR EARLY ANNALS SEATED ON THE BACK OF A BULL -1221-135767-0024 PEARL SEEING THE ROSE BUSHES BEGAN TO CRY FOR A RED ROSE AND WOULD NOT BE PACIFIED -1284-1180-0000 HE WORE BLUE SILK STOCKINGS BLUE KNEE PANTS WITH GOLD BUCKLES A BLUE RUFFLED WAIST AND A JACKET OF BRIGHT BLUE BRAIDED WITH GOLD -1284-1180-0001 HIS HAT HAD A PEAKED CROWN AND A FLAT BRIM AND AROUND THE BRIM WAS A ROW OF TINY GOLDEN BELLS THAT TINKLED WHEN HE MOVED -1284-1180-0002 INSTEAD OF SHOES THE OLD MAN WORE BOOTS WITH TURNOVER TOPS AND HIS BLUE COAT HAD WIDE CUFFS OF GOLD BRAID -1284-1180-0003 FOR A LONG TIME HE HAD WISHED TO EXPLORE THE BEAUTIFUL LAND OF OZ IN WHICH THEY LIVED -1284-1180-0004 WHEN THEY WERE OUTSIDE UNC SIMPLY LATCHED THE DOOR AND STARTED UP THE PATH -1284-1180-0005 NO ONE WOULD DISTURB THEIR LITTLE HOUSE EVEN IF ANYONE CAME SO FAR INTO THE THICK FOREST WHILE THEY WERE GONE -1284-1180-0006 AT THE FOOT OF THE MOUNTAIN THAT SEPARATED THE COUNTRY OF THE MUNCHKINS FROM THE COUNTRY OF THE GILLIKINS THE PATH DIVIDED -1284-1180-0007 HE KNEW IT WOULD TAKE THEM TO THE HOUSE OF THE CROOKED MAGICIAN WHOM HE HAD NEVER SEEN BUT WHO WAS THEIR NEAREST NEIGHBOR -1284-1180-0008 ALL THE MORNING THEY TRUDGED UP THE MOUNTAIN PATH AND AT NOON UNC AND OJO SAT ON A FALLEN TREE TRUNK AND ATE THE LAST OF THE BREAD WHICH THE OLD MUNCHKIN HAD PLACED IN HIS POCKET -1284-1180-0009 THEN THEY STARTED ON AGAIN AND TWO HOURS LATER CAME IN SIGHT OF THE HOUSE OF DOCTOR PIPT -1284-1180-0010 UNC KNOCKED AT THE DOOR OF THE HOUSE AND A CHUBBY PLEASANT FACED WOMAN DRESSED ALL IN BLUE OPENED IT AND GREETED THE VISITORS WITH A SMILE -1284-1180-0011 I AM MY DEAR AND ALL STRANGERS ARE WELCOME TO MY HOME -1284-1180-0012 WE HAVE COME FROM A FAR LONELIER PLACE THAN THIS A LONELIER PLACE -1284-1180-0013 AND YOU MUST BE OJO THE UNLUCKY SHE ADDED -1284-1180-0014 OJO HAD NEVER EATEN SUCH A FINE MEAL IN ALL HIS LIFE -1284-1180-0015 WE ARE TRAVELING REPLIED OJO AND WE STOPPED AT YOUR HOUSE JUST TO REST AND REFRESH OURSELVES -1284-1180-0016 THE WOMAN SEEMED THOUGHTFUL -1284-1180-0017 AT ONE END STOOD A GREAT FIREPLACE IN WHICH A BLUE LOG WAS BLAZING WITH A BLUE FLAME AND OVER THE FIRE HUNG FOUR KETTLES IN A ROW ALL BUBBLING AND STEAMING AT A GREAT RATE -1284-1180-0018 IT TAKES ME SEVERAL YEARS TO MAKE THIS MAGIC POWDER BUT AT THIS MOMENT I AM PLEASED TO SAY IT IS NEARLY DONE YOU SEE I AM MAKING IT FOR MY GOOD WIFE MARGOLOTTE WHO WANTS TO USE SOME OF IT FOR A PURPOSE OF HER OWN -1284-1180-0019 YOU MUST KNOW SAID MARGOLOTTE WHEN THEY WERE ALL SEATED TOGETHER ON THE BROAD WINDOW SEAT THAT MY HUSBAND FOOLISHLY GAVE AWAY ALL THE POWDER OF LIFE HE FIRST MADE TO OLD MOMBI THE WITCH WHO USED TO LIVE IN THE COUNTRY OF THE GILLIKINS TO THE NORTH OF HERE -1284-1180-0020 THE FIRST LOT WE TESTED ON OUR GLASS CAT WHICH NOT ONLY BEGAN TO LIVE BUT HAS LIVED EVER SINCE -1284-1180-0021 I THINK THE NEXT GLASS CAT THE MAGICIAN MAKES WILL HAVE NEITHER BRAINS NOR HEART FOR THEN IT WILL NOT OBJECT TO CATCHING MICE AND MAY PROVE OF SOME USE TO US -1284-1180-0022 I'M AFRAID I DON'T KNOW MUCH ABOUT THE LAND OF OZ -1284-1180-0023 YOU SEE I'VE LIVED ALL MY LIFE WITH UNC NUNKIE THE SILENT ONE AND THERE WAS NO ONE TO TELL ME ANYTHING -1284-1180-0024 THAT IS ONE REASON YOU ARE OJO THE UNLUCKY SAID THE WOMAN IN A SYMPATHETIC TONE -1284-1180-0025 I THINK I MUST SHOW YOU MY PATCHWORK GIRL SAID MARGOLOTTE LAUGHING AT THE BOY'S ASTONISHMENT FOR SHE IS RATHER DIFFICULT TO EXPLAIN -1284-1180-0026 BUT FIRST I WILL TELL YOU THAT FOR MANY YEARS I HAVE LONGED FOR A SERVANT TO HELP ME WITH THE HOUSEWORK AND TO COOK THE MEALS AND WASH THE DISHES -1284-1180-0027 YET THAT TASK WAS NOT SO EASY AS YOU MAY SUPPOSE -1284-1180-0028 A BED QUILT MADE OF PATCHES OF DIFFERENT KINDS AND COLORS OF CLOTH ALL NEATLY SEWED TOGETHER -1284-1180-0029 SOMETIMES IT IS CALLED A CRAZY QUILT BECAUSE THE PATCHES AND COLORS ARE SO MIXED UP -1284-1180-0030 WHEN I FOUND IT I SAID TO MYSELF THAT IT WOULD DO NICELY FOR MY SERVANT GIRL FOR WHEN SHE WAS BROUGHT TO LIFE SHE WOULD NOT BE PROUD NOR HAUGHTY AS THE GLASS CAT IS FOR SUCH A DREADFUL MIXTURE OF COLORS WOULD DISCOURAGE HER FROM TRYING TO BE AS DIGNIFIED AS THE BLUE MUNCHKINS ARE -1284-1180-0031 AT THE EMERALD CITY WHERE OUR PRINCESS OZMA LIVES GREEN IS THE POPULAR COLOR -1284-1180-0032 I WILL SHOW YOU WHAT A GOOD JOB I DID AND SHE WENT TO A TALL CUPBOARD AND THREW OPEN THE DOORS -1284-1181-0000 OJO EXAMINED THIS CURIOUS CONTRIVANCE WITH WONDER -1284-1181-0001 MARGOLOTTE HAD FIRST MADE THE GIRL'S FORM FROM THE PATCHWORK QUILT AND THEN SHE HAD DRESSED IT WITH A PATCHWORK SKIRT AND AN APRON WITH POCKETS IN IT USING THE SAME GAY MATERIAL THROUGHOUT -1284-1181-0002 THE HEAD OF THE PATCHWORK GIRL WAS THE MOST CURIOUS PART OF HER -1284-1181-0003 THE HAIR WAS OF BROWN YARN AND HUNG DOWN ON HER NECK IN SEVERAL NEAT BRAIDS -1284-1181-0004 GOLD IS THE MOST COMMON METAL IN THE LAND OF OZ AND IS USED FOR MANY PURPOSES BECAUSE IT IS SOFT AND PLIABLE -1284-1181-0005 NO I FORGOT ALL ABOUT THE BRAINS EXCLAIMED THE WOMAN -1284-1181-0006 WELL THAT MAY BE TRUE AGREED MARGOLOTTE BUT ON THE CONTRARY A SERVANT WITH TOO MUCH BRAINS IS SURE TO BECOME INDEPENDENT AND HIGH AND MIGHTY AND FEEL ABOVE HER WORK -1284-1181-0007 SHE POURED INTO THE DISH A QUANTITY FROM EACH OF THESE BOTTLES -1284-1181-0008 I THINK THAT WILL DO SHE CONTINUED FOR THE OTHER QUALITIES ARE NOT NEEDED IN A SERVANT -1284-1181-0009 SHE RAN TO HER HUSBAND'S SIDE AT ONCE AND HELPED HIM LIFT THE FOUR KETTLES FROM THE FIRE -1284-1181-0010 THEIR CONTENTS HAD ALL BOILED AWAY LEAVING IN THE BOTTOM OF EACH KETTLE A FEW GRAINS OF FINE WHITE POWDER -1284-1181-0011 VERY CAREFULLY THE MAGICIAN REMOVED THIS POWDER PLACING IT ALL TOGETHER IN A GOLDEN DISH WHERE HE MIXED IT WITH A GOLDEN SPOON -1284-1181-0012 NO ONE SAW HIM DO THIS FOR ALL WERE LOOKING AT THE POWDER OF LIFE BUT SOON THE WOMAN REMEMBERED WHAT SHE HAD BEEN DOING AND CAME BACK TO THE CUPBOARD -1284-1181-0013 OJO BECAME A BIT UNEASY AT THIS FOR HE HAD ALREADY PUT QUITE A LOT OF THE CLEVERNESS POWDER IN THE DISH BUT HE DARED NOT INTERFERE AND SO HE COMFORTED HIMSELF WITH THE THOUGHT THAT ONE CANNOT HAVE TOO MUCH CLEVERNESS -1284-1181-0014 HE SELECTED A SMALL GOLD BOTTLE WITH A PEPPER BOX TOP SO THAT THE POWDER MIGHT BE SPRINKLED ON ANY OBJECT THROUGH THE SMALL HOLES -1284-1181-0015 MOST PEOPLE TALK TOO MUCH SO IT IS A RELIEF TO FIND ONE WHO TALKS TOO LITTLE -1284-1181-0016 I AM NOT ALLOWED TO PERFORM MAGIC EXCEPT FOR MY OWN AMUSEMENT HE TOLD HIS VISITORS AS HE LIGHTED A PIPE WITH A CROOKED STEM AND BEGAN TO SMOKE -1284-1181-0017 THE WIZARD OF OZ WHO USED TO BE A HUMBUG AND KNEW NO MAGIC AT ALL HAS BEEN TAKING LESSONS OF GLINDA AND I'M TOLD HE IS GETTING TO BE A PRETTY GOOD WIZARD BUT HE IS MERELY THE ASSISTANT OF THE GREAT SORCERESS -1284-1181-0018 IT TRULY IS ASSERTED THE MAGICIAN -1284-1181-0019 I NOW USE THEM AS ORNAMENTAL STATUARY IN MY GARDEN -1284-1181-0020 DEAR ME WHAT A CHATTERBOX YOU'RE GETTING TO BE UNC REMARKED THE MAGICIAN WHO WAS PLEASED WITH THE COMPLIMENT -1284-1181-0021 ASKED THE VOICE IN SCORNFUL ACCENTS -1284-134647-0000 THE GRATEFUL APPLAUSE OF THE CLERGY HAS CONSECRATED THE MEMORY OF A PRINCE WHO INDULGED THEIR PASSIONS AND PROMOTED THEIR INTEREST -1284-134647-0001 THE EDICT OF MILAN THE GREAT CHARTER OF TOLERATION HAD CONFIRMED TO EACH INDIVIDUAL OF THE ROMAN WORLD THE PRIVILEGE OF CHOOSING AND PROFESSING HIS OWN RELIGION -1284-134647-0002 BUT THIS INESTIMABLE PRIVILEGE WAS SOON VIOLATED WITH THE KNOWLEDGE OF TRUTH THE EMPEROR IMBIBED THE MAXIMS OF PERSECUTION AND THE SECTS WHICH DISSENTED FROM THE CATHOLIC CHURCH WERE AFFLICTED AND OPPRESSED BY THE TRIUMPH OF CHRISTIANITY -1284-134647-0003 CONSTANTINE EASILY BELIEVED THAT THE HERETICS WHO PRESUMED TO DISPUTE HIS OPINIONS OR TO OPPOSE HIS COMMANDS WERE GUILTY OF THE MOST ABSURD AND CRIMINAL OBSTINACY AND THAT A SEASONABLE APPLICATION OF MODERATE SEVERITIES MIGHT SAVE THOSE UNHAPPY MEN FROM THE DANGER OF AN EVERLASTING CONDEMNATION -1284-134647-0004 SOME OF THE PENAL REGULATIONS WERE COPIED FROM THE EDICTS OF DIOCLETIAN AND THIS METHOD OF CONVERSION WAS APPLAUDED BY THE SAME BISHOPS WHO HAD FELT THE HAND OF OPPRESSION AND PLEADED FOR THE RIGHTS OF HUMANITY -1284-134647-0005 THEY ASSERTED WITH CONFIDENCE AND ALMOST WITH EXULTATION THAT THE APOSTOLICAL SUCCESSION WAS INTERRUPTED THAT ALL THE BISHOPS OF EUROPE AND ASIA WERE INFECTED BY THE CONTAGION OF GUILT AND SCHISM AND THAT THE PREROGATIVES OF THE CATHOLIC CHURCH WERE CONFINED TO THE CHOSEN PORTION OF THE AFRICAN BELIEVERS WHO ALONE HAD PRESERVED INVIOLATE THE INTEGRITY OF THEIR FAITH AND DISCIPLINE -1284-134647-0006 BISHOPS VIRGINS AND EVEN SPOTLESS INFANTS WERE SUBJECTED TO THE DISGRACE OF A PUBLIC PENANCE BEFORE THEY COULD BE ADMITTED TO THE COMMUNION OF THE DONATISTS -1284-134647-0007 PROSCRIBED BY THE CIVIL AND ECCLESIASTICAL POWERS OF THE EMPIRE THE DONATISTS STILL MAINTAINED IN SOME PROVINCES PARTICULARLY IN NUMIDIA THEIR SUPERIOR NUMBERS AND FOUR HUNDRED BISHOPS ACKNOWLEDGED THE JURISDICTION OF THEIR PRIMATE -1320-122612-0000 SINCE THE PERIOD OF OUR TALE THE ACTIVE SPIRIT OF THE COUNTRY HAS SURROUNDED IT WITH A BELT OF RICH AND THRIVING SETTLEMENTS THOUGH NONE BUT THE HUNTER OR THE SAVAGE IS EVER KNOWN EVEN NOW TO PENETRATE ITS WILD RECESSES -1320-122612-0001 THE DEWS WERE SUFFERED TO EXHALE AND THE SUN HAD DISPERSED THE MISTS AND WAS SHEDDING A STRONG AND CLEAR LIGHT IN THE FOREST WHEN THE TRAVELERS RESUMED THEIR JOURNEY -1320-122612-0002 AFTER PROCEEDING A FEW MILES THE PROGRESS OF HAWKEYE WHO LED THE ADVANCE BECAME MORE DELIBERATE AND WATCHFUL -1320-122612-0003 HE OFTEN STOPPED TO EXAMINE THE TREES NOR DID HE CROSS A RIVULET WITHOUT ATTENTIVELY CONSIDERING THE QUANTITY THE VELOCITY AND THE COLOR OF ITS WATERS -1320-122612-0004 DISTRUSTING HIS OWN JUDGMENT HIS APPEALS TO THE OPINION OF CHINGACHGOOK WERE FREQUENT AND EARNEST -1320-122612-0005 YET HERE ARE WE WITHIN A SHORT RANGE OF THE SCAROONS AND NOT A SIGN OF A TRAIL HAVE WE CROSSED -1320-122612-0006 LET US RETRACE OUR STEPS AND EXAMINE AS WE GO WITH KEENER EYES -1320-122612-0007 CHINGACHGOOK HAD CAUGHT THE LOOK AND MOTIONING WITH HIS HAND HE BADE HIM SPEAK -1320-122612-0008 THE EYES OF THE WHOLE PARTY FOLLOWED THE UNEXPECTED MOVEMENT AND READ THEIR SUCCESS IN THE AIR OF TRIUMPH THAT THE YOUTH ASSUMED -1320-122612-0009 IT WOULD HAVE BEEN MORE WONDERFUL HAD HE SPOKEN WITHOUT A BIDDING -1320-122612-0010 SEE SAID UNCAS POINTING NORTH AND SOUTH AT THE EVIDENT MARKS OF THE BROAD TRAIL ON EITHER SIDE OF HIM THE DARK HAIR HAS GONE TOWARD THE FOREST -1320-122612-0011 IF A ROCK OR A RIVULET OR A BIT OF EARTH HARDER THAN COMMON SEVERED THE LINKS OF THE CLEW THEY FOLLOWED THE TRUE EYE OF THE SCOUT RECOVERED THEM AT A DISTANCE AND SELDOM RENDERED THE DELAY OF A SINGLE MOMENT NECESSARY -1320-122612-0012 EXTINGUISHED BRANDS WERE LYING AROUND A SPRING THE OFFALS OF A DEER WERE SCATTERED ABOUT THE PLACE AND THE TREES BORE EVIDENT MARKS OF HAVING BEEN BROWSED BY THE HORSES -1320-122612-0013 A CIRCLE OF A FEW HUNDRED FEET IN CIRCUMFERENCE WAS DRAWN AND EACH OF THE PARTY TOOK A SEGMENT FOR HIS PORTION -1320-122612-0014 THE EXAMINATION HOWEVER RESULTED IN NO DISCOVERY -1320-122612-0015 THE WHOLE PARTY CROWDED TO THE SPOT WHERE UNCAS POINTED OUT THE IMPRESSION OF A MOCCASIN IN THE MOIST ALLUVION -1320-122612-0016 RUN BACK UNCAS AND BRING ME THE SIZE OF THE SINGER'S FOOT -1320-122617-0000 NOTWITHSTANDING THE HIGH RESOLUTION OF HAWKEYE HE FULLY COMPREHENDED ALL THE DIFFICULTIES AND DANGER HE WAS ABOUT TO INCUR -1320-122617-0001 IN HIS RETURN TO THE CAMP HIS ACUTE AND PRACTISED INTELLECTS WERE INTENTLY ENGAGED IN DEVISING MEANS TO COUNTERACT A WATCHFULNESS AND SUSPICION ON THE PART OF HIS ENEMIES THAT HE KNEW WERE IN NO DEGREE INFERIOR TO HIS OWN -1320-122617-0002 IN OTHER WORDS WHILE HE HAD IMPLICIT FAITH IN THE ABILITY OF BALAAM'S ASS TO SPEAK HE WAS SOMEWHAT SKEPTICAL ON THE SUBJECT OF A BEAR'S SINGING AND YET HE HAD BEEN ASSURED OF THE LATTER ON THE TESTIMONY OF HIS OWN EXQUISITE ORGANS -1320-122617-0003 THERE WAS SOMETHING IN HIS AIR AND MANNER THAT BETRAYED TO THE SCOUT THE UTTER CONFUSION OF THE STATE OF HIS MIND -1320-122617-0004 THE INGENIOUS HAWKEYE WHO RECALLED THE HASTY MANNER IN WHICH THE OTHER HAD ABANDONED HIS POST AT THE BEDSIDE OF THE SICK WOMAN WAS NOT WITHOUT HIS SUSPICIONS CONCERNING THE SUBJECT OF SO MUCH SOLEMN DELIBERATION -1320-122617-0005 THE BEAR SHOOK HIS SHAGGY SIDES AND THEN A WELL KNOWN VOICE REPLIED -1320-122617-0006 CAN THESE THINGS BE RETURNED DAVID BREATHING MORE FREELY AS THE TRUTH BEGAN TO DAWN UPON HIM -1320-122617-0007 COME COME RETURNED HAWKEYE UNCASING HIS HONEST COUNTENANCE THE BETTER TO ASSURE THE WAVERING CONFIDENCE OF HIS COMPANION YOU MAY SEE A SKIN WHICH IF IT BE NOT AS WHITE AS ONE OF THE GENTLE ONES HAS NO TINGE OF RED TO IT THAT THE WINDS OF THE HEAVEN AND THE SUN HAVE NOT BESTOWED NOW LET US TO BUSINESS -1320-122617-0008 THE YOUNG MAN IS IN BONDAGE AND MUCH I FEAR HIS DEATH IS DECREED -1320-122617-0009 I GREATLY MOURN THAT ONE SO WELL DISPOSED SHOULD DIE IN HIS IGNORANCE AND I HAVE SOUGHT A GOODLY HYMN CAN YOU LEAD ME TO HIM -1320-122617-0010 THE TASK WILL NOT BE DIFFICULT RETURNED DAVID HESITATING THOUGH I GREATLY FEAR YOUR PRESENCE WOULD RATHER INCREASE THAN MITIGATE HIS UNHAPPY FORTUNES -1320-122617-0011 THE LODGE IN WHICH UNCAS WAS CONFINED WAS IN THE VERY CENTER OF THE VILLAGE AND IN A SITUATION PERHAPS MORE DIFFICULT THAN ANY OTHER TO APPROACH OR LEAVE WITHOUT OBSERVATION -1320-122617-0012 FOUR OR FIVE OF THE LATTER ONLY LINGERED ABOUT THE DOOR OF THE PRISON OF UNCAS WARY BUT CLOSE OBSERVERS OF THE MANNER OF THEIR CAPTIVE -1320-122617-0013 DELIVERED IN A STRONG TONE OF ASSENT ANNOUNCED THE GRATIFICATION THE SAVAGE WOULD RECEIVE IN WITNESSING SUCH AN EXHIBITION OF WEAKNESS IN AN ENEMY SO LONG HATED AND SO MUCH FEARED -1320-122617-0014 THEY DREW BACK A LITTLE FROM THE ENTRANCE AND MOTIONED TO THE SUPPOSED CONJURER TO ENTER -1320-122617-0015 BUT THE BEAR INSTEAD OF OBEYING MAINTAINED THE SEAT IT HAD TAKEN AND GROWLED -1320-122617-0016 THE CUNNING MAN IS AFRAID THAT HIS BREATH WILL BLOW UPON HIS BROTHERS AND TAKE AWAY THEIR COURAGE TOO CONTINUED DAVID IMPROVING THE HINT HE RECEIVED THEY MUST STAND FURTHER OFF -1320-122617-0017 THEN AS IF SATISFIED OF THEIR SAFETY THE SCOUT LEFT HIS POSITION AND SLOWLY ENTERED THE PLACE -1320-122617-0018 IT WAS SILENT AND GLOOMY BEING TENANTED SOLELY BY THE CAPTIVE AND LIGHTED BY THE DYING EMBERS OF A FIRE WHICH HAD BEEN USED FOR THE PURPOSED OF COOKERY -1320-122617-0019 UNCAS OCCUPIED A DISTANT CORNER IN A RECLINING ATTITUDE BEING RIGIDLY BOUND BOTH HANDS AND FEET BY STRONG AND PAINFUL WITHES -1320-122617-0020 THE SCOUT WHO HAD LEFT DAVID AT THE DOOR TO ASCERTAIN THEY WERE NOT OBSERVED THOUGHT IT PRUDENT TO PRESERVE HIS DISGUISE UNTIL ASSURED OF THEIR PRIVACY -1320-122617-0021 WHAT SHALL WE DO WITH THE MINGOES AT THE DOOR THEY COUNT SIX AND THIS SINGER IS AS GOOD AS NOTHING -1320-122617-0022 THE DELAWARES ARE CHILDREN OF THE TORTOISE AND THEY OUTSTRIP THE DEER -1320-122617-0023 UNCAS WHO HAD ALREADY APPROACHED THE DOOR IN READINESS TO LEAD THE WAY NOW RECOILED AND PLACED HIMSELF ONCE MORE IN THE BOTTOM OF THE LODGE -1320-122617-0024 BUT HAWKEYE WHO WAS TOO MUCH OCCUPIED WITH HIS OWN THOUGHTS TO NOTE THE MOVEMENT CONTINUED SPEAKING MORE TO HIMSELF THAN TO HIS COMPANION -1320-122617-0025 SO UNCAS YOU HAD BETTER TAKE THE LEAD WHILE I WILL PUT ON THE SKIN AGAIN AND TRUST TO CUNNING FOR WANT OF SPEED -1320-122617-0026 WELL WHAT CAN'T BE DONE BY MAIN COURAGE IN WAR MUST BE DONE BY CIRCUMVENTION -1320-122617-0027 AS SOON AS THESE DISPOSITIONS WERE MADE THE SCOUT TURNED TO DAVID AND GAVE HIM HIS PARTING INSTRUCTIONS -1320-122617-0028 MY PURSUITS ARE PEACEFUL AND MY TEMPER I HUMBLY TRUST IS GREATLY GIVEN TO MERCY AND LOVE RETURNED DAVID A LITTLE NETTLED AT SO DIRECT AN ATTACK ON HIS MANHOOD BUT THERE ARE NONE WHO CAN SAY THAT I HAVE EVER FORGOTTEN MY FAITH IN THE LORD EVEN IN THE GREATEST STRAITS -1320-122617-0029 IF YOU ARE NOT THEN KNOCKED ON THE HEAD YOUR BEING A NON COMPOSSER WILL PROTECT YOU AND YOU'LL THEN HAVE A GOOD REASON TO EXPECT TO DIE IN YOUR BED -1320-122617-0030 SO CHOOSE FOR YOURSELF TO MAKE A RUSH OR TARRY HERE -1320-122617-0031 BRAVELY AND GENEROUSLY HAS HE BATTLED IN MY BEHALF AND THIS AND MORE WILL I DARE IN HIS SERVICE -1320-122617-0032 KEEP SILENT AS LONG AS MAY BE AND IT WOULD BE WISE WHEN YOU DO SPEAK TO BREAK OUT SUDDENLY IN ONE OF YOUR SHOUTINGS WHICH WILL SERVE TO REMIND THE INDIANS THAT YOU ARE NOT ALTOGETHER AS RESPONSIBLE AS MEN SHOULD BE -1320-122617-0033 IF HOWEVER THEY TAKE YOUR SCALP AS I TRUST AND BELIEVE THEY WILL NOT DEPEND ON IT UNCAS AND I WILL NOT FORGET THE DEED BUT REVENGE IT AS BECOMES TRUE WARRIORS AND TRUSTY FRIENDS -1320-122617-0034 HOLD SAID DAVID PERCEIVING THAT WITH THIS ASSURANCE THEY WERE ABOUT TO LEAVE HIM I AM AN UNWORTHY AND HUMBLE FOLLOWER OF ONE WHO TAUGHT NOT THE DAMNABLE PRINCIPLE OF REVENGE -1320-122617-0035 THEN HEAVING A HEAVY SIGH PROBABLY AMONG THE LAST HE EVER DREW IN PINING FOR A CONDITION HE HAD SO LONG ABANDONED HE ADDED IT IS WHAT I WOULD WISH TO PRACTISE MYSELF AS ONE WITHOUT A CROSS OF BLOOD THOUGH IT IS NOT ALWAYS EASY TO DEAL WITH AN INDIAN AS YOU WOULD WITH A FELLOW CHRISTIAN -1320-122617-0036 GOD BLESS YOU FRIEND I DO BELIEVE YOUR SCENT IS NOT GREATLY WRONG WHEN THE MATTER IS DULY CONSIDERED AND KEEPING ETERNITY BEFORE THE EYES THOUGH MUCH DEPENDS ON THE NATURAL GIFTS AND THE FORCE OF TEMPTATION -1320-122617-0037 THE DELAWARE DOG HE SAID LEANING FORWARD AND PEERING THROUGH THE DIM LIGHT TO CATCH THE EXPRESSION OF THE OTHER'S FEATURES IS HE AFRAID -1320-122617-0038 WILL THE HURONS HEAR HIS GROANS -1320-122617-0039 THE MOHICAN STARTED ON HIS FEET AND SHOOK HIS SHAGGY COVERING AS THOUGH THE ANIMAL HE COUNTERFEITED WAS ABOUT TO MAKE SOME DESPERATE EFFORT -1320-122617-0040 HE HAD NO OCCASION TO DELAY FOR AT THE NEXT INSTANT A BURST OF CRIES FILLED THE OUTER AIR AND RAN ALONG THE WHOLE EXTENT OF THE VILLAGE -1320-122617-0041 UNCAS CAST HIS SKIN AND STEPPED FORTH IN HIS OWN BEAUTIFUL PROPORTIONS -1580-141083-0000 I WILL ENDEAVOUR IN MY STATEMENT TO AVOID SUCH TERMS AS WOULD SERVE TO LIMIT THE EVENTS TO ANY PARTICULAR PLACE OR GIVE A CLUE AS TO THE PEOPLE CONCERNED -1580-141083-0001 I HAD ALWAYS KNOWN HIM TO BE RESTLESS IN HIS MANNER BUT ON THIS PARTICULAR OCCASION HE WAS IN SUCH A STATE OF UNCONTROLLABLE AGITATION THAT IT WAS CLEAR SOMETHING VERY UNUSUAL HAD OCCURRED -1580-141083-0002 MY FRIEND'S TEMPER HAD NOT IMPROVED SINCE HE HAD BEEN DEPRIVED OF THE CONGENIAL SURROUNDINGS OF BAKER STREET -1580-141083-0003 WITHOUT HIS SCRAPBOOKS HIS CHEMICALS AND HIS HOMELY UNTIDINESS HE WAS AN UNCOMFORTABLE MAN -1580-141083-0004 I HAD TO READ IT OVER CAREFULLY AS THE TEXT MUST BE ABSOLUTELY CORRECT -1580-141083-0005 I WAS ABSENT RATHER MORE THAN AN HOUR -1580-141083-0006 THE ONLY DUPLICATE WHICH EXISTED SO FAR AS I KNEW WAS THAT WHICH BELONGED TO MY SERVANT BANNISTER A MAN WHO HAS LOOKED AFTER MY ROOM FOR TEN YEARS AND WHOSE HONESTY IS ABSOLUTELY ABOVE SUSPICION -1580-141083-0007 THE MOMENT I LOOKED AT MY TABLE I WAS AWARE THAT SOMEONE HAD RUMMAGED AMONG MY PAPERS -1580-141083-0008 THE PROOF WAS IN THREE LONG SLIPS I HAD LEFT THEM ALL TOGETHER -1580-141083-0009 THE ALTERNATIVE WAS THAT SOMEONE PASSING HAD OBSERVED THE KEY IN THE DOOR HAD KNOWN THAT I WAS OUT AND HAD ENTERED TO LOOK AT THE PAPERS -1580-141083-0010 I GAVE HIM A LITTLE BRANDY AND LEFT HIM COLLAPSED IN A CHAIR WHILE I MADE A MOST CAREFUL EXAMINATION OF THE ROOM -1580-141083-0011 A BROKEN TIP OF LEAD WAS LYING THERE ALSO -1580-141083-0012 NOT ONLY THIS BUT ON THE TABLE I FOUND A SMALL BALL OF BLACK DOUGH OR CLAY WITH SPECKS OF SOMETHING WHICH LOOKS LIKE SAWDUST IN IT -1580-141083-0013 ABOVE ALL THINGS I DESIRE TO SETTLE THE MATTER QUIETLY AND DISCREETLY -1580-141083-0014 TO THE BEST OF MY BELIEF THEY WERE ROLLED UP -1580-141083-0015 DID ANYONE KNOW THAT THESE PROOFS WOULD BE THERE NO ONE SAVE THE PRINTER -1580-141083-0016 I WAS IN SUCH A HURRY TO COME TO YOU YOU LEFT YOUR DOOR OPEN -1580-141083-0017 SO IT SEEMS TO ME -1580-141083-0018 NOW MISTER SOAMES AT YOUR DISPOSAL -1580-141083-0019 ABOVE WERE THREE STUDENTS ONE ON EACH STORY -1580-141083-0020 THEN HE APPROACHED IT AND STANDING ON TIPTOE WITH HIS NECK CRANED HE LOOKED INTO THE ROOM -1580-141083-0021 THERE IS NO OPENING EXCEPT THE ONE PANE SAID OUR LEARNED GUIDE -1580-141083-0022 I AM AFRAID THERE ARE NO SIGNS HERE SAID HE -1580-141083-0023 ONE COULD HARDLY HOPE FOR ANY UPON SO DRY A DAY -1580-141083-0024 YOU LEFT HIM IN A CHAIR YOU SAY WHICH CHAIR BY THE WINDOW THERE -1580-141083-0025 THE MAN ENTERED AND TOOK THE PAPERS SHEET BY SHEET FROM THE CENTRAL TABLE -1580-141083-0026 AS A MATTER OF FACT HE COULD NOT SAID SOAMES FOR I ENTERED BY THE SIDE DOOR -1580-141083-0027 HOW LONG WOULD IT TAKE HIM TO DO THAT USING EVERY POSSIBLE CONTRACTION A QUARTER OF AN HOUR NOT LESS -1580-141083-0028 THEN HE TOSSED IT DOWN AND SEIZED THE NEXT -1580-141083-0029 HE WAS IN THE MIDST OF THAT WHEN YOUR RETURN CAUSED HIM TO MAKE A VERY HURRIED RETREAT VERY HURRIED SINCE HE HAD NOT TIME TO REPLACE THE PAPERS WHICH WOULD TELL YOU THAT HE HAD BEEN THERE -1580-141083-0030 MISTER SOAMES WAS SOMEWHAT OVERWHELMED BY THIS FLOOD OF INFORMATION -1580-141083-0031 HOLMES HELD OUT A SMALL CHIP WITH THE LETTERS N N AND A SPACE OF CLEAR WOOD AFTER THEM YOU SEE -1580-141083-0032 WATSON I HAVE ALWAYS DONE YOU AN INJUSTICE THERE ARE OTHERS -1580-141083-0033 I WAS HOPING THAT IF THE PAPER ON WHICH HE WROTE WAS THIN SOME TRACE OF IT MIGHT COME THROUGH UPON THIS POLISHED SURFACE NO I SEE NOTHING -1580-141083-0034 AS HOLMES DREW THE CURTAIN I WAS AWARE FROM SOME LITTLE RIGIDITY AND ALERTNESS OF HIS ATTITUDE THAT HE WAS PREPARED FOR AN EMERGENCY -1580-141083-0035 HOLMES TURNED AWAY AND STOOPED SUDDENLY TO THE FLOOR HALLOA WHAT'S THIS -1580-141083-0036 HOLMES HELD IT OUT ON HIS OPEN PALM IN THE GLARE OF THE ELECTRIC LIGHT -1580-141083-0037 WHAT COULD HE DO HE CAUGHT UP EVERYTHING WHICH WOULD BETRAY HIM AND HE RUSHED INTO YOUR BEDROOM TO CONCEAL HIMSELF -1580-141083-0038 I UNDERSTAND YOU TO SAY THAT THERE ARE THREE STUDENTS WHO USE THIS STAIR AND ARE IN THE HABIT OF PASSING YOUR DOOR YES THERE ARE -1580-141083-0039 AND THEY ARE ALL IN FOR THIS EXAMINATION YES -1580-141083-0040 ONE HARDLY LIKES TO THROW SUSPICION WHERE THERE ARE NO PROOFS -1580-141083-0041 LET US HEAR THE SUSPICIONS I WILL LOOK AFTER THE PROOFS -1580-141083-0042 MY SCHOLAR HAS BEEN LEFT VERY POOR BUT HE IS HARD WORKING AND INDUSTRIOUS HE WILL DO WELL -1580-141083-0043 THE TOP FLOOR BELONGS TO MILES MC LAREN -1580-141083-0044 I DARE NOT GO SO FAR AS THAT BUT OF THE THREE HE IS PERHAPS THE LEAST UNLIKELY -1580-141083-0045 HE WAS STILL SUFFERING FROM THIS SUDDEN DISTURBANCE OF THE QUIET ROUTINE OF HIS LIFE -1580-141083-0046 BUT I HAVE OCCASIONALLY DONE THE SAME THING AT OTHER TIMES -1580-141083-0047 DID YOU LOOK AT THESE PAPERS ON THE TABLE -1580-141083-0048 HOW CAME YOU TO LEAVE THE KEY IN THE DOOR -1580-141083-0049 ANYONE IN THE ROOM COULD GET OUT YES SIR -1580-141083-0050 I REALLY DON'T THINK HE KNEW MUCH ABOUT IT MISTER HOLMES -1580-141083-0051 ONLY FOR A MINUTE OR SO -1580-141083-0052 OH I WOULD NOT VENTURE TO SAY SIR -1580-141083-0053 YOU HAVEN'T SEEN ANY OF THEM NO SIR -1580-141084-0000 IT WAS THE INDIAN WHOSE DARK SILHOUETTE APPEARED SUDDENLY UPON HIS BLIND -1580-141084-0001 HE WAS PACING SWIFTLY UP AND DOWN HIS ROOM -1580-141084-0002 THIS SET OF ROOMS IS QUITE THE OLDEST IN THE COLLEGE AND IT IS NOT UNUSUAL FOR VISITORS TO GO OVER THEM -1580-141084-0003 NO NAMES PLEASE SAID HOLMES AS WE KNOCKED AT GILCHRIST'S DOOR -1580-141084-0004 OF COURSE HE DID NOT REALIZE THAT IT WAS I WHO WAS KNOCKING BUT NONE THE LESS HIS CONDUCT WAS VERY UNCOURTEOUS AND INDEED UNDER THE CIRCUMSTANCES RATHER SUSPICIOUS -1580-141084-0005 THAT IS VERY IMPORTANT SAID HOLMES -1580-141084-0006 YOU DON'T SEEM TO REALIZE THE POSITION -1580-141084-0007 TO MORROW IS THE EXAMINATION -1580-141084-0008 I CANNOT ALLOW THE EXAMINATION TO BE HELD IF ONE OF THE PAPERS HAS BEEN TAMPERED WITH THE SITUATION MUST BE FACED -1580-141084-0009 IT IS POSSIBLE THAT I MAY BE IN A POSITION THEN TO INDICATE SOME COURSE OF ACTION -1580-141084-0010 I WILL TAKE THE BLACK CLAY WITH ME ALSO THE PENCIL CUTTINGS GOOD BYE -1580-141084-0011 WHEN WE WERE OUT IN THE DARKNESS OF THE QUADRANGLE WE AGAIN LOOKED UP AT THE WINDOWS -1580-141084-0012 THE FOUL MOUTHED FELLOW AT THE TOP -1580-141084-0013 HE IS THE ONE WITH THE WORST RECORD -1580-141084-0014 WHY BANNISTER THE SERVANT WHAT'S HIS GAME IN THE MATTER -1580-141084-0015 HE IMPRESSED ME AS BEING A PERFECTLY HONEST MAN -1580-141084-0016 MY FRIEND DID NOT APPEAR TO BE DEPRESSED BY HIS FAILURE BUT SHRUGGED HIS SHOULDERS IN HALF HUMOROUS RESIGNATION -1580-141084-0017 NO GOOD MY DEAR WATSON -1580-141084-0018 I THINK SO YOU HAVE FORMED A CONCLUSION -1580-141084-0019 YES MY DEAR WATSON I HAVE SOLVED THE MYSTERY -1580-141084-0020 LOOK AT THAT HE HELD OUT HIS HAND -1580-141084-0021 ON THE PALM WERE THREE LITTLE PYRAMIDS OF BLACK DOUGHY CLAY -1580-141084-0022 AND ONE MORE THIS MORNING -1580-141084-0023 IN A FEW HOURS THE EXAMINATION WOULD COMMENCE AND HE WAS STILL IN THE DILEMMA BETWEEN MAKING THE FACTS PUBLIC AND ALLOWING THE CULPRIT TO COMPETE FOR THE VALUABLE SCHOLARSHIP -1580-141084-0024 HE COULD HARDLY STAND STILL SO GREAT WAS HIS MENTAL AGITATION AND HE RAN TOWARDS HOLMES WITH TWO EAGER HANDS OUTSTRETCHED THANK HEAVEN THAT YOU HAVE COME -1580-141084-0025 YOU KNOW HIM I THINK SO -1580-141084-0026 IF THIS MATTER IS NOT TO BECOME PUBLIC WE MUST GIVE OURSELVES CERTAIN POWERS AND RESOLVE OURSELVES INTO A SMALL PRIVATE COURT MARTIAL -1580-141084-0027 NO SIR CERTAINLY NOT -1580-141084-0028 THERE WAS NO MAN SIR -1580-141084-0029 HIS TROUBLED BLUE EYES GLANCED AT EACH OF US AND FINALLY RESTED WITH AN EXPRESSION OF BLANK DISMAY UPON BANNISTER IN THE FARTHER CORNER -1580-141084-0030 JUST CLOSE THE DOOR SAID HOLMES -1580-141084-0031 WE WANT TO KNOW MISTER GILCHRIST HOW YOU AN HONOURABLE MAN EVER CAME TO COMMIT SUCH AN ACTION AS THAT OF YESTERDAY -1580-141084-0032 FOR A MOMENT GILCHRIST WITH UPRAISED HAND TRIED TO CONTROL HIS WRITHING FEATURES -1580-141084-0033 COME COME SAID HOLMES KINDLY IT IS HUMAN TO ERR AND AT LEAST NO ONE CAN ACCUSE YOU OF BEING A CALLOUS CRIMINAL -1580-141084-0034 WELL WELL DON'T TROUBLE TO ANSWER LISTEN AND SEE THAT I DO YOU NO INJUSTICE -1580-141084-0035 HE COULD EXAMINE THE PAPERS IN HIS OWN OFFICE -1580-141084-0036 THE INDIAN I ALSO THOUGHT NOTHING OF -1580-141084-0037 WHEN I APPROACHED YOUR ROOM I EXAMINED THE WINDOW -1580-141084-0038 NO ONE LESS THAN THAT WOULD HAVE A CHANCE -1580-141084-0039 I ENTERED AND I TOOK YOU INTO MY CONFIDENCE AS TO THE SUGGESTIONS OF THE SIDE TABLE -1580-141084-0040 HE RETURNED CARRYING HIS JUMPING SHOES WHICH ARE PROVIDED AS YOU ARE AWARE WITH SEVERAL SHARP SPIKES -1580-141084-0041 NO HARM WOULD HAVE BEEN DONE HAD IT NOT BEEN THAT AS HE PASSED YOUR DOOR HE PERCEIVED THE KEY WHICH HAD BEEN LEFT BY THE CARELESSNESS OF YOUR SERVANT -1580-141084-0042 A SUDDEN IMPULSE CAME OVER HIM TO ENTER AND SEE IF THEY WERE INDEED THE PROOFS -1580-141084-0043 HE PUT HIS SHOES ON THE TABLE -1580-141084-0044 GLOVES SAID THE YOUNG MAN -1580-141084-0045 SUDDENLY HE HEARD HIM AT THE VERY DOOR THERE WAS NO POSSIBLE ESCAPE -1580-141084-0046 HAVE I TOLD THE TRUTH MISTER GILCHRIST -1580-141084-0047 I HAVE A LETTER HERE MISTER SOAMES WHICH I WROTE TO YOU EARLY THIS MORNING IN THE MIDDLE OF A RESTLESS NIGHT -1580-141084-0048 IT WILL BE CLEAR TO YOU FROM WHAT I HAVE SAID THAT ONLY YOU COULD HAVE LET THIS YOUNG MAN OUT SINCE YOU WERE LEFT IN THE ROOM AND MUST HAVE LOCKED THE DOOR WHEN YOU WENT OUT -1580-141084-0049 IT WAS SIMPLE ENOUGH SIR IF YOU ONLY HAD KNOWN BUT WITH ALL YOUR CLEVERNESS IT WAS IMPOSSIBLE THAT YOU COULD KNOW -1580-141084-0050 IF MISTER SOAMES SAW THEM THE GAME WAS UP -1995-1826-0000 IN THE DEBATE BETWEEN THE SENIOR SOCIETIES HER DEFENCE OF THE FIFTEENTH AMENDMENT HAD BEEN NOT ONLY A NOTABLE BIT OF REASONING BUT DELIVERED WITH REAL ENTHUSIASM -1995-1826-0001 THE SOUTH SHE HAD NOT THOUGHT OF SERIOUSLY AND YET KNOWING OF ITS DELIGHTFUL HOSPITALITY AND MILD CLIMATE SHE WAS NOT AVERSE TO CHARLESTON OR NEW ORLEANS -1995-1826-0002 JOHN TAYLOR WHO HAD SUPPORTED HER THROUGH COLLEGE WAS INTERESTED IN COTTON -1995-1826-0003 BETTER GO HE HAD COUNSELLED SENTENTIOUSLY -1995-1826-0004 MIGHT LEARN SOMETHING USEFUL DOWN THERE -1995-1826-0005 BUT JOHN THERE'S NO SOCIETY JUST ELEMENTARY WORK -1995-1826-0006 BEEN LOOKING UP TOOMS COUNTY -1995-1826-0007 FIND SOME CRESSWELLS THERE BIG PLANTATIONS RATED AT TWO HUNDRED AND FIFTY THOUSAND DOLLARS -1995-1826-0008 SOME OTHERS TOO BIG COTTON COUNTY -1995-1826-0009 YOU OUGHT TO KNOW JOHN IF I TEACH NEGROES I'LL SCARCELY SEE MUCH OF PEOPLE IN MY OWN CLASS -1995-1826-0010 AT ANY RATE I SAY GO -1995-1826-0011 HERE SHE WAS TEACHING DIRTY CHILDREN AND THE SMELL OF CONFUSED ODORS AND BODILY PERSPIRATION WAS TO HER AT TIMES UNBEARABLE -1995-1826-0012 SHE WANTED A GLANCE OF THE NEW BOOKS AND PERIODICALS AND TALK OF GREAT PHILANTHROPIES AND REFORMS -1995-1826-0013 SO FOR THE HUNDREDTH TIME SHE WAS THINKING TODAY AS SHE WALKED ALONE UP THE LANE BACK OF THE BARN AND THEN SLOWLY DOWN THROUGH THE BOTTOMS -1995-1826-0014 COTTON SHE PAUSED -1995-1826-0015 SHE HAD ALMOST FORGOTTEN THAT IT WAS HERE WITHIN TOUCH AND SIGHT -1995-1826-0016 THE GLIMMERING SEA OF DELICATE LEAVES WHISPERED AND MURMURED BEFORE HER STRETCHING AWAY TO THE NORTHWARD -1995-1826-0017 THERE MIGHT BE A BIT OF POETRY HERE AND THERE BUT MOST OF THIS PLACE WAS SUCH DESPERATE PROSE -1995-1826-0018 HER REGARD SHIFTED TO THE GREEN STALKS AND LEAVES AGAIN AND SHE STARTED TO MOVE AWAY -1995-1826-0019 COTTON IS A WONDERFUL THING IS IT NOT BOYS SHE SAID RATHER PRIMLY -1995-1826-0020 MISS TAYLOR DID NOT KNOW MUCH ABOUT COTTON BUT AT LEAST ONE MORE REMARK SEEMED CALLED FOR -1995-1826-0021 DON'T KNOW WELL OF ALL THINGS INWARDLY COMMENTED MISS TAYLOR LITERALLY BORN IN COTTON AND OH WELL AS MUCH AS TO ASK WHAT'S THE USE SHE TURNED AGAIN TO GO -1995-1826-0022 I SUPPOSE THOUGH IT'S TOO EARLY FOR THEM THEN CAME THE EXPLOSION -1995-1826-0023 GOOBERS DON'T GROW ON THE TOPS OF VINES BUT UNDERGROUND ON THE ROOTS LIKE YAMS IS THAT SO -1995-1826-0024 THE GOLDEN FLEECE IT'S THE SILVER FLEECE HE HARKENED -1995-1826-0025 SOME TIME YOU'LL TELL ME PLEASE WON'T YOU -1995-1826-0026 NOW FOR ONE LITTLE HALF HOUR SHE HAD BEEN A WOMAN TALKING TO A BOY NO NOT EVEN THAT SHE HAD BEEN TALKING JUST TALKING THERE WERE NO PERSONS IN THE CONVERSATION JUST THINGS ONE THING COTTON -1995-1836-0000 THE HON CHARLES SMITH MISS SARAH'S BROTHER WAS WALKING SWIFTLY UPTOWN FROM MISTER EASTERLY'S WALL STREET OFFICE AND HIS FACE WAS PALE -1995-1836-0001 AT LAST THE COTTON COMBINE WAS TO ALL APPEARANCES AN ASSURED FACT AND HE WAS SLATED FOR THE SENATE -1995-1836-0002 WHY SHOULD HE NOT BE AS OTHER MEN -1995-1836-0003 SHE WAS NOT HERSELF A NOTABLY INTELLIGENT WOMAN SHE GREATLY ADMIRED INTELLIGENCE OR WHATEVER LOOKED TO HER LIKE INTELLIGENCE IN OTHERS -1995-1836-0004 AS SHE AWAITED HER GUESTS SHE SURVEYED THE TABLE WITH BOTH SATISFACTION AND DISQUIETUDE FOR HER SOCIAL FUNCTIONS WERE FEW TONIGHT THERE WERE SHE CHECKED THEM OFF ON HER FINGERS SIR JAMES CREIGHTON THE RICH ENGLISH MANUFACTURER AND LADY CREIGHTON MISTER AND MISSUS VANDERPOOL MISTER HARRY CRESSWELL AND HIS SISTER JOHN TAYLOR AND HIS SISTER AND MISTER CHARLES SMITH WHOM THE EVENING PAPERS MENTIONED AS LIKELY TO BE UNITED STATES SENATOR FROM NEW JERSEY A SELECTION OF GUESTS THAT HAD BEEN DETERMINED UNKNOWN TO THE HOSTESS BY THE MEETING OF COTTON INTERESTS EARLIER IN THE DAY -1995-1836-0005 MISSUS GREY HAD MET SOUTHERNERS BEFORE BUT NOT INTIMATELY AND SHE ALWAYS HAD IN MIND VIVIDLY THEIR CRUELTY TO POOR NEGROES A SUBJECT SHE MADE A POINT OF INTRODUCING FORTHWITH -1995-1836-0006 SHE WAS THEREFORE MOST AGREEABLY SURPRISED TO HEAR MISTER CRESSWELL EXPRESS HIMSELF SO CORDIALLY AS APPROVING OF NEGRO EDUCATION -1995-1836-0007 BUT YOU BELIEVE IN SOME EDUCATION ASKED MARY TAYLOR -1995-1836-0008 I BELIEVE IN THE TRAINING OF PEOPLE TO THEIR HIGHEST CAPACITY THE ENGLISHMAN HERE HEARTILY SECONDED HIM -1995-1836-0009 BUT CRESSWELL ADDED SIGNIFICANTLY CAPACITY DIFFERS ENORMOUSLY BETWEEN RACES -1995-1836-0010 THE VANDERPOOLS WERE SURE OF THIS AND THE ENGLISHMAN INSTANCING INDIA BECAME QUITE ELOQUENT MISSUS GREY WAS MYSTIFIED BUT HARDLY DARED ADMIT IT THE GENERAL TREND OF THE CONVERSATION SEEMED TO BE THAT MOST INDIVIDUALS NEEDED TO BE SUBMITTED TO THE SHARPEST SCRUTINY BEFORE BEING ALLOWED MUCH EDUCATION AND AS FOR THE LOWER RACES IT WAS SIMPLY CRIMINAL TO OPEN SUCH USELESS OPPORTUNITIES TO THEM -1995-1836-0011 POSITIVELY HEROIC ADDED CRESSWELL AVOIDING HIS SISTER'S EYES -1995-1836-0012 BUT WE'RE NOT ER EXACTLY WELCOMED -1995-1836-0013 MARY TAYLOR HOWEVER RELATED THE TALE OF ZORA TO MISSUS GREY'S PRIVATE EAR LATER -1995-1836-0014 FORTUNATELY SAID MISTER VANDERPOOL NORTHERNERS AND SOUTHERNERS ARE ARRIVING AT A BETTER MUTUAL UNDERSTANDING ON MOST OF THESE MATTERS -1995-1837-0000 HE KNEW THE SILVER FLEECE HIS AND ZORA'S MUST BE RUINED -1995-1837-0001 IT WAS THE FIRST GREAT SORROW OF HIS LIFE IT WAS NOT SO MUCH THE LOSS OF THE COTTON ITSELF BUT THE FANTASY THE HOPES THE DREAMS BUILT AROUND IT -1995-1837-0002 AH THE SWAMP THE CRUEL SWAMP -1995-1837-0003 THE REVELATION OF HIS LOVE LIGHTED AND BRIGHTENED SLOWLY TILL IT FLAMED LIKE A SUNRISE OVER HIM AND LEFT HIM IN BURNING WONDER -1995-1837-0004 HE PANTED TO KNOW IF SHE TOO KNEW OR KNEW AND CARED NOT OR CARED AND KNEW NOT -1995-1837-0005 SHE WAS SO STRANGE AND HUMAN A CREATURE -1995-1837-0006 THE WORLD WAS WATER VEILED IN MISTS -1995-1837-0007 THEN OF A SUDDEN AT MIDDAY THE SUN SHOT OUT HOT AND STILL NO BREATH OF AIR STIRRED THE SKY WAS LIKE BLUE STEEL THE EARTH STEAMED -1995-1837-0008 WHERE WAS THE USE OF IMAGINING -1995-1837-0009 THE LAGOON HAD BEEN LEVEL WITH THE DYKES A WEEK AGO AND NOW -1995-1837-0010 PERHAPS SHE TOO MIGHT BE THERE WAITING WEEPING -1995-1837-0011 HE STARTED AT THE THOUGHT HE HURRIED FORTH SADLY -1995-1837-0012 HE SPLASHED AND STAMPED ALONG FARTHER AND FARTHER ONWARD UNTIL HE NEARED THE RAMPART OF THE CLEARING AND PUT FOOT UPON THE TREE BRIDGE -1995-1837-0013 THEN HE LOOKED DOWN THE LAGOON WAS DRY -1995-1837-0014 HE STOOD A MOMENT BEWILDERED THEN TURNED AND RUSHED UPON THE ISLAND A GREAT SHEET OF DAZZLING SUNLIGHT SWEPT THE PLACE AND BENEATH LAY A MIGHTY MASS OF OLIVE GREEN THICK TALL WET AND WILLOWY -1995-1837-0015 THE SQUARES OF COTTON SHARP EDGED HEAVY WERE JUST ABOUT TO BURST TO BOLLS -1995-1837-0016 FOR ONE LONG MOMENT HE PAUSED STUPID AGAPE WITH UTTER AMAZEMENT THEN LEANED DIZZILY AGAINST A TREE -1995-1837-0017 HE GAZED ABOUT PERPLEXED ASTONISHED -1995-1837-0018 HERE LAY THE READING OF THE RIDDLE WITH INFINITE WORK AND PAIN SOME ONE HAD DUG A CANAL FROM THE LAGOON TO THE CREEK INTO WHICH THE FORMER HAD DRAINED BY A LONG AND CROOKED WAY THUS ALLOWING IT TO EMPTY DIRECTLY -1995-1837-0019 HE SAT DOWN WEAK BEWILDERED AND ONE THOUGHT WAS UPPERMOST ZORA -1995-1837-0020 THE YEARS OF THE DAYS OF HER DYING WERE TEN -1995-1837-0021 THE HOPE AND DREAM OF HARVEST WAS UPON THE LAND -1995-1837-0022 UP IN THE SICK ROOM ZORA LAY ON THE LITTLE WHITE BED -1995-1837-0023 THE NET AND WEB OF ENDLESS THINGS HAD BEEN CRAWLING AND CREEPING AROUND HER SHE HAD STRUGGLED IN DUMB SPEECHLESS TERROR AGAINST SOME MIGHTY GRASPING THAT STROVE FOR HER LIFE WITH GNARLED AND CREEPING FINGERS BUT NOW AT LAST WEAKLY SHE OPENED HER EYES AND QUESTIONED -1995-1837-0024 FOR A WHILE SHE LAY IN HER CHAIR IN HAPPY DREAMY PLEASURE AT SUN AND BIRD AND TREE -1995-1837-0025 SHE ROSE WITH A FLEETING GLANCE GATHERED THE SHAWL ROUND HER THEN GLIDING FORWARD WAVERING TREMULOUS SLIPPED ACROSS THE ROAD AND INTO THE SWAMP -1995-1837-0026 SHE HAD BEEN BORN WITHIN ITS BORDERS WITHIN ITS BORDERS SHE HAD LIVED AND GROWN AND WITHIN ITS BORDERS SHE HAD MET HER LOVE -1995-1837-0027 ON SHE HURRIED UNTIL SWEEPING DOWN TO THE LAGOON AND THE ISLAND LO THE COTTON LAY BEFORE HER -1995-1837-0028 THE CHAIR WAS EMPTY BUT HE KNEW -1995-1837-0029 HE DARTED THROUGH THE TREES AND PAUSED A TALL MAN STRONGLY BUT SLIMLY MADE -2094-142345-0000 IT IS A VERY FINE OLD PLACE OF RED BRICK SOFTENED BY A PALE POWDERY LICHEN WHICH HAS DISPERSED ITSELF WITH HAPPY IRREGULARITY SO AS TO BRING THE RED BRICK INTO TERMS OF FRIENDLY COMPANIONSHIP WITH THE LIMESTONE ORNAMENTS SURROUNDING THE THREE GABLES THE WINDOWS AND THE DOOR PLACE -2094-142345-0001 BUT THE WINDOWS ARE PATCHED WITH WOODEN PANES AND THE DOOR I THINK IS LIKE THE GATE IT IS NEVER OPENED -2094-142345-0002 FOR IT IS A SOLID HEAVY HANDSOME DOOR AND MUST ONCE HAVE BEEN IN THE HABIT OF SHUTTING WITH A SONOROUS BANG BEHIND A LIVERIED LACKEY WHO HAD JUST SEEN HIS MASTER AND MISTRESS OFF THE GROUNDS IN A CARRIAGE AND PAIR -2094-142345-0003 A LARGE OPEN FIREPLACE WITH RUSTY DOGS IN IT AND A BARE BOARDED FLOOR AT THE FAR END FLEECES OF WOOL STACKED UP IN THE MIDDLE OF THE FLOOR SOME EMPTY CORN BAGS -2094-142345-0004 AND WHAT THROUGH THE LEFT HAND WINDOW -2094-142345-0005 SEVERAL CLOTHES HORSES A PILLION A SPINNING WHEEL AND AN OLD BOX WIDE OPEN AND STUFFED FULL OF COLOURED RAGS -2094-142345-0006 AT THE EDGE OF THIS BOX THERE LIES A GREAT WOODEN DOLL WHICH SO FAR AS MUTILATION IS CONCERNED BEARS A STRONG RESEMBLANCE TO THE FINEST GREEK SCULPTURE AND ESPECIALLY IN THE TOTAL LOSS OF ITS NOSE -2094-142345-0007 THE HISTORY OF THE HOUSE IS PLAIN NOW -2094-142345-0008 BUT THERE IS ALWAYS A STRONGER SENSE OF LIFE WHEN THE SUN IS BRILLIANT AFTER RAIN AND NOW HE IS POURING DOWN HIS BEAMS AND MAKING SPARKLES AMONG THE WET STRAW AND LIGHTING UP EVERY PATCH OF VIVID GREEN MOSS ON THE RED TILES OF THE COW SHED AND TURNING EVEN THE MUDDY WATER THAT IS HURRYING ALONG THE CHANNEL TO THE DRAIN INTO A MIRROR FOR THE YELLOW BILLED DUCKS WHO ARE SEIZING THE OPPORTUNITY OF GETTING A DRINK WITH AS MUCH BODY IN IT AS POSSIBLE -2094-142345-0009 FOR THE GREAT BARN DOORS ARE THROWN WIDE OPEN AND MEN ARE BUSY THERE MENDING THE HARNESS UNDER THE SUPERINTENDENCE OF MISTER GOBY THE WHITTAW OTHERWISE SADDLER WHO ENTERTAINS THEM WITH THE LATEST TREDDLESTON GOSSIP -2094-142345-0010 HETTY SORREL OFTEN TOOK THE OPPORTUNITY WHEN HER AUNT'S BACK WAS TURNED OF LOOKING AT THE PLEASING REFLECTION OF HERSELF IN THOSE POLISHED SURFACES FOR THE OAK TABLE WAS USUALLY TURNED UP LIKE A SCREEN AND WAS MORE FOR ORNAMENT THAN FOR USE AND SHE COULD SEE HERSELF SOMETIMES IN THE GREAT ROUND PEWTER DISHES THAT WERE RANGED ON THE SHELVES ABOVE THE LONG DEAL DINNER TABLE OR IN THE HOBS OF THE GRATE WHICH ALWAYS SHONE LIKE JASPER -2094-142345-0011 DO NOT SUPPOSE HOWEVER THAT MISSUS POYSER WAS ELDERLY OR SHREWISH IN HER APPEARANCE SHE WAS A GOOD LOOKING WOMAN NOT MORE THAN EIGHT AND THIRTY OF FAIR COMPLEXION AND SANDY HAIR WELL SHAPEN LIGHT FOOTED -2094-142345-0012 THE FAMILY LIKENESS BETWEEN HER AND HER NIECE DINAH MORRIS WITH THE CONTRAST BETWEEN HER KEENNESS AND DINAH'S SERAPHIC GENTLENESS OF EXPRESSION MIGHT HAVE SERVED A PAINTER AS AN EXCELLENT SUGGESTION FOR A MARTHA AND MARY -2094-142345-0013 HER TONGUE WAS NOT LESS KEEN THAN HER EYE AND WHENEVER A DAMSEL CAME WITHIN EARSHOT SEEMED TO TAKE UP AN UNFINISHED LECTURE AS A BARREL ORGAN TAKES UP A TUNE PRECISELY AT THE POINT WHERE IT HAD LEFT OFF -2094-142345-0014 THE FACT THAT IT WAS CHURNING DAY WAS ANOTHER REASON WHY IT WAS INCONVENIENT TO HAVE THE WHITTAWS AND WHY CONSEQUENTLY MISSUS POYSER SHOULD SCOLD MOLLY THE HOUSEMAID WITH UNUSUAL SEVERITY -2094-142345-0015 TO ALL APPEARANCE MOLLY HAD GOT THROUGH HER AFTER DINNER WORK IN AN EXEMPLARY MANNER HAD CLEANED HERSELF WITH GREAT DISPATCH AND NOW CAME TO ASK SUBMISSIVELY IF SHE SHOULD SIT DOWN TO HER SPINNING TILL MILKING TIME -2094-142345-0016 SPINNING INDEED -2094-142345-0017 I NEVER KNEW YOUR EQUALS FOR GALLOWSNESS -2094-142345-0018 WHO TAUGHT YOU TO SCRUB A FLOOR I SHOULD LIKE TO KNOW -2094-142345-0019 COMB THE WOOL FOR THE WHITTAWS INDEED -2094-142345-0020 THAT'S WHAT YOU'D LIKE TO BE DOING IS IT -2094-142345-0021 THAT'S THE WAY WITH YOU THAT'S THE ROAD YOU'D ALL LIKE TO GO HEADLONGS TO RUIN -2094-142345-0022 MISTER OTTLEY'S INDEED -2094-142345-0023 YOU'RE A RARE UN FOR SITTING DOWN TO YOUR WORK A LITTLE WHILE AFTER IT'S TIME TO PUT BY -2094-142345-0024 MUNNY MY IRON'S TWITE TOLD PEASE PUT IT DOWN TO WARM -2094-142345-0025 COLD IS IT MY DARLING BLESS YOUR SWEET FACE -2094-142345-0026 SHE'S GOING TO PUT THE IRONING THINGS AWAY -2094-142345-0027 MUNNY I TOULD IKE TO DO INTO DE BARN TO TOMMY TO SEE DE WHITTAWD -2094-142345-0028 NO NO NO TOTTY UD GET HER FEET WET SAID MISSUS POYSER CARRYING AWAY HER IRON -2094-142345-0029 DID EVER ANYBODY SEE THE LIKE SCREAMED MISSUS POYSER RUNNING TOWARDS THE TABLE WHEN HER EYE HAD FALLEN ON THE BLUE STREAM -2094-142345-0030 TOTTY HOWEVER HAD DESCENDED FROM HER CHAIR WITH GREAT SWIFTNESS AND WAS ALREADY IN RETREAT TOWARDS THE DAIRY WITH A SORT OF WADDLING RUN AND AN AMOUNT OF FAT ON THE NAPE OF HER NECK WHICH MADE HER LOOK LIKE THE METAMORPHOSIS OF A WHITE SUCKLING PIG -2094-142345-0031 AND SHE WAS VERY FOND OF YOU TOO AUNT RACHEL -2094-142345-0032 I OFTEN HEARD HER TALK OF YOU IN THE SAME SORT OF WAY -2094-142345-0033 WHEN SHE HAD THAT BAD ILLNESS AND I WAS ONLY ELEVEN YEARS OLD SHE USED TO SAY YOU'LL HAVE A FRIEND ON EARTH IN YOUR AUNT RACHEL IF I'M TAKEN FROM YOU FOR SHE HAS A KIND HEART AND I'M SURE I'VE FOUND IT SO -2094-142345-0034 AND THERE'S LINEN IN THE HOUSE AS I COULD WELL SPARE YOU FOR I'VE GOT LOTS O SHEETING AND TABLE CLOTHING AND TOWELLING AS ISN'T MADE UP -2094-142345-0035 BUT NOT MORE THAN WHAT'S IN THE BIBLE AUNT SAID DINAH -2094-142345-0036 NAY DEAR AUNT YOU NEVER HEARD ME SAY THAT ALL PEOPLE ARE CALLED TO FORSAKE THEIR WORK AND THEIR FAMILIES -2094-142345-0037 WE CAN ALL BE SERVANTS OF GOD WHEREVER OUR LOT IS CAST BUT HE GIVES US DIFFERENT SORTS OF WORK ACCORDING AS HE FITS US FOR IT AND CALLS US TO IT -2094-142345-0038 I CAN NO MORE HELP SPENDING MY LIFE IN TRYING TO DO WHAT I CAN FOR THE SOULS OF OTHERS THAN YOU COULD HELP RUNNING IF YOU HEARD LITTLE TOTTY CRYING AT THE OTHER END OF THE HOUSE THE VOICE WOULD GO TO YOUR HEART YOU WOULD THINK THE DEAR CHILD WAS IN TROUBLE OR IN DANGER AND YOU COULDN'T REST WITHOUT RUNNING TO HELP HER AND COMFORT HER -2094-142345-0039 I'VE STRONG ASSURANCE THAT NO EVIL WILL HAPPEN TO YOU AND MY UNCLE AND THE CHILDREN FROM ANYTHING I'VE DONE -2094-142345-0040 I DIDN'T PREACH WITHOUT DIRECTION -2094-142345-0041 DIRECTION -2094-142345-0042 I HANNA COMMON PATIENCE WITH YOU -2094-142345-0043 BY THIS TIME THE TWO GENTLEMEN HAD REACHED THE PALINGS AND HAD GOT DOWN FROM THEIR HORSES IT WAS PLAIN THEY MEANT TO COME IN -2094-142345-0044 SAID MISTER IRWINE WITH HIS STATELY CORDIALITY -2094-142345-0045 OH SIR DON'T MENTION IT SAID MISSUS POYSER -2094-142345-0046 I DELIGHT IN YOUR KITCHEN -2094-142345-0047 POYSER IS NOT AT HOME IS HE -2094-142345-0048 SAID CAPTAIN DONNITHORNE SEATING HIMSELF WHERE HE COULD SEE ALONG THE SHORT PASSAGE TO THE OPEN DAIRY DOOR -2094-142345-0049 NO SIR HE ISN'T HE'S GONE TO ROSSETER TO SEE MISTER WEST THE FACTOR ABOUT THE WOOL -2094-142345-0050 BUT THERE'S FATHER THE BARN SIR IF HE'D BE OF ANY USE -2094-142345-0051 NO THANK YOU I'LL JUST LOOK AT THE WHELPS AND LEAVE A MESSAGE ABOUT THEM WITH YOUR SHEPHERD -2094-142345-0052 I MUST COME ANOTHER DAY AND SEE YOUR HUSBAND I WANT TO HAVE A CONSULTATION WITH HIM ABOUT HORSES -2094-142345-0053 FOR IF HE'S ANYWHERE ON THE FARM WE CAN SEND FOR HIM IN A MINUTE -2094-142345-0054 OH SIR SAID MISSUS POYSER RATHER ALARMED YOU WOULDN'T LIKE IT AT ALL -2094-142345-0055 BUT YOU KNOW MORE ABOUT THAT THAN I DO SIR -2094-142345-0056 I THINK I SHOULD BE DOING YOU A SERVICE TO TURN YOU OUT OF SUCH A PLACE -2094-142345-0057 I KNOW HIS FARM IS IN BETTER ORDER THAN ANY OTHER WITHIN TEN MILES OF US AND AS FOR THE KITCHEN HE ADDED SMILING I DON'T BELIEVE THERE'S ONE IN THE KINGDOM TO BEAT IT -2094-142345-0058 BY THE BY I'VE NEVER SEEN YOUR DAIRY I MUST SEE YOUR DAIRY MISSUS POYSER -2094-142345-0059 THIS MISSUS POYSER SAID BLUSHING AND BELIEVING THAT THE CAPTAIN WAS REALLY INTERESTED IN HER MILK PANS AND WOULD ADJUST HIS OPINION OF HER TO THE APPEARANCE OF HER DAIRY -2094-142345-0060 OH I'VE NO DOUBT IT'S IN CAPITAL ORDER -2300-131720-0000 THE PARIS PLANT LIKE THAT AT THE CRYSTAL PALACE WAS A TEMPORARY EXHIBIT -2300-131720-0001 THE LONDON PLANT WAS LESS TEMPORARY BUT NOT PERMANENT SUPPLYING BEFORE IT WAS TORN OUT NO FEWER THAN THREE THOUSAND LAMPS IN HOTELS CHURCHES STORES AND DWELLINGS IN THE VICINITY OF HOLBORN VIADUCT -2300-131720-0002 THERE MESSRS JOHNSON AND HAMMER PUT INTO PRACTICE MANY OF THE IDEAS NOW STANDARD IN THE ART AND SECURED MUCH USEFUL DATA FOR THE WORK IN NEW YORK OF WHICH THE STORY HAS JUST BEEN TOLD -2300-131720-0003 THE DYNAMO ELECTRIC MACHINE THOUGH SMALL WAS ROBUST FOR UNDER ALL THE VARYING SPEEDS OF WATER POWER AND THE VICISSITUDES OF THE PLANT TO WHICH IT BELONGED IT CONTINUED IN ACTIVE USE UNTIL EIGHTEEN NINETY NINE SEVENTEEN YEARS -2300-131720-0004 OWING TO HIS INSISTENCE ON LOW PRESSURE DIRECT CURRENT FOR USE IN DENSELY POPULATED DISTRICTS AS THE ONLY SAFE AND TRULY UNIVERSAL PROFITABLE WAY OF DELIVERING ELECTRICAL ENERGY TO THE CONSUMERS EDISON HAS BEEN FREQUENTLY SPOKEN OF AS AN OPPONENT OF THE ALTERNATING CURRENT -2300-131720-0005 WHY IF WE ERECT A STATION AT THE FALLS IT IS A GREAT ECONOMY TO GET IT UP TO THE CITY -2300-131720-0006 THERE SEEMS NO GOOD REASON FOR BELIEVING THAT IT WILL CHANGE -2300-131720-0007 BROAD AS THE PRAIRIES AND FREE IN THOUGHT AS THE WINDS THAT SWEEP THEM HE IS IDIOSYNCRATICALLY OPPOSED TO LOOSE AND WASTEFUL METHODS TO PLANS OF EMPIRE THAT NEGLECT THE POOR AT THE GATE -2300-131720-0008 EVERYTHING HE HAS DONE HAS BEEN AIMED AT THE CONSERVATION OF ENERGY THE CONTRACTION OF SPACE THE INTENSIFICATION OF CULTURE -2300-131720-0009 FOR SOME YEARS IT WAS NOT FOUND FEASIBLE TO OPERATE MOTORS ON ALTERNATING CURRENT CIRCUITS AND THAT REASON WAS OFTEN URGED AGAINST IT SERIOUSLY -2300-131720-0010 IT COULD NOT BE USED FOR ELECTROPLATING OR DEPOSITION NOR COULD IT CHARGE STORAGE BATTERIES ALL OF WHICH ARE EASILY WITHIN THE ABILITY OF THE DIRECT CURRENT -2300-131720-0011 BUT WHEN IT CAME TO BE A QUESTION OF LIGHTING A SCATTERED SUBURB A GROUP OF DWELLINGS ON THE OUTSKIRTS A REMOTE COUNTRY RESIDENCE OR A FARM HOUSE THE ALTERNATING CURRENT IN ALL ELEMENTS SAVE ITS DANGER WAS AND IS IDEAL -2300-131720-0012 EDISON WAS INTOLERANT OF SHAM AND SHODDY AND NOTHING WOULD SATISFY HIM THAT COULD NOT STAND CROSS EXAMINATION BY MICROSCOPE TEST TUBE AND GALVANOMETER -2300-131720-0013 UNLESS HE COULD SECURE AN ENGINE OF SMOOTHER RUNNING AND MORE EXACTLY GOVERNED AND REGULATED THAN THOSE AVAILABLE FOR HIS DYNAMO AND LAMP EDISON REALIZED THAT HE WOULD FIND IT ALMOST IMPOSSIBLE TO GIVE A STEADY LIGHT -2300-131720-0014 MISTER EDISON WAS A LEADER FAR AHEAD OF THE TIME -2300-131720-0015 HE OBTAINED THE DESIRED SPEED AND LOAD WITH A FRICTION BRAKE ALSO REGULATOR OF SPEED BUT WAITED FOR AN INDICATOR TO VERIFY IT -2300-131720-0016 THEN AGAIN THERE WAS NO KNOWN WAY TO LUBRICATE AN ENGINE FOR CONTINUOUS RUNNING AND MISTER EDISON INFORMED ME THAT AS A MARINE ENGINE STARTED BEFORE THE SHIP LEFT NEW YORK AND CONTINUED RUNNING UNTIL IT REACHED ITS HOME PORT SO AN ENGINE FOR HIS PURPOSES MUST PRODUCE LIGHT AT ALL TIMES -2300-131720-0017 EDISON HAD INSTALLED HIS HISTORIC FIRST GREAT CENTRAL STATION SYSTEM IN NEW YORK ON THE MULTIPLE ARC SYSTEM COVERED BY HIS FEEDER AND MAIN INVENTION WHICH RESULTED IN A NOTABLE SAVING IN THE COST OF CONDUCTORS AS AGAINST A STRAIGHT TWO WIRE SYSTEM THROUGHOUT OF THE TREE KIND -2300-131720-0018 HE SOON FORESAW THAT STILL GREATER ECONOMY WOULD BE NECESSARY FOR COMMERCIAL SUCCESS NOT ALONE FOR THE LARGER TERRITORY OPENING BUT FOR THE COMPACT DISTRICTS OF LARGE CITIES -2300-131720-0019 THE STRONG POSITION HELD BY THE EDISON SYSTEM UNDER THE STRENUOUS COMPETITION THAT WAS ALREADY SPRINGING UP WAS ENORMOUSLY IMPROVED BY THE INTRODUCTION OF THE THREE WIRE SYSTEM AND IT GAVE AN IMMEDIATE IMPETUS TO INCANDESCENT LIGHTING -2300-131720-0020 IT WAS SPECIALLY SUITED FOR A TRIAL PLANT ALSO IN THE EARLY DAYS WHEN A YIELD OF SIX OR EIGHT LAMPS TO THE HORSE POWER WAS CONSIDERED SUBJECT FOR CONGRATULATION -2300-131720-0021 THE STREET CONDUCTORS WERE OF THE OVERHEAD POLE LINE CONSTRUCTION AND WERE INSTALLED BY THE CONSTRUCTION COMPANY THAT HAD BEEN ORGANIZED BY EDISON TO BUILD AND EQUIP CENTRAL STATIONS -2300-131720-0022 MEANWHILE HE HAD CALLED UPON ME TO MAKE A REPORT OF THE THREE WIRE SYSTEM KNOWN IN ENGLAND AS THE HOPKINSON BOTH DOCTOR JOHN HOPKINSON AND MISTER EDISON BEING INDEPENDENT INVENTORS AT PRACTICALLY THE SAME TIME -2300-131720-0023 I THINK HE WAS PERHAPS MORE APPRECIATIVE THAN I WAS OF THE DISCIPLINE OF THE EDISON CONSTRUCTION DEPARTMENT AND THOUGHT IT WOULD BE WELL FOR US TO WAIT UNTIL THE MORNING OF THE FOURTH BEFORE WE STARTED UP -2300-131720-0024 BUT THE PLANT RAN AND IT WAS THE FIRST THREE WIRE STATION IN THIS COUNTRY -2300-131720-0025 THEY WERE LATER USED AS RESERVE MACHINES AND FINALLY WITH THE ENGINE RETIRED FROM SERVICE AS PART OF THE COLLECTION OF EDISONIA BUT THEY REMAIN IN PRACTICALLY AS GOOD CONDITION AS WHEN INSTALLED IN EIGHTEEN EIGHTY THREE -2300-131720-0026 THE ARC LAMP INSTALLED OUTSIDE A CUSTOMER'S PREMISES OR IN A CIRCUIT FOR PUBLIC STREET LIGHTING BURNED SO MANY HOURS NIGHTLY SO MANY NIGHTS IN THE MONTH AND WAS PAID FOR AT THAT RATE SUBJECT TO REBATE FOR HOURS WHEN THE LAMP MIGHT BE OUT THROUGH ACCIDENT -2300-131720-0027 EDISON HELD THAT THE ELECTRICITY SOLD MUST BE MEASURED JUST LIKE GAS OR WATER AND HE PROCEEDED TO DEVELOP A METER -2300-131720-0028 THERE WAS INFINITE SCEPTICISM AROUND HIM ON THE SUBJECT AND WHILE OTHER INVENTORS WERE ALSO GIVING THE SUBJECT THEIR THOUGHT THE PUBLIC TOOK IT FOR GRANTED THAT ANYTHING SO UTTERLY INTANGIBLE AS ELECTRICITY THAT COULD NOT BE SEEN OR WEIGHED AND ONLY GAVE SECONDARY EVIDENCE OF ITSELF AT THE EXACT POINT OF USE COULD NOT BE BROUGHT TO ACCURATE REGISTRATION -2300-131720-0029 HENCE THE EDISON ELECTROLYTIC METER IS NO LONGER USED DESPITE ITS EXCELLENT QUALITIES -2300-131720-0030 THE PRINCIPLE EMPLOYED IN THE EDISON ELECTROLYTIC METER IS THAT WHICH EXEMPLIFIES THE POWER OF ELECTRICITY TO DECOMPOSE A CHEMICAL SUBSTANCE -2300-131720-0031 ASSOCIATED WITH THIS SIMPLE FORM OF APPARATUS WERE VARIOUS INGENIOUS DETAILS AND REFINEMENTS TO SECURE REGULARITY OF OPERATION FREEDOM FROM INACCURACY AND IMMUNITY FROM SUCH TAMPERING AS WOULD PERMIT THEFT OF CURRENT OR DAMAGE -2300-131720-0032 THE STANDARD EDISON METER PRACTICE WAS TO REMOVE THE CELLS ONCE A MONTH TO THE METER ROOM OF THE CENTRAL STATION COMPANY FOR EXAMINATION ANOTHER SET BEING SUBSTITUTED -2300-131720-0033 IN DECEMBER EIGHTEEN EIGHTY EIGHT MISTER W J JENKS READ AN INTERESTING PAPER BEFORE THE AMERICAN INSTITUTE OF ELECTRICAL ENGINEERS ON THE SIX YEARS OF PRACTICAL EXPERIENCE HAD UP TO THAT TIME WITH THE METER THEN MORE GENERALLY IN USE THAN ANY OTHER -2300-131720-0034 THE OTHERS HAVING BEEN IN OPERATION TOO SHORT A TIME TO SHOW DEFINITE RESULTS ALTHOUGH THEY ALSO WENT QUICKLY TO A DIVIDEND BASIS -2300-131720-0035 IN THIS CONNECTION IT SHOULD BE MENTIONED THAT THE ASSOCIATION OF EDISON ILLUMINATING COMPANIES IN THE SAME YEAR ADOPTED RESOLUTIONS UNANIMOUSLY TO THE EFFECT THAT THE EDISON METER WAS ACCURATE AND THAT ITS USE WAS NOT EXPENSIVE FOR STATIONS ABOVE ONE THOUSAND LIGHTS AND THAT THE BEST FINANCIAL RESULTS WERE INVARIABLY SECURED IN A STATION SELLING CURRENT BY METER -2300-131720-0036 THE METER CONTINUED IN GENERAL SERVICE DURING EIGHTEEN NINETY NINE AND PROBABLY UP TO THE CLOSE OF THE CENTURY -2300-131720-0037 HE WEIGHED AND REWEIGHED THE METER PLATES AND PURSUED EVERY LINE OF INVESTIGATION IMAGINABLE BUT ALL IN VAIN -2300-131720-0038 HE FELT HE WAS UP AGAINST IT AND THAT PERHAPS ANOTHER KIND OF A JOB WOULD SUIT HIM BETTER -2300-131720-0039 THE PROBLEM WAS SOLVED -2300-131720-0040 WE WERE MORE INTERESTED IN THE TECHNICAL CONDITION OF THE STATION THAN IN THE COMMERCIAL PART -2300-131720-0041 WE HAD METERS IN WHICH THERE WERE TWO BOTTLES OF LIQUID -237-126133-0000 HERE SHE WOULD STAY COMFORTED AND SOOTHED AMONG THE LOVELY PLANTS AND RICH EXOTICS REJOICING THE HEART OF OLD TURNER THE GARDENER WHO SINCE POLLY'S FIRST RAPTUROUS ENTRANCE HAD TAKEN HER INTO HIS GOOD GRACES FOR ALL TIME -237-126133-0001 EVERY CHANCE SHE COULD STEAL AFTER PRACTICE HOURS WERE OVER AND AFTER THE CLAMOROUS DEMANDS OF THE BOYS UPON HER TIME WERE FULLY SATISFIED WAS SEIZED TO FLY ON THE WINGS OF THE WIND TO THE FLOWERS -237-126133-0002 THEN DEAR SAID MISSUS WHITNEY YOU MUST BE KINDER TO HER THAN EVER THINK WHAT IT WOULD BE FOR ONE OF YOU TO BE AWAY FROM HOME EVEN AMONG FRIENDS -237-126133-0003 SOMEHOW OF ALL THE DAYS WHEN THE HOME FEELING WAS THE STRONGEST THIS DAY IT SEEMED AS IF SHE COULD BEAR IT NO LONGER -237-126133-0004 IF SHE COULD ONLY SEE PHRONSIE FOR JUST ONE MOMENT -237-126133-0005 OH SHE'S ALWAYS AT THE PIANO SAID VAN SHE MUST BE THERE NOW SOMEWHERE AND THEN SOMEBODY LAUGHED -237-126133-0006 AT THIS THE BUNDLE OPENED SUDDENLY AND OUT POPPED PHRONSIE -237-126133-0007 BUT POLLY COULDN'T SPEAK AND IF JASPER HADN'T CAUGHT HER JUST IN TIME SHE WOULD HAVE TUMBLED OVER BACKWARD FROM THE STOOL PHRONSIE AND ALL -237-126133-0008 ASKED PHRONSIE WITH HER LITTLE FACE CLOSE TO POLLY'S OWN -237-126133-0009 NOW YOU'LL STAY CRIED VAN SAY POLLY WON'T YOU -237-126133-0010 OH YOU ARE THE DEAREST AND BEST MISTER KING I EVER SAW BUT HOW DID YOU MAKE MAMMY LET HER COME -237-126133-0011 ISN'T HE SPLENDID CRIED JASPER IN INTENSE PRIDE SWELLING UP FATHER KNEW HOW TO DO IT -237-126133-0012 THERE THERE HE SAID SOOTHINGLY PATTING HER BROWN FUZZY HEAD -237-126133-0013 I KNOW GASPED POLLY CONTROLLING HER SOBS I WON'T ONLY I CAN'T THANK YOU -237-126133-0014 ASKED PHRONSIE IN INTENSE INTEREST SLIPPING DOWN OUT OF POLLY'S ARMS AND CROWDING UP CLOSE TO JASPER'S SIDE -237-126133-0015 YES ALL ALONE BY HIMSELF ASSERTED JASPER VEHEMENTLY AND WINKING FURIOUSLY TO THE OTHERS TO STOP THEIR LAUGHING HE DID NOW TRULY PHRONSIE -237-126133-0016 OH NO JASPER I MUST GO BY MY VERY OWN SELF -237-126133-0017 THERE JAP YOU'VE CAUGHT IT LAUGHED PERCY WHILE THE OTHERS SCREAMED AT THE SIGHT OF JASPER'S FACE -237-126133-0018 DON'T MIND IT POLLY WHISPERED JASPER TWASN'T HER FAULT -237-126133-0019 DEAR ME EJACULATED THE OLD GENTLEMAN IN THE UTMOST AMAZEMENT AND SUCH A TIME AS I'VE HAD TO GET HER HERE TOO -237-126133-0020 HOW DID HER MOTHER EVER LET HER GO -237-126133-0021 SHE ASKED IMPULSIVELY I DIDN'T BELIEVE YOU COULD PERSUADE HER FATHER -237-126133-0022 I DIDN'T HAVE ANY FEARS IF I WORKED IT RIGHTLY SAID THE OLD GENTLEMAN COMPLACENTLY -237-126133-0023 HE CRIED IN HIGH DUDGEON JUST AS IF HE OWNED THE WHOLE OF THE PEPPERS AND COULD DISPOSE OF THEM ALL TO SUIT HIS FANCY -237-126133-0024 AND THE OLD GENTLEMAN WAS SO DELIGHTED WITH HIS SUCCESS THAT HE HAD TO BURST OUT INTO A SERIES OF SHORT HAPPY BITS OF LAUGHTER THAT OCCUPIED QUITE A SPACE OF TIME -237-126133-0025 AT LAST HE CAME OUT OF THEM AND WIPED HIS FACE VIGOROUSLY -237-134493-0000 IT IS SIXTEEN YEARS SINCE JOHN BERGSON DIED -237-134493-0001 HIS WIFE NOW LIES BESIDE HIM AND THE WHITE SHAFT THAT MARKS THEIR GRAVES GLEAMS ACROSS THE WHEAT FIELDS -237-134493-0002 FROM THE NORWEGIAN GRAVEYARD ONE LOOKS OUT OVER A VAST CHECKER BOARD MARKED OFF IN SQUARES OF WHEAT AND CORN LIGHT AND DARK DARK AND LIGHT -237-134493-0003 FROM THE GRAVEYARD GATE ONE CAN COUNT A DOZEN GAYLY PAINTED FARMHOUSES THE GILDED WEATHER VANES ON THE BIG RED BARNS WINK AT EACH OTHER ACROSS THE GREEN AND BROWN AND YELLOW FIELDS -237-134493-0004 THE AIR AND THE EARTH ARE CURIOUSLY MATED AND INTERMINGLED AS IF THE ONE WERE THE BREATH OF THE OTHER -237-134493-0005 HE WAS A SPLENDID FIGURE OF A BOY TALL AND STRAIGHT AS A YOUNG PINE TREE WITH A HANDSOME HEAD AND STORMY GRAY EYES DEEPLY SET UNDER A SERIOUS BROW -237-134493-0006 THAT'S NOT MUCH OF A JOB FOR AN ATHLETE HERE I'VE BEEN TO TOWN AND BACK -237-134493-0007 ALEXANDRA LETS YOU SLEEP LATE -237-134493-0008 SHE GATHERED UP HER REINS -237-134493-0009 PLEASE WAIT FOR ME MARIE EMIL COAXED -237-134493-0010 I NEVER SEE LOU'S SCYTHE OVER HERE -237-134493-0011 HOW BROWN YOU'VE GOT SINCE YOU CAME HOME I WISH I HAD AN ATHLETE TO MOW MY ORCHARD -237-134493-0012 I GET WET TO MY KNEES WHEN I GO DOWN TO PICK CHERRIES -237-134493-0013 INDEED HE HAD LOOKED AWAY WITH THE PURPOSE OF NOT SEEING IT -237-134493-0014 THEY THINK YOU'RE PROUD BECAUSE YOU'VE BEEN AWAY TO SCHOOL OR SOMETHING -237-134493-0015 THERE WAS SOMETHING INDIVIDUAL ABOUT THE GREAT FARM A MOST UNUSUAL TRIMNESS AND CARE FOR DETAIL -237-134493-0016 ON EITHER SIDE OF THE ROAD FOR A MILE BEFORE YOU REACHED THE FOOT OF THE HILL STOOD TALL OSAGE ORANGE HEDGES THEIR GLOSSY GREEN MARKING OFF THE YELLOW FIELDS -237-134493-0017 ANY ONE THEREABOUTS WOULD HAVE TOLD YOU THAT THIS WAS ONE OF THE RICHEST FARMS ON THE DIVIDE AND THAT THE FARMER WAS A WOMAN ALEXANDRA BERGSON -237-134493-0018 THERE IS EVEN A WHITE ROW OF BEEHIVES IN THE ORCHARD UNDER THE WALNUT TREES -237-134500-0000 FRANK READ ENGLISH SLOWLY AND THE MORE HE READ ABOUT THIS DIVORCE CASE THE ANGRIER HE GREW -237-134500-0001 MARIE SIGHED -237-134500-0002 A BRISK WIND HAD COME UP AND WAS DRIVING PUFFY WHITE CLOUDS ACROSS THE SKY -237-134500-0003 THE ORCHARD WAS SPARKLING AND RIPPLING IN THE SUN -237-134500-0004 THAT INVITATION DECIDED HER -237-134500-0005 OH BUT I'M GLAD TO GET THIS PLACE MOWED -237-134500-0006 JUST SMELL THE WILD ROSES THEY ARE ALWAYS SO SPICY AFTER A RAIN -237-134500-0007 WE NEVER HAD SO MANY OF THEM IN HERE BEFORE -237-134500-0008 I SUPPOSE IT'S THE WET SEASON WILL YOU HAVE TO CUT THEM TOO -237-134500-0009 I SUPPOSE THAT'S THE WET SEASON TOO THEN -237-134500-0010 IT'S EXCITING TO SEE EVERYTHING GROWING SO FAST AND TO GET THE GRASS CUT -237-134500-0011 AREN'T YOU SPLASHED LOOK AT THE SPIDER WEBS ALL OVER THE GRASS -237-134500-0012 IN A FEW MOMENTS HE HEARD THE CHERRIES DROPPING SMARTLY INTO THE PAIL AND HE BEGAN TO SWING HIS SCYTHE WITH THAT LONG EVEN STROKE THAT FEW AMERICAN BOYS EVER LEARN -237-134500-0013 MARIE PICKED CHERRIES AND SANG SOFTLY TO HERSELF STRIPPING ONE GLITTERING BRANCH AFTER ANOTHER SHIVERING WHEN SHE CAUGHT A SHOWER OF RAINDROPS ON HER NECK AND HAIR -237-134500-0014 AND EMIL MOWED HIS WAY SLOWLY DOWN TOWARD THE CHERRY TREES -237-134500-0015 THAT SUMMER THE RAINS HAD BEEN SO MANY AND OPPORTUNE THAT IT WAS ALMOST MORE THAN SHABATA AND HIS MAN COULD DO TO KEEP UP WITH THE CORN THE ORCHARD WAS A NEGLECTED WILDERNESS -237-134500-0016 I DON'T KNOW ALL OF THEM BUT I KNOW LINDENS ARE -237-134500-0017 IF I FEEL THAT WAY I FEEL THAT WAY -237-134500-0018 HE REACHED UP AMONG THE BRANCHES AND BEGAN TO PICK THE SWEET INSIPID FRUIT LONG IVORY COLORED BERRIES TIPPED WITH FAINT PINK LIKE WHITE CORAL THAT FALL TO THE GROUND UNHEEDED ALL SUMMER THROUGH -237-134500-0019 HE DROPPED A HANDFUL INTO HER LAP -237-134500-0020 YES DON'T YOU -237-134500-0021 OH EVER SO MUCH ONLY HE SEEMS KIND OF STAID AND SCHOOL TEACHERY -237-134500-0022 WHEN SHE USED TO TELL ME ABOUT HIM I ALWAYS WONDERED WHETHER SHE WASN'T A LITTLE IN LOVE WITH HIM -237-134500-0023 IT WOULD SERVE YOU ALL RIGHT IF SHE WALKED OFF WITH CARL -237-134500-0024 I LIKE TO TALK TO CARL ABOUT NEW YORK AND WHAT A FELLOW CAN DO THERE -237-134500-0025 OH EMIL -237-134500-0026 SURELY YOU ARE NOT THINKING OF GOING OFF THERE -237-134500-0027 MARIE'S FACE FELL UNDER HIS BROODING GAZE -237-134500-0028 I'M SURE ALEXANDRA HOPES YOU WILL STAY ON HERE SHE MURMURED -237-134500-0029 I DON'T WANT TO STAND AROUND AND LOOK ON -237-134500-0030 I WANT TO BE DOING SOMETHING ON MY OWN ACCOUNT -237-134500-0031 SOMETIMES I DON'T WANT TO DO ANYTHING AT ALL AND SOMETIMES I WANT TO PULL THE FOUR CORNERS OF THE DIVIDE TOGETHER HE THREW OUT HIS ARM AND BROUGHT IT BACK WITH A JERK SO LIKE A TABLE CLOTH -237-134500-0032 I GET TIRED OF SEEING MEN AND HORSES GOING UP AND DOWN UP AND DOWN -237-134500-0033 I WISH YOU WEREN'T SO RESTLESS AND DIDN'T GET SO WORKED UP OVER THINGS SHE SAID SADLY -237-134500-0034 THANK YOU HE RETURNED SHORTLY -237-134500-0035 AND YOU NEVER USED TO BE CROSS TO ME -237-134500-0036 I CAN'T PLAY WITH YOU LIKE A LITTLE BOY ANY MORE HE SAID SLOWLY THAT'S WHAT YOU MISS MARIE -237-134500-0037 BUT EMIL IF I UNDERSTAND THEN ALL OUR GOOD TIMES ARE OVER WE CAN NEVER DO NICE THINGS TOGETHER ANY MORE -237-134500-0038 AND ANYHOW THERE'S NOTHING TO UNDERSTAND -237-134500-0039 THAT WON'T LAST IT WILL GO AWAY AND THINGS WILL BE JUST AS THEY USED TO -237-134500-0040 I PRAY FOR YOU BUT THAT'S NOT THE SAME AS IF YOU PRAYED YOURSELF -237-134500-0041 I CAN'T PRAY TO HAVE THE THINGS I WANT HE SAID SLOWLY AND I WON'T PRAY NOT TO HAVE THEM NOT IF I'M DAMNED FOR IT -237-134500-0042 THEN ALL OUR GOOD TIMES ARE OVER -260-123286-0000 SATURDAY AUGUST FIFTEENTH THE SEA UNBROKEN ALL ROUND NO LAND IN SIGHT -260-123286-0001 THE HORIZON SEEMS EXTREMELY DISTANT -260-123286-0002 ALL MY DANGER AND SUFFERINGS WERE NEEDED TO STRIKE A SPARK OF HUMAN FEELING OUT OF HIM BUT NOW THAT I AM WELL HIS NATURE HAS RESUMED ITS SWAY -260-123286-0003 YOU SEEM ANXIOUS MY UNCLE I SAID SEEING HIM CONTINUALLY WITH HIS GLASS TO HIS EYE ANXIOUS -260-123286-0004 ONE MIGHT BE WITH LESS REASON THAN NOW -260-123286-0005 I AM NOT COMPLAINING THAT THE RATE IS SLOW BUT THAT THE SEA IS SO WIDE -260-123286-0006 WE ARE LOSING TIME AND THE FACT IS I HAVE NOT COME ALL THIS WAY TO TAKE A LITTLE SAIL UPON A POND ON A RAFT -260-123286-0007 HE CALLED THIS SEA A POND AND OUR LONG VOYAGE TAKING A LITTLE SAIL -260-123286-0008 THEREFORE DON'T TALK TO ME ABOUT VIEWS AND PROSPECTS -260-123286-0009 I TAKE THIS AS MY ANSWER AND I LEAVE THE PROFESSOR TO BITE HIS LIPS WITH IMPATIENCE -260-123286-0010 SUNDAY AUGUST SIXTEENTH -260-123286-0011 NOTHING NEW WEATHER UNCHANGED THE WIND FRESHENS -260-123286-0012 BUT THERE SEEMED NO REASON TO FEAR -260-123286-0013 THE SHADOW OF THE RAFT WAS CLEARLY OUTLINED UPON THE SURFACE OF THE WAVES -260-123286-0014 TRULY THIS SEA IS OF INFINITE WIDTH -260-123286-0015 IT MUST BE AS WIDE AS THE MEDITERRANEAN OR THE ATLANTIC AND WHY NOT -260-123286-0016 THESE THOUGHTS AGITATED ME ALL DAY AND MY IMAGINATION SCARCELY CALMED DOWN AFTER SEVERAL HOURS SLEEP -260-123286-0017 I SHUDDER AS I RECALL THESE MONSTERS TO MY REMEMBRANCE -260-123286-0018 I SAW AT THE HAMBURG MUSEUM THE SKELETON OF ONE OF THESE CREATURES THIRTY FEET IN LENGTH -260-123286-0019 I SUPPOSE PROFESSOR LIEDENBROCK WAS OF MY OPINION TOO AND EVEN SHARED MY FEARS FOR AFTER HAVING EXAMINED THE PICK HIS EYES TRAVERSED THE OCEAN FROM SIDE TO SIDE -260-123286-0020 TUESDAY AUGUST EIGHTEENTH -260-123286-0021 DURING HIS WATCH I SLEPT -260-123286-0022 TWO HOURS AFTERWARDS A TERRIBLE SHOCK AWOKE ME -260-123286-0023 THE RAFT WAS HEAVED UP ON A WATERY MOUNTAIN AND PITCHED DOWN AGAIN AT A DISTANCE OF TWENTY FATHOMS -260-123286-0024 THERE'S A WHALE A WHALE CRIED THE PROFESSOR -260-123286-0025 FLIGHT WAS OUT OF THE QUESTION NOW THE REPTILES ROSE THEY WHEELED AROUND OUR LITTLE RAFT WITH A RAPIDITY GREATER THAN THAT OF EXPRESS TRAINS -260-123286-0026 TWO MONSTERS ONLY WERE CREATING ALL THIS COMMOTION AND BEFORE MY EYES ARE TWO REPTILES OF THE PRIMITIVE WORLD -260-123286-0027 I CAN DISTINGUISH THE EYE OF THE ICHTHYOSAURUS GLOWING LIKE A RED HOT COAL AND AS LARGE AS A MAN'S HEAD -260-123286-0028 ITS JAW IS ENORMOUS AND ACCORDING TO NATURALISTS IT IS ARMED WITH NO LESS THAN ONE HUNDRED AND EIGHTY TWO TEETH -260-123286-0029 THOSE HUGE CREATURES ATTACKED EACH OTHER WITH THE GREATEST ANIMOSITY -260-123286-0030 SUDDENLY THE ICHTHYOSAURUS AND THE PLESIOSAURUS DISAPPEAR BELOW LEAVING A WHIRLPOOL EDDYING IN THE WATER -260-123286-0031 AS FOR THE ICHTHYOSAURUS HAS HE RETURNED TO HIS SUBMARINE CAVERN -260-123288-0000 THE ROARINGS BECOME LOST IN THE DISTANCE -260-123288-0001 THE WEATHER IF WE MAY USE THAT TERM WILL CHANGE BEFORE LONG -260-123288-0002 THE ATMOSPHERE IS CHARGED WITH VAPOURS PERVADED WITH THE ELECTRICITY GENERATED BY THE EVAPORATION OF SALINE WATERS -260-123288-0003 THE ELECTRIC LIGHT CAN SCARCELY PENETRATE THROUGH THE DENSE CURTAIN WHICH HAS DROPPED OVER THE THEATRE ON WHICH THE BATTLE OF THE ELEMENTS IS ABOUT TO BE WAGED -260-123288-0004 THE AIR IS HEAVY THE SEA IS CALM -260-123288-0005 FROM TIME TO TIME A FLEECY TUFT OF MIST WITH YET SOME GLEAMING LIGHT LEFT UPON IT DROPS DOWN UPON THE DENSE FLOOR OF GREY AND LOSES ITSELF IN THE OPAQUE AND IMPENETRABLE MASS -260-123288-0006 THE ATMOSPHERE IS EVIDENTLY CHARGED AND SURCHARGED WITH ELECTRICITY -260-123288-0007 THE WIND NEVER LULLS BUT TO ACQUIRE INCREASED STRENGTH THE VAST BANK OF HEAVY CLOUDS IS A HUGE RESERVOIR OF FEARFUL WINDY GUSTS AND RUSHING STORMS -260-123288-0008 THERE'S A HEAVY STORM COMING ON I CRIED POINTING TOWARDS THE HORIZON -260-123288-0009 THOSE CLOUDS SEEM AS IF THEY WERE GOING TO CRUSH THE SEA -260-123288-0010 ON THE MAST ALREADY I SEE THE LIGHT PLAY OF A LAMBENT SAINT ELMO'S FIRE THE OUTSTRETCHED SAIL CATCHES NOT A BREATH OF WIND AND HANGS LIKE A SHEET OF LEAD -260-123288-0011 BUT IF WE HAVE NOW CEASED TO ADVANCE WHY DO WE YET LEAVE THAT SAIL LOOSE WHICH AT THE FIRST SHOCK OF THE TEMPEST MAY CAPSIZE US IN A MOMENT -260-123288-0012 THAT WILL BE SAFEST NO NO NEVER -260-123288-0013 THE PILED UP VAPOURS CONDENSE INTO WATER AND THE AIR PUT INTO VIOLENT ACTION TO SUPPLY THE VACUUM LEFT BY THE CONDENSATION OF THE MISTS ROUSES ITSELF INTO A WHIRLWIND -260-123288-0014 HANS STIRS NOT -260-123288-0015 FROM THE UNDER SURFACE OF THE CLOUDS THERE ARE CONTINUAL EMISSIONS OF LURID LIGHT ELECTRIC MATTER IS IN CONTINUAL EVOLUTION FROM THEIR COMPONENT MOLECULES THE GASEOUS ELEMENTS OF THE AIR NEED TO BE SLAKED WITH MOISTURE FOR INNUMERABLE COLUMNS OF WATER RUSH UPWARDS INTO THE AIR AND FALL BACK AGAIN IN WHITE FOAM -260-123288-0016 I REFER TO THE THERMOMETER IT INDICATES THE FIGURE IS OBLITERATED -260-123288-0017 IS THE ATMOSPHERIC CONDITION HAVING ONCE REACHED THIS DENSITY TO BECOME FINAL -260-123288-0018 THE RAFT BEARS ON STILL TO THE SOUTH EAST -260-123288-0019 AT NOON THE VIOLENCE OF THE STORM REDOUBLES -260-123288-0020 EACH OF US IS LASHED TO SOME PART OF THE RAFT -260-123288-0021 THE WAVES RISE ABOVE OUR HEADS -260-123288-0022 THEY SEEM TO BE WE ARE LOST BUT I AM NOT SURE -260-123288-0023 HE NODS HIS CONSENT -260-123288-0024 THE FIREBALL HALF OF IT WHITE HALF AZURE BLUE AND THE SIZE OF A TEN INCH SHELL MOVED SLOWLY ABOUT THE RAFT BUT REVOLVING ON ITS OWN AXIS WITH ASTONISHING VELOCITY AS IF WHIPPED ROUND BY THE FORCE OF THE WHIRLWIND -260-123288-0025 HERE IT COMES THERE IT GLIDES NOW IT IS UP THE RAGGED STUMP OF THE MAST THENCE IT LIGHTLY LEAPS ON THE PROVISION BAG DESCENDS WITH A LIGHT BOUND AND JUST SKIMS THE POWDER MAGAZINE HORRIBLE -260-123288-0026 WE SHALL BE BLOWN UP BUT NO THE DAZZLING DISK OF MYSTERIOUS LIGHT NIMBLY LEAPS ASIDE IT APPROACHES HANS WHO FIXES HIS BLUE EYE UPON IT STEADILY IT THREATENS THE HEAD OF MY UNCLE WHO FALLS UPON HIS KNEES WITH HIS HEAD DOWN TO AVOID IT -260-123288-0027 A SUFFOCATING SMELL OF NITROGEN FILLS THE AIR IT ENTERS THE THROAT IT FILLS THE LUNGS -260-123288-0028 WE SUFFER STIFLING PAINS -260-123440-0000 AND HOW ODD THE DIRECTIONS WILL LOOK -260-123440-0001 POOR ALICE -260-123440-0002 IT WAS THE WHITE RABBIT RETURNING SPLENDIDLY DRESSED WITH A PAIR OF WHITE KID GLOVES IN ONE HAND AND A LARGE FAN IN THE OTHER HE CAME TROTTING ALONG IN A GREAT HURRY MUTTERING TO HIMSELF AS HE CAME OH THE DUCHESS THE DUCHESS -260-123440-0003 OH WON'T SHE BE SAVAGE IF I'VE KEPT HER WAITING -260-123440-0004 ALICE TOOK UP THE FAN AND GLOVES AND AS THE HALL WAS VERY HOT SHE KEPT FANNING HERSELF ALL THE TIME SHE WENT ON TALKING DEAR DEAR HOW QUEER EVERYTHING IS TO DAY -260-123440-0005 AND YESTERDAY THINGS WENT ON JUST AS USUAL -260-123440-0006 I WONDER IF I'VE BEEN CHANGED IN THE NIGHT -260-123440-0007 I ALMOST THINK I CAN REMEMBER FEELING A LITTLE DIFFERENT -260-123440-0008 I'LL TRY IF I KNOW ALL THE THINGS I USED TO KNOW -260-123440-0009 I SHALL NEVER GET TO TWENTY AT THAT RATE -260-123440-0010 HOW CHEERFULLY HE SEEMS TO GRIN HOW NEATLY SPREAD HIS CLAWS AND WELCOME LITTLE FISHES IN WITH GENTLY SMILING JAWS -260-123440-0011 NO I'VE MADE UP MY MIND ABOUT IT IF I'M MABEL I'LL STAY DOWN HERE -260-123440-0012 IT'LL BE NO USE THEIR PUTTING THEIR HEADS DOWN AND SAYING COME UP AGAIN DEAR -260-123440-0013 I AM SO VERY TIRED OF BEING ALL ALONE HERE -260-123440-0014 AND I DECLARE IT'S TOO BAD THAT IT IS -260-123440-0015 I WISH I HADN'T CRIED SO MUCH SAID ALICE AS SHE SWAM ABOUT TRYING TO FIND HER WAY OUT -260-123440-0016 I SHALL BE PUNISHED FOR IT NOW I SUPPOSE BY BEING DROWNED IN MY OWN TEARS -260-123440-0017 THAT WILL BE A QUEER THING TO BE SURE -260-123440-0018 I AM VERY TIRED OF SWIMMING ABOUT HERE O MOUSE -260-123440-0019 CRIED ALICE AGAIN FOR THIS TIME THE MOUSE WAS BRISTLING ALL OVER AND SHE FELT CERTAIN IT MUST BE REALLY OFFENDED -260-123440-0020 WE WON'T TALK ABOUT HER ANY MORE IF YOU'D RATHER NOT WE INDEED -2830-3979-0000 WE WANT YOU TO HELP US PUBLISH SOME LEADING WORK OF LUTHER'S FOR THE GENERAL AMERICAN MARKET WILL YOU DO IT -2830-3979-0001 THE CONDITION IS THAT I WILL BE PERMITTED TO MAKE LUTHER TALK AMERICAN STREAMLINE HIM SO TO SPEAK BECAUSE YOU WILL NEVER GET PEOPLE WHETHER IN OR OUTSIDE THE LUTHERAN CHURCH ACTUALLY TO READ LUTHER UNLESS WE MAKE HIM TALK AS HE WOULD TALK TODAY TO AMERICANS -2830-3979-0002 LET US BEGIN WITH THAT HIS COMMENTARY ON GALATIANS -2830-3979-0003 THE UNDERTAKING WHICH SEEMED SO ATTRACTIVE WHEN VIEWED AS A LITERARY TASK PROVED A MOST DIFFICULT ONE AND AT TIMES BECAME OPPRESSIVE -2830-3979-0004 IT WAS WRITTEN IN LATIN -2830-3979-0005 THE WORK HAD TO BE CONDENSED -2830-3979-0006 A WORD SHOULD NOW BE SAID ABOUT THE ORIGIN OF LUTHER'S COMMENTARY ON GALATIANS -2830-3979-0007 MUCH LATER WHEN A FRIEND OF HIS WAS PREPARING AN EDITION OF ALL HIS LATIN WORKS HE REMARKED TO HIS HOME CIRCLE IF I HAD MY WAY ABOUT IT THEY WOULD REPUBLISH ONLY THOSE OF MY BOOKS WHICH HAVE DOCTRINE MY GALATIANS FOR INSTANCE -2830-3979-0008 IN OTHER WORDS THESE THREE MEN TOOK DOWN THE LECTURES WHICH LUTHER ADDRESSED TO HIS STUDENTS IN THE COURSE OF GALATIANS AND ROERER PREPARED THE MANUSCRIPT FOR THE PRINTER -2830-3979-0009 IT PRESENTS LIKE NO OTHER OF LUTHER'S WRITINGS THE CENTRAL THOUGHT OF CHRISTIANITY THE JUSTIFICATION OF THE SINNER FOR THE SAKE OF CHRIST'S MERITS ALONE -2830-3979-0010 BUT THE ESSENCE OF LUTHER'S LECTURES IS THERE -2830-3979-0011 THE LORD WHO HAS GIVEN US POWER TO TEACH AND TO HEAR LET HIM ALSO GIVE US THE POWER TO SERVE AND TO DO LUKE TWO -2830-3979-0012 THE WORD OF OUR GOD SHALL STAND FOREVER -2830-3980-0000 IN EVERY WAY THEY SOUGHT TO UNDERMINE THE AUTHORITY OF SAINT PAUL -2830-3980-0001 THEY SAID TO THE GALATIANS YOU HAVE NO RIGHT TO THINK HIGHLY OF PAUL -2830-3980-0002 HE WAS THE LAST TO TURN TO CHRIST -2830-3980-0003 PAUL CAME LATER AND IS BENEATH US -2830-3980-0004 INDEED HE PERSECUTED THE CHURCH OF CHRIST FOR A LONG TIME -2830-3980-0005 DO YOU SUPPOSE THAT GOD FOR THE SAKE OF A FEW LUTHERAN HERETICS WOULD DISOWN HIS ENTIRE CHURCH -2830-3980-0006 AGAINST THESE BOASTING FALSE APOSTLES PAUL BOLDLY DEFENDS HIS APOSTOLIC AUTHORITY AND MINISTRY -2830-3980-0007 AS THE AMBASSADOR OF A GOVERNMENT IS HONORED FOR HIS OFFICE AND NOT FOR HIS PRIVATE PERSON SO THE MINISTER OF CHRIST SHOULD EXALT HIS OFFICE IN ORDER TO GAIN AUTHORITY AMONG MEN -2830-3980-0008 PAUL TAKES PRIDE IN HIS MINISTRY NOT TO HIS OWN PRAISE BUT TO THE PRAISE OF GOD -2830-3980-0009 PAUL AN APOSTLE NOT OF MEN ET CETERA -2830-3980-0010 EITHER HE CALLS MINISTERS THROUGH THE AGENCY OF MEN OR HE CALLS THEM DIRECTLY AS HE CALLED THE PROPHETS AND APOSTLES -2830-3980-0011 PAUL DECLARES THAT THE FALSE APOSTLES WERE CALLED OR SENT NEITHER BY MEN NOR BY MAN -2830-3980-0012 THE MOST THEY COULD CLAIM IS THAT THEY WERE SENT BY OTHERS -2830-3980-0013 HE MENTIONS THE APOSTLES FIRST BECAUSE THEY WERE APPOINTED DIRECTLY BY GOD -2830-3980-0014 THE CALL IS NOT TO BE TAKEN LIGHTLY -2830-3980-0015 FOR A PERSON TO POSSESS KNOWLEDGE IS NOT ENOUGH -2830-3980-0016 IT SPOILS ONE'S BEST WORK -2830-3980-0017 WHEN I WAS A YOUNG MAN I THOUGHT PAUL WAS MAKING TOO MUCH OF HIS CALL -2830-3980-0018 I DID NOT THEN REALIZE THE IMPORTANCE OF THE MINISTRY -2830-3980-0019 I KNEW NOTHING OF THE DOCTRINE OF FAITH BECAUSE WE WERE TAUGHT SOPHISTRY INSTEAD OF CERTAINTY AND NOBODY UNDERSTOOD SPIRITUAL BOASTING -2830-3980-0020 THIS IS NO SINFUL PRIDE IT IS HOLY PRIDE -2830-3980-0021 AND GOD THE FATHER WHO RAISED HIM FROM THE DEAD -2830-3980-0022 THE CLAUSE SEEMS SUPERFLUOUS ON FIRST SIGHT -2830-3980-0023 THESE PERVERTERS OF THE RIGHTEOUSNESS OF CHRIST RESIST THE FATHER AND THE SON AND THE WORKS OF THEM BOTH -2830-3980-0024 IN THIS WHOLE EPISTLE PAUL TREATS OF THE RESURRECTION OF CHRIST -2830-3980-0025 BY HIS RESURRECTION CHRIST WON THE VICTORY OVER LAW SIN FLESH WORLD DEVIL DEATH HELL AND EVERY EVIL -2830-3980-0026 VERSE TWO -2830-3980-0027 AND ALL THE BRETHREN WHICH ARE WITH ME -2830-3980-0028 THIS SHOULD GO FAR IN SHUTTING THE MOUTHS OF THE FALSE APOSTLES -2830-3980-0029 ALTHOUGH THE BRETHREN WITH ME ARE NOT APOSTLES LIKE MYSELF YET THEY ARE ALL OF ONE MIND WITH ME THINK WRITE AND TEACH AS I DO -2830-3980-0030 THEY DO NOT GO WHERE THE ENEMIES OF THE GOSPEL PREDOMINATE THEY GO WHERE THE CHRISTIANS ARE -2830-3980-0031 WHY DO THEY NOT INVADE THE CATHOLIC PROVINCES AND PREACH THEIR DOCTRINE TO GODLESS PRINCES BISHOPS AND DOCTORS AS WE HAVE DONE BY THE HELP OF GOD -2830-3980-0032 WE LOOK FOR THAT REWARD WHICH EYE HATH NOT SEEN NOR EAR HEARD NEITHER HATH ENTERED INTO THE HEART OF MAN -2830-3980-0033 NOT ALL THE GALATIANS HAD BECOME PERVERTED -2830-3980-0034 THESE MEANS CANNOT BE CONTAMINATED -2830-3980-0035 THEY REMAIN DIVINE REGARDLESS OF MEN'S OPINION -2830-3980-0036 WHEREVER THE MEANS OF GRACE ARE FOUND THERE IS THE HOLY CHURCH EVEN THOUGH ANTICHRIST REIGNS THERE -2830-3980-0037 SO MUCH FOR THE TITLE OF THE EPISTLE NOW FOLLOWS THE GREETING OF THE APOSTLE VERSE THREE -2830-3980-0038 GRACE BE TO YOU AND PEACE FROM GOD THE FATHER AND FROM OUR LORD JESUS CHRIST -2830-3980-0039 THE TERMS OF GRACE AND PEACE ARE COMMON TERMS WITH PAUL AND ARE NOW PRETTY WELL UNDERSTOOD -2830-3980-0040 THE GREETING OF THE APOSTLE IS REFRESHING -2830-3980-0041 GRACE INVOLVES THE REMISSION OF SINS PEACE AND A HAPPY CONSCIENCE -2830-3980-0042 THE WORLD BRANDS THIS A PERNICIOUS DOCTRINE -2830-3980-0043 EXPERIENCE PROVES THIS -2830-3980-0044 HOWEVER THE GRACE AND PEACE OF GOD WILL -2830-3980-0045 MEN SHOULD NOT SPECULATE ABOUT THE NATURE OF GOD -2830-3980-0046 WAS IT NOT ENOUGH TO SAY FROM GOD THE FATHER -2830-3980-0047 TO DO SO IS TO LOSE GOD ALTOGETHER BECAUSE GOD BECOMES INTOLERABLE WHEN WE SEEK TO MEASURE AND TO COMPREHEND HIS INFINITE MAJESTY -2830-3980-0048 HE CAME DOWN TO EARTH LIVED AMONG MEN SUFFERED WAS CRUCIFIED AND THEN HE DIED STANDING CLEARLY BEFORE US SO THAT OUR HEARTS AND EYES MAY FASTEN UPON HIM -2830-3980-0049 EMBRACE HIM AND FORGET ABOUT THE NATURE OF GOD -2830-3980-0050 DID NOT CHRIST HIMSELF SAY I AM THE WAY AND THE TRUTH AND THE LIFE NO MAN COMETH UNTO THE FATHER BUT BY ME -2830-3980-0051 WHEN YOU ARGUE ABOUT THE NATURE OF GOD APART FROM THE QUESTION OF JUSTIFICATION YOU MAY BE AS PROFOUND AS YOU LIKE -2830-3980-0052 WE ARE TO HEAR CHRIST WHO HAS BEEN APPOINTED BY THE FATHER AS OUR DIVINE TEACHER -2830-3980-0053 AT THE SAME TIME PAUL CONFIRMS OUR CREED THAT CHRIST IS VERY GOD -2830-3980-0054 THAT CHRIST IS VERY GOD IS APPARENT IN THAT PAUL ASCRIBES TO HIM DIVINE POWERS EQUALLY WITH THE FATHER AS FOR INSTANCE THE POWER TO DISPENSE GRACE AND PEACE -2830-3980-0055 TO BESTOW PEACE AND GRACE LIES IN THE PROVINCE OF GOD WHO ALONE CAN CREATE THESE BLESSINGS THE ANGELS CANNOT -2830-3980-0056 OTHERWISE PAUL SHOULD HAVE WRITTEN GRACE FROM GOD THE FATHER AND PEACE FROM OUR LORD JESUS CHRIST -2830-3980-0057 THE ARIANS TOOK CHRIST FOR A NOBLE AND PERFECT CREATURE SUPERIOR EVEN TO THE ANGELS BECAUSE BY HIM GOD CREATED HEAVEN AND EARTH -2830-3980-0058 MOHAMMED ALSO SPEAKS HIGHLY OF CHRIST -2830-3980-0059 PAUL STICKS TO HIS THEME -2830-3980-0060 HE NEVER LOSES SIGHT OF THE PURPOSE OF HIS EPISTLE -2830-3980-0061 NOT GOLD OR SILVER OR PASCHAL LAMBS OR AN ANGEL BUT HIMSELF WHAT FOR -2830-3980-0062 NOT FOR A CROWN OR A KINGDOM OR OUR GOODNESS BUT FOR OUR SINS -2830-3980-0063 UNDERSCORE THESE WORDS FOR THEY ARE FULL OF COMFORT FOR SORE CONSCIENCES -2830-3980-0064 HOW MAY WE OBTAIN REMISSION OF OUR SINS -2830-3980-0065 PAUL ANSWERS THE MAN WHO IS NAMED JESUS CHRIST AND THE SON OF GOD GAVE HIMSELF FOR OUR SINS -2830-3980-0066 SINCE CHRIST WAS GIVEN FOR OUR SINS IT STANDS TO REASON THAT THEY CANNOT BE PUT AWAY BY OUR OWN EFFORTS -2830-3980-0067 THIS SENTENCE ALSO DEFINES OUR SINS AS GREAT SO GREAT IN FACT THAT THE WHOLE WORLD COULD NOT MAKE AMENDS FOR A SINGLE SIN -2830-3980-0068 THE GREATNESS OF THE RANSOM CHRIST THE SON OF GOD INDICATES THIS -2830-3980-0069 THE VICIOUS CHARACTER OF SIN IS BROUGHT OUT BY THE WORDS WHO GAVE HIMSELF FOR OUR SINS -2830-3980-0070 BUT WE ARE CARELESS WE MAKE LIGHT OF SIN -2830-3980-0071 WE THINK THAT BY SOME LITTLE WORK OR MERIT WE CAN DISMISS SIN -2830-3980-0072 THIS PASSAGE THEN BEARS OUT THE FACT THAT ALL MEN ARE SOLD UNDER SIN -2830-3980-0073 THIS ATTITUDE SPRINGS FROM A FALSE CONCEPTION OF SIN THE CONCEPTION THAT SIN IS A SMALL MATTER EASILY TAKEN CARE OF BY GOOD WORKS THAT WE MUST PRESENT OURSELVES UNTO GOD WITH A GOOD CONSCIENCE THAT WE MUST FEEL NO SIN BEFORE WE MAY FEEL THAT CHRIST WAS GIVEN FOR OUR SINS -2830-3980-0074 THIS ATTITUDE IS UNIVERSAL AND PARTICULARLY DEVELOPED IN THOSE WHO CONSIDER THEMSELVES BETTER THAN OTHERS -2830-3980-0075 BUT THE REAL SIGNIFICANCE AND COMFORT OF THE WORDS FOR OUR SINS IS LOST UPON THEM -2830-3980-0076 ON THE OTHER HAND WE ARE NOT TO REGARD THEM AS SO TERRIBLE THAT WE MUST DESPAIR -2961-960-0000 HE PASSES ABRUPTLY FROM PERSONS TO IDEAS AND NUMBERS AND FROM IDEAS AND NUMBERS TO PERSONS FROM THE HEAVENS TO MAN FROM ASTRONOMY TO PHYSIOLOGY HE CONFUSES OR RATHER DOES NOT DISTINGUISH SUBJECT AND OBJECT FIRST AND FINAL CAUSES AND IS DREAMING OF GEOMETRICAL FIGURES LOST IN A FLUX OF SENSE -2961-960-0001 THE INFLUENCE WITH THE TIMAEUS HAS EXERCISED UPON POSTERITY IS DUE PARTLY TO A MISUNDERSTANDING -2961-960-0002 IN THE SUPPOSED DEPTHS OF THIS DIALOGUE THE NEO PLATONISTS FOUND HIDDEN MEANINGS AND CONNECTIONS WITH THE JEWISH AND CHRISTIAN SCRIPTURES AND OUT OF THEM THEY ELICITED DOCTRINES QUITE AT VARIANCE WITH THE SPIRIT OF PLATO -2961-960-0003 THEY WERE ABSORBED IN HIS THEOLOGY AND WERE UNDER THE DOMINION OF HIS NAME WHILE THAT WHICH WAS TRULY GREAT AND TRULY CHARACTERISTIC IN HIM HIS EFFORT TO REALIZE AND CONNECT ABSTRACTIONS WAS NOT UNDERSTOOD BY THEM AT ALL -2961-960-0004 THERE IS NO DANGER OF THE MODERN COMMENTATORS ON THE TIMAEUS FALLING INTO THE ABSURDITIES OF THE NEO PLATONISTS -2961-960-0005 IN THE PRESENT DAY WE ARE WELL AWARE THAT AN ANCIENT PHILOSOPHER IS TO BE INTERPRETED FROM HIMSELF AND BY THE CONTEMPORARY HISTORY OF THOUGHT -2961-960-0006 THE FANCIES OF THE NEO PLATONISTS ARE ONLY INTERESTING TO US BECAUSE THEY EXHIBIT A PHASE OF THE HUMAN MIND WHICH PREVAILED WIDELY IN THE FIRST CENTURIES OF THE CHRISTIAN ERA AND IS NOT WHOLLY EXTINCT IN OUR OWN DAY -2961-960-0007 BUT THEY HAVE NOTHING TO DO WITH THE INTERPRETATION OF PLATO AND IN SPIRIT THEY ARE OPPOSED TO HIM -2961-960-0008 WE DO NOT KNOW HOW PLATO WOULD HAVE ARRANGED HIS OWN DIALOGUES OR WHETHER THE THOUGHT OF ARRANGING ANY OF THEM BESIDES THE TWO TRILOGIES WHICH HE HAS EXPRESSLY CONNECTED WAS EVER PRESENT TO HIS MIND -2961-960-0009 THE DIALOGUE IS PRIMARILY CONCERNED WITH THE ANIMAL CREATION INCLUDING UNDER THIS TERM THE HEAVENLY BODIES AND WITH MAN ONLY AS ONE AMONG THE ANIMALS -2961-960-0010 BUT HE HAS NOT AS YET DEFINED THIS INTERMEDIATE TERRITORY WHICH LIES SOMEWHERE BETWEEN MEDICINE AND MATHEMATICS AND HE WOULD HAVE FELT THAT THERE WAS AS GREAT AN IMPIETY IN RANKING THEORIES OF PHYSICS FIRST IN THE ORDER OF KNOWLEDGE AS IN PLACING THE BODY BEFORE THE SOUL -2961-960-0011 WITH HERACLEITUS HE ACKNOWLEDGES THE PERPETUAL FLUX LIKE ANAXAGORAS HE ASSERTS THE PREDOMINANCE OF MIND ALTHOUGH ADMITTING AN ELEMENT OF NECESSITY WHICH REASON IS INCAPABLE OF SUBDUING LIKE THE PYTHAGOREANS HE SUPPOSES THE MYSTERY OF THE WORLD TO BE CONTAINED IN NUMBER -2961-960-0012 MANY IF NOT ALL THE ELEMENTS OF THE PRE SOCRATIC PHILOSOPHY ARE INCLUDED IN THE TIMAEUS -2961-960-0013 IT IS PROBABLE THAT THE RELATION OF THE IDEAS TO GOD OR OF GOD TO THE WORLD WAS DIFFERENTLY CONCEIVED BY HIM AT DIFFERENT TIMES OF HIS LIFE -2961-960-0014 THE IDEAS ALSO REMAIN BUT THEY HAVE BECOME TYPES IN NATURE FORMS OF MEN ANIMALS BIRDS FISHES -2961-960-0015 THE STYLE AND PLAN OF THE TIMAEUS DIFFER GREATLY FROM THAT OF ANY OTHER OF THE PLATONIC DIALOGUES -2961-960-0016 BUT PLATO HAS NOT THE SAME MASTERY OVER HIS INSTRUMENT WHICH HE EXHIBITS IN THE PHAEDRUS OR SYMPOSIUM -2961-960-0017 NOTHING CAN EXCEED THE BEAUTY OR ART OF THE INTRODUCTION IN WHICH HE IS USING WORDS AFTER HIS ACCUSTOMED MANNER -2961-960-0018 BUT IN THE REST OF THE WORK THE POWER OF LANGUAGE SEEMS TO FAIL HIM AND THE DRAMATIC FORM IS WHOLLY GIVEN UP -2961-960-0019 HE COULD WRITE IN ONE STYLE BUT NOT IN ANOTHER AND THE GREEK LANGUAGE HAD NOT AS YET BEEN FASHIONED BY ANY POET OR PHILOSOPHER TO DESCRIBE PHYSICAL PHENOMENA -2961-960-0020 AND HENCE WE FIND THE SAME SORT OF CLUMSINESS IN THE TIMAEUS OF PLATO WHICH CHARACTERIZES THE PHILOSOPHICAL POEM OF LUCRETIUS -2961-960-0021 THERE IS A WANT OF FLOW AND OFTEN A DEFECT OF RHYTHM THE MEANING IS SOMETIMES OBSCURE AND THERE IS A GREATER USE OF APPOSITION AND MORE OF REPETITION THAN OCCURS IN PLATO'S EARLIER WRITINGS -2961-960-0022 PLATO HAD NOT THE COMMAND OF HIS MATERIALS WHICH WOULD HAVE ENABLED HIM TO PRODUCE A PERFECT WORK OF ART -2961-961-0000 SOCRATES BEGINS THE TIMAEUS WITH A SUMMARY OF THE REPUBLIC -2961-961-0001 AND NOW HE DESIRES TO SEE THE IDEAL STATE SET IN MOTION HE WOULD LIKE TO KNOW HOW SHE BEHAVED IN SOME GREAT STRUGGLE -2961-961-0002 AND THEREFORE TO YOU I TURN TIMAEUS CITIZEN OF LOCRIS WHO ARE AT ONCE A PHILOSOPHER AND A STATESMAN AND TO YOU CRITIAS WHOM ALL ATHENIANS KNOW TO BE SIMILARLY ACCOMPLISHED AND TO HERMOCRATES WHO IS ALSO FITTED BY NATURE AND EDUCATION TO SHARE IN OUR DISCOURSE -2961-961-0003 I WILL IF TIMAEUS APPROVES I APPROVE -2961-961-0004 LISTEN THEN SOCRATES TO A TALE OF SOLON'S WHO BEING THE FRIEND OF DROPIDAS MY GREAT GRANDFATHER TOLD IT TO MY GRANDFATHER CRITIAS AND HE TOLD ME -2961-961-0005 SOME POEMS OF SOLON WERE RECITED BY THE BOYS -2961-961-0006 AND WHAT WAS THE SUBJECT OF THE POEM SAID THE PERSON WHO MADE THE REMARK -2961-961-0007 THE SUBJECT WAS A VERY NOBLE ONE HE DESCRIBED THE MOST FAMOUS ACTION IN WHICH THE ATHENIAN PEOPLE WERE EVER ENGAGED -2961-961-0008 BUT THE MEMORY OF THEIR EXPLOITS HAS PASSED AWAY OWING TO THE LAPSE OF TIME AND THE EXTINCTION OF THE ACTORS -2961-961-0009 TELL US SAID THE OTHER THE WHOLE STORY AND WHERE SOLON HEARD THE STORY -2961-961-0010 BUT IN EGYPT THE TRADITIONS OF OUR OWN AND OTHER LANDS ARE BY US REGISTERED FOR EVER IN OUR TEMPLES -2961-961-0011 THE GENEALOGIES WHICH YOU HAVE RECITED TO US OUT OF YOUR OWN ANNALS SOLON ARE A MERE CHILDREN'S STORY -2961-961-0012 FOR IN THE TIMES BEFORE THE GREAT FLOOD ATHENS WAS THE GREATEST AND BEST OF CITIES AND DID THE NOBLEST DEEDS AND HAD THE BEST CONSTITUTION OF ANY UNDER THE FACE OF HEAVEN -2961-961-0013 SOLON MARVELLED AND DESIRED TO BE INFORMED OF THE PARTICULARS -2961-961-0014 NINE THOUSAND YEARS HAVE ELAPSED SINCE SHE FOUNDED YOURS AND EIGHT THOUSAND SINCE SHE FOUNDED OURS AS OUR ANNALS RECORD -2961-961-0015 MANY LAWS EXIST AMONG US WHICH ARE THE COUNTERPART OF YOURS AS THEY WERE IN THE OLDEN TIME -2961-961-0016 I WILL BRIEFLY DESCRIBE THEM TO YOU AND YOU SHALL READ THE ACCOUNT OF THEM AT YOUR LEISURE IN THE SACRED REGISTERS -2961-961-0017 OBSERVE AGAIN WHAT CARE THE LAW TOOK IN THE PURSUIT OF WISDOM SEARCHING OUT THE DEEP THINGS OF THE WORLD AND APPLYING THEM TO THE USE OF MAN -2961-961-0018 THE MOST FAMOUS OF THEM ALL WAS THE OVERTHROW OF THE ISLAND OF ATLANTIS -2961-961-0019 FOR AT THE PERIL OF HER OWN EXISTENCE AND WHEN THE OTHER HELLENES HAD DESERTED HER SHE REPELLED THE INVADER AND OF HER OWN ACCORD GAVE LIBERTY TO ALL THE NATIONS WITHIN THE PILLARS -2961-961-0020 THIS IS THE EXPLANATION OF THE SHALLOWS WHICH ARE FOUND IN THAT PART OF THE ATLANTIC OCEAN -2961-961-0021 BUT I WOULD NOT SPEAK AT THE TIME BECAUSE I WANTED TO REFRESH MY MEMORY -2961-961-0022 THEN NOW LET ME EXPLAIN TO YOU THE ORDER OF OUR ENTERTAINMENT FIRST TIMAEUS WHO IS A NATURAL PHILOSOPHER WILL SPEAK OF THE ORIGIN OF THE WORLD GOING DOWN TO THE CREATION OF MAN AND THEN I SHALL RECEIVE THE MEN WHOM HE HAS CREATED AND SOME OF WHOM WILL HAVE BEEN EDUCATED BY YOU AND INTRODUCE THEM TO YOU AS THE LOST ATHENIAN CITIZENS OF WHOM THE EGYPTIAN RECORD SPOKE -3570-5694-0000 BUT ALREADY AT A POINT IN ECONOMIC EVOLUTION FAR ANTEDATING THE EMERGENCE OF THE LADY SPECIALISED CONSUMPTION OF GOODS AS AN EVIDENCE OF PECUNIARY STRENGTH HAD BEGUN TO WORK OUT IN A MORE OR LESS ELABORATE SYSTEM -3570-5694-0001 THE UTILITY OF CONSUMPTION AS AN EVIDENCE OF WEALTH IS TO BE CLASSED AS A DERIVATIVE GROWTH -3570-5694-0002 SUCH CONSUMPTION AS FALLS TO THE WOMEN IS MERELY INCIDENTAL TO THEIR WORK IT IS A MEANS TO THEIR CONTINUED LABOUR AND NOT A CONSUMPTION DIRECTED TO THEIR OWN COMFORT AND FULNESS OF LIFE -3570-5694-0003 WITH A FURTHER ADVANCE IN CULTURE THIS TABU MAY CHANGE INTO SIMPLE CUSTOM OF A MORE OR LESS RIGOROUS CHARACTER BUT WHATEVER BE THE THEORETICAL BASIS OF THE DISTINCTION WHICH IS MAINTAINED WHETHER IT BE A TABU OR A LARGER CONVENTIONALITY THE FEATURES OF THE CONVENTIONAL SCHEME OF CONSUMPTION DO NOT CHANGE EASILY -3570-5694-0004 IN THE NATURE OF THINGS LUXURIES AND THE COMFORTS OF LIFE BELONG TO THE LEISURE CLASS -3570-5694-0005 UNDER THE TABU CERTAIN VICTUALS AND MORE PARTICULARLY CERTAIN BEVERAGES ARE STRICTLY RESERVED FOR THE USE OF THE SUPERIOR CLASS -3570-5694-0006 DRUNKENNESS AND THE OTHER PATHOLOGICAL CONSEQUENCES OF THE FREE USE OF STIMULANTS THEREFORE TEND IN THEIR TURN TO BECOME HONORIFIC AS BEING A MARK AT THE SECOND REMOVE OF THE SUPERIOR STATUS OF THOSE WHO ARE ABLE TO AFFORD THE INDULGENCE -3570-5694-0007 IT HAS EVEN HAPPENED THAT THE NAME FOR CERTAIN DISEASED CONDITIONS OF THE BODY ARISING FROM SUCH AN ORIGIN HAS PASSED INTO EVERYDAY SPEECH AS A SYNONYM FOR NOBLE OR GENTLE -3570-5694-0008 THE CONSUMPTION OF LUXURIES IN THE TRUE SENSE IS A CONSUMPTION DIRECTED TO THE COMFORT OF THE CONSUMER HIMSELF AND IS THEREFORE A MARK OF THE MASTER -3570-5694-0009 WITH MANY QUALIFICATIONS WITH MORE QUALIFICATIONS AS THE PATRIARCHAL TRADITION HAS GRADUALLY WEAKENED THE GENERAL RULE IS FELT TO BE RIGHT AND BINDING THAT WOMEN SHOULD CONSUME ONLY FOR THE BENEFIT OF THEIR MASTERS -3570-5694-0010 THE OBJECTION OF COURSE PRESENTS ITSELF THAT EXPENDITURE ON WOMEN'S DRESS AND HOUSEHOLD PARAPHERNALIA IS AN OBVIOUS EXCEPTION TO THIS RULE BUT IT WILL APPEAR IN THE SEQUEL THAT THIS EXCEPTION IS MUCH MORE OBVIOUS THAN SUBSTANTIAL -3570-5694-0011 THE CUSTOM OF FESTIVE GATHERINGS PROBABLY ORIGINATED IN MOTIVES OF CONVIVIALITY AND RELIGION THESE MOTIVES ARE ALSO PRESENT IN THE LATER DEVELOPMENT BUT THEY DO NOT CONTINUE TO BE THE SOLE MOTIVES -3570-5694-0012 THERE IS A MORE OR LESS ELABORATE SYSTEM OF RANK AND GRADES -3570-5694-0013 THIS DIFFERENTIATION IS FURTHERED BY THE INHERITANCE OF WEALTH AND THE CONSEQUENT INHERITANCE OF GENTILITY -3570-5694-0014 MANY OF THESE AFFILIATED GENTLEMEN OF LEISURE ARE AT THE SAME TIME LESSER MEN OF SUBSTANCE IN THEIR OWN RIGHT SO THAT SOME OF THEM ARE SCARCELY AT ALL OTHERS ONLY PARTIALLY TO BE RATED AS VICARIOUS CONSUMERS -3570-5694-0015 SO MANY OF THEM HOWEVER AS MAKE UP THE RETAINER AND HANGERS ON OF THE PATRON MAY BE CLASSED AS VICARIOUS CONSUMER WITHOUT QUALIFICATION -3570-5694-0016 MANY OF THESE AGAIN AND ALSO MANY OF THE OTHER ARISTOCRACY OF LESS DEGREE HAVE IN TURN ATTACHED TO THEIR PERSONS A MORE OR LESS COMPREHENSIVE GROUP OF VICARIOUS CONSUMER IN THE PERSONS OF THEIR WIVES AND CHILDREN THEIR SERVANTS RETAINERS ET CETERA -3570-5694-0017 THE WEARING OF UNIFORMS OR LIVERIES IMPLIES A CONSIDERABLE DEGREE OF DEPENDENCE AND MAY EVEN BE SAID TO BE A MARK OF SERVITUDE REAL OR OSTENSIBLE -3570-5694-0018 THE WEARERS OF UNIFORMS AND LIVERIES MAY BE ROUGHLY DIVIDED INTO TWO CLASSES THE FREE AND THE SERVILE OR THE NOBLE AND THE IGNOBLE -3570-5694-0019 BUT THE GENERAL DISTINCTION IS NOT ON THAT ACCOUNT TO BE OVERLOOKED -3570-5694-0020 SO THOSE OFFICES WHICH ARE BY RIGHT THE PROPER EMPLOYMENT OF THE LEISURE CLASS ARE NOBLE SUCH AS GOVERNMENT FIGHTING HUNTING THE CARE OF ARMS AND ACCOUTREMENTS AND THE LIKE IN SHORT THOSE WHICH MAY BE CLASSED AS OSTENSIBLY PREDATORY EMPLOYMENTS -3570-5694-0021 WHENEVER AS IN THESE CASES THE MENIAL SERVICE IN QUESTION HAS TO DO DIRECTLY WITH THE PRIMARY LEISURE EMPLOYMENTS OF FIGHTING AND HUNTING IT EASILY ACQUIRES A REFLECTED HONORIFIC CHARACTER -3570-5694-0022 THE LIVERY BECOMES OBNOXIOUS TO NEARLY ALL WHO ARE REQUIRED TO WEAR IT -3570-5695-0000 IN A GENERAL WAY THOUGH NOT WHOLLY NOR CONSISTENTLY THESE TWO GROUPS COINCIDE -3570-5695-0001 THE DEPENDENT WHO WAS FIRST DELEGATED FOR THESE DUTIES WAS THE WIFE OR THE CHIEF WIFE AND AS WOULD BE EXPECTED IN THE LATER DEVELOPMENT OF THE INSTITUTION WHEN THE NUMBER OF PERSONS BY WHOM THESE DUTIES ARE CUSTOMARILY PERFORMED GRADUALLY NARROWS THE WIFE REMAINS THE LAST -3570-5695-0002 BUT AS WE DESCEND THE SOCIAL SCALE THE POINT IS PRESENTLY REACHED WHERE THE DUTIES OF VICARIOUS LEISURE AND CONSUMPTION DEVOLVE UPON THE WIFE ALONE -3570-5695-0003 IN THE COMMUNITIES OF THE WESTERN CULTURE THIS POINT IS AT PRESENT FOUND AMONG THE LOWER MIDDLE CLASS -3570-5695-0004 IF BEAUTY OR COMFORT IS ACHIEVED AND IT IS A MORE OR LESS FORTUITOUS CIRCUMSTANCE IF THEY ARE THEY MUST BE ACHIEVED BY MEANS AND METHODS THAT COMMEND THEMSELVES TO THE GREAT ECONOMIC LAW OF WASTED EFFORT -3570-5695-0005 THE MAN OF THE HOUSEHOLD ALSO CAN DO SOMETHING IN THIS DIRECTION AND INDEED HE COMMONLY DOES BUT WITH A STILL LOWER DESCENT INTO THE LEVELS OF INDIGENCE ALONG THE MARGIN OF THE SLUMS THE MAN AND PRESENTLY ALSO THE CHILDREN VIRTUALLY CEASE TO CONSUME VALUABLE GOODS FOR APPEARANCES AND THE WOMAN REMAINS VIRTUALLY THE SOLE EXPONENT OF THE HOUSEHOLD'S PECUNIARY DECENCY -3570-5695-0006 VERY MUCH OF SQUALOR AND DISCOMFORT WILL BE ENDURED BEFORE THE LAST TRINKET OR THE LAST PRETENSE OF PECUNIARY DECENCY IS PUT AWAY -3570-5695-0007 THERE IS NO CLASS AND NO COUNTRY THAT HAS YIELDED SO ABJECTLY BEFORE THE PRESSURE OF PHYSICAL WANT AS TO DENY THEMSELVES ALL GRATIFICATION OF THIS HIGHER OR SPIRITUAL NEED -3570-5695-0008 THE QUESTION IS WHICH OF THE TWO METHODS WILL MOST EFFECTIVELY REACH THE PERSONS WHOSE CONVICTIONS IT IS DESIRED TO AFFECT -3570-5695-0009 EACH WILL THEREFORE SERVE ABOUT EQUALLY WELL DURING THE EARLIER STAGES OF SOCIAL GROWTH -3570-5695-0010 THE MODERN ORGANIZATION OF INDUSTRY WORKS IN THE SAME DIRECTION ALSO BY ANOTHER LINE -3570-5695-0011 IT IS EVIDENT THEREFORE THAT THE PRESENT TREND OF THE DEVELOPMENT IS IN THE DIRECTION OF HEIGHTENING THE UTILITY OF CONSPICUOUS CONSUMPTION AS COMPARED WITH LEISURE -3570-5695-0012 IT IS ALSO NOTICEABLE THAT THE SERVICEABILITY OF CONSUMPTION AS A MEANS OF REPUTE AS WELL AS THE INSISTENCE ON IT AS AN ELEMENT OF DECENCY IS AT ITS BEST IN THOSE PORTIONS OF THE COMMUNITY WHERE THE HUMAN CONTACT OF THE INDIVIDUAL IS WIDEST AND THE MOBILITY OF THE POPULATION IS GREATEST -3570-5695-0013 CONSUMPTION BECOMES A LARGER ELEMENT IN THE STANDARD OF LIVING IN THE CITY THAN IN THE COUNTRY -3570-5695-0014 AMONG THE COUNTRY POPULATION ITS PLACE IS TO SOME EXTENT TAKEN BY SAVINGS AND HOME COMFORTS KNOWN THROUGH THE MEDIUM OF NEIGHBORHOOD GOSSIP SUFFICIENTLY TO SERVE THE LIKE GENERAL PURPOSE OF PECUNIARY REPUTE -3570-5695-0015 THE RESULT IS A GREAT MOBILITY OF THE LABOR EMPLOYED IN PRINTING PERHAPS GREATER THAN IN ANY OTHER EQUALLY WELL DEFINED AND CONSIDERABLE BODY OF WORKMEN -3570-5696-0000 UNDER THE SIMPLE TEST OF EFFECTIVENESS FOR ADVERTISING WE SHOULD EXPECT TO FIND LEISURE AND THE CONSPICUOUS CONSUMPTION OF GOODS DIVIDING THE FIELD OF PECUNIARY EMULATION PRETTY EVENLY BETWEEN THEM AT THE OUTSET -3570-5696-0001 BUT THE ACTUAL COURSE OF DEVELOPMENT HAS BEEN SOMEWHAT DIFFERENT FROM THIS IDEAL SCHEME LEISURE HELD THE FIRST PLACE AT THE START AND CAME TO HOLD A RANK VERY MUCH ABOVE WASTEFUL CONSUMPTION OF GOODS BOTH AS A DIRECT EXPONENT OF WEALTH AND AS AN ELEMENT IN THE STANDARD OF DECENCY DURING THE QUASI PEACEABLE CULTURE -3570-5696-0002 OTHER CIRCUMSTANCES PERMITTING THAT INSTINCT DISPOSES MEN TO LOOK WITH FAVOR UPON PRODUCTIVE EFFICIENCY AND ON WHATEVER IS OF HUMAN USE -3570-5696-0003 A RECONCILIATION BETWEEN THE TWO CONFLICTING REQUIREMENTS IS EFFECTED BY A RESORT TO MAKE BELIEVE MANY AND INTRICATE POLITE OBSERVANCES AND SOCIAL DUTIES OF A CEREMONIAL NATURE ARE DEVELOPED MANY ORGANIZATIONS ARE FOUNDED WITH SOME SPECIOUS OBJECT OF AMELIORATION EMBODIED IN THEIR OFFICIAL STYLE AND TITLE THERE IS MUCH COMING AND GOING AND A DEAL OF TALK TO THE END THAT THE TALKERS MAY NOT HAVE OCCASION TO REFLECT ON WHAT IS THE EFFECTUAL ECONOMIC VALUE OF THEIR TRAFFIC -3570-5696-0004 THE SALIENT FEATURES OF THIS DEVELOPMENT OF DOMESTIC SERVICE HAVE ALREADY BEEN INDICATED -3570-5696-0005 THROUGHOUT THE ENTIRE EVOLUTION OF CONSPICUOUS EXPENDITURE WHETHER OF GOODS OR OF SERVICES OR HUMAN LIFE RUNS THE OBVIOUS IMPLICATION THAT IN ORDER TO EFFECTUALLY MEND THE CONSUMER'S GOOD FAME IT MUST BE AN EXPENDITURE OF SUPERFLUITIES -3570-5696-0006 AS USED IN THE SPEECH OF EVERYDAY LIFE THE WORD CARRIES AN UNDERTONE OF DEPRECATION -3570-5696-0007 THE USE OF THE WORD WASTE AS A TECHNICAL TERM THEREFORE IMPLIES NO DEPRECATION OF THE MOTIVES OR OF THE ENDS SOUGHT BY THE CONSUMER UNDER THIS CANON OF CONSPICUOUS WASTE -3570-5696-0008 BUT IT IS ON OTHER GROUNDS WORTH NOTING THAT THE TERM WASTE IN THE LANGUAGE OF EVERYDAY LIFE IMPLIES DEPRECATION OF WHAT IS CHARACTERIZED AS WASTEFUL -3570-5696-0009 IN STRICT ACCURACY NOTHING SHOULD BE INCLUDED UNDER THE HEAD OF CONSPICUOUS WASTE BUT SUCH EXPENDITURE AS IS INCURRED ON THE GROUND OF AN INVIDIOUS PECUNIARY COMPARISON -3570-5696-0010 AN ARTICLE MAY BE USEFUL AND WASTEFUL BOTH AND ITS UTILITY TO THE CONSUMER MAY BE MADE UP OF USE AND WASTE IN THE MOST VARYING PROPORTIONS -3575-170457-0000 AND OFTEN HAS MY MOTHER SAID WHILE ON HER LAP I LAID MY HEAD SHE FEARED FOR TIME I WAS NOT MADE BUT FOR ETERNITY -3575-170457-0001 WHY ARE WE TO BE DENIED EACH OTHER'S SOCIETY -3575-170457-0002 WHY ARE WE TO BE DIVIDED -3575-170457-0003 SURELY IT MUST BE BECAUSE WE ARE IN DANGER OF LOVING EACH OTHER TOO WELL OF LOSING SIGHT OF THE CREATOR IN IDOLATRY OF THE CREATURE -3575-170457-0004 WE USED TO DISPUTE ABOUT POLITICS AND RELIGION -3575-170457-0005 SHE A TORY AND CLERGYMAN'S DAUGHTER WAS ALWAYS IN A MINORITY OF ONE IN OUR HOUSE OF VIOLENT DISSENT AND RADICALISM -3575-170457-0006 HER FEEBLE HEALTH GAVE HER HER YIELDING MANNER FOR SHE COULD NEVER OPPOSE ANY ONE WITHOUT GATHERING UP ALL HER STRENGTH FOR THE STRUGGLE -3575-170457-0007 HE SPOKE FRENCH PERFECTLY I HAVE BEEN TOLD WHEN NEED WAS BUT DELIGHTED USUALLY IN TALKING THE BROADEST YORKSHIRE -3575-170457-0008 AND SO LIFE AND DEATH HAVE DISPERSED THE CIRCLE OF VIOLENT RADICALS AND DISSENTERS INTO WHICH TWENTY YEARS AGO THE LITTLE QUIET RESOLUTE CLERGYMAN'S DAUGHTER WAS RECEIVED AND BY WHOM SHE WAS TRULY LOVED AND HONOURED -3575-170457-0009 JANUARY AND FEBRUARY OF EIGHTEEN THIRTY SEVEN HAD PASSED AWAY AND STILL THERE WAS NO REPLY FROM SOUTHEY -3575-170457-0010 I AM NOT DEPRECIATING IT WHEN I SAY THAT IN THESE TIMES IT IS NOT RARE -3575-170457-0011 BUT IT IS NOT WITH A VIEW TO DISTINCTION THAT YOU SHOULD CULTIVATE THIS TALENT IF YOU CONSULT YOUR OWN HAPPINESS -3575-170457-0012 YOU WILL SAY THAT A WOMAN HAS NO NEED OF SUCH A CAUTION THERE CAN BE NO PERIL IN IT FOR HER -3575-170457-0013 THE MORE SHE IS ENGAGED IN HER PROPER DUTIES THE LESS LEISURE WILL SHE HAVE FOR IT EVEN AS AN ACCOMPLISHMENT AND A RECREATION -3575-170457-0014 TO THOSE DUTIES YOU HAVE NOT YET BEEN CALLED AND WHEN YOU ARE YOU WILL BE LESS EAGER FOR CELEBRITY -3575-170457-0015 BUT DO NOT SUPPOSE THAT I DISPARAGE THE GIFT WHICH YOU POSSESS NOR THAT I WOULD DISCOURAGE YOU FROM EXERCISING IT I ONLY EXHORT YOU SO TO THINK OF IT AND SO TO USE IT AS TO RENDER IT CONDUCIVE TO YOUR OWN PERMANENT GOOD -3575-170457-0016 FAREWELL MADAM -3575-170457-0017 THOUGH I MAY BE BUT AN UNGRACIOUS ADVISER YOU WILL ALLOW ME THEREFORE TO SUBSCRIBE MYSELF WITH THE BEST WISHES FOR YOUR HAPPINESS HERE AND HEREAFTER YOUR TRUE FRIEND ROBERT SOUTHEY -3575-170457-0018 SIR MARCH SIXTEENTH -3575-170457-0019 I HAD NOT VENTURED TO HOPE FOR SUCH A REPLY SO CONSIDERATE IN ITS TONE SO NOBLE IN ITS SPIRIT -3575-170457-0020 I KNOW THE FIRST LETTER I WROTE TO YOU WAS ALL SENSELESS TRASH FROM BEGINNING TO END BUT I AM NOT ALTOGETHER THE IDLE DREAMING BEING IT WOULD SEEM TO DENOTE -3575-170457-0021 I THOUGHT IT THEREFORE MY DUTY WHEN I LEFT SCHOOL TO BECOME A GOVERNESS -3575-170457-0022 IN THE EVENINGS I CONFESS I DO THINK BUT I NEVER TROUBLE ANY ONE ELSE WITH MY THOUGHTS -3575-170457-0023 I CAREFULLY AVOID ANY APPEARANCE OF PREOCCUPATION AND ECCENTRICITY WHICH MIGHT LEAD THOSE I LIVE AMONGST TO SUSPECT THE NATURE OF MY PURSUITS -3575-170457-0024 I DON'T ALWAYS SUCCEED FOR SOMETIMES WHEN I'M TEACHING OR SEWING I WOULD RATHER BE READING OR WRITING BUT I TRY TO DENY MYSELF AND MY FATHER'S APPROBATION AMPLY REWARDED ME FOR THE PRIVATION -3575-170457-0025 AGAIN I THANK YOU THIS INCIDENT I SUPPOSE WILL BE RENEWED NO MORE IF I LIVE TO BE AN OLD WOMAN I SHALL REMEMBER IT THIRTY YEARS HENCE AS A BRIGHT DREAM -3575-170457-0026 P S PRAY SIR EXCUSE ME FOR WRITING TO YOU A SECOND TIME I COULD NOT HELP WRITING PARTLY TO TELL YOU HOW THANKFUL I AM FOR YOUR KINDNESS AND PARTLY TO LET YOU KNOW THAT YOUR ADVICE SHALL NOT BE WASTED HOWEVER SORROWFULLY AND RELUCTANTLY IT MAY BE AT FIRST FOLLOWED C B -3575-170457-0027 I CANNOT DENY MYSELF THE GRATIFICATION OF INSERTING SOUTHEY'S REPLY -3575-170457-0028 KESWICK MARCH TWENTY SECOND EIGHTEEN THIRTY SEVEN DEAR MADAM -3575-170457-0029 YOUR LETTER HAS GIVEN ME GREAT PLEASURE AND I SHOULD NOT FORGIVE MYSELF IF I DID NOT TELL YOU SO -3575-170457-0030 OF THIS SECOND LETTER ALSO SHE SPOKE AND TOLD ME THAT IT CONTAINED AN INVITATION FOR HER TO GO AND SEE THE POET IF EVER SHE VISITED THE LAKES -3575-170457-0031 ON AUGUST TWENTY SEVENTH EIGHTEEN THIRTY SEVEN SHE WRITES -3575-170457-0032 COME COME I AM GETTING REALLY TIRED OF YOUR ABSENCE -3575-170457-0033 SATURDAY AFTER SATURDAY COMES ROUND AND I CAN HAVE NO HOPE OF HEARING YOUR KNOCK AT THE DOOR AND THEN BEING TOLD THAT MISS E IS COME OH DEAR -3575-170457-0034 IN THIS MONOTONOUS LIFE OF MINE THAT WAS A PLEASANT EVENT -3575-170457-0035 I WISH IT WOULD RECUR AGAIN BUT IT WILL TAKE TWO OR THREE INTERVIEWS BEFORE THE STIFFNESS THE ESTRANGEMENT OF THIS LONG SEPARATION WILL WEAR AWAY -3575-170457-0036 MY EYES FILL WITH TEARS WHEN I CONTRAST THE BLISS OF SUCH A STATE BRIGHTENED BY HOPES OF THE FUTURE WITH THE MELANCHOLY STATE I NOW LIVE IN UNCERTAIN THAT I EVER FELT TRUE CONTRITION WANDERING IN THOUGHT AND DEED LONGING FOR HOLINESS WHICH I SHALL NEVER NEVER OBTAIN SMITTEN AT TIMES TO THE HEART WITH THE CONVICTION THAT GHASTLY CALVINISTIC DOCTRINES ARE TRUE DARKENED IN SHORT BY THE VERY SHADOWS OF SPIRITUAL DEATH -3575-170457-0037 IF CHRISTIAN PERFECTION BE NECESSARY TO SALVATION I SHALL NEVER BE SAVED MY HEART IS A VERY HOTBED FOR SINFUL THOUGHTS AND WHEN I DECIDE ON AN ACTION I SCARCELY REMEMBER TO LOOK TO MY REDEEMER FOR DIRECTION -3575-170457-0038 AND MEANTIME I KNOW THE GREATNESS OF JEHOVAH I ACKNOWLEDGE THE PERFECTION OF HIS WORD I ADORE THE PURITY OF THE CHRISTIAN FAITH MY THEORY IS RIGHT MY PRACTICE HORRIBLY WRONG -3575-170457-0039 THE CHRISTMAS HOLIDAYS CAME AND SHE AND ANNE RETURNED TO THE PARSONAGE AND TO THAT HAPPY HOME CIRCLE IN WHICH ALONE THEIR NATURES EXPANDED AMONGST ALL OTHER PEOPLE THEY SHRIVELLED UP MORE OR LESS -3575-170457-0040 INDEED THERE WERE ONLY ONE OR TWO STRANGERS WHO COULD BE ADMITTED AMONG THE SISTERS WITHOUT PRODUCING THE SAME RESULT -3575-170457-0041 SHE WAS GONE OUT INTO THE VILLAGE ON SOME ERRAND WHEN AS SHE WAS DESCENDING THE STEEP STREET HER FOOT SLIPPED ON THE ICE AND SHE FELL IT WAS DARK AND NO ONE SAW HER MISCHANCE TILL AFTER A TIME HER GROANS ATTRACTED THE ATTENTION OF A PASSER BY -3575-170457-0042 UNFORTUNATELY THE FRACTURE COULD NOT BE SET TILL SIX O'CLOCK THE NEXT MORNING AS NO SURGEON WAS TO BE HAD BEFORE THAT TIME AND SHE NOW LIES AT OUR HOUSE IN A VERY DOUBTFUL AND DANGEROUS STATE -3575-170457-0043 HOWEVER REMEMBERING WHAT YOU TOLD ME NAMELY THAT YOU HAD COMMENDED THE MATTER TO A HIGHER DECISION THAN OURS AND THAT YOU WERE RESOLVED TO SUBMIT WITH RESIGNATION TO THAT DECISION WHATEVER IT MIGHT BE I HOLD IT MY DUTY TO YIELD ALSO AND TO BE SILENT IT MAY BE ALL FOR THE BEST -3575-170457-0044 AFTER THIS DISAPPOINTMENT I NEVER DARE RECKON WITH CERTAINTY ON THE ENJOYMENT OF A PLEASURE AGAIN IT SEEMS AS IF SOME FATALITY STOOD BETWEEN YOU AND ME -3575-170457-0045 I AM NOT GOOD ENOUGH FOR YOU AND YOU MUST BE KEPT FROM THE CONTAMINATION OF TOO INTIMATE SOCIETY -3575-170457-0046 A GOOD NEIGHBOUR OF THE BRONTES A CLEVER INTELLIGENT YORKSHIRE WOMAN WHO KEEPS A DRUGGIST'S SHOP IN HAWORTH AND FROM HER OCCUPATION HER EXPERIENCE AND EXCELLENT SENSE HOLDS THE POSITION OF VILLAGE DOCTRESS AND NURSE AND AS SUCH HAS BEEN A FRIEND IN MANY A TIME OF TRIAL AND SICKNESS AND DEATH IN THE HOUSEHOLDS ROUND TOLD ME A CHARACTERISTIC LITTLE INCIDENT CONNECTED WITH TABBY'S FRACTURED LEG -3575-170457-0047 TABBY HAD LIVED WITH THEM FOR TEN OR TWELVE YEARS AND WAS AS CHARLOTTE EXPRESSED IT ONE OF THE FAMILY -3575-170457-0048 HE REFUSED AT FIRST TO LISTEN TO THE CAREFUL ADVICE IT WAS REPUGNANT TO HIS LIBERAL NATURE -3575-170457-0049 THIS DECISION WAS COMMUNICATED TO THE GIRLS -3575-170457-0050 TABBY HAD TENDED THEM IN THEIR CHILDHOOD THEY AND NONE OTHER SHOULD TEND HER IN HER INFIRMITY AND AGE -3575-170457-0051 AT TEA TIME THEY WERE SAD AND SILENT AND THE MEAL WENT AWAY UNTOUCHED BY ANY OF THE THREE -3575-170457-0052 SHE HAD ANOTHER WEIGHT ON HER MIND THIS CHRISTMAS -3575-170457-0053 BUT ANNE HAD BEGUN TO SUFFER JUST BEFORE THE HOLIDAYS AND CHARLOTTE WATCHED OVER HER YOUNGER SISTERS WITH THE JEALOUS VIGILANCE OF SOME WILD CREATURE THAT CHANGES HER VERY NATURE IF DANGER THREATENS HER YOUNG -3575-170457-0054 STUNG BY ANXIETY FOR THIS LITTLE SISTER SHE UPBRAIDED MISS W FOR HER FANCIED INDIFFERENCE TO ANNE'S STATE OF HEALTH -3575-170457-0055 STILL HER HEART HAD RECEIVED A SHOCK IN THE PERCEPTION OF ANNE'S DELICACY AND ALL THESE HOLIDAYS SHE WATCHED OVER HER WITH THE LONGING FOND ANXIETY WHICH IS SO FULL OF SUDDEN PANGS OF FEAR -3575-170457-0056 I DOUBT WHETHER BRANWELL WAS MAINTAINING HIMSELF AT THIS TIME -3729-6852-0000 TO CELEBRATE THE ARRIVAL OF HER SON SILVIA GAVE A SPLENDID SUPPER TO WHICH SHE HAD INVITED ALL HER RELATIVES AND IT WAS A GOOD OPPORTUNITY FOR ME TO MAKE THEIR ACQUAINTANCE -3729-6852-0001 WITHOUT SAYING IT POSITIVELY SHE MADE ME UNDERSTAND THAT BEING HERSELF AN ILLUSTRIOUS MEMBER OF THE REPUBLIC OF LETTERS SHE WAS WELL AWARE THAT SHE WAS SPEAKING TO AN INSECT -3729-6852-0002 IN ORDER TO PLEASE HER I SPOKE TO HER OF THE ABBE CONTI AND I HAD OCCASION TO QUOTE TWO LINES OF THAT PROFOUND WRITER -3729-6852-0003 MADAM CORRECTED ME WITH A PATRONIZING AIR FOR MY PRONUNCIATION OF THE WORD SCEVRA WHICH MEANS DIVIDED SAYING THAT IT OUGHT TO BE PRONOUNCED SCEURA AND SHE ADDED THAT I OUGHT TO BE VERY GLAD TO HAVE LEARNED SO MUCH ON THE FIRST DAY OF MY ARRIVAL IN PARIS TELLING ME THAT IT WOULD BE AN IMPORTANT DAY IN MY LIFE -3729-6852-0004 HER FACE WAS AN ENIGMA FOR IT INSPIRED EVERYONE WITH THE WARMEST SYMPATHY AND YET IF YOU EXAMINED IT ATTENTIVELY THERE WAS NOT ONE BEAUTIFUL FEATURE SHE COULD NOT BE CALLED HANDSOME BUT NO ONE COULD HAVE THOUGHT HER UGLY -3729-6852-0005 SILVIA WAS THE ADORATION OF FRANCE AND HER TALENT WAS THE REAL SUPPORT OF ALL THE COMEDIES WHICH THE GREATEST AUTHORS WROTE FOR HER ESPECIALLY OF THE PLAYS OF MARIVAUX FOR WITHOUT HER HIS COMEDIES WOULD NEVER HAVE GONE TO POSTERITY -3729-6852-0006 SILVIA DID NOT THINK THAT HER GOOD CONDUCT WAS A MERIT FOR SHE KNEW THAT SHE WAS VIRTUOUS ONLY BECAUSE HER SELF LOVE COMPELLED HER TO BE SO AND SHE NEVER EXHIBITED ANY PRIDE OR ASSUMED ANY SUPERIORITY TOWARDS HER THEATRICAL SISTERS ALTHOUGH SATISFIED TO SHINE BY THEIR TALENT OR THEIR BEAUTY THEY CARED LITTLE ABOUT RENDERING THEMSELVES CONSPICUOUS BY THEIR VIRTUE -3729-6852-0007 TWO YEARS BEFORE HER DEATH I SAW HER PERFORM THE CHARACTER OF MARIANNE IN THE COMEDY OF MARIVAUX AND IN SPITE OF HER AGE AND DECLINING HEALTH THE ILLUSION WAS COMPLETE -3729-6852-0008 SHE WAS HONOURABLY BURIED IN THE CHURCH OF SAINT SAUVEUR WITHOUT THE SLIGHTEST OPPOSITION FROM THE VENERABLE PRIEST WHO FAR FROM SHARING THE ANTI CHRISTAIN INTOLERANCY OF THE CLERGY IN GENERAL SAID THAT HER PROFESSION AS AN ACTRESS HAD NOT HINDERED HER FROM BEING A GOOD CHRISTIAN AND THAT THE EARTH WAS THE COMMON MOTHER OF ALL HUMAN BEINGS AS JESUS CHRIST HAD BEEN THE SAVIOUR OF ALL MANKIND -3729-6852-0009 YOU WILL FORGIVE ME DEAR READER IF I HAVE MADE YOU ATTEND THE FUNERAL OF SILVIA TEN YEARS BEFORE HER DEATH BELIEVE ME I HAVE NO INTENTION OF PERFORMING A MIRACLE YOU MAY CONSOLE YOURSELF WITH THE IDEA THAT I SHALL SPARE YOU THAT UNPLEASANT TASK WHEN POOR SILVIA DIES -3729-6852-0010 I NEVER HAD ANY FAMILY -3729-6852-0011 I HAD A NAME I BELIEVE IN MY YOUNG DAYS BUT I HAVE FORGOTTEN IT SINCE I HAVE BEEN IN SERVICE -3729-6852-0012 I SHALL CALL YOU ESPRIT -3729-6852-0013 YOU DO ME A GREAT HONOUR -3729-6852-0014 HERE GO AND GET ME CHANGE FOR A LOUIS I HAVE IT SIR -3729-6852-0015 AT YOUR SERVICE SIR -3729-6852-0016 MADAME QUINSON BESIDES CAN ANSWER YOUR ENQUIRIES -3729-6852-0017 I SEE A QUANTITY OF CHAIRS FOR HIRE AT THE RATE OF ONE SOU MEN READING THE NEWSPAPER UNDER THE SHADE OF THE TREES GIRLS AND MEN BREAKFASTING EITHER ALONE OR IN COMPANY WAITERS WHO WERE RAPIDLY GOING UP AND DOWN A NARROW STAIRCASE HIDDEN UNDER THE FOLIAGE -3729-6852-0018 I SIT DOWN AT A SMALL TABLE A WAITER COMES IMMEDIATELY TO ENQUIRE MY WISHES -3729-6852-0019 I TELL HIM TO GIVE ME SOME COFFEE IF IT IS GOOD -3729-6852-0020 THEN TURNING TOWARDS ME HE SAYS THAT I LOOK LIKE A FOREIGNER AND WHEN I SAY THAT I AM AN ITALIAN HE BEGINS TO SPEAK TO ME OF THE COURT OF THE CITY OF THE THEATRES AND AT LAST HE OFFERS TO ACCOMPANY ME EVERYWHERE -3729-6852-0021 I THANK HIM AND TAKE MY LEAVE -3729-6852-0022 I ADDRESS HIM IN ITALIAN AND HE ANSWERS VERY WITTILY BUT HIS WAY OF SPEAKING MAKES ME SMILE AND I TELL HIM WHY -3729-6852-0023 MY REMARK PLEASES HIM BUT I SOON PROVE TO HIM THAT IT IS NOT THE RIGHT WAY TO SPEAK HOWEVER PERFECT MAY HAVE BEEN THE LANGUAGE OF THAT ANCIENT WRITER -3729-6852-0024 I SEE A CROWD IN ONE CORNER OF THE GARDEN EVERYBODY STANDING STILL AND LOOKING UP -3729-6852-0025 IS THERE NOT A MERIDIAN EVERYWHERE -3729-6852-0026 YES BUT THE MERIDIAN OF THE PALAIS ROYAL IS THE MOST EXACT -3729-6852-0027 THAT IS TRUE BADAUDERIE -3729-6852-0028 ALL THESE HONEST PERSONS ARE WAITING THEIR TURN TO GET THEIR SNUFF BOXES FILLED -3729-6852-0029 IT IS SOLD EVERYWHERE BUT FOR THE LAST THREE WEEKS NOBODY WILL USE ANY SNUFF BUT THAT SOLD AT THE CIVET CAT -3729-6852-0030 IS IT BETTER THAN ANYWHERE ELSE -3729-6852-0031 BUT HOW DID SHE MANAGE TO RENDER IT SO FASHIONABLE -3729-6852-0032 SIMPLY BY STOPPING HER CARRIAGE TWO OR THREE TIMES BEFORE THE SHOP TO HAVE HER SNUFF BOX FILLED AND BY SAYING ALOUD TO THE YOUNG GIRL WHO HANDED BACK THE BOX THAT HER SNUFF WAS THE VERY BEST IN PARIS -3729-6852-0033 YOU ARE NOW IN THE ONLY COUNTRY IN THE WORLD WHERE WIT CAN MAKE A FORTUNE BY SELLING EITHER A GENUINE OR A FALSE ARTICLE IN THE FIRST CASE IT RECEIVES THE WELCOME OF INTELLIGENT AND TALENTED PEOPLE AND IN THE SECOND FOOLS ARE ALWAYS READY TO REWARD IT FOR SILLINESS IS TRULY A CHARACTERISTIC OF THE PEOPLE HERE AND HOWEVER WONDERFUL IT MAY APPEAR SILLINESS IS THE DAUGHTER OF WIT -3729-6852-0034 LET A MAN RUN AND EVERYBODY WILL RUN AFTER HIM THE CROWD WILL NOT STOP UNLESS THE MAN IS PROVED TO BE MAD BUT TO PROVE IT IS INDEED A DIFFICULT TASK BECAUSE WE HAVE A CROWD OF MEN WHO MAD FROM THEIR BIRTH ARE STILL CONSIDERED WISE -3729-6852-0035 IT SEEMS TO ME I REPLIED THAT SUCH APPROVAL SUCH RATIFICATION OF THE OPINION EXPRESSED BY THE KING THE PRINCES OF THE BLOOD ET CETERA IS RATHER A PROOF OF THE AFFECTION FELT FOR THEM BY THE NATION FOR THE FRENCH CARRY THAT AFFECTION TO SUCH AN EXTENT THAT THEY BELIEVE THEM INFALLIBLE -3729-6852-0036 WHEN THE KING COMES TO PARIS EVERYBODY CALLS OUT VIVE LE ROI -3729-6852-0037 SHE INTRODUCED ME TO ALL HER GUESTS AND GAVE ME SOME PARTICULARS RESPECTING EVERY ONE OF THEM -3729-6852-0038 WHAT SIR I SAID TO HIM AM I FORTUNATE ENOUGH TO SEE YOU -3729-6852-0039 HE HIMSELF RECITED THE SAME PASSAGE IN FRENCH AND POLITELY POINTED OUT THE PARTS IN WHICH HE THOUGHT THAT I HAD IMPROVED ON THE ORIGINAL -3729-6852-0040 FOR THE FIRST DAY SIR I THINK THAT WHAT YOU HAVE DONE GIVES GREAT HOPES OF YOU AND WITHOUT ANY DOUBT YOU WILL MAKE RAPID PROGRESS -3729-6852-0041 I BELIEVE IT SIR AND THAT IS WHAT I FEAR THEREFORE THE PRINCIPAL OBJECT OF MY VISIT HERE IS TO DEVOTE MYSELF ENTIRELY TO THE STUDY OF THE FRENCH LANGUAGE -3729-6852-0042 I AM A VERY UNPLEASANT PUPIL ALWAYS ASKING QUESTIONS CURIOUS TROUBLESOME INSATIABLE AND EVEN SUPPOSING THAT I COULD MEET WITH THE TEACHER I REQUIRE I AM AFRAID I AM NOT RICH ENOUGH TO PAY HIM -3729-6852-0043 I RESIDE IN THE MARAIS RUE DE DOUZE PORTES -3729-6852-0044 I WILL MAKE YOU TRANSLATE THEM INTO FRENCH AND YOU NEED NOT BE AFRAID OF MY FINDING YOU INSATIABLE -3729-6852-0045 HE HAD A GOOD APPETITE COULD TELL A GOOD STORY WITHOUT LAUGHING WAS CELEBRATED FOR HIS WITTY REPARTEES AND HIS SOCIABLE MANNERS BUT HE SPENT HIS LIFE AT HOME SELDOM GOING OUT AND SEEING HARDLY ANYONE BECAUSE HE ALWAYS HAD A PIPE IN HIS MOUTH AND WAS SURROUNDED BY AT LEAST TWENTY CATS WITH WHICH HE WOULD AMUSE HIMSELF ALL DAY -3729-6852-0046 HIS HOUSEKEEPER HAD THE MANAGEMENT OF EVERYTHING SHE NEVER ALLOWED HIM TO BE IN NEED OF ANYTHING AND SHE GAVE NO ACCOUNT OF HIS MONEY WHICH SHE KEPT ALTOGETHER BECAUSE HE NEVER ASKED HER TO RENDER ANY ACCOUNTS -4077-13751-0000 ON THE SIXTH OF APRIL EIGHTEEN THIRTY THE CHURCH OF JESUS CHRIST OF LATTER DAY SAINTS WAS FORMALLY ORGANIZED AND THUS TOOK ON A LEGAL EXISTENCE -4077-13751-0001 ITS ORIGIN WAS SMALL A GERM AN INSIGNIFICANT SEED HARDLY TO BE THOUGHT OF AS LIKELY TO AROUSE OPPOSITION -4077-13751-0002 INSTEAD OF BUT SIX REGULARLY AFFILIATED MEMBERS AND AT MOST TWO SCORE OF ADHERENTS THE ORGANIZATION NUMBERS TODAY MANY HUNDRED THOUSAND SOULS -4077-13751-0003 IN PLACE OF A SINGLE HAMLET IN THE SMALLEST CORNER OF WHICH THE MEMBERS COULD HAVE CONGREGATED THERE NOW ARE ABOUT SEVENTY STAKES OF ZION AND ABOUT SEVEN HUNDRED ORGANIZED WARDS EACH WARD AND STAKE WITH ITS FULL COMPLEMENT OF OFFICERS AND PRIESTHOOD ORGANIZATIONS -4077-13751-0004 THE PRACTISE OF GATHERING ITS PROSELYTES INTO ONE PLACE PREVENTS THE BUILDING UP AND STRENGTHENING OF FOREIGN BRANCHES AND INASMUCH AS EXTENSIVE AND STRONG ORGANIZATIONS ARE SELDOM MET WITH ABROAD VERY ERRONEOUS IDEAS EXIST CONCERNING THE STRENGTH OF THE CHURCH -4077-13751-0005 NEVERTHELESS THE MUSTARD SEED AMONG THE SMALLEST OF ALL SEEDS HAS ATTAINED THE PROPORTIONS OF A TREE AND THE BIRDS OF THE AIR ARE NESTING IN ITS BRANCHES THE ACORN IS NOW AN OAK OFFERING PROTECTION AND THE SWEETS OF SATISFACTION TO EVERY EARNEST PILGRIM JOURNEYING ITS WAY FOR TRUTH -4077-13751-0006 THEIR EYES WERE FROM THE FIRST TURNED IN ANTICIPATION TOWARD THE EVENING SUN NOT MERELY THAT THE WORK OF PROSELYTING SHOULD BE CARRIED ON IN THE WEST BUT THAT THE HEADQUARTERS OF THE CHURCH SHOULD BE THERE ESTABLISHED -4077-13751-0007 THE BOOK OF MORMON HAD TAUGHT THE PEOPLE THE TRUE ORIGIN AND DESTINY OF THE AMERICAN INDIANS AND TOWARD THIS DARK SKINNED REMNANT OF A ONCE MIGHTY PEOPLE THE MISSIONARIES OF MORMONISM EARLY TURNED THEIR EYES AND WITH THEIR EYES WENT THEIR HEARTS AND THEIR HOPES -4077-13751-0008 IT IS NOTABLE THAT THE INDIAN TRIBES HAVE GENERALLY REGARDED THE RELIGION OF THE LATTER DAY SAINTS WITH FAVOR SEEING IN THE BOOK OF MORMON STRIKING AGREEMENT WITH THEIR OWN TRADITIONS -4077-13751-0009 THE FIRST WELL ESTABLISHED SEAT OF THE CHURCH WAS IN THE PRETTY LITTLE TOWN OF KIRTLAND OHIO ALMOST WITHIN SIGHT OF LAKE ERIE AND HERE SOON ROSE THE FIRST TEMPLE OF MODERN TIMES -4077-13751-0010 TO THE FERVENT LATTER DAY SAINT A TEMPLE IS NOT SIMPLY A CHURCH BUILDING A HOUSE FOR RELIGIOUS ASSEMBLY -4077-13751-0011 SOON THOUSANDS OF CONVERTS HAD RENTED OR PURCHASED HOMES IN MISSOURI INDEPENDENCE JACKSON COUNTY BEING THEIR CENTER BUT FROM THE FIRST THEY WERE UNPOPULAR AMONG THE MISSOURIANS -4077-13751-0012 THE LIEUTENANT GOVERNOR LILBURN W BOGGS AFTERWARD GOVERNOR WAS A PRONOUNCED MORMON HATER AND THROUGHOUT THE PERIOD OF THE TROUBLES HE MANIFESTED SYMPATHY WITH THE PERSECUTORS -4077-13751-0013 THEIR SUFFERINGS HAVE NEVER YET BEEN FITLY CHRONICLED BY HUMAN SCRIBE -4077-13751-0014 MAKING THEIR WAY ACROSS THE RIVER MOST OF THE REFUGEES FOUND SHELTER AMONG THE MORE HOSPITABLE PEOPLE OF CLAY COUNTY AND AFTERWARD ESTABLISHED THEMSELVES IN CALDWELL COUNTY THEREIN FOUNDING THE CITY OF FAR WEST -4077-13751-0015 A SMALL SETTLEMENT HAD BEEN FOUNDED BY MORMON FAMILIES ON SHOAL CREEK AND HERE ON THE THIRTIETH OF OCTOBER EIGHTEEN THIRTY EIGHT A COMPANY OF TWO HUNDRED AND FORTY FELL UPON THE HAPLESS SETTLERS AND BUTCHERED A SCORE -4077-13751-0016 BE IT SAID TO THE HONOR OF SOME OF THE OFFICERS ENTRUSTED WITH THE TERRIBLE COMMISSION THAT WHEN THEY LEARNED ITS TRUE SIGNIFICANCE THEY RESIGNED THEIR AUTHORITY RATHER THAN HAVE ANYTHING TO DO WITH WHAT THEY DESIGNATED A COLD BLOODED BUTCHERY -4077-13751-0017 OH WHAT A RECORD TO READ WHAT A PICTURE TO GAZE UPON HOW AWFUL THE FACT -4077-13751-0018 AMERICAN SCHOOL BOYS READ WITH EMOTIONS OF HORROR OF THE ALBIGENSES DRIVEN BEATEN AND KILLED WITH A PAPAL LEGATE DIRECTING THE BUTCHERY AND OF THE VAUDOIS HUNTED AND HOUNDED LIKE BEASTS AS THE EFFECT OF A ROYAL DECREE AND THEY YET SHALL READ IN THE HISTORY OF THEIR OWN COUNTRY OF SCENES AS TERRIBLE AS THESE IN THE EXHIBITION OF INJUSTICE AND INHUMAN HATE -4077-13751-0019 WHO BEGAN THE QUARREL WAS IT THE MORMONS -4077-13751-0020 AS A SAMPLE OF THE PRESS COMMENTS AGAINST THE BRUTALITY OF THE MISSOURIANS I QUOTE A PARAGRAPH FROM THE QUINCY ARGUS MARCH SIXTEENTH EIGHTEEN THIRTY NINE -4077-13751-0021 IT WILL BE OBSERVED THAT AN ORGANIZED MOB AIDED BY MANY OF THE CIVIL AND MILITARY OFFICERS OF MISSOURI WITH GOVERNOR BOGGS AT THEIR HEAD HAVE BEEN THE PROMINENT ACTORS IN THIS BUSINESS INCITED TOO IT APPEARS AGAINST THE MORMONS BY POLITICAL HATRED AND BY THE ADDITIONAL MOTIVES OF PLUNDER AND REVENGE -4077-13754-0000 THE ARMY FOUND THE PEOPLE IN POVERTY AND LEFT THEM IN COMPARATIVE WEALTH -4077-13754-0001 BUT A WORD FURTHER CONCERNING THE EXPEDITION IN GENERAL -4077-13754-0002 IT WAS THROUGH FLOYD'S ADVICE THAT BUCHANAN ORDERED THE MILITARY EXPEDITION TO UTAH OSTENSIBLY TO INSTALL CERTAIN FEDERAL OFFICIALS AND TO REPRESS AN ALLEGED INFANTILE REBELLION WHICH IN FACT HAD NEVER COME INTO EXISTENCE BUT IN REALITY TO FURTHER THE INTERESTS OF THE SECESSIONISTS -4077-13754-0003 MOREOVER HAD THE PEOPLE BEEN INCLINED TO REBELLION WHAT GREATER OPPORTUNITY COULD THEY HAVE WISHED -4077-13754-0004 ALREADY A NORTH AND A SOUTH WERE TALKED OF WHY NOT SET UP ALSO A WEST -4077-13754-0005 THEY KNEW NO NORTH NO SOUTH NO EAST NO WEST THEY STOOD POSITIVELY BY THE CONSTITUTION AND WOULD HAVE NOTHING TO DO IN THE BLOODY STRIFE BETWEEN BROTHERS UNLESS INDEED THEY WERE SUMMONED BY THE AUTHORITY TO WHICH THEY HAD ALREADY ONCE LOYALLY RESPONDED TO FURNISH MEN AND ARMS FOR THEIR COUNTRY'S NEED -4077-13754-0006 WHAT THE LATTER DAY SAINTS CALL CELESTIAL MARRIAGE IS CHARACTERISTIC OF THE CHURCH AND IS IN VERY GENERAL PRACTISE BUT OF CELESTIAL MARRIAGE PLURALITY OF WIVES WAS AN INCIDENT NEVER AN ESSENTIAL -4077-13754-0007 WE BELIEVE IN A LITERAL RESURRECTION AND AN ACTUAL HEREAFTER IN WHICH FUTURE STATE SHALL BE RECOGNIZED EVERY SANCTIFIED AND AUTHORIZED RELATIONSHIP EXISTING HERE ON EARTH OF PARENT AND CHILD BROTHER AND SISTER HUSBAND AND WIFE -4077-13754-0008 IT HAS BEEN MY PRIVILEGE TO TREAD THE SOIL OF MANY LANDS TO OBSERVE THE CUSTOMS AND STUDY THE HABITS OF MORE NATIONS THAN ONE AND I HAVE YET TO FIND THE PLACE AND MEET THE PEOPLE WHERE AND WITH WHOM THE PURITY OF MAN AND WOMAN IS HELD MORE PRECIOUS THAN AMONG THE MALIGNED MORMONS IN THE MOUNTAIN VALLEYS OF THE WEST -4077-13754-0009 AT THE INCEPTION OF PLURAL MARRIAGE AMONG THE LATTER DAY SAINTS THERE WAS NO LAW NATIONAL OR STATE AGAINST ITS PRACTISE -4077-13754-0010 IN EIGHTEEN SIXTY TWO A LAW WAS ENACTED WITH THE PURPOSE OF SUPPRESSING PLURAL MARRIAGE AND AS HAD BEEN PREDICTED IN THE NATIONAL SENATE PRIOR TO ITS PASSAGE IT LAY FOR MANY YEARS A DEAD LETTER -4077-13754-0011 FEDERAL JUDGES AND UNITED STATES ATTORNEYS IN UTAH WHO WERE NOT MORMONS NOR LOVERS OF MORMONISM REFUSED TO ENTERTAIN COMPLAINTS OR PROSECUTE CASES UNDER THE LAW BECAUSE OF ITS MANIFEST INJUSTICE AND INADEQUACY -4077-13754-0012 THIS MEANT THAT FOR AN ALLEGED MISDEMEANOR FOR WHICH CONGRESS PRESCRIBED A MAXIMUM PENALTY OF SIX MONTHS IMPRISONMENT AND A FINE OF THREE HUNDRED DOLLARS A MAN MIGHT BE IMPRISONED FOR LIFE AYE FOR MANY TERMS OF A MAN'S NATURAL LIFE DID THE COURT'S POWER TO ENFORCE ITS SENTENCES EXTEND SO FAR AND MIGHT BE FINED MILLIONS OF DOLLARS -4077-13754-0013 BEFORE THIS TRAVESTY ON THE ADMINISTRATION OF LAW COULD BE BROUGHT BEFORE THE COURT OF LAST RESORT AND THERE MEET WITH THE REVERSAL AND REBUKE IT DESERVED MEN WERE IMPRISONED UNDER SENTENCES OF MANY YEARS DURATION -4077-13754-0014 THE PEOPLE CONTESTED THESE MEASURES ONE BY ONE IN THE COURTS PRESENTING IN CASE AFTER CASE THE DIFFERENT PHASES OF THE SUBJECT AND URGING THE UNCONSTITUTIONALITY OF THE MEASURE -4077-13754-0015 THEN THE CHURCH WAS DISINCORPORATED AND ITS PROPERTY BOTH REAL AND PERSONAL CONFISCATED AND ESCHEATED TO THE GOVERNMENT OF THE UNITED STATES AND ALTHOUGH THE PERSONAL PROPERTY WAS SOON RESTORED REAL ESTATE OF GREAT VALUE LONG LAY IN THE HANDS OF THE COURT'S RECEIVER AND THE MORMON CHURCH HAD TO PAY THE NATIONAL GOVERNMENT HIGH RENTAL ON ITS OWN PROPERTY -4077-13754-0016 AND SO THE STORY OF MORMONISM RUNS ON ITS FINALE HAS NOT YET BEEN WRITTEN THE CURRENT PRESS PRESENTS CONTINUOUSLY NEW STAGES OF ITS PROGRESS NEW DEVELOPMENTS OF ITS PLAN -4446-2271-0000 MAINHALL LIKED ALEXANDER BECAUSE HE WAS AN ENGINEER -4446-2271-0001 HE HAD PRECONCEIVED IDEAS ABOUT EVERYTHING AND HIS IDEA ABOUT AMERICANS WAS THAT THEY SHOULD BE ENGINEERS OR MECHANICS -4446-2271-0002 IT'S TREMENDOUSLY WELL PUT ON TOO -4446-2271-0003 IT'S BEEN ON ONLY TWO WEEKS AND I'VE BEEN HALF A DOZEN TIMES ALREADY -4446-2271-0004 DO YOU KNOW ALEXANDER MAINHALL LOOKED WITH PERPLEXITY UP INTO THE TOP OF THE HANSOM AND RUBBED HIS PINK CHEEK WITH HIS GLOVED FINGER DO YOU KNOW I SOMETIMES THINK OF TAKING TO CRITICISM SERIOUSLY MYSELF -4446-2271-0005 SHE SAVES HER HAND TOO SHE'S AT HER BEST IN THE SECOND ACT -4446-2271-0006 HE'S BEEN WANTING TO MARRY HILDA THESE THREE YEARS AND MORE -4446-2271-0007 SHE DOESN'T TAKE UP WITH ANYBODY YOU KNOW -4446-2271-0008 IRENE BURGOYNE ONE OF HER FAMILY TOLD ME IN CONFIDENCE THAT THERE WAS A ROMANCE SOMEWHERE BACK IN THE BEGINNING -4446-2271-0009 MAINHALL VOUCHED FOR HER CONSTANCY WITH A LOFTINESS THAT MADE ALEXANDER SMILE EVEN WHILE A KIND OF RAPID EXCITEMENT WAS TINGLING THROUGH HIM -4446-2271-0010 HE'S ANOTHER WHO'S AWFULLY KEEN ABOUT HER LET ME INTRODUCE YOU -4446-2271-0011 SIR HARRY TOWNE MISTER BARTLEY ALEXANDER THE AMERICAN ENGINEER -4446-2271-0012 I SAY SIR HARRY THE LITTLE GIRL'S GOING FAMOUSLY TO NIGHT ISN'T SHE -4446-2271-0013 DO YOU KNOW I THOUGHT THE DANCE A BIT CONSCIOUS TO NIGHT FOR THE FIRST TIME -4446-2271-0014 WESTMERE AND I WERE BACK AFTER THE FIRST ACT AND WE THOUGHT SHE SEEMED QUITE UNCERTAIN OF HERSELF -4446-2271-0015 A LITTLE ATTACK OF NERVES POSSIBLY -4446-2271-0016 HE WAS BEGINNING TO FEEL A KEEN INTEREST IN THE SLENDER BAREFOOT DONKEY GIRL WHO SLIPPED IN AND OUT OF THE PLAY SINGING LIKE SOME ONE WINDING THROUGH A HILLY FIELD -4446-2271-0017 ONE NIGHT WHEN HE AND WINIFRED WERE SITTING TOGETHER ON THE BRIDGE HE TOLD HER THAT THINGS HAD HAPPENED WHILE HE WAS STUDYING ABROAD THAT HE WAS SORRY FOR ONE THING IN PARTICULAR AND HE ASKED HER WHETHER SHE THOUGHT SHE OUGHT TO KNOW ABOUT THEM -4446-2271-0018 SHE CONSIDERED A MOMENT AND THEN SAID NO I THINK NOT THOUGH I AM GLAD YOU ASK ME -4446-2271-0019 AFTER THAT IT WAS EASY TO FORGET ACTUALLY TO FORGET -4446-2271-0020 OF COURSE HE REFLECTED SHE ALWAYS HAD THAT COMBINATION OF SOMETHING HOMELY AND SENSIBLE AND SOMETHING UTTERLY WILD AND DAFT -4446-2271-0021 SHE MUST CARE ABOUT THE THEATRE A GREAT DEAL MORE THAN SHE USED TO -4446-2271-0022 I'M GLAD SHE'S HELD HER OWN SINCE -4446-2271-0023 AFTER ALL WE WERE AWFULLY YOUNG -4446-2271-0024 I SHOULDN'T WONDER IF SHE COULD LAUGH ABOUT IT WITH ME NOW -4446-2273-0000 HILDA WAS VERY NICE TO HIM AND HE SAT ON THE EDGE OF HIS CHAIR FLUSHED WITH HIS CONVERSATIONAL EFFORTS AND MOVING HIS CHIN ABOUT NERVOUSLY OVER HIS HIGH COLLAR -4446-2273-0001 THEY ASKED HIM TO COME TO SEE THEM IN CHELSEA AND THEY SPOKE VERY TENDERLY OF HILDA -4446-2273-0002 LAMB WOULDN'T CARE A GREAT DEAL ABOUT MANY OF THEM I FANCY -4446-2273-0003 WHEN BARTLEY ARRIVED AT BEDFORD SQUARE ON SUNDAY EVENING MARIE THE PRETTY LITTLE FRENCH GIRL MET HIM AT THE DOOR AND CONDUCTED HIM UPSTAIRS -4446-2273-0004 I SHOULD NEVER HAVE ASKED YOU IF MOLLY HAD BEEN HERE FOR I REMEMBER YOU DON'T LIKE ENGLISH COOKERY -4446-2273-0005 I HAVEN'T HAD A CHANCE YET TO TELL YOU WHAT A JOLLY LITTLE PLACE I THINK THIS IS -4446-2273-0006 THEY ARE ALL SKETCHES MADE ABOUT THE VILLA D'ESTE YOU SEE -4446-2273-0007 THOSE FELLOWS ARE ALL VERY LOYAL EVEN MAINHALL -4446-2273-0008 I'VE MANAGED TO SAVE SOMETHING EVERY YEAR AND THAT WITH HELPING MY THREE SISTERS NOW AND THEN AND TIDING POOR COUSIN MIKE OVER BAD SEASONS -4446-2273-0009 IT'S NOT PARTICULARLY RARE SHE SAID BUT SOME OF IT WAS MY MOTHER'S -4446-2273-0010 THERE WAS WATERCRESS SOUP AND SOLE AND A DELIGHTFUL OMELETTE STUFFED WITH MUSHROOMS AND TRUFFLES AND TWO SMALL RARE DUCKLINGS AND ARTICHOKES AND A DRY YELLOW RHONE WINE OF WHICH BARTLEY HAD ALWAYS BEEN VERY FOND -4446-2273-0011 THERE IS NOTHING ELSE THAT LOOKS SO JOLLY -4446-2273-0012 THANK YOU BUT I DON'T LIKE IT SO WELL AS THIS -4446-2273-0013 HAVE YOU BEEN IN PARIS MUCH THESE LATE YEARS -4446-2273-0014 THERE ARE FEW CHANGES IN THE OLD QUARTER -4446-2273-0015 DON'T I THOUGH I'M SO SORRY TO HEAR IT HOW DID HER SON TURN OUT -4446-2273-0016 HER HAIR IS STILL LIKE FLAX AND HER BLUE EYES ARE JUST LIKE A BABY'S AND SHE HAS THE SAME THREE FRECKLES ON HER LITTLE NOSE AND TALKS ABOUT GOING BACK TO HER BAINS DE MER -4446-2273-0017 HOW JOLLY IT WAS BEING YOUNG HILDA -4446-2273-0018 DO YOU REMEMBER THAT FIRST WALK WE TOOK TOGETHER IN PARIS -4446-2273-0019 COME WE'LL HAVE OUR COFFEE IN THE OTHER ROOM AND YOU CAN SMOKE -4446-2273-0020 I THINK WE DID SHE ANSWERED DEMURELY -4446-2273-0021 WHAT SHE WANTED FROM US WAS NEITHER OUR FLOWERS NOR OUR FRANCS BUT JUST OUR YOUTH -4446-2273-0022 THEY WERE BOTH REMEMBERING WHAT THE WOMAN HAD SAID WHEN SHE TOOK THE MONEY GOD GIVE YOU A HAPPY LOVE -4446-2273-0023 THE STRANGE WOMAN AND HER PASSIONATE SENTENCE THAT RANG OUT SO SHARPLY HAD FRIGHTENED THEM BOTH -4446-2273-0024 BARTLEY STARTED WHEN HILDA RANG THE LITTLE BELL BESIDE HER DEAR ME WHY DID YOU DO THAT -4446-2273-0025 IT WAS VERY JOLLY HE MURMURED LAZILY AS MARIE CAME IN TO TAKE AWAY THE COFFEE -4446-2273-0026 HAVE I TOLD YOU ABOUT MY NEW PLAY -4446-2273-0027 WHEN SHE FINISHED ALEXANDER SHOOK HIMSELF OUT OF A REVERIE -4446-2273-0028 NONSENSE OF COURSE I CAN'T REALLY SING EXCEPT THE WAY MY MOTHER AND GRANDMOTHER DID BEFORE ME -4446-2273-0029 IT'S REALLY TOO WARM IN THIS ROOM TO SING DON'T YOU FEEL IT -4446-2273-0030 ALEXANDER WENT OVER AND OPENED THE WINDOW FOR HER -4446-2273-0031 THERE JUST IN FRONT -4446-2273-0032 HE STOOD A LITTLE BEHIND HER AND TRIED TO STEADY HIMSELF AS HE SAID IT'S SOFT AND MISTY SEE HOW WHITE THE STARS ARE -4446-2273-0033 FOR A LONG TIME NEITHER HILDA NOR BARTLEY SPOKE -4446-2273-0034 HE FELT A TREMOR RUN THROUGH THE SLENDER YELLOW FIGURE IN FRONT OF HIM -4446-2273-0035 BARTLEY LEANED OVER HER SHOULDER WITHOUT TOUCHING HER AND WHISPERED IN HER EAR YOU ARE GIVING ME A CHANCE YES -4446-2273-0036 ALEXANDER UNCLENCHED THE TWO HANDS AT HIS SIDES -4446-2275-0000 THE STOP AT QUEENSTOWN THE TEDIOUS PASSAGE UP THE MERSEY WERE THINGS THAT HE NOTED DIMLY THROUGH HIS GROWING IMPATIENCE -4446-2275-0001 SHE BLUSHED AND SMILED AND FUMBLED HIS CARD IN HER CONFUSION BEFORE SHE RAN UPSTAIRS -4446-2275-0002 ALEXANDER PACED UP AND DOWN THE HALLWAY BUTTONING AND UNBUTTONING HIS OVERCOAT UNTIL SHE RETURNED AND TOOK HIM UP TO HILDA'S LIVING ROOM -4446-2275-0003 THE ROOM WAS EMPTY WHEN HE ENTERED -4446-2275-0004 ALEXANDER DID NOT SIT DOWN -4446-2275-0005 I FELT IT IN MY BONES WHEN I WOKE THIS MORNING THAT SOMETHING SPLENDID WAS GOING TO TURN UP -4446-2275-0006 I THOUGHT IT MIGHT BE SISTER KATE OR COUSIN MIKE WOULD BE HAPPENING ALONG -4446-2275-0007 SHE PUSHED HIM TOWARD THE BIG CHAIR BY THE FIRE AND SAT DOWN ON A STOOL AT THE OPPOSITE SIDE OF THE HEARTH HER KNEES DRAWN UP TO HER CHIN LAUGHING LIKE A HAPPY LITTLE GIRL -4446-2275-0008 WHEN DID YOU COME BARTLEY AND HOW DID IT HAPPEN YOU HAVEN'T SPOKEN A WORD -4446-2275-0009 I GOT IN ABOUT TEN MINUTES AGO -4446-2275-0010 ALEXANDER LEANED FORWARD AND WARMED HIS HANDS BEFORE THE BLAZE -4446-2275-0011 BARTLEY BENT LOWER OVER THE FIRE -4446-2275-0012 SHE LOOKED AT HIS HEAVY SHOULDERS AND BIG DETERMINED HEAD THRUST FORWARD LIKE A CATAPULT IN LEASH -4446-2275-0013 I'LL DO ANYTHING YOU WISH ME TO BARTLEY SHE SAID TREMULOUSLY -4446-2275-0014 I CAN'T STAND SEEING YOU MISERABLE -4446-2275-0015 HE PULLED UP A WINDOW AS IF THE AIR WERE HEAVY -4446-2275-0016 HILDA WATCHED HIM FROM HER CORNER TREMBLING AND SCARCELY BREATHING DARK SHADOWS GROWING ABOUT HER EYES IT -4446-2275-0017 BUT IT'S WORSE NOW IT'S UNBEARABLE -4446-2275-0018 I GET NOTHING BUT MISERY OUT OF EITHER -4446-2275-0019 THE WORLD IS ALL THERE JUST AS IT USED TO BE BUT I CAN'T GET AT IT ANY MORE -4446-2275-0020 IT WAS MYSELF I WAS DEFYING HILDA -4446-2275-0021 HILDA'S FACE QUIVERED BUT SHE WHISPERED YES I THINK IT MUST HAVE BEEN -4446-2275-0022 BUT WHY DIDN'T YOU TELL ME WHEN YOU WERE HERE IN THE SUMMER -4446-2275-0023 ALEXANDER GROANED I MEANT TO BUT SOMEHOW I COULDN'T -4446-2275-0024 SHE PRESSED HIS HAND GENTLY IN GRATITUDE -4446-2275-0025 WEREN'T YOU HAPPY THEN AT ALL -4446-2275-0026 SHE CLOSED HER EYES AND TOOK A DEEP BREATH AS IF TO DRAW IN AGAIN THE FRAGRANCE OF THOSE DAYS -4446-2275-0027 HE MOVED UNEASILY AND HIS CHAIR CREAKED -4446-2275-0028 YES YES SHE HURRIED PULLING HER HAND GENTLY AWAY FROM HIM -4446-2275-0029 PLEASE TELL ME ONE THING BARTLEY AT LEAST TELL ME THAT YOU BELIEVE I THOUGHT I WAS MAKING YOU HAPPY -4446-2275-0030 YES HILDA I KNOW THAT HE SAID SIMPLY -4446-2275-0031 I UNDERSTAND BARTLEY I WAS WRONG -4446-2275-0032 BUT I DIDN'T KNOW YOU'VE ONLY TO TELL ME NOW -4446-2275-0033 WHAT I MEAN IS THAT I WANT YOU TO PROMISE NEVER TO SEE ME AGAIN NO MATTER HOW OFTEN I COME NO MATTER HOW HARD I BEG -4446-2275-0034 KEEP AWAY IF YOU WISH WHEN HAVE I EVER FOLLOWED YOU -4446-2275-0035 ALEXANDER ROSE AND SHOOK HIMSELF ANGRILY YES I KNOW I'M COWARDLY -4446-2275-0036 HE TOOK HER ROUGHLY IN HIS ARMS DO YOU KNOW WHAT I MEAN -4446-2275-0037 OH BARTLEY WHAT AM I TO DO -4446-2275-0038 I WILL ASK THE LEAST IMAGINABLE BUT I MUST HAVE SOMETHING -4446-2275-0039 I MUST KNOW ABOUT YOU -4446-2275-0040 THE SIGHT OF YOU BARTLEY TO SEE YOU LIVING AND HAPPY AND SUCCESSFUL CAN I NEVER MAKE YOU UNDERSTAND WHAT THAT MEANS TO ME -4446-2275-0041 YOU SEE LOVING SOME ONE AS I LOVE YOU MAKES THE WHOLE WORLD DIFFERENT -4446-2275-0042 AND THEN YOU CAME BACK NOT CARING VERY MUCH BUT IT MADE NO DIFFERENCE -4446-2275-0043 BARTLEY BENT OVER AND TOOK HER IN HIS ARMS KISSING HER MOUTH AND HER WET TIRED EYES -4446-2275-0044 DON'T CRY DON'T CRY HE WHISPERED -4446-2275-0045 WE'VE TORTURED EACH OTHER ENOUGH FOR TONIGHT -4507-16021-0000 CHAPTER ONE ORIGIN -4507-16021-0001 IT ENGENDERS A WHOLE WORLD LA PEGRE FOR WHICH READ THEFT AND A HELL LA PEGRENNE FOR WHICH READ HUNGER -4507-16021-0002 THUS IDLENESS IS THE MOTHER -4507-16021-0003 SHE HAS A SON THEFT AND A DAUGHTER HUNGER -4507-16021-0004 WHAT IS SLANG -4507-16021-0005 WE HAVE NEVER UNDERSTOOD THIS SORT OF OBJECTIONS -4507-16021-0006 SLANG IS ODIOUS -4507-16021-0007 SLANG MAKES ONE SHUDDER -4507-16021-0008 WHO DENIES THAT OF COURSE IT DOES -4507-16021-0009 WHEN IT IS A QUESTION OF PROBING A WOUND A GULF A SOCIETY SINCE WHEN HAS IT BEEN CONSIDERED WRONG TO GO TOO FAR TO GO TO THE BOTTOM -4507-16021-0010 WE HAVE ALWAYS THOUGHT THAT IT WAS SOMETIMES A COURAGEOUS ACT AND AT LEAST A SIMPLE AND USEFUL DEED WORTHY OF THE SYMPATHETIC ATTENTION WHICH DUTY ACCEPTED AND FULFILLED MERITS -4507-16021-0011 WHY SHOULD ONE NOT EXPLORE EVERYTHING AND STUDY EVERYTHING -4507-16021-0012 WHY SHOULD ONE HALT ON THE WAY -4507-16021-0013 NOTHING IS MORE LUGUBRIOUS THAN THE CONTEMPLATION THUS IN ITS NUDITY IN THE BROAD LIGHT OF THOUGHT OF THE HORRIBLE SWARMING OF SLANG -4507-16021-0014 NOW WHEN HAS HORROR EVER EXCLUDED STUDY -4507-16021-0015 SINCE WHEN HAS MALADY BANISHED MEDICINE -4507-16021-0016 CAN ONE IMAGINE A NATURALIST REFUSING TO STUDY THE VIPER THE BAT THE SCORPION THE CENTIPEDE THE TARANTULA AND ONE WHO WOULD CAST THEM BACK INTO THEIR DARKNESS SAYING OH HOW UGLY THAT IS -4507-16021-0017 HE WOULD BE LIKE A PHILOLOGIST REFUSING TO EXAMINE A FACT IN LANGUAGE A PHILOSOPHER HESITATING TO SCRUTINIZE A FACT IN HUMANITY -4507-16021-0018 WHAT IS SLANG PROPERLY SPEAKING -4507-16021-0019 IT IS THE LANGUAGE OF WRETCHEDNESS -4507-16021-0020 WE MAY BE STOPPED THE FACT MAY BE PUT TO US IN GENERAL TERMS WHICH IS ONE WAY OF ATTENUATING IT WE MAY BE TOLD THAT ALL TRADES PROFESSIONS IT MAY BE ADDED ALL THE ACCIDENTS OF THE SOCIAL HIERARCHY AND ALL FORMS OF INTELLIGENCE HAVE THEIR OWN SLANG -4507-16021-0021 THE PAINTER WHO SAYS MY GRINDER THE NOTARY WHO SAYS MY SKIP THE GUTTER THE HAIRDRESSER WHO SAYS MY MEALYBACK THE COBBLER WHO SAYS MY CUB TALKS SLANG -4507-16021-0022 THERE IS THE SLANG OF THE AFFECTED LADY AS WELL AS OF THE PRECIEUSES -4507-16021-0023 THE SUGAR MANUFACTURER WHO SAYS LOAF CLARIFIED LUMPS BASTARD COMMON BURNT THIS HONEST MANUFACTURER TALKS SLANG -4507-16021-0024 ALGEBRA MEDICINE BOTANY HAVE EACH THEIR SLANG -4507-16021-0025 TO MEET THE NEEDS OF THIS CONFLICT WRETCHEDNESS HAS INVENTED A LANGUAGE OF COMBAT WHICH IS SLANG -4507-16021-0026 TO KEEP AFLOAT AND TO RESCUE FROM OBLIVION TO HOLD ABOVE THE GULF WERE IT BUT A FRAGMENT OF SOME LANGUAGE WHICH MAN HAS SPOKEN AND WHICH WOULD OTHERWISE BE LOST THAT IS TO SAY ONE OF THE ELEMENTS GOOD OR BAD OF WHICH CIVILIZATION IS COMPOSED OR BY WHICH IT IS COMPLICATED TO EXTEND THE RECORDS OF SOCIAL OBSERVATION IS TO SERVE CIVILIZATION ITSELF -4507-16021-0027 PHOENICIAN VERY GOOD -4507-16021-0028 EVEN DIALECT LET THAT PASS -4507-16021-0029 TO THIS WE REPLY IN ONE WORD ONLY -4507-16021-0030 ASSUREDLY IF THE TONGUE WHICH A NATION OR A PROVINCE HAS SPOKEN IS WORTHY OF INTEREST THE LANGUAGE WHICH HAS BEEN SPOKEN BY A MISERY IS STILL MORE WORTHY OF ATTENTION AND STUDY -4507-16021-0031 AND THEN WE INSIST UPON IT THE STUDY OF SOCIAL DEFORMITIES AND INFIRMITIES AND THE TASK OF POINTING THEM OUT WITH A VIEW TO REMEDY IS NOT A BUSINESS IN WHICH CHOICE IS PERMITTED -4507-16021-0032 HE MUST DESCEND WITH HIS HEART FULL OF CHARITY AND SEVERITY AT THE SAME TIME AS A BROTHER AND AS A JUDGE TO THOSE IMPENETRABLE CASEMATES WHERE CRAWL PELL MELL THOSE WHO BLEED AND THOSE WHO DEAL THE BLOW THOSE WHO WEEP AND THOSE WHO CURSE THOSE WHO FAST AND THOSE WHO DEVOUR THOSE WHO ENDURE EVIL AND THOSE WHO INFLICT IT -4507-16021-0033 DO WE REALLY KNOW THE MOUNTAIN WELL WHEN WE ARE NOT ACQUAINTED WITH THE CAVERN -4507-16021-0034 THEY CONSTITUTE TWO DIFFERENT ORDERS OF FACTS WHICH CORRESPOND TO EACH OTHER WHICH ARE ALWAYS INTERLACED AND WHICH OFTEN BRING FORTH RESULTS -4507-16021-0035 TRUE HISTORY BEING A MIXTURE OF ALL THINGS THE TRUE HISTORIAN MINGLES IN EVERYTHING -4507-16021-0036 FACTS FORM ONE OF THESE AND IDEAS THE OTHER -4507-16021-0037 THERE IT CLOTHES ITSELF IN WORD MASKS IN METAPHOR RAGS -4507-16021-0038 IN THIS GUISE IT BECOMES HORRIBLE -4507-16021-0039 ONE PERCEIVES WITHOUT UNDERSTANDING IT A HIDEOUS MURMUR SOUNDING ALMOST LIKE HUMAN ACCENTS BUT MORE NEARLY RESEMBLING A HOWL THAN AN ARTICULATE WORD -4507-16021-0040 ONE THINKS ONE HEARS HYDRAS TALKING -4507-16021-0041 IT IS UNINTELLIGIBLE IN THE DARK -4507-16021-0042 IT IS BLACK IN MISFORTUNE IT IS BLACKER STILL IN CRIME THESE TWO BLACKNESSES AMALGAMATED COMPOSE SLANG -4507-16021-0043 THE EARTH IS NOT DEVOID OF RESEMBLANCE TO A JAIL -4507-16021-0044 LOOK CLOSELY AT LIFE -4507-16021-0045 IT IS SO MADE THAT EVERYWHERE WE FEEL THE SENSE OF PUNISHMENT -4507-16021-0046 EACH DAY HAS ITS OWN GREAT GRIEF OR ITS LITTLE CARE -4507-16021-0047 YESTERDAY YOU WERE TREMBLING FOR A HEALTH THAT IS DEAR TO YOU TO DAY YOU FEAR FOR YOUR OWN TO MORROW IT WILL BE ANXIETY ABOUT MONEY THE DAY AFTER TO MORROW THE DIATRIBE OF A SLANDERER THE DAY AFTER THAT THE MISFORTUNE OF SOME FRIEND THEN THE PREVAILING WEATHER THEN SOMETHING THAT HAS BEEN BROKEN OR LOST THEN A PLEASURE WITH WHICH YOUR CONSCIENCE AND YOUR VERTEBRAL COLUMN REPROACH YOU AGAIN THE COURSE OF PUBLIC AFFAIRS -4507-16021-0048 THIS WITHOUT RECKONING IN THE PAINS OF THE HEART AND SO IT GOES ON -4507-16021-0049 THERE IS HARDLY ONE DAY OUT OF A HUNDRED WHICH IS WHOLLY JOYOUS AND SUNNY -4507-16021-0050 AND YOU BELONG TO THAT SMALL CLASS WHO ARE HAPPY -4507-16021-0051 IN THIS WORLD EVIDENTLY THE VESTIBULE OF ANOTHER THERE ARE NO FORTUNATE -4507-16021-0052 THE REAL HUMAN DIVISION IS THIS THE LUMINOUS AND THE SHADY -4507-16021-0053 TO DIMINISH THE NUMBER OF THE SHADY TO AUGMENT THE NUMBER OF THE LUMINOUS THAT IS THE OBJECT -4507-16021-0054 THAT IS WHY WE CRY EDUCATION SCIENCE -4507-16021-0055 TO TEACH READING MEANS TO LIGHT THE FIRE EVERY SYLLABLE SPELLED OUT SPARKLES -4507-16021-0056 HOWEVER HE WHO SAYS LIGHT DOES NOT NECESSARILY SAY JOY -4507-16021-0057 PEOPLE SUFFER IN THE LIGHT EXCESS BURNS -4507-16021-0058 THE FLAME IS THE ENEMY OF THE WING -4507-16021-0059 TO BURN WITHOUT CEASING TO FLY THEREIN LIES THE MARVEL OF GENIUS -4970-29093-0000 YOU'LL NEVER DIG IT OUT OF THE ASTOR LIBRARY -4970-29093-0001 TO THE YOUNG AMERICAN HERE OR ELSEWHERE THE PATHS TO FORTUNE ARE INNUMERABLE AND ALL OPEN THERE IS INVITATION IN THE AIR AND SUCCESS IN ALL HIS WIDE HORIZON -4970-29093-0002 HE HAS NO TRADITIONS TO BIND HIM OR GUIDE HIM AND HIS IMPULSE IS TO BREAK AWAY FROM THE OCCUPATION HIS FATHER HAS FOLLOWED AND MAKE A NEW WAY FOR HIMSELF -4970-29093-0003 THE MODEST FELLOW WOULD HAVE LIKED FAME THRUST UPON HIM FOR SOME WORTHY ACHIEVEMENT IT MIGHT BE FOR A BOOK OR FOR THE SKILLFUL MANAGEMENT OF SOME GREAT NEWSPAPER OR FOR SOME DARING EXPEDITION LIKE THAT OF LIEUTENANT STRAIN OR DOCTOR KANE -4970-29093-0004 HE WAS UNABLE TO DECIDE EXACTLY WHAT IT SHOULD BE -4970-29093-0005 SOMETIMES HE THOUGHT HE WOULD LIKE TO STAND IN A CONSPICUOUS PULPIT AND HUMBLY PREACH THE GOSPEL OF REPENTANCE AND IT EVEN CROSSED HIS MIND THAT IT WOULD BE NOBLE TO GIVE HIMSELF TO A MISSIONARY LIFE TO SOME BENIGHTED REGION WHERE THE DATE PALM GROWS AND THE NIGHTINGALE'S VOICE IS IN TUNE AND THE BUL BUL SINGS ON THE OFF NIGHTS -4970-29093-0006 LAW SEEMED TO HIM WELL ENOUGH AS A SCIENCE BUT HE NEVER COULD DISCOVER A PRACTICAL CASE WHERE IT APPEARED TO HIM WORTH WHILE TO GO TO LAW AND ALL THE CLIENTS WHO STOPPED WITH THIS NEW CLERK IN THE ANTE ROOM OF THE LAW OFFICE WHERE HE WAS WRITING PHILIP INVARIABLY ADVISED TO SETTLE NO MATTER HOW BUT SETTLE GREATLY TO THE DISGUST OF HIS EMPLOYER WHO KNEW THAT JUSTICE BETWEEN MAN AND MAN COULD ONLY BE ATTAINED BY THE RECOGNIZED PROCESSES WITH THE ATTENDANT FEES -4970-29093-0007 IT IS SUCH A NOBLE AMBITION THAT IT IS A PITY IT HAS USUALLY SUCH A SHALLOW FOUNDATION -4970-29093-0008 HE WANTED TO BEGIN AT THE TOP OF THE LADDER -4970-29093-0009 PHILIP THEREFORE READ DILIGENTLY IN THE ASTOR LIBRARY PLANNED LITERARY WORKS THAT SHOULD COMPEL ATTENTION AND NURSED HIS GENIUS -4970-29093-0010 HE HAD NO FRIEND WISE ENOUGH TO TELL HIM TO STEP INTO THE DORKING CONVENTION THEN IN SESSION MAKE A SKETCH OF THE MEN AND WOMEN ON THE PLATFORM AND TAKE IT TO THE EDITOR OF THE DAILY GRAPEVINE AND SEE WHAT HE COULD GET A LINE FOR IT -4970-29093-0011 O VERY WELL SAID GRINGO TURNING AWAY WITH A SHADE OF CONTEMPT YOU'LL FIND IF YOU ARE GOING INTO LITERATURE AND NEWSPAPER WORK THAT YOU CAN'T AFFORD A CONSCIENCE LIKE THAT -4970-29093-0012 BUT PHILIP DID AFFORD IT AND HE WROTE THANKING HIS FRIENDS AND DECLINING BECAUSE HE SAID THE POLITICAL SCHEME WOULD FAIL AND OUGHT TO FAIL -4970-29093-0013 AND HE WENT BACK TO HIS BOOKS AND TO HIS WAITING FOR AN OPENING LARGE ENOUGH FOR HIS DIGNIFIED ENTRANCE INTO THE LITERARY WORLD -4970-29093-0014 WELL I'M GOING AS AN ENGINEER YOU CAN GO AS ONE -4970-29093-0015 YOU CAN BEGIN BY CARRYING A ROD AND PUTTING DOWN THE FIGURES -4970-29093-0016 NO ITS NOT TOO SOON -4970-29093-0017 I'VE BEEN READY TO GO ANYWHERE FOR SIX MONTHS -4970-29093-0018 THE TWO YOUNG MEN WHO WERE BY THIS TIME FULL OF THE ADVENTURE WENT DOWN TO THE WALL STREET OFFICE OF HENRY'S UNCLE AND HAD A TALK WITH THAT WILY OPERATOR -4970-29093-0019 THE NIGHT WAS SPENT IN PACKING UP AND WRITING LETTERS FOR PHILIP WOULD NOT TAKE SUCH AN IMPORTANT STEP WITHOUT INFORMING HIS FRIENDS -4970-29093-0020 WHY IT'S IN MISSOURI SOMEWHERE ON THE FRONTIER I THINK WE'LL GET A MAP -4970-29093-0021 I WAS AFRAID IT WAS NEARER HOME -4970-29093-0022 HE KNEW HIS UNCLE WOULD BE GLAD TO HEAR THAT HE HAD AT LAST TURNED HIS THOUGHTS TO A PRACTICAL MATTER -4970-29093-0023 HE WELL KNEW THE PERILS OF THE FRONTIER THE SAVAGE STATE OF SOCIETY THE LURKING INDIANS AND THE DANGERS OF FEVER -4970-29095-0000 SHE WAS TIRED OF OTHER THINGS -4970-29095-0001 SHE TRIED THIS MORNING AN AIR OR TWO UPON THE PIANO SANG A SIMPLE SONG IN A SWEET BUT SLIGHTLY METALLIC VOICE AND THEN SEATING HERSELF BY THE OPEN WINDOW READ PHILIP'S LETTER -4970-29095-0002 WELL MOTHER SAID THE YOUNG STUDENT LOOKING UP WITH A SHADE OF IMPATIENCE -4970-29095-0003 I HOPE THEE TOLD THE ELDERS THAT FATHER AND I ARE RESPONSIBLE FOR THE PIANO AND THAT MUCH AS THEE LOVES MUSIC THEE IS NEVER IN THE ROOM WHEN IT IS PLAYED -4970-29095-0004 I HEARD FATHER TELL COUSIN ABNER THAT HE WAS WHIPPED SO OFTEN FOR WHISTLING WHEN HE WAS A BOY THAT HE WAS DETERMINED TO HAVE WHAT COMPENSATION HE COULD GET NOW -4970-29095-0005 THY WAYS GREATLY TRY ME RUTH AND ALL THY RELATIONS -4970-29095-0006 IS THY FATHER WILLING THEE SHOULD GO AWAY TO A SCHOOL OF THE WORLD'S PEOPLE -4970-29095-0007 I HAVE NOT ASKED HIM RUTH REPLIED WITH A LOOK THAT MIGHT IMPLY THAT SHE WAS ONE OF THOSE DETERMINED LITTLE BODIES WHO FIRST MADE UP HER OWN MIND AND THEN COMPELLED OTHERS TO MAKE UP THEIRS IN ACCORDANCE WITH HERS -4970-29095-0008 MOTHER I'M GOING TO STUDY MEDICINE -4970-29095-0009 MARGARET BOLTON ALMOST LOST FOR A MOMENT HER HABITUAL PLACIDITY -4970-29095-0010 THEE STUDY MEDICINE -4970-29095-0011 DOES THEE THINK THEE COULD STAND IT SIX MONTHS -4970-29095-0012 AND BESIDES SUPPOSE THEE DOES LEARN MEDICINE -4970-29095-0013 I WILL PRACTICE IT -4970-29095-0014 WHERE THEE AND THY FAMILY ARE KNOWN -4970-29095-0015 IF I CAN GET PATIENTS -4970-29095-0016 RUTH SAT QUITE STILL FOR A TIME WITH FACE INTENT AND FLUSHED IT WAS OUT NOW -4970-29095-0017 THE SIGHT SEERS RETURNED IN HIGH SPIRITS FROM THE CITY -4970-29095-0018 RUTH ASKED THE ENTHUSIASTS IF THEY WOULD LIKE TO LIVE IN SUCH A SOUNDING MAUSOLEUM WITH ITS GREAT HALLS AND ECHOING ROOMS AND NO COMFORTABLE PLACE IN IT FOR THE ACCOMMODATION OF ANY BODY -4970-29095-0019 AND THEN THERE WAS BROAD STREET -4970-29095-0020 THERE CERTAINLY WAS NO END TO IT AND EVEN RUTH WAS PHILADELPHIAN ENOUGH TO BELIEVE THAT A STREET OUGHT NOT TO HAVE ANY END OR ARCHITECTURAL POINT UPON WHICH THE WEARY EYE COULD REST -4970-29095-0021 BUT NEITHER SAINT GIRARD NOR BROAD STREET NEITHER WONDERS OF THE MINT NOR THE GLORIES OF THE HALL WHERE THE GHOSTS OF OUR FATHERS SIT ALWAYS SIGNING THE DECLARATION IMPRESSED THE VISITORS SO MUCH AS THE SPLENDORS OF THE CHESTNUT STREET WINDOWS AND THE BARGAINS ON EIGHTH STREET -4970-29095-0022 IS THEE GOING TO THE YEARLY MEETING RUTH ASKED ONE OF THE GIRLS -4970-29095-0023 I HAVE NOTHING TO WEAR REPLIED THAT DEMURE PERSON -4970-29095-0024 IT HAS OCCUPIED MOTHER A LONG TIME TO FIND AT THE SHOPS THE EXACT SHADE FOR HER NEW BONNET -4970-29095-0025 AND THEE WON'T GO WHY SHOULD I -4970-29095-0026 IF I GO TO MEETING AT ALL I LIKE BEST TO SIT IN THE QUIET OLD HOUSE IN GERMANTOWN WHERE THE WINDOWS ARE ALL OPEN AND I CAN SEE THE TREES AND HEAR THE STIR OF THE LEAVES -4970-29095-0027 IT'S SUCH A CRUSH AT THE YEARLY MEETING AT ARCH STREET AND THEN THERE'S THE ROW OF SLEEK LOOKING YOUNG MEN WHO LINE THE CURBSTONE AND STARE AT US AS WE COME OUT -4970-29095-0028 HE DOESN'T SAY BUT IT'S ON THE FRONTIER AND ON THE MAP EVERYTHING BEYOND IT IS MARKED INDIANS AND DESERT AND LOOKS AS DESOLATE AS A WEDNESDAY MEETING HUMPH IT WAS TIME FOR HIM TO DO SOMETHING -4970-29095-0029 IS HE GOING TO START A DAILY NEWSPAPER AMONG THE KICK A POOS -4970-29095-0030 FATHER THEE'S UNJUST TO PHILIP HE'S GOING INTO BUSINESS -4970-29095-0031 HE DOESN'T SAY EXACTLY WHAT IT IS SAID RUTH A LITTLE DUBIOUSLY BUT IT'S SOMETHING ABOUT LAND AND RAILROADS AND THEE KNOWS FATHER THAT FORTUNES ARE MADE NOBODY KNOWS EXACTLY HOW IN A NEW COUNTRY -4970-29095-0032 BUT PHILIP IS HONEST AND HE HAS TALENT ENOUGH IF HE WILL STOP SCRIBBLING TO MAKE HIS WAY -4970-29095-0033 WHAT A BOX WOMEN ARE PUT INTO MEASURED FOR IT AND PUT IN YOUNG IF WE GO ANYWHERE IT'S IN A BOX VEILED AND PINIONED AND SHUT IN BY DISABILITIES -4970-29095-0034 WHY SHOULD I RUST AND BE STUPID AND SIT IN INACTION BECAUSE I AM A GIRL -4970-29095-0035 AND IF I HAD A FORTUNE WOULD THEE WANT ME TO LEAD A USELESS LIFE -4970-29095-0036 HAS THEE CONSULTED THY MOTHER ABOUT A CAREER I SUPPOSE IT IS A CAREER THEE WANTS -4970-29095-0037 BUT THAT WISE AND PLACID WOMAN UNDERSTOOD THE SWEET REBEL A GREAT DEAL BETTER THAN RUTH UNDERSTOOD HERSELF -4970-29095-0038 RUTH WAS GLAD TO HEAR THAT PHILIP HAD MADE A PUSH INTO THE WORLD AND SHE WAS SURE THAT HIS TALENT AND COURAGE WOULD MAKE A WAY FOR HIM -4992-23283-0000 BUT THE MORE FORGETFULNESS HAD THEN PREVAILED THE MORE POWERFUL WAS THE FORCE OF REMEMBRANCE WHEN SHE AWOKE -4992-23283-0001 MISS MILNER'S HEALTH IS NOT GOOD -4992-23283-0002 SAID MISSUS HORTON A FEW MINUTES AFTER -4992-23283-0003 SO THERE IS TO ME ADDED SANDFORD WITH A SARCASTIC SNEER -4992-23283-0004 AND YET YOU MUST OWN HER BEHAVIOUR HAS WARRANTED THEM HAS IT NOT BEEN IN THIS PARTICULAR INCOHERENT AND UNACCOUNTABLE -4992-23283-0005 NOT THAT I KNOW OF NOT ONE MORE THAT I KNOW OF HE REPLIED WITH ASTONISHMENT AT WHAT SHE HAD INSINUATED AND YET WITH A PERFECT ASSURANCE THAT SHE WAS IN THE WRONG -4992-23283-0006 PERHAPS I AM MISTAKEN ANSWERED SHE -4992-23283-0007 TO ASK ANY MORE QUESTIONS OF YOU I BELIEVE WOULD BE UNFAIR -4992-23283-0008 HE SEEMED TO WAIT FOR HER REPLY BUT AS SHE MADE NONE HE PROCEEDED -4992-23283-0009 OH MY LORD CRIED MISS WOODLEY WITH A MOST FORCIBLE ACCENT YOU ARE THE LAST PERSON ON EARTH SHE WOULD PARDON ME FOR ENTRUSTING -4992-23283-0010 BUT IN SUCH A CASE MISS MILNER'S ELECTION OF A HUSBAND SHALL NOT DIRECT MINE -4992-23283-0011 IF SHE DOES NOT KNOW HOW TO ESTIMATE HER OWN VALUE I DO -4992-23283-0012 INDEPENDENT OF HER FORTUNE SHE HAS BEAUTY TO CAPTIVATE THE HEART OF ANY MAN AND WITH ALL HER FOLLIES SHE HAS A FRANKNESS IN HER MANNER AN UNAFFECTED WISDOM IN HER THOUGHTS A VIVACITY IN HER CONVERSATION AND WITHAL A SOFTNESS IN HER DEMEANOUR THAT MIGHT ALONE ENGAGE THE AFFECTIONS OF A MAN OF THE NICEST SENTIMENTS AND THE STRONGEST UNDERSTANDING -4992-23283-0013 MY LORD MISS MILNER'S TASTE IS NOT A DEPRAVED ONE IT IS BUT TOO REFINED -4992-23283-0014 WHAT CAN YOU MEAN BY THAT MISS WOODLEY YOU TALK MYSTERIOUSLY -4992-23283-0015 IS SHE NOT AFRAID THAT I WILL THWART HER INCLINATIONS -4992-23283-0016 AGAIN HE SEARCHED HIS OWN THOUGHTS NOR INEFFECTUALLY AS BEFORE -4992-23283-0017 MISS WOODLEY WAS TOO LITTLE VERSED IN THE SUBJECT TO KNOW THIS WOULD HAVE BEEN NOT TO LOVE AT ALL AT LEAST NOT TO THE EXTENT OF BREAKING THROUGH ENGAGEMENTS AND ALL THE VARIOUS OBSTACLES THAT STILL MILITATED AGAINST THEIR UNION -4992-23283-0018 TO RELIEVE HER FROM BOTH HE LAID HIS HAND WITH FORCE UPON HIS HEART AND SAID DO YOU BELIEVE ME -4992-23283-0019 I WILL MAKE NO UNJUST USE OF WHAT I KNOW HE REPLIED WITH FIRMNESS I BELIEVE YOU MY LORD -4992-23283-0020 I HAVE NEVER YET HOWEVER BEEN VANQUISHED BY THEM AND EVEN UPON THIS OCCASION MY REASON SHALL COMBAT THEM TO THE LAST AND MY REASON SHALL FAIL ME BEFORE I DO WRONG -4992-41797-0000 YES DEAD THESE FOUR YEARS AN A GOOD JOB FOR HER TOO -4992-41797-0001 WELL AS I SAY IT'S AN AWFUL QUEER WORLD THEY CLAP ALL THE BURGLARS INTO JAIL AND THE MURDERERS AND THE WIFE BEATERS I'VE ALLERS THOUGHT A GENTLE REPROOF WOULD BE ENOUGH PUNISHMENT FOR A WIFE BEATER CAUSE HE PROBABLY HAS A LOT O PROVOCATION THAT NOBODY KNOWS AND THE FIREBUGS CAN'T THINK O THE RIGHT NAME SOMETHING LIKE CENDENARIES AN THE BREAKERS O THE PEACE AN WHAT NOT AN YET THE LAW HAS NOTHIN TO SAY TO A MAN LIKE HEN LORD -4992-41797-0002 GRANDFATHER WAS ALEXANDER CAREY L L D DOCTOR OF LAWS THAT IS -4992-41797-0003 MISTER POPHAM LAID DOWN HIS BRUSH -4992-41797-0004 I SWAN TO MAN HE EJACULATED IF YOU DON'T WORK HARD YOU CAN'T KEEP UP WITH THE TIMES DOCTOR OF LAWS -4992-41797-0005 DONE HE AIN'T DONE A THING HE'D OUGHTER SENCE HE WAS BORN -4992-41797-0006 HE KEEPS THE THOU SHALT NOT COMMANDMENTS FIRST RATE HEN LORD DOES -4992-41797-0007 HE GIVE UP HIS POSITION AND SHUT THE FAMILY UP IN THAT TOMB OF A HOUSE SO T HE COULD STUDY HIS BOOKS -4992-41797-0008 MISTER POPHAM EXAGGERATED NOTHING BUT ON THE CONTRARY LEFT MUCH UNSAID IN HIS NARRATIVE OF THE FAMILY AT THE HOUSE OF LORDS -4992-41797-0009 HENRY LORD WITH THE DEGREE OF PH D TO HIS CREDIT HAD BEEN PROFESSOR OF ZOOLOGY AT A NEW ENGLAND COLLEGE BUT HAD RESIGNED HIS POST IN ORDER TO WRITE A SERIES OF SCIENTIFIC TEXT BOOKS -4992-41797-0010 ALWAYS IRRITABLE COLD INDIFFERENT HE HAD GROWN RAPIDLY MORE SO AS YEARS WENT ON -4992-41797-0011 WHATEVER APPEALED TO HER SENSE OF BEAUTY WAS STRAIGHTWAY TRANSFERRED TO PAPER OR CANVAS -4992-41797-0012 SHE IS WILD TO KNOW HOW TO DO THINGS -4992-41797-0013 SHE MAKES EFFORT AFTER EFFORT TREMBLING WITH EAGERNESS AND WHEN SHE FAILS TO REPRODUCE WHAT SHE SEES SHE WORKS HERSELF INTO A FRENZY OF GRIEF AND DISAPPOINTMENT -4992-41797-0014 WHEN SHE COULD NOT MAKE A RABBIT OR A BIRD LOOK REAL ON PAPER SHE SEARCHED IN HER FATHER'S BOOKS FOR PICTURES OF ITS BONES -4992-41797-0015 CYRIL THERE MUST BE SOME BETTER WAY OF DOING I JUST DRAW THE OUTLINE OF AN ANIMAL AND THEN I PUT HAIRS OR FEATHERS ON IT THEY HAVE NO BODIES -4992-41797-0016 THEY COULDN'T RUN NOR MOVE THEY'RE JUST PASTEBOARD -4992-41797-0017 HE WOULDN'T SEARCH SO DON'T WORRY REPLIED CYRIL QUIETLY AND THE TWO LOOKED AT EACH OTHER AND KNEW THAT IT WAS SO -4992-41797-0018 THERE IN THE CEDAR HOLLOW THEN LIVED OLIVE LORD AN ANGRY RESENTFUL LITTLE CREATURE WEIGHED DOWN BY A FIERCE SENSE OF INJURY -4992-41797-0019 OLIVE'S MOURNFUL BLACK EYES MET NANCY'S SPARKLING BROWN ONES -4992-41797-0020 NANCY'S CURLY CHESTNUT CROP SHONE IN THE SUN AND OLIVE'S THICK BLACK PLAITS LOOKED BLACKER BY CONTRAST -4992-41797-0021 SHE'S WONDERFUL MORE WONDERFUL THAN ANYBODY WE'VE EVER SEEN ANYWHERE AND SHE DRAWS BETTER THAN THE TEACHER IN CHARLESTOWN -4992-41797-0022 SHE'S OLDER THAN I AM BUT SO TINY AND SAD AND SHY THAT SHE SEEMS LIKE A CHILD -4992-41806-0000 NATTY HARMON TRIED THE KITCHEN PUMP SECRETLY SEVERAL TIMES DURING THE EVENING FOR THE WATER HAD TO RUN UP HILL ALL THE WAY FROM THE WELL TO THE KITCHEN SINK AND HE BELIEVED THIS TO BE A CONTINUAL MIRACLE THAT MIGHT GIVE OUT AT ANY MOMENT -4992-41806-0001 TO NIGHT THERE WAS NO NEED OF EXTRA HEAT AND THERE WERE GREAT CEREMONIES TO BE OBSERVED IN LIGHTING THE FIRES ON THE HEARTHSTONES -4992-41806-0002 THEY BEGAN WITH THE ONE IN THE FAMILY SITTING ROOM COLONEL WHEELER RALPH THURSTON MISTER AND MISSUS BILL HARMON WITH NATTY AND RUFUS MISTER AND MISSUS POPHAM WITH DIGBY AND LALLIE JOY ALL STANDING IN ADMIRING GROUPS AND THRILLING WITH DELIGHT AT THE ORDER OF EVENTS -4992-41806-0003 KATHLEEN WAVED THE TORCH TO AND FRO AS SHE RECITED SOME BEAUTIFUL LINES WRITTEN FOR SOME SUCH PURPOSE AS THAT WHICH CALLED THEM TOGETHER TO NIGHT -4992-41806-0004 BURN FIRE BURN FLICKER FLICKER FLAME -4992-41806-0005 NEXT CAME OLIVE'S TURN TO HELP IN THE CEREMONIES -4992-41806-0006 RALPH THURSTON HAD FOUND A LINE OF LATIN FOR THEM IN HIS BELOVED HORACE TIBI SPLENDET FOCUS FOR YOU THE HEARTH FIRE SHINES -4992-41806-0007 OLIVE HAD PAINTED THE MOTTO ON A LONG NARROW PANEL OF CANVAS AND GIVING IT TO MISTER POPHAM STOOD BY THE FIRESIDE WHILE HE DEFTLY FITTED IT INTO THE PLACE PREPARED FOR IT -4992-41806-0008 OLIVE HAS ANOTHER LOVELY GIFT FOR THE YELLOW HOUSE SAID MOTHER CAREY RISING AND TO CARRY OUT THE NEXT PART OF THE PROGRAMME WE SHALL HAVE TO GO IN PROCESSION UPSTAIRS TO MY BEDROOM -4992-41806-0009 EXCLAIMED BILL HARMON TO HIS WIFE AS THEY WENT THROUGH THE LIGHTED HALL -4992-41806-0010 AIN'T THEY THE GREATEST -4992-41806-0011 MOTHER CAREY POURED COFFEE NANCY CHOCOLATE AND THE OTHERS HELPED SERVE THE SANDWICHES AND CAKE DOUGHNUTS AND TARTS -4992-41806-0012 AT THAT MOMENT THE GENTLEMAN ENTERED BEARING A HUGE OBJECT CONCEALED BY A PIECE OF GREEN FELT -4992-41806-0013 APPROACHING THE DINING TABLE HE CAREFULLY PLACED THE ARTICLE IN THE CENTRE AND REMOVED THE CLOTH -4992-41806-0014 THINKS I TO MYSELF I NEVER SEEN ANYTHING OSH POPHAM COULDN'T MEND IF HE TOOK TIME ENOUGH AND GLUE ENOUGH SO I CARRIED THIS LITTLE FELLER HOME IN A BUSHEL BASKET ONE NIGHT LAST MONTH AN I'VE SPENT ELEVEN EVENIN'S PUTTIN HIM TOGETHER -4992-41806-0015 MISSUS HARMON THOUGHT HE SANG TOO MUCH AND TOLD HER HUSBAND PRIVATELY THAT IF HE WAS A CANARY BIRD SHE SHOULD WANT TO KEEP A TABLE COVER OVER HIS HEAD MOST OF THE TIME BUT HE WAS IMMENSELY POPULAR WITH THE REST OF HIS AUDIENCE -4992-41806-0016 THE FACE OF THE MAHOGANY SHONE WITH DELIGHT AND WHY NOT WHEN IT WAS DOING EVERYTHING ALMOST EVERYTHING WITHIN THE SCOPE OF A PIANO AND YET THE FAMILY HAD ENJOYED WEEKS OF GOOD NOURISHING MEALS ON WHAT HAD BEEN SAVED BY ITS EXERTIONS -4992-41806-0017 WE SHUT OUR EYES THE FLOWERS BLOOM ON WE MURMUR BUT THE CORN EARS FILL WE CHOOSE THE SHADOW BUT THE SUN THAT CASTS IT SHINES BEHIND US STILL -5105-28233-0000 LENGTH OF SERVICE FOURTEEN YEARS THREE MONTHS AND FIVE DAYS -5105-28233-0001 HE SEEMED BORN TO PLEASE WITHOUT BEING CONSCIOUS OF THE POWER HE POSSESSED -5105-28233-0002 IT MUST BE OWNED AND NO ONE WAS MORE READY TO CONFESS IT THAN HIMSELF THAT HIS LITERARY ATTAINMENTS WERE BY NO MEANS OF A HIGH ORDER -5105-28233-0003 WE DON'T SPIN TOPS IS A FAVORITE SAYING AMONGST ARTILLERY OFFICERS INDICATING THAT THEY DO NOT SHIRK THEIR DUTY BY FRIVOLOUS PURSUITS BUT IT MUST BE CONFESSED THAT SERVADAC BEING NATURALLY IDLE WAS VERY MUCH GIVEN TO SPINNING TOPS -5105-28233-0004 ONCE IN ACTION HE WAS LEADING A DETACHMENT OF INFANTRY THROUGH AN INTRENCHMENT -5105-28233-0005 SOMETIMES HE WOULD WANDER ON FOOT UPON THE SANDY SHORE AND SOMETIMES HE WOULD ENJOY A RIDE ALONG THE SUMMIT OF THE CLIFF ALTOGETHER BEING IN NO HURRY AT ALL TO BRING HIS TASK TO AN END -5105-28233-0006 NO CATHEDRAL NOT EVEN BURGOS ITSELF COULD VIE WITH THE CHURCH AT MONTMARTRE -5105-28233-0007 BEN ZOOF'S MOST AMBITIOUS DESIRE WAS TO INDUCE THE CAPTAIN TO GO WITH HIM AND END HIS DAYS IN HIS MUCH LOVED HOME AND SO INCESSANTLY WERE SERVADAC'S EARS BESIEGED WITH DESCRIPTIONS OF THE UNPARALLELED BEAUTIES AND ADVANTAGES OF THIS EIGHTEENTH ARRONDISSEMENT OF PARIS THAT HE COULD SCARCELY HEAR THE NAME OF MONTMARTRE WITHOUT A CONSCIOUS THRILL OF AVERSION -5105-28233-0008 WHEN A PRIVATE IN THE EIGHTH CAVALRY HE HAD BEEN ON THE POINT OF QUITTING THE ARMY AT TWENTY EIGHT YEARS OF AGE BUT UNEXPECTEDLY HE HAD BEEN APPOINTED ORDERLY TO CAPTAIN SERVADAC -5105-28233-0009 THE BOND OF UNION THUS EFFECTED COULD NEVER BE SEVERED AND ALTHOUGH BEN ZOOF'S ACHIEVEMENTS HAD FAIRLY EARNED HIM THE RIGHT OF RETIREMENT HE FIRMLY DECLINED ALL HONORS OR ANY PENSION THAT MIGHT PART HIM FROM HIS SUPERIOR OFFICER -5105-28233-0010 UNLIKE HIS MASTER HE MADE NO PRETENSION TO ANY GIFT OF POETIC POWER BUT HIS INEXHAUSTIBLE MEMORY MADE HIM A LIVING ENCYCLOPAEDIA AND FOR HIS STOCK OF ANECDOTES AND TROOPER'S TALES HE WAS MATCHLESS -5105-28240-0000 FAST AS HIS LEGS COULD CARRY HIM SERVADAC HAD MADE HIS WAY TO THE TOP OF THE CLIFF -5105-28240-0001 IT WAS QUITE TRUE THAT A VESSEL WAS IN SIGHT HARDLY MORE THAN SIX MILES FROM THE SHORE BUT OWING TO THE INCREASE IN THE EARTH'S CONVEXITY AND THE CONSEQUENT LIMITATION OF THE RANGE OF VISION THE RIGGING OF THE TOPMASTS ALONE WAS VISIBLE ABOVE THE WATER -5105-28240-0002 EXCLAIMED SERVADAC KEEPING HIS EYE UNMOVED AT HIS TELESCOPE -5105-28240-0003 SHE IS UNDER SAIL BUT SHE IS COUNT TIMASCHEFF'S YACHT HE WAS RIGHT -5105-28240-0004 IF THE COUNT WERE ON BOARD A STRANGE FATALITY WAS BRINGING HIM TO THE PRESENCE OF HIS RIVAL -5105-28240-0005 HE RECKONED THEREFORE NOT ONLY UPON ASCERTAINING THE EXTENT OF THE LATE CATASTROPHE BUT UPON LEARNING ITS CAUSE -5105-28240-0006 THE WIND BEING ADVERSE THE DOBRYNA DID NOT MAKE VERY RAPID PROGRESS BUT AS THE WEATHER IN SPITE OF A FEW CLOUDS REMAINED CALM AND THE SEA WAS QUITE SMOOTH SHE WAS ENABLED TO HOLD A STEADY COURSE -5105-28240-0007 SERVADAC TOOK IT FOR GRANTED THAT THE DOBRYNA WAS ENDEAVORING TO PUT IN -5105-28240-0008 A NARROW CHANNEL FORMED A PASSAGE THROUGH THE RIDGE OF ROCKS THAT PROTECTED IT FROM THE OPEN SEA AND WHICH EVEN IN THE ROUGHEST WEATHER WOULD ENSURE THE CALMNESS OF ITS WATERS -5105-28240-0009 SLIGHTLY CHANGING HER COURSE SHE FIRST STRUCK HER MAINSAIL AND IN ORDER TO FACILITATE THE MOVEMENTS OF HER HELMSMAN SOON CARRIED NOTHING BUT HER TWO TOPSAILS BRIGANTINE AND JIB -5105-28240-0010 CAPTAIN SERVADAC HASTENED TOWARDS HIM -5105-28240-0011 I LEFT YOU ON A CONTINENT AND HERE I HAVE THE HONOR OF FINDING YOU ON AN ISLAND -5105-28240-0012 NEVER MIND NOW INTERPOSED THE CAPTAIN WE WILL TALK OF THAT BY AND BY -5105-28240-0013 NOTHING MORE THAN YOU KNOW YOURSELF -5105-28240-0014 ARE YOU CERTAIN THAT THIS IS THE MEDITERRANEAN -5105-28240-0015 FOR SOME MOMENTS HE SEEMED PERFECTLY STUPEFIED THEN RECOVERING HIMSELF HE BEGAN TO OVERWHELM THE COUNT WITH A TORRENT OF QUESTIONS -5105-28240-0016 TO ALL THESE INQUIRIES THE COUNT RESPONDED IN THE AFFIRMATIVE -5105-28240-0017 SOME MYSTERIOUS FORCE SEEMED TO HAVE BROUGHT ABOUT A CONVULSION OF THE ELEMENTS -5105-28240-0018 YOU WILL TAKE ME ON BOARD COUNT WILL YOU NOT -5105-28240-0019 MY YACHT IS AT YOUR SERVICE SIR EVEN SHOULD YOU REQUIRE TO MAKE A TOUR ROUND THE WORLD -5105-28240-0020 THE COUNT SHOOK HIS HEAD -5105-28240-0021 BEFORE STARTING IT WAS INDISPENSABLE THAT THE ENGINE OF THE DOBRYNA SHOULD BE REPAIRED TO SAIL UNDER CANVAS ONLY WOULD IN CONTRARY WINDS AND ROUGH SEAS BE BOTH TEDIOUS AND DIFFICULT -5105-28240-0022 IT WAS ON THE LAST DAY OF JANUARY THAT THE REPAIRS OF THE SCHOONER WERE COMPLETED -5105-28240-0023 A SLIGHT DIMINUTION IN THE EXCESSIVELY HIGH TEMPERATURE WHICH HAD PREVAILED FOR THE LAST FEW WEEKS WAS THE ONLY APPARENT CHANGE IN THE GENERAL ORDER OF THINGS BUT WHETHER THIS WAS TO BE ATTRIBUTED TO ANY ALTERATION IN THE EARTH'S ORBIT WAS A QUESTION WHICH WOULD STILL REQUIRE SEVERAL DAYS TO DECIDE -5105-28240-0024 DOUBTS NOW AROSE AND SOME DISCUSSION FOLLOWED WHETHER OR NOT IT WAS DESIRABLE FOR BEN ZOOF TO ACCOMPANY HIS MASTER -5105-28241-0000 HER SEA GOING QUALITIES WERE EXCELLENT AND WOULD HAVE AMPLY SUFFICED FOR A CIRCUMNAVIGATION OF THE GLOBE -5105-28241-0001 AFTER AN APPRENTICESHIP ON A MERCHANT SHIP HE HAD ENTERED THE IMPERIAL NAVY AND HAD ALREADY REACHED THE RANK OF LIEUTENANT WHEN THE COUNT APPOINTED HIM TO THE CHARGE OF HIS OWN PRIVATE YACHT IN WHICH HE WAS ACCUSTOMED TO SPEND BY FAR THE GREATER PART OF HIS TIME THROUGHOUT THE WINTER GENERALLY CRUISING IN THE MEDITERRANEAN WHILST IN THE SUMMER HE VISITED MORE NORTHERN WATERS -5105-28241-0002 THE LATE ASTOUNDING EVENTS HOWEVER HAD RENDERED PROCOPE MANIFESTLY UNEASY AND NOT THE LESS SO FROM HIS CONSCIOUSNESS THAT THE COUNT SECRETLY PARTOOK OF HIS OWN ANXIETY -5105-28241-0003 STEAM UP AND CANVAS SPREAD THE SCHOONER STARTED EASTWARDS -5105-28241-0004 ALTHOUGH ONLY A MODERATE BREEZE WAS BLOWING THE SEA WAS ROUGH A CIRCUMSTANCE TO BE ACCOUNTED FOR ONLY BY THE DIMINUTION IN THE FORCE OF THE EARTH'S ATTRACTION RENDERING THE LIQUID PARTICLES SO BUOYANT THAT BY THE MERE EFFECT OF OSCILLATION THEY WERE CARRIED TO A HEIGHT THAT WAS QUITE UNPRECEDENTED -5105-28241-0005 FOR A FEW MILES SHE FOLLOWED THE LINE HITHERTO PRESUMABLY OCCUPIED BY THE COAST OF ALGERIA BUT NO LAND APPEARED TO THE SOUTH -5105-28241-0006 THE LOG AND THE COMPASS THEREFORE WERE ABLE TO BE CALLED UPON TO DO THE WORK OF THE SEXTANT WHICH HAD BECOME UTTERLY USELESS -5105-28241-0007 THERE IS NO FEAR OF THAT SIR -5105-28241-0008 THE EARTH HAS UNDOUBTEDLY ENTERED UPON A NEW ORBIT BUT SHE IS NOT INCURRING ANY PROBABLE RISK OF BEING PRECIPITATED ONTO THE SUN -5105-28241-0009 AND WHAT DEMONSTRATION DO YOU OFFER ASKED SERVADAC EAGERLY THAT IT WILL NOT HAPPEN -5105-28241-0010 OCEAN REIGNED SUPREME -5105-28241-0011 ALL THE IMAGES OF HIS PAST LIFE FLOATED UPON HIS MEMORY HIS THOUGHTS SPED AWAY TO HIS NATIVE FRANCE ONLY TO RETURN AGAIN TO WONDER WHETHER THE DEPTHS OF OCEAN WOULD REVEAL ANY TRACES OF THE ALGERIAN METROPOLIS -5105-28241-0012 IS IT NOT IMPOSSIBLE HE MURMURED ALOUD THAT ANY CITY SHOULD DISAPPEAR SO COMPLETELY -5105-28241-0013 WOULD NOT THE LOFTIEST EMINENCES OF THE CITY AT LEAST BE VISIBLE -5105-28241-0014 ANOTHER CIRCUMSTANCE WAS MOST REMARKABLE -5105-28241-0015 TO THE SURPRISE OF ALL AND ESPECIALLY OF LIEUTENANT PROCOPE THE LINE INDICATED A BOTTOM AT A NEARLY UNIFORM DEPTH OF FROM FOUR TO FIVE FATHOMS AND ALTHOUGH THE SOUNDING WAS PERSEVERED WITH CONTINUOUSLY FOR MORE THAN TWO HOURS OVER A CONSIDERABLE AREA THE DIFFERENCES OF LEVEL WERE INSIGNIFICANT NOT CORRESPONDING IN ANY DEGREE TO WHAT WOULD BE EXPECTED OVER THE SITE OF A CITY THAT HAD BEEN TERRACED LIKE THE SEATS OF AN AMPHITHEATER -5105-28241-0016 YOU MUST SEE LIEUTENANT I SHOULD THINK THAT WE ARE NOT SO NEAR THE COAST OF ALGERIA AS YOU IMAGINED -5105-28241-0017 AFTER PONDERING AWHILE HE SAID IF WE WERE FARTHER AWAY I SHOULD EXPECT TO FIND A DEPTH OF TWO OR THREE HUNDRED FATHOMS INSTEAD OF FIVE FATHOMS FIVE FATHOMS -5105-28241-0018 ITS DEPTH REMAINED INVARIABLE STILL FOUR OR AT MOST FIVE FATHOMS AND ALTHOUGH ITS BOTTOM WAS ASSIDUOUSLY DREDGED IT WAS ONLY TO PROVE IT BARREN OF MARINE PRODUCTION OF ANY TYPE -5105-28241-0019 NOTHING WAS TO BE DONE BUT TO PUT ABOUT AND RETURN IN DISAPPOINTMENT TOWARDS THE NORTH -5142-33396-0000 AT ANOTHER TIME HARALD ASKED -5142-33396-0001 WHAT IS YOUR COUNTRY OLAF HAVE YOU ALWAYS BEEN A THRALL THE THRALL'S EYES FLASHED -5142-33396-0002 TWO HUNDRED WARRIORS FEASTED IN HIS HALL AND FOLLOWED HIM TO BATTLE -5142-33396-0003 THE REST OF YOU OFF A VIKING HE HAD THREE SHIPS -5142-33396-0004 THESE HE GAVE TO THREE OF MY BROTHERS -5142-33396-0005 BUT I STAYED THAT SPRING AND BUILT ME A BOAT -5142-33396-0006 I MADE HER FOR ONLY TWENTY OARS BECAUSE I THOUGHT FEW MEN WOULD FOLLOW ME FOR I WAS YOUNG FIFTEEN YEARS OLD -5142-33396-0007 AT THE PROW I CARVED THE HEAD WITH OPEN MOUTH AND FORKED TONGUE THRUST OUT -5142-33396-0008 I PAINTED THE EYES RED FOR ANGER -5142-33396-0009 THERE STAND SO I SAID AND GLARE AND HISS AT MY FOES -5142-33396-0010 IN THE STERN I CURVED THE TAIL UP ALMOST AS HIGH AS THE HEAD -5142-33396-0011 THERE SHE SAT ON THE ROLLERS AS FAIR A SHIP AS I EVER SAW -5142-33396-0012 THEN I WILL GET ME A FARM AND WILL WINTER IN THAT LAND NOW WHO WILL FOLLOW ME -5142-33396-0013 HE IS BUT A BOY THE MEN SAID -5142-33396-0014 THIRTY MEN ONE AFTER ANOTHER RAISED THEIR HORNS AND SAID -5142-33396-0015 AS OUR BOAT FLASHED DOWN THE ROLLERS INTO THE WATER I MADE THIS SONG AND SANG IT -5142-33396-0016 SO WE HARRIED THE COAST OF NORWAY -5142-33396-0017 WE ATE AT MANY MEN'S TABLES UNINVITED -5142-33396-0018 MY DRAGON'S BELLY IS NEVER FULL AND ON BOARD WENT THE GOLD -5142-33396-0019 OH IT IS BETTER TO LIVE ON THE SEA AND LET OTHER MEN RAISE YOUR CROPS AND COOK YOUR MEALS -5142-33396-0020 A HOUSE SMELLS OF SMOKE A SHIP SMELLS OF FROLIC -5142-33396-0021 UP AND DOWN THE WATER WE WENT TO GET MUCH WEALTH AND MUCH FROLIC -5142-33396-0022 WHAT OF THE FARM OLAF NOT YET I ANSWERED VIKING IS BETTER FOR SUMMER -5142-33396-0023 IT WAS SO DARK THAT I COULD SEE NOTHING BUT A FEW SPARKS ON THE HEARTH -5142-33396-0024 I STOOD WITH MY BACK TO THE WALL FOR I WANTED NO SWORD REACHING OUT OF THE DARK FOR ME -5142-33396-0025 COME COME I CALLED WHEN NO ONE OBEYED A FIRE -5142-33396-0026 MY MEN LAUGHED YES A STINGY HOST -5142-33396-0027 HE ACTS AS THOUGH HE HAD NOT EXPECTED US -5142-33396-0028 ON A BENCH IN A FAR CORNER WERE A DOZEN PEOPLE HUDDLED TOGETHER -5142-33396-0029 BRING IN THE TABLE WE ARE HUNGRY -5142-33396-0030 THE THRALLS WERE BRINGING IN A GREAT POT OF MEAT -5142-33396-0031 THEY SET UP A CRANE OVER THE FIRE AND HUNG THE POT UPON IT AND WE SAT AND WATCHED IT BOIL WHILE WE JOKED AT LAST THE SUPPER BEGAN -5142-33396-0032 THE FARMER SAT GLOOMILY ON THE BENCH AND WOULD NOT EAT AND YOU CANNOT WONDER FOR HE SAW US PUTTING POTFULS OF HIS GOOD BEEF AND BASKET LOADS OF BREAD INTO OUR BIG MOUTHS -5142-33396-0033 YOU WOULD NOT EAT WITH US YOU CANNOT SAY NO TO HALF OF MY ALE I DRINK THIS TO YOUR HEALTH -5142-33396-0034 THEN I DRANK HALF OF THE HORNFUL AND SENT THE REST ACROSS THE FIRE TO THE FARMER HE TOOK IT AND SMILED SAYING -5142-33396-0035 DID YOU EVER HAVE SUCH A LORDLY GUEST BEFORE I WENT ON -5142-33396-0036 SO I WILL GIVE OUT THIS LAW THAT MY MEN SHALL NEVER LEAVE YOU ALONE -5142-33396-0037 HAKON THERE SHALL BE YOUR CONSTANT COMPANION FRIEND FARMER -5142-33396-0038 HE SHALL NOT LEAVE YOU DAY OR NIGHT WHETHER YOU ARE WORKING OR PLAYING OR SLEEPING -5142-33396-0039 I NAMED NINE OTHERS AND SAID -5142-33396-0040 AND THESE SHALL FOLLOW YOUR THRALLS IN THE SAME WAY -5142-33396-0041 SO I SET GUARDS OVER EVERY ONE IN THAT HOUSE -5142-33396-0042 SO NO TALES GOT OUT TO THE NEIGHBORS BESIDES IT WAS A LONELY PLACE AND BY GOOD LUCK NO ONE CAME THAT WAY -5142-33396-0043 THEIR EYES DANCED BIG THORLEIF STOOD UP AND STRETCHED HIMSELF -5142-33396-0044 I AM STIFF WITH LONG SITTING HE SAID I ITCH FOR A FIGHT I TURNED TO THE FARMER -5142-33396-0045 THIS IS OUR LAST FEAST WITH YOU I SAID -5142-33396-0046 BY THE BEARD OF ODIN I CRIED YOU HAVE TAKEN OUR JOKE LIKE A MAN -5142-33396-0047 MY MEN POUNDED THE TABLE WITH THEIR FISTS -5142-33396-0048 BY THE HAMMER OF THOR SHOUTED GRIM HERE IS NO STINGY COWARD -5142-33396-0049 HERE FRIEND TAKE IT AND HE THRUST IT INTO THE FARMER'S HAND -5142-33396-0050 MAY YOU DRINK HEART'S EASE FROM IT FOR MANY YEARS -5142-33396-0051 AND WITH IT I LEAVE YOU A NAME SIF THE FRIENDLY I SHALL HOPE TO DRINK WITH YOU SOMETIME IN VALHALLA -5142-33396-0052 HERE IS A RING FOR SIF THE FRIENDLY AND HERE IS A BRACELET A SWORD WOULD NOT BE ASHAMED TO HANG AT YOUR SIDE -5142-33396-0053 I TOOK FIVE GREAT BRACELETS OF GOLD FROM OUR TREASURE CHEST AND GAVE THEM TO HIM -5142-33396-0054 THAT IS THE BEST WAY TO DECIDE FOR THE SPEAR WILL ALWAYS POINT SOMEWHERE AND ONE THING IS AS GOOD AS ANOTHER -5142-33396-0055 THAT TIME IT POINTED US INTO YOUR FATHER'S SHIPS -5142-33396-0056 HERE THEY SAID IS A RASCAL WHO HAS BEEN HARRYING OUR COASTS -5142-33396-0057 WE SUNK HIS SHIP AND MEN BUT HIM WE BROUGHT TO YOU -5142-33396-0058 A ROBBER VIKING SAID THE KING AND SCOWLED AT ME -5142-33396-0059 YES AND WITH ALL YOUR FINGERS IT TOOK YOU A YEAR TO CATCH ME THE KING FROWNED MORE ANGRILY -5142-33396-0060 TAKE HIM OUT THORKEL AND LET HIM TASTE YOUR SWORD -5142-33396-0061 YOUR MOTHER THE QUEEN WAS STANDING BY -5142-33396-0062 NOW SHE PUT HER HAND ON HIS ARM AND SMILED AND SAID -5142-33396-0063 AND WOULD HE NOT BE A GOOD GIFT FOR OUR BABY -5142-33396-0064 YOUR FATHER THOUGHT A MOMENT THEN LOOKED AT YOUR MOTHER AND SMILED -5142-33396-0065 SOFT HEART HE SAID GENTLY TO HER THEN TO THORKEL WELL LET HIM GO THORKEL -5142-33396-0066 THEN HE TURNED TO ME AGAIN FROWNING -5142-33396-0067 BUT YOUNG SHARP TONGUE NOW THAT WE HAVE CAUGHT YOU WE WILL PUT YOU INTO A TRAP THAT YOU CANNOT GET OUT OF -5142-33396-0068 SO I LIVED AND NOW AM YOUR TOOTH THRALL WELL IT IS THE LUCK OF WAR -5142-36377-0000 IT WAS ONE OF THE MASTERLY AND CHARMING STORIES OF DUMAS THE ELDER -5142-36377-0001 IN FIVE MINUTES I WAS IN A NEW WORLD AND MY MELANCHOLY ROOM WAS FULL OF THE LIVELIEST FRENCH COMPANY -5142-36377-0002 THE SOUND OF AN IMPERATIVE AND UNCOMPROMISING BELL RECALLED ME IN DUE TIME TO THE REGIONS OF REALITY -5142-36377-0003 AMBROSE MET ME AT THE BOTTOM OF THE STAIRS AND SHOWED ME THE WAY TO THE SUPPER ROOM -5142-36377-0004 SHE SIGNED TO ME WITH A GHOSTLY SOLEMNITY TO TAKE THE VACANT PLACE ON THE LEFT OF HER FATHER -5142-36377-0005 THE DOOR OPENED AGAIN WHILE I WAS STILL STUDYING THE TWO BROTHERS WITHOUT I HONESTLY CONFESS BEING VERY FAVORABLY IMPRESSED BY EITHER OF THEM -5142-36377-0006 A NEW MEMBER OF THE FAMILY CIRCLE WHO INSTANTLY ATTRACTED MY ATTENTION ENTERED THE ROOM -5142-36377-0007 A LITTLE CRACKED THAT IN THE POPULAR PHRASE WAS MY IMPRESSION OF THE STRANGER WHO NOW MADE HIS APPEARANCE IN THE SUPPER ROOM -5142-36377-0008 MISTER MEADOWCROFT THE ELDER HAVING NOT SPOKEN ONE WORD THUS FAR HIMSELF INTRODUCED THE NEWCOMER TO ME WITH A SIDE GLANCE AT HIS SONS WHICH HAD SOMETHING LIKE DEFIANCE IN IT A GLANCE WHICH AS I WAS SORRY TO NOTICE WAS RETURNED WITH THE DEFIANCE ON THEIR SIDE BY THE TWO YOUNG MEN -5142-36377-0009 PHILIP LEFRANK THIS IS MY OVERLOOKER MISTER JAGO SAID THE OLD MAN FORMALLY PRESENTING US -5142-36377-0010 HE IS NOT WELL HE HAS COME OVER THE OCEAN FOR REST AND CHANGE OF SCENE -5142-36377-0011 MISTER JAGO IS AN AMERICAN PHILIP -5142-36377-0012 MAKE ACQUAINTANCE WITH MISTER JAGO SIT TOGETHER -5142-36377-0013 THEY POINTEDLY DREW BACK FROM JOHN JAGO AS HE APPROACHED THE EMPTY CHAIR NEXT TO ME AND MOVED ROUND TO THE OPPOSITE SIDE OF THE TABLE -5142-36377-0014 A PRETTY GIRL AND SO FAR AS I COULD JUDGE BY APPEARANCES A GOOD GIRL TOO DESCRIBING HER GENERALLY I MAY SAY THAT SHE HAD A SMALL HEAD WELL CARRIED AND WELL SET ON HER SHOULDERS BRIGHT GRAY EYES THAT LOOKED AT YOU HONESTLY AND MEANT WHAT THEY LOOKED A TRIM SLIGHT LITTLE FIGURE TOO SLIGHT FOR OUR ENGLISH NOTIONS OF BEAUTY A STRONG AMERICAN ACCENT AND A RARE THING IN AMERICA A PLEASANTLY TONED VOICE WHICH MADE THE ACCENT AGREEABLE TO ENGLISH EARS -5142-36377-0015 OUR FIRST IMPRESSIONS OF PEOPLE ARE IN NINE CASES OUT OF TEN THE RIGHT IMPRESSIONS -5142-36377-0016 FOR ONCE IN A WAY I PROVED A TRUE PROPHET -5142-36377-0017 THE ONLY CHEERFUL CONVERSATION WAS THE CONVERSATION ACROSS THE TABLE BETWEEN NAOMI AND ME -5142-36377-0018 HE LOOKED UP AT NAOMI DOUBTINGLY FROM HIS PLATE AND LOOKED DOWN AGAIN SLOWLY WITH A FROWN -5142-36377-0019 WHEN I ADDRESSED HIM HE ANSWERED CONSTRAINEDLY -5142-36377-0020 A MORE DREARY AND MORE DISUNITED FAMILY PARTY I NEVER SAT AT THE TABLE WITH -5142-36377-0021 ENVY HATRED MALICE AND UNCHARITABLENESS ARE NEVER SO ESSENTIALLY DETESTABLE TO MY MIND AS WHEN THEY ARE ANIMATED BY A SENSE OF PROPRIETY AND WORK UNDER THE SURFACE BUT FOR MY INTEREST IN NAOMI AND MY OTHER INTEREST IN THE LITTLE LOVE LOOKS WHICH I NOW AND THEN SURPRISED PASSING BETWEEN HER AND AMBROSE I SHOULD NEVER HAVE SAT THROUGH THAT SUPPER -5142-36377-0022 I WISH YOU GOOD NIGHT SHE LAID HER BONY HANDS ON THE BACK OF MISTER MEADOWCROFT'S INVALID CHAIR CUT HIM SHORT IN HIS FAREWELL SALUTATION TO ME AND WHEELED HIM OUT TO HIS BED AS IF SHE WERE WHEELING HIM OUT TO HIS GRAVE -5142-36377-0023 YOU WERE QUITE RIGHT TO SAY NO AMBROSE BEGAN NEVER SMOKE WITH JOHN JAGO HIS CIGARS WILL POISON YOU -5142-36377-0024 NAOMI SHOOK HER FOREFINGER REPROACHFULLY AT THEM AS IF THE TWO STURDY YOUNG FARMERS HAD BEEN TWO CHILDREN -5142-36377-0025 SILAS SLUNK AWAY WITHOUT A WORD OF PROTEST AMBROSE STOOD HIS GROUND EVIDENTLY BENT ON MAKING HIS PEACE WITH NAOMI BEFORE HE LEFT HER SEEING THAT I WAS IN THE WAY I WALKED ASIDE TOWARD A GLASS DOOR AT THE LOWER END OF THE ROOM -5142-36586-0000 IT IS MANIFEST THAT MAN IS NOW SUBJECT TO MUCH VARIABILITY -5142-36586-0001 SO IT IS WITH THE LOWER ANIMALS -5142-36586-0002 THE VARIABILITY OF MULTIPLE PARTS -5142-36586-0003 BUT THIS SUBJECT WILL BE MORE PROPERLY DISCUSSED WHEN WE TREAT OF THE DIFFERENT RACES OF MANKIND -5142-36586-0004 EFFECTS OF THE INCREASED USE AND DISUSE OF PARTS -5142-36600-0000 CHAPTER SEVEN ON THE RACES OF MAN -5142-36600-0001 IN DETERMINING WHETHER TWO OR MORE ALLIED FORMS OUGHT TO BE RANKED AS SPECIES OR VARIETIES NATURALISTS ARE PRACTICALLY GUIDED BY THE FOLLOWING CONSIDERATIONS NAMELY THE AMOUNT OF DIFFERENCE BETWEEN THEM AND WHETHER SUCH DIFFERENCES RELATE TO FEW OR MANY POINTS OF STRUCTURE AND WHETHER THEY ARE OF PHYSIOLOGICAL IMPORTANCE BUT MORE ESPECIALLY WHETHER THEY ARE CONSTANT -5639-40744-0000 ELEVEN O'CLOCK HAD STRUCK IT WAS A FINE CLEAR NIGHT THEY WERE THE ONLY PERSONS ON THE ROAD AND THEY SAUNTERED LEISURELY ALONG TO AVOID PAYING THE PRICE OF FATIGUE FOR THE RECREATION PROVIDED FOR THE TOLEDANS IN THEIR VALLEY OR ON THE BANKS OF THEIR RIVER -5639-40744-0001 SECURE AS HE THOUGHT IN THE CAREFUL ADMINISTRATION OF JUSTICE IN THAT CITY AND THE CHARACTER OF ITS WELL DISPOSED INHABITANTS THE GOOD HIDALGO WAS FAR FROM THINKING THAT ANY DISASTER COULD BEFAL HIS FAMILY -5639-40744-0002 RODOLFO AND HIS COMPANIONS WITH THEIR FACES MUFFLED IN THEIR CLOAKS STARED RUDELY AND INSOLENTLY AT THE MOTHER THE DAUGHTER AND THE SERVANT MAID -5639-40744-0003 IN A MOMENT HE COMMUNICATED HIS THOUGHTS TO HIS COMPANIONS AND IN THE NEXT MOMENT THEY RESOLVED TO TURN BACK AND CARRY HER OFF TO PLEASE RODOLFO FOR THE RICH WHO ARE OPEN HANDED ALWAYS FIND PARASITES READY TO ENCOURAGE THEIR BAD PROPENSITIES AND THUS TO CONCEIVE THIS WICKED DESIGN TO COMMUNICATE IT APPROVE IT RESOLVE ON RAVISHING LEOCADIA AND TO CARRY THAT DESIGN INTO EFFECT WAS THE WORK OF A MOMENT -5639-40744-0004 THEY DREW THEIR SWORDS HID THEIR FACES IN THE FLAPS OF THEIR CLOAKS TURNED BACK AND SOON CAME IN FRONT OF THE LITTLE PARTY WHO HAD NOT YET DONE GIVING THANKS TO GOD FOR THEIR ESCAPE FROM THOSE AUDACIOUS MEN -5639-40744-0005 FINALLY THE ONE PARTY WENT OFF EXULTING AND THE OTHER WAS LEFT IN DESOLATION AND WOE -5639-40744-0006 RODOLFO ARRIVED AT HIS OWN HOUSE WITHOUT ANY IMPEDIMENT AND LEOCADIA'S PARENTS REACHED THEIRS HEART BROKEN AND DESPAIRING -5639-40744-0007 MEANWHILE RODOLFO HAD LEOCADIA SAFE IN HIS CUSTODY AND IN HIS OWN APARTMENT -5639-40744-0008 WHO TOUCHES ME AM I IN BED -5639-40744-0009 MOTHER DEAR FATHER DO YOU HEAR ME -5639-40744-0010 IT IS THE ONLY AMENDS I ASK OF YOU FOR THE WRONG YOU HAVE DONE ME -5639-40744-0011 SHE FOUND THE DOOR BUT IT WAS LOCKED OUTSIDE -5639-40744-0012 SHE SUCCEEDED IN OPENING THE WINDOW AND THE MOONLIGHT SHONE IN SO BRIGHTLY THAT SHE COULD DISTINGUISH THE COLOUR OF SOME DAMASK HANGINGS IN THE ROOM -5639-40744-0013 SHE SAW THAT THE BED WAS GILDED AND SO RICH THAT IT SEEMED THAT OF A PRINCE RATHER THAN OF A PRIVATE GENTLEMAN -5639-40744-0014 AMONG OTHER THINGS ON WHICH SHE CAST HER EYES WAS A SMALL CRUCIFIX OF SOLID SILVER STANDING ON A CABINET NEAR THE WINDOW -5639-40744-0015 THIS PERSON WAS RODOLFO WHO THOUGH HE HAD GONE TO LOOK FOR HIS FRIENDS HAD CHANGED HIS MIND IN THAT RESPECT NOT THINKING IT ADVISABLE TO ACQUAINT THEM WITH WHAT HAD PASSED BETWEEN HIM AND THE GIRL -5639-40744-0016 ON THE CONTRARY HE RESOLVED TO TELL THEM THAT REPENTING OF HIS VIOLENCE AND MOVED BY HER TEARS HE HAD ONLY CARRIED HER HALF WAY TOWARDS HIS HOUSE AND THEN LET HER GO -5639-40744-0017 CHOKING WITH EMOTION LEOCADI MADE A SIGN TO HER PARENTS THAT SHE WISHED TO BE ALONE WITH THEM -5639-40744-0018 THAT WOULD BE VERY WELL MY CHILD REPLIED HER FATHER IF YOUR PLAN WERE NOT LIABLE TO BE FRUSTRATED BY ORDINARY CUNNING BUT NO DOUBT THIS IMAGE HAS BEEN ALREADY MISSED BY ITS OWNER AND HE WILL HAVE SET IT DOWN FOR CERTAIN THAT IT WAS TAKEN OUT OF THE ROOM BY THE PERSON HE LOCKED UP THERE -5639-40744-0019 WHAT YOU HAD BEST DO MY CHILD IS TO KEEP IT AND PRAY TO IT THAT SINCE IT WAS A WITNESS TO YOUR UNDOING IT WILL DEIGN TO VINDICATE YOUR CAUSE BY ITS RIGHTEOUS JUDGMENT -5639-40744-0020 THUS DID THIS HUMANE AND RIGHT MINDED FATHER COMFORT HIS UNHAPPY DAUGHTER AND HER MOTHER EMBRACING HER AGAIN DID ALL SHE COULD TO SOOTHE HER FEELINGS -5639-40744-0021 SHE MEANWHILE PASSED HER LIFE WITH HER PARENTS IN THE STRICTEST RETIREMENT NEVER LETTING HERSELF BE SEEN BUT SHUNNING EVERY EYE LEST IT SHOULD READ HER MISFORTUNE IN HER FACE -5639-40744-0022 TIME ROLLED ON THE HOUR OF HER DELIVERY ARRIVED IT TOOK PLACE IN THE UTMOST SECRECY HER MOTHER TAKING UPON HER THE OFFICE OF MIDWIFE AND SHE GAVE BIRTH TO A SON ONE OF THE MOST BEAUTIFUL EVER SEEN -5639-40744-0023 WHEN THE BOY WALKED THROUGH THE STREETS BLESSINGS WERE SHOWERED UPON HIM BY ALL WHO SAW HIM BLESSINGS UPON HIS BEAUTY UPON THE MOTHER THAT BORE HIM UPON THE FATHER THAT BEGOT HIM UPON THOSE WHO BROUGHT HIM UP SO WELL -5639-40744-0024 ONE DAY WHEN THE BOY WAS SENT BY HIS GRANDFATHER WITH A MESSAGE TO A RELATION HE PASSED ALONG A STREET IN WHICH THERE WAS A GREAT CONCOURSE OF HORSEMEN -5639-40744-0025 THE BED SHE TOO WELL REMEMBERED WAS THERE AND ABOVE ALL THE CABINET ON WHICH HAD STOOD THE IMAGE SHE HAD TAKEN AWAY WAS STILL ON THE SAME SPOT -5639-40744-0026 LUIS WAS OUT OF DANGER IN A FORTNIGHT IN A MONTH HE ROSE FROM HIS BED AND DURING ALL THAT TIME HE WAS VISITED DAILY BY HIS MOTHER AND GRANDMOTHER AND TREATED BY THE MASTER AND MISTRESS OF THE HOUSE AS IF HE WAS THEIR OWN CHILD -5639-40744-0027 THUS SAYING AND PRESSING THE CRUCIFIX TO HER BREAST SHE FELL FAINTING INTO THE ARMS OF DONA ESTAFANIA WHO AS A GENTLEWOMAN TO WHOSE SEX PITY IS AS NATURAL AS CRUELTY IS TO MAN INSTANTLY PRESSED HER LIPS TO THOSE OF THE FAINTING GIRL SHEDDING OVER HER SO MANY TEARS THAT THERE NEEDED NO OTHER SPRINKLING OF WATER TO RECOVER LEOCADIA FROM HER SWOON -5639-40744-0028 I HAVE GREAT THINGS TO TELL YOU SENOR SAID DONA ESTAFANIA TO HER HUSBAND THE CREAM AND SUBSTANCE OF WHICH IS THIS THE FAINTING GIRL BEFORE YOU IS YOUR DAUGHTER AND THAT BOY IS YOUR GRANDSON -5639-40744-0029 THIS TRUTH WHICH I HAVE LEARNED FROM HER LIPS IS CONFIRMED BY HIS FACE IN WHICH WE HAVE BOTH BEHELD THAT OF OUR SON -5639-40744-0030 JUST THEN LEOCADIA CAME TO HERSELF AND EMBRACING THE CROSS SEEMED CHANGED INTO A SEA OF TEARS AND THE GENTLEMAN REMAINED IN UTTER BEWILDERMENT UNTIL HIS WIFE HAD REPEATED TO HIM FROM BEGINNING TO END LEOCADIA'S WHOLE STORY AND HE BELIEVED IT THROUGH THE BLESSED DISPENSATION OF HEAVEN WHICH HAD CONFIRMED IT BY SO MANY CONVINCING TESTIMONIES -5639-40744-0031 SO PERSUASIVE WERE HER ENTREATIES AND SO STRONG HER ASSURANCES THAT NO HARM WHATEVER COULD RESULT TO THEM FROM THE INFORMATION SHE SOUGHT THEY WERE INDUCED TO CONFESS THAT ONE SUMMER'S NIGHT THE SAME SHE HAD MENTIONED THEMSELVES AND ANOTHER FRIEND BEING OUT ON A STROLL WITH RODOLFO THEY HAD BEEN CONCERNED IN THE ABDUCTION OF A GIRL WHOM RODOLFO CARRIED OFF WHILST THE REST OF THEM DETAINED HER FAMILY WHO MADE A GREAT OUTCRY AND WOULD HAVE DEFENDED HER IF THEY COULD -5639-40744-0032 FOR GOD'S SAKE MY LADY MOTHER GIVE ME A WIFE WHO WOULD BE AN AGREEABLE COMPANION NOT ONE WHO WILL DISGUST ME SO THAT WE MAY BOTH BEAR EVENLY AND WITH MUTUAL GOOD WILL THE YOKE IMPOSED ON US BY HEAVEN INSTEAD OF PULLING THIS WAY AND THAT WAY AND FRETTING EACH OTHER TO DEATH -5639-40744-0033 HER BEARING WAS GRACEFUL AND ANIMATED SHE LED HER SON BY THE HAND AND BEFORE HER WALKED TWO MAIDS WITH WAX LIGHTS AND SILVER CANDLESTICKS -5639-40744-0034 ALL ROSE TO DO HER REVERENCE AS IF SOMETHING FROM HEAVEN HAD MIRACULOUSLY APPEARED BEFORE THEM BUT GAZING ON HER ENTRANCED WITH ADMIRATION NOT ONE OF THEM WAS ABLE TO ADDRESS A SINGLE WORD TO HER -5639-40744-0035 SHE REFLECTED HOW NEAR SHE STOOD TO THE CRISIS WHICH WAS TO DETERMINE WHETHER SHE WAS TO BE BLESSED OR UNHAPPY FOR EVER AND RACKED BY THE INTENSITY OF HER EMOTIONS SHE SUDDENLY CHANGED COLOUR HER HEAD DROPPED AND SHE FELL FORWARD IN A SWOON INTO THE ARMS OF THE DISMAYED ESTAFANIA -5639-40744-0036 HIS MOTHER HAD LEFT HER TO HIM AS BEING HER DESTINED PROTECTOR BUT WHEN SHE SAW THAT HE TOO WAS INSENSIBLE SHE WAS NEAR MAKING A THIRD AND WOULD HAVE DONE SO HAD HE NOT COME TO HIMSELF -5639-40744-0037 KNOW THEN SON OF MY HEART THAT THIS FAINTING LADY IS YOUR REAL BRIDE I SAY REAL BECAUSE SHE IS THE ONE WHOM YOUR FATHER AND I HAVE CHOSEN FOR YOU AND THE PORTRAIT WAS A PRETENCE -5639-40744-0038 JUST AT THE MOMENT WHEN THE TEARS OF THE PITYING BEHOLDERS FLOWED FASTEST AND THEIR EJACULATIONS WERE MOST EXPRESSIVE OF DESPAIR LEOCADIA GAVE SIGNS OF RECOVERY AND BROUGHT BACK GLADNESS TO THE HEARTS OF ALL -5639-40744-0039 WHEN SHE CAME TO HER SENSES AND BLUSHING TO FIND HERSELF IN RODOLFO'S ARMS WOULD HAVE DISENGAGED HERSELF NO SENORA HE SAID THAT MUST NOT BE STRIVE NOT TO WITHDRAW FROM THE ARMS OF HIM WHO HOLDS YOU IN HIS SOUL -5639-40744-0040 THIS WAS DONE FOR THE EVENT TOOK PLACE AT A TIME WHEN THE CONSENT OF THE PARTIES WAS SUFFICIENT FOR THE CELEBRATION OF A MARRIAGE WITHOUT ANY OF THE PRELIMINARY FORMALITIES WHICH ARE NOW SO PROPERLY REQUIRED -5639-40744-0041 NOR WAS RODOLFO LESS SURPRISED THAN THEY AND THE BETTER TO ASSURE HIMSELF OF SO WONDERFUL A FACT HE BEGGED LEOCADIA TO GIVE HIM SOME TOKEN WHICH SHOULD MAKE PERFECTLY CLEAR TO HIM THAT WHICH INDEED HE DID NOT DOUBT SINCE IT WAS AUTHENTICATED BY HIS PARENTS -5683-32865-0000 YOU KNOW CAPTAIN LAKE -5683-32865-0001 SAID LORD CHELFORD ADDRESSING ME -5683-32865-0002 HE HAD HIS HAND UPON LAKE'S SHOULDER -5683-32865-0003 THEY ARE COUSINS YOU KNOW WE ARE ALL COUSINS -5683-32865-0004 WHATEVER LORD CHELFORD SAID MISS BRANDON RECEIVED IT VERY GRACIOUSLY AND EVEN WITH A MOMENTARY SMILE -5683-32865-0005 BUT HER GREETING TO CAPTAIN LAKE WAS MORE THAN USUALLY HAUGHTY AND FROZEN AND HER FEATURES I FANCIED PARTICULARLY PROUD AND PALE -5683-32865-0006 AT DINNER LAKE WAS EASY AND AMUSING -5683-32865-0007 I'M GLAD YOU LIKE IT SAYS WYLDER CHUCKLING BENIGNANTLY ON IT OVER HIS SHOULDER -5683-32865-0008 I BELIEVE I HAVE A LITTLE TASTE THAT WAY THOSE ARE ALL REAL YOU KNOW THOSE JEWELS -5683-32865-0009 AND HE PLACED IT IN THAT GENTLEMAN'S FINGERS WHO NOW TOOK HIS TURN AT THE LAMP AND CONTEMPLATED THE LITTLE PARALLELOGRAM WITH A GLEAM OF SLY AMUSEMENT -5683-32865-0010 I WAS THINKING IT'S VERY LIKE THE ACE OF HEARTS ANSWERED THE CAPTAIN SOFTLY SMILING ON -5683-32865-0011 WHEREUPON LAKE LAUGHED QUIETLY STILL LOOKING ON THE ACE OF HEARTS WITH HIS SLY EYES -5683-32865-0012 AND WYLDER LAUGHED TOO MORE SUDDENLY AND NOISILY THAN THE HUMOUR OF THE JOKE SEEMED QUITE TO CALL FOR AND GLANCED A GRIM LOOK FROM THE CORNERS OF HIS EYES ON LAKE BUT THE GALLANT CAPTAIN DID NOT SEEM TO PERCEIVE IT AND AFTER A FEW SECONDS MORE HE HANDED IT VERY INNOCENTLY BACK TO MISSUS DOROTHY ONLY REMARKING -5683-32865-0013 DO YOU KNOW LAKE OH I REALLY CAN'T TELL BUT HE'LL SOON TIRE OF COUNTRY LIFE -5683-32865-0014 HE'S NOT A MAN FOR COUNTRY QUARTERS -5683-32865-0015 I HAD A HORRID DREAM ABOUT HIM LAST NIGHT THAT -5683-32865-0016 OH I KNOW THAT'S LORNE BRANDON -5683-32865-0017 ALL THE TIME HE WAS TALKING TO ME HIS ANGRY LITTLE EYES WERE FOLLOWING LAKE -5683-32866-0000 MISS LAKE DECLINED THE CARRIAGE TO NIGHT -5683-32866-0001 AND HE ADDED SOMETHING STILL LESS COMPLIMENTARY -5683-32866-0002 BUT DON'T THESE VERY WISE THINGS SOMETIMES TURN OUT VERY FOOLISHLY -5683-32866-0003 IN THE MEANTIME I HAD FORMED A NEW IDEA OF HER -5683-32866-0004 BY THIS TIME LORD CHELFORD AND WYLDER RETURNED AND DISGUSTED RATHER WITH MYSELF I RUMINATED ON MY WANT OF GENERAL SHIP -5683-32866-0005 AND HE MADE A LITTLE DIP OF HIS CANE TOWARDS BRANDON HALL OVER HIS SHOULDER -5683-32866-0006 YES SO THEY SAID BUT THAT WOULD I THINK HAVE BEEN WORSE -5683-32866-0007 IF A FELLOW'S BEEN A LITTLE BIT WILD HE'S BEELZEBUB AT ONCE -5683-32866-0008 BRACTON'S A VERY GOOD FELLOW I CAN ASSURE YOU -5683-32866-0009 I DON'T KNOW AND CAN'T SAY HOW YOU FINE GENTLEMEN DEFINE WICKEDNESS ONLY AS AN OBSCURE FEMALE I SPEAK ACCORDING TO MY LIGHTS AND HE IS GENERALLY THOUGHT THE WICKEDEST MAN IN THIS COUNTY -5683-32866-0010 WELL YOU KNOW RADIE WOMEN LIKE WICKED FELLOWS IT IS CONTRAST I SUPPOSE BUT THEY DO AND I'M SURE FROM WHAT BRACTON HAS SAID TO ME I KNOW HIM INTIMATELY THAT DORCAS LIKES HIM AND I CAN'T CONCEIVE WHY THEY ARE NOT MARRIED -5683-32866-0011 THEIR WALK CONTINUED SILENT FOR THE GREATER PART NEITHER WAS QUITE SATISFIED WITH THE OTHER BUT RACHEL AT LAST SAID -5683-32866-0012 NOW THAT'S IMPOSSIBLE RADIE FOR I REALLY DON'T THINK I ONCE THOUGHT OF HIM ALL THIS EVENING EXCEPT JUST WHILE WE WERE TALKING -5683-32866-0013 THERE WAS A BRIGHT MOONLIGHT BROKEN BY THE SHADOWS OF OVERHANGING BOUGHS AND WITHERED LEAVES AND THE MOTTLED LIGHTS AND SHADOWS GLIDED ODDLY ACROSS HIS PALE FEATURES -5683-32866-0014 DON'T INSULT ME STANLEY BY TALKING AGAIN AS YOU DID THIS MORNING -5683-32866-0015 WHAT I SAY IS ALTOGETHER ON YOUR OWN ACCOUNT -5683-32866-0016 MARK MY WORDS YOU'LL FIND HIM TOO STRONG FOR YOU AYE AND TOO DEEP -5683-32866-0017 I AM VERY UNEASY ABOUT IT WHATEVER IT IS I CAN'T HELP IT -5683-32866-0018 TO MY MIND THERE HAS ALWAYS BEEN SOMETHING INEXPRESSIBLY AWFUL IN FAMILY FEUDS -5683-32866-0019 THE MYSTERY OF THEIR ORIGIN THEIR CAPACITY FOR EVOLVING LATENT FACULTIES OF CRIME AND THE STEADY VITALITY WITH WHICH THEY SURVIVE THE HEARSE AND SPEAK THEIR DEEP MOUTHED MALIGNITIES IN EVERY NEW BORN GENERATION HAVE ASSOCIATED THEM SOMEHOW IN MY MIND WITH A SPELL OF LIFE EXCEEDING AND DISTINCT FROM HUMAN AND A SPECIAL SATANIC ACTION -5683-32866-0020 THE FLOOR MORE THAN ANYTHING ELSE SHOWED THE GREAT AGE OF THE ROOM -5683-32866-0021 MY BED WAS UNEXCEPTIONABLY COMFORTABLE BUT IN MY THEN MOOD I COULD HAVE WISHED IT A GREAT DEAL MORE MODERN -5683-32866-0022 ITS CURTAINS WERE OF THICK AND FADED TAPESTRY -5683-32866-0023 ALL THE FURNITURE BELONGED TO OTHER TIMES -5683-32866-0024 I SHAN'T TROUBLE YOU ABOUT MY TRAIN OF THOUGHTS OR FANCIES BUT I BEGAN TO FEEL VERY LIKE A GENTLEMAN IN A GHOST STORY WATCHING EXPERIMENTALLY IN A HAUNTED CHAMBER -5683-32866-0025 I DID NOT EVEN TAKE THE PRECAUTION OF SMOKING UP THE CHIMNEY -5683-32866-0026 I BOLDLY LIGHTED MY CHEROOT -5683-32866-0027 A COLD BRIGHT MOON WAS SHINING WITH CLEAR SHARP LIGHTS AND SHADOWS -5683-32866-0028 THE SOMBRE OLD TREES LIKE GIGANTIC HEARSE PLUMES BLACK AND AWFUL -5683-32866-0029 SOMEHOW I HAD GROWN NERVOUS -5683-32866-0030 A LITTLE BIT OF PLASTER TUMBLED DOWN THE CHIMNEY AND STARTLED ME CONFOUNDEDLY -5683-32879-0000 IT WAS NOT VERY MUCH PAST ELEVEN THAT MORNING WHEN THE PONY CARRIAGE FROM BRANDON DREW UP BEFORE THE LITTLE GARDEN WICKET OF REDMAN'S FARM -5683-32879-0001 WELL SHE WAS BETTER THOUGH SHE HAD HAD A BAD NIGHT -5683-32879-0002 SO THERE CAME A STEP AND A LITTLE RUSTLING OF FEMININE DRAPERIES THE SMALL DOOR OPENED AND RACHEL ENTERED WITH HER HAND EXTENDED AND A PALE SMILE OF WELCOME -5683-32879-0003 WOMEN CAN HIDE THEIR PAIN BETTER THAN WE MEN AND BEAR IT BETTER TOO EXCEPT WHEN SHAME DROPS FIRE INTO THE DREADFUL CHALICE -5683-32879-0004 BUT POOR RACHEL LAKE HAD MORE THAN THAT STOICAL HYPOCRISY WHICH ENABLES THE TORTURED SPIRITS OF HER SEX TO LIFT A PALE FACE THROUGH THE FLAMES AND SMILE -5683-32879-0005 THIS TRANSIENT SPRING AND LIGHTING UP ARE BEAUTIFUL A GLAMOUR BEGUILING OUR SENSES -5683-32879-0006 THERE WAS SOMETHING OF SWEETNESS AND FONDNESS IN HER TONES AND MANNER WHICH WAS NEW TO RACHEL AND COMFORTING AND SHE RETURNED THE GREETING AS KINDLY AND FELT MORE LIKE HER FORMER SELF -5683-32879-0007 RACHEL'S PALE AND SHARPENED FEATURES AND DILATED EYE STRUCK HER WITH A PAINFUL SURPRISE -5683-32879-0008 YOU HAVE BEEN SO ILL MY POOR RACHEL -5683-32879-0009 ILL AND TROUBLED DEAR TROUBLED IN MIND AND MISERABLY NERVOUS -5683-32879-0010 POOR RACHEL HER NATURE RECOILED FROM DECEIT AND SHE TOLD AT ALL EVENTS AS MUCH OF THE TRUTH AS SHE DARED -5683-32879-0011 SHE SPOKE WITH A SUDDEN ENERGY WHICH PARTOOK OF FEAR AND PASSION AND FLUSHED HER THIN CHEEK AND MADE HER LANGUID EYES FLASH -5683-32879-0012 THANK YOU RACHEL MY COUSIN RACHEL MY ONLY FRIEND -5683-32879-0013 CHELFORD HAD A NOTE FROM MISTER WYLDER THIS MORNING ANOTHER NOTE HIS COMING DELAYED AND SOMETHING OF HIS HAVING TO SEE SOME PERSON WHO IS ABROAD CONTINUED DORCAS AFTER A LITTLE PAUSE -5683-32879-0014 YES SOMETHING EVERYTHING SAID RACHEL HURRIEDLY LOOKING FROWNINGLY AT A FLOWER WHICH SHE WAS TWIRLING IN HER FINGERS -5683-32879-0015 YES SAID RACHEL -5683-32879-0016 AND THE WAN ORACLE HAVING SPOKEN SHE SATE DOWN IN THE SAME SORT OF ABSTRACTION AGAIN BESIDE DORCAS AND SHE LOOKED FULL IN HER COUSIN'S EYES -5683-32879-0017 OF MARK WYLDER I SAY THIS HIS NAME HAS BEEN FOR YEARS HATEFUL TO ME AND RECENTLY IT HAS BECOME FRIGHTFUL AND YOU WILL PROMISE ME SIMPLY THIS THAT YOU WILL NEVER ASK ME TO SPEAK AGAIN ABOUT HIM -5683-32879-0018 IT IS AN ANTIPATHY AN ANTIPATHY I CANNOT GET OVER DEAR DORCAS YOU MAY THINK IT A MADNESS BUT DON'T BLAME ME -5683-32879-0019 I HAVE VERY FEW TO LOVE ME NOW AND I THOUGHT YOU MIGHT LOVE ME AS I HAVE BEGUN TO LOVE YOU -5683-32879-0020 AND SHE THREW HER ARMS ROUND HER COUSIN'S NECK AND BRAVE RACHEL AT LAST BURST INTO TEARS -5683-32879-0021 DORCAS IN HER STRANGE WAY WAS MOVED -5683-32879-0022 I LIKE YOU STILL RACHEL I'M SURE I'LL ALWAYS LIKE YOU -5683-32879-0023 YOU RESEMBLE ME RACHEL YOU ARE FEARLESS AND INFLEXIBLE AND GENEROUS -5683-32879-0024 YES RACHEL I DO LOVE YOU -5683-32879-0025 THANK YOU DORCAS DEAR -61-70968-0001 GIVE NOT SO EARNEST A MIND TO THESE MUMMERIES CHILD -61-70968-0002 A GOLDEN FORTUNE AND A HAPPY LIFE -61-70968-0003 HE WAS LIKE UNTO MY FATHER IN A WAY AND YET WAS NOT MY FATHER -61-70968-0004 ALSO THERE WAS A STRIPLING PAGE WHO TURNED INTO A MAID -61-70968-0005 THIS WAS SO SWEET A LADY SIR AND IN SOME MANNER I DO THINK SHE DIED -61-70968-0006 BUT THEN THE PICTURE WAS GONE AS QUICKLY AS IT CAME -61-70968-0007 SISTER NELL DO YOU HEAR THESE MARVELS -61-70968-0008 TAKE YOUR PLACE AND LET US SEE WHAT THE CRYSTAL CAN SHOW TO YOU -61-70968-0009 LIKE AS NOT YOUNG MASTER THOUGH I AM AN OLD MAN -61-70968-0010 FORTHWITH ALL RAN TO THE OPENING OF THE TENT TO SEE WHAT MIGHT BE AMISS BUT MASTER WILL WHO PEEPED OUT FIRST NEEDED NO MORE THAN ONE GLANCE -61-70968-0011 HE GAVE WAY TO THE OTHERS VERY READILY AND RETREATED UNPERCEIVED BY THE SQUIRE AND MISTRESS FITZOOTH TO THE REAR OF THE TENT -61-70968-0012 CRIES OF A NOTTINGHAM A NOTTINGHAM -61-70968-0013 BEFORE THEM FLED THE STROLLER AND HIS THREE SONS CAPLESS AND TERRIFIED -61-70968-0014 WHAT IS THE TUMULT AND RIOTING CRIED OUT THE SQUIRE AUTHORITATIVELY AND HE BLEW TWICE ON A SILVER WHISTLE WHICH HUNG AT HIS BELT -61-70968-0015 NAY WE REFUSED THEIR REQUEST MOST POLITELY MOST NOBLE SAID THE LITTLE STROLLER -61-70968-0016 AND THEN THEY BECAME VEXED AND WOULD HAVE SNATCHED YOUR PURSE FROM US -61-70968-0017 I COULD NOT SEE MY BOY INJURED EXCELLENCE FOR BUT DOING HIS DUTY AS ONE OF CUMBERLAND'S SONS -61-70968-0018 SO I DID PUSH THIS FELLOW -61-70968-0019 IT IS ENOUGH SAID GEORGE GAMEWELL SHARPLY AND HE TURNED UPON THE CROWD -61-70968-0020 SHAME ON YOU CITIZENS CRIED HE I BLUSH FOR MY FELLOWS OF NOTTINGHAM -61-70968-0021 SURELY WE CAN SUBMIT WITH GOOD GRACE -61-70968-0022 TIS FINE FOR YOU TO TALK OLD MAN ANSWERED THE LEAN SULLEN APPRENTICE -61-70968-0023 BUT I WRESTLED WITH THIS FELLOW AND DO KNOW THAT HE PLAYED UNFAIRLY IN THE SECOND BOUT -61-70968-0024 SPOKE THE SQUIRE LOSING ALL PATIENCE AND IT WAS TO YOU THAT I GAVE ANOTHER PURSE IN CONSOLATION -61-70968-0025 COME TO ME MEN HERE HERE HE RAISED HIS VOICE STILL LOUDER -61-70968-0026 THE STROLLERS TOOK THEIR PART IN IT WITH HEARTY ZEST NOW THAT THEY HAD SOME CHANCE OF BEATING OFF THEIR FOES -61-70968-0027 ROBIN AND THE LITTLE TUMBLER BETWEEN THEM TRIED TO FORCE THE SQUIRE TO STAND BACK AND VERY VALIANTLY DID THESE TWO COMPORT THEMSELVES -61-70968-0028 THE HEAD AND CHIEF OF THE RIOT THE NOTTINGHAM APPRENTICE WITH CLENCHED FISTS THREATENED MONTFICHET -61-70968-0029 THE SQUIRE HELPED TO THRUST THEM ALL IN AND ENTERED SWIFTLY HIMSELF -61-70968-0030 NOW BE SILENT ON YOUR LIVES HE BEGAN BUT THE CAPTURED APPRENTICE SET UP AN INSTANT SHOUT -61-70968-0031 SILENCE YOU KNAVE CRIED MONTFICHET -61-70968-0032 HE FELT FOR AND FOUND THE WIZARD'S BLACK CLOTH THE SQUIRE WAS QUITE OUT OF BREATH -61-70968-0033 THRUSTING OPEN THE PROPER ENTRANCE OF THE TENT ROBIN SUDDENLY RUSHED FORTH WITH HIS BURDEN WITH A GREAT SHOUT -61-70968-0034 A MONTFICHET A MONTFICHET GAMEWELL TO THE RESCUE -61-70968-0035 TAKING ADVANTAGE OF THIS THE SQUIRE'S FEW MEN REDOUBLED THEIR EFFORTS AND ENCOURAGED BY ROBIN'S AND THE LITTLE STROLLER'S CRIES FOUGHT THEIR WAY TO HIM -61-70968-0036 GEORGE MONTFICHET WILL NEVER FORGET THIS DAY -61-70968-0037 WHAT IS YOUR NAME LORDING ASKED THE LITTLE STROLLER PRESENTLY -61-70968-0038 ROBIN FITZOOTH -61-70968-0039 AND MINE IS WILL STUTELEY SHALL WE BE COMRADES -61-70968-0040 RIGHT WILLINGLY FOR BETWEEN US WE HAVE WON THE BATTLE ANSWERED ROBIN -61-70968-0041 I LIKE YOU WILL YOU ARE THE SECOND WILL THAT I HAVE MET AND LIKED WITHIN TWO DAYS IS THERE A SIGN IN THAT -61-70968-0042 MONTFICHET CALLED OUT FOR ROBIN TO GIVE HIM AN ARM -61-70968-0043 FRIENDS SAID MONTFICHET FAINTLY TO THE WRESTLERS BEAR US ESCORT SO FAR AS THE SHERIFF'S HOUSE -61-70968-0044 IT WILL NOT BE SAFE FOR YOU TO STAY HERE NOW -61-70968-0045 PRAY FOLLOW US WITH MINE AND MY LORD SHERIFF'S MEN -61-70968-0046 NOTTINGHAM CASTLE WAS REACHED AND ADMITTANCE WAS DEMANDED -61-70968-0047 MASTER MONCEUX THE SHERIFF OF NOTTINGHAM WAS MIGHTILY PUT ABOUT WHEN TOLD OF THE RIOTING -61-70968-0048 AND HENRY MIGHT RETURN TO ENGLAND AT ANY MOMENT -61-70968-0049 HAVE YOUR WILL CHILD IF THE BOY ALSO WILLS IT MONTFICHET ANSWERED FEELING TOO ILL TO OPPOSE ANYTHING VERY STRONGLY JUST THEN -61-70968-0050 HE MADE AN EFFORT TO HIDE HIS CONDITION FROM THEM ALL AND ROBIN FELT HIS FINGERS TIGHTEN UPON HIS ARM -61-70968-0051 BEG ME A ROOM OF THE SHERIFF CHILD QUICKLY -61-70968-0052 BUT WHO IS THIS FELLOW PLUCKING AT YOUR SLEEVE -61-70968-0053 HE IS MY ESQUIRE EXCELLENCY RETURNED ROBIN WITH DIGNITY -61-70968-0054 MISTRESS FITZOOTH HAD BEEN CARRIED OFF BY THE SHERIFF'S DAUGHTER AND HER MAIDS AS SOON AS THEY HAD ENTERED THE HOUSE SO THAT ROBIN ALONE HAD THE CARE OF MONTFICHET -61-70968-0055 ROBIN WAS GLAD WHEN AT LENGTH THEY WERE LEFT TO THEIR OWN DEVICES -61-70968-0056 THE WINE DID CERTAINLY BRING BACK THE COLOR TO THE SQUIRE'S CHEEKS -61-70968-0057 THESE ESCAPADES ARE NOT FOR OLD GAMEWELL LAD HIS DAY HAS COME TO TWILIGHT -61-70968-0058 WILL YOU FORGIVE ME NOW -61-70968-0059 IT WILL BE NO DISAPPOINTMENT TO ME -61-70968-0060 NO THANKS I AM GLAD TO GIVE YOU SUCH EASY HAPPINESS -61-70968-0061 YOU ARE A WORTHY LEECH WILL PRESENTLY WHISPERED ROBIN THE WINE HAS WORKED A MARVEL -61-70968-0062 AY AND SHOW YOU SOME PRETTY TRICKS -61-70970-0000 YOUNG FITZOOTH HAD BEEN COMMANDED TO HIS MOTHER'S CHAMBER SO SOON AS HE HAD COME OUT FROM HIS CONVERSE WITH THE SQUIRE -61-70970-0001 THERE BEFELL AN ANXIOUS INTERVIEW MISTRESS FITZOOTH ARGUING FOR AND AGAINST THE SQUIRE'S PROJECT IN A BREATH -61-70970-0002 MOST OF ALL ROBIN THOUGHT OF HIS FATHER WHAT WOULD HE COUNSEL -61-70970-0003 IF FOR A WHIM YOU BEGGAR YOURSELF I CANNOT STAY YOU -61-70970-0004 BUT TAKE IT WHILST I LIVE AND WEAR MONTFICHET'S SHIELD IN THE DAYS WHEN MY EYES CAN BE REJOICED BY SO BRAVE A SIGHT FOR YOU WILL NE'ER DISGRACE OUR SCUTCHEON I WARRANT ME -61-70970-0005 THE LAD HAD CHECKED HIM THEN -61-70970-0006 NEVER THAT SIR HE HAD SAID -61-70970-0007 HE WAS IN DEEP CONVERSE WITH THE CLERK AND ENTERED THE HALL HOLDING HIM BY THE ARM -61-70970-0008 NOW TO BED BOY -61-70970-0009 TIS LATE AND I GO MYSELF WITHIN A SHORT SPACE -61-70970-0010 DISMISS YOUR SQUIRE ROBIN AND BID ME GOOD E E N -61-70970-0011 AS ANY IN ENGLAND I WOULD SAY SAID GAMEWELL PROUDLY THAT IS IN HIS DAY -61-70970-0012 YET HE WILL TEACH YOU A FEW TRICKS WHEN MORNING IS COME -61-70970-0013 THERE WAS NO CHANCE TO ALTER HIS SLEEPING ROOM TO ONE NEARER TO GAMEWELL'S CHAMBER -61-70970-0014 PRESENTLY HE CROSSED THE FLOOR OF HIS ROOM WITH DECIDED STEP -61-70970-0015 WILL CRIED HE SOFTLY AND STUTELEY WHO HAD CHOSEN HIS COUCH ACROSS THE DOOR OF HIS YOUNG MASTER'S CHAMBER SPRANG UP AT ONCE IN ANSWER -61-70970-0016 WE WILL GO OUT TOGETHER TO THE BOWER THERE IS A WAY DOWN TO THE COURT FROM MY WINDOW -61-70970-0017 REST AND BE STILL UNTIL I WARN YOU -61-70970-0018 THE HOURS PASSED WEARILY BY AND MOVEMENT COULD YET BE HEARD ABOUT THE HALL -61-70970-0019 AT LAST ALL WAS QUIET AND BLACK IN THE COURTYARD OF GAMEWELL -61-70970-0020 WILL WHISPERED ROBIN OPENING HIS DOOR AS HE SPOKE ARE YOU READY -61-70970-0021 THEY THEN RENEWED THEIR JOURNEY AND UNDER THE BETTER LIGHT MADE A SAFE CROSSING OF THE STABLE ROOFS -61-70970-0022 ROBIN ENTERED THE HUT DRAGGING THE UNWILLING ESQUIRE AFTER HIM -61-70970-0023 BE NOT SO FOOLISH FRIEND SAID FITZOOTH CROSSLY -61-70970-0024 THEY MOVED THEREAFTER CAUTIOUSLY ABOUT THE HUT GROPING BEFORE AND ABOUT THEM TO FIND SOMETHING TO SHOW THAT WARRENTON HAD FULFILLED HIS MISSION -61-70970-0025 THEY WERE UPON THE VERGE OF AN OPEN TRAP IN THE FAR CORNER OF THE HUT AND STUTELEY HAD TRIPPED OVER THE EDGE OF THE REVERSED FLAP MOUTH OF THIS PIT -61-70970-0026 FITZOOTH'S HAND RESTED AT LAST UPON THE TOP RUNG OF A LADDER AND SLOWLY THE TRUTH CAME TO HIM -61-70970-0027 ROBIN CAREFULLY DESCENDED THE LADDER AND FOUND HIMSELF SOON UPON FIRM ROCKY GROUND -61-70970-0028 STUTELEY WAS BY HIS SIDE IN A FLASH AND THEN THEY BOTH BEGAN FEELING ABOUT THEM TO ASCERTAIN THE SHAPE AND CHARACTER OF THIS VAULT -61-70970-0029 FROM THE BLACKNESS BEHIND THE LIGHT THEY HEARD A VOICE WARRENTON'S -61-70970-0030 SAVE ME MASTERS BUT YOU STARTLED ME RARELY -61-70970-0031 CRIED HE WAVING THE LANTHORN BEFORE HIM TO MAKE SURE THAT THESE WERE NO GHOSTS IN FRONT OF HIM -61-70970-0032 ENQUIRED ROBIN WITH HIS SUSPICIONS STILL UPON HIM -61-70970-0033 TRULY SUCH A HORSE SHOULD BE WORTH MUCH IN NOTTINGHAM FAIR -61-70970-0034 NAY NAY LORDING ANSWERED WARRENTON WITH A HALF LAUGH -61-70970-0035 WARRENTON SPOKE THUS WITH SIGNIFICANCE TO SHOW ROBIN THAT HE WAS NOT TO THINK GEOFFREY'S CLAIMS TO THE ESTATE WOULD BE PASSED BY -61-70970-0036 ROBIN FITZOOTH SAW THAT HIS DOUBTS OF WARRENTON HAD BEEN UNFAIR AND HE BECAME ASHAMED OF HIMSELF FOR HARBORING THEM -61-70970-0037 HIS TONES RANG PLEASANTLY ON WARRENTON'S EARS AND FORTHWITH A GOOD FELLOWSHIP WAS HERALDED BETWEEN THEM -61-70970-0038 THE OLD SERVANT TOLD HIM QUIETLY AS THEY CREPT BACK TO GAMEWELL THAT THIS PASSAGE WAY LED FROM THE HUT IN THE PLEASANCE TO SHERWOOD AND THAT GEOFFREY FOR THE TIME WAS HIDING WITH THE OUTLAWS IN THE FOREST -61-70970-0039 HE IMPLORES US TO BE DISCREET AS THE GRAVE IN THIS MATTER FOR IN SOOTH HIS LIFE IS IN THE HOLLOW OF OUR HANDS -61-70970-0040 THEY REGAINED THEIR APARTMENT APPARENTLY WITHOUT DISTURBING THE HOUSEHOLD OF GAMEWELL -672-122797-0000 OUT IN THE WOODS STOOD A NICE LITTLE FIR TREE -672-122797-0001 THE PLACE HE HAD WAS A VERY GOOD ONE THE SUN SHONE ON HIM AS TO FRESH AIR THERE WAS ENOUGH OF THAT AND ROUND HIM GREW MANY LARGE SIZED COMRADES PINES AS WELL AS FIRS -672-122797-0002 HE DID NOT THINK OF THE WARM SUN AND OF THE FRESH AIR HE DID NOT CARE FOR THE LITTLE COTTAGE CHILDREN THAT RAN ABOUT AND PRATTLED WHEN THEY WERE IN THE WOODS LOOKING FOR WILD STRAWBERRIES -672-122797-0003 BUT THIS WAS WHAT THE TREE COULD NOT BEAR TO HEAR -672-122797-0004 IN WINTER WHEN THE SNOW LAY GLITTERING ON THE GROUND A HARE WOULD OFTEN COME LEAPING ALONG AND JUMP RIGHT OVER THE LITTLE TREE -672-122797-0005 OH THAT MADE HIM SO ANGRY -672-122797-0006 TO GROW AND GROW TO GET OLDER AND BE TALL THOUGHT THE TREE THAT AFTER ALL IS THE MOST DELIGHTFUL THING IN THE WORLD -672-122797-0007 IN AUTUMN THE WOOD CUTTERS ALWAYS CAME AND FELLED SOME OF THE LARGEST TREES -672-122797-0008 THIS HAPPENED EVERY YEAR AND THE YOUNG FIR TREE THAT HAD NOW GROWN TO A VERY COMELY SIZE TREMBLED AT THE SIGHT FOR THE MAGNIFICENT GREAT TREES FELL TO THE EARTH WITH NOISE AND CRACKING THE BRANCHES WERE LOPPED OFF AND THE TREES LOOKED LONG AND BARE THEY WERE HARDLY TO BE RECOGNISED AND THEN THEY WERE LAID IN CARTS AND THE HORSES DRAGGED THEM OUT OF THE WOOD -672-122797-0009 HAVE YOU NOT MET THEM ANYWHERE -672-122797-0010 REJOICE IN THY GROWTH SAID THE SUNBEAMS -672-122797-0011 AND THEN WHAT HAPPENS THEN -672-122797-0012 I WOULD FAIN KNOW IF I AM DESTINED FOR SO GLORIOUS A CAREER CRIED THE TREE REJOICING -672-122797-0013 I AM NOW TALL AND MY BRANCHES SPREAD LIKE THE OTHERS THAT WERE CARRIED OFF LAST YEAR OH -672-122797-0014 WERE I BUT ALREADY ON THE CART -672-122797-0015 WERE I IN THE WARM ROOM WITH ALL THE SPLENDOR AND MAGNIFICENCE -672-122797-0016 YES THEN SOMETHING BETTER SOMETHING STILL GRANDER WILL SURELY FOLLOW OR WHEREFORE SHOULD THEY THUS ORNAMENT ME -672-122797-0017 SOMETHING BETTER SOMETHING STILL GRANDER MUST FOLLOW BUT WHAT -672-122797-0018 REJOICE IN OUR PRESENCE SAID THE AIR AND THE SUNLIGHT -672-122797-0019 REJOICE IN THY OWN FRESH YOUTH -672-122797-0020 BUT THE TREE DID NOT REJOICE AT ALL HE GREW AND GREW AND WAS GREEN BOTH WINTER AND SUMMER -672-122797-0021 AND TOWARDS CHRISTMAS HE WAS ONE OF THE FIRST THAT WAS CUT DOWN -672-122797-0022 THE AXE STRUCK DEEP INTO THE VERY PITH THE TREE FELL TO THE EARTH WITH A SIGH HE FELT A PANG IT WAS LIKE A SWOON HE COULD NOT THINK OF HAPPINESS FOR HE WAS SORROWFUL AT BEING SEPARATED FROM HIS HOME FROM THE PLACE WHERE HE HAD SPRUNG UP -672-122797-0023 HE WELL KNEW THAT HE SHOULD NEVER SEE HIS DEAR OLD COMRADES THE LITTLE BUSHES AND FLOWERS AROUND HIM ANYMORE PERHAPS NOT EVEN THE BIRDS -672-122797-0024 THE DEPARTURE WAS NOT AT ALL AGREEABLE -672-122797-0025 THE TREE ONLY CAME TO HIMSELF WHEN HE WAS UNLOADED IN A COURT YARD WITH THE OTHER TREES AND HEARD A MAN SAY THAT ONE IS SPLENDID WE DON'T WANT THE OTHERS -672-122797-0026 THERE TOO WERE LARGE EASY CHAIRS SILKEN SOFAS LARGE TABLES FULL OF PICTURE BOOKS AND FULL OF TOYS WORTH HUNDREDS AND HUNDREDS OF CROWNS AT LEAST THE CHILDREN SAID SO -672-122797-0027 THE SERVANTS AS WELL AS THE YOUNG LADIES DECORATED IT -672-122797-0028 THIS EVENING THEY ALL SAID -672-122797-0029 HOW IT WILL SHINE THIS EVENING -672-122797-0030 PERHAPS THE OTHER TREES FROM THE FOREST WILL COME TO LOOK AT ME -672-122797-0031 IT BLAZED UP FAMOUSLY HELP HELP -672-122797-0032 CRIED THE YOUNG LADIES AND THEY QUICKLY PUT OUT THE FIRE -672-122797-0033 A STORY -672-122797-0034 A STORY CRIED THE CHILDREN DRAWING A LITTLE FAT MAN TOWARDS THE TREE -672-122797-0035 BUT I SHALL TELL ONLY ONE STORY -672-122797-0036 HUMPY DUMPY FELL DOWNSTAIRS AND YET HE MARRIED THE PRINCESS -672-122797-0037 THAT'S THE WAY OF THE WORLD -672-122797-0038 THOUGHT THE FIR TREE AND BELIEVED IT ALL BECAUSE THE MAN WHO TOLD THE STORY WAS SO GOOD LOOKING WELL WELL -672-122797-0039 I WON'T TREMBLE TO MORROW THOUGHT THE FIR TREE -672-122797-0040 AND THE WHOLE NIGHT THE TREE STOOD STILL AND IN DEEP THOUGHT -672-122797-0041 IN THE MORNING THE SERVANT AND THE HOUSEMAID CAME IN -672-122797-0042 BUT THEY DRAGGED HIM OUT OF THE ROOM AND UP THE STAIRS INTO THE LOFT AND HERE IN A DARK CORNER WHERE NO DAYLIGHT COULD ENTER THEY LEFT HIM -672-122797-0043 WHAT'S THE MEANING OF THIS THOUGHT THE TREE -672-122797-0044 AND HE LEANED AGAINST THE WALL LOST IN REVERIE -672-122797-0045 TIME ENOUGH HAD HE TOO FOR HIS REFLECTIONS FOR DAYS AND NIGHTS PASSED ON AND NOBODY CAME UP AND WHEN AT LAST SOMEBODY DID COME IT WAS ONLY TO PUT SOME GREAT TRUNKS IN A CORNER OUT OF THE WAY -672-122797-0046 TIS NOW WINTER OUT OF DOORS THOUGHT THE TREE -672-122797-0047 HOW KIND MAN IS AFTER ALL -672-122797-0048 IF IT ONLY WERE NOT SO DARK HERE AND SO TERRIBLY LONELY -672-122797-0049 SQUEAK SQUEAK -672-122797-0050 THEY SNUFFED ABOUT THE FIR TREE AND RUSTLED AMONG THE BRANCHES -672-122797-0051 I AM BY NO MEANS OLD SAID THE FIR TREE -672-122797-0052 THERE'S MANY A ONE CONSIDERABLY OLDER THAN I AM -672-122797-0053 THEY WERE SO EXTREMELY CURIOUS -672-122797-0054 I KNOW NO SUCH PLACE SAID THE TREE -672-122797-0055 AND THEN HE TOLD ALL ABOUT HIS YOUTH AND THE LITTLE MICE HAD NEVER HEARD THE LIKE BEFORE AND THEY LISTENED AND SAID -672-122797-0056 SAID THE FIR TREE THINKING OVER WHAT HE HAD HIMSELF RELATED -672-122797-0057 YES IN REALITY THOSE WERE HAPPY TIMES -672-122797-0058 WHO IS HUMPY DUMPY ASKED THE MICE -672-122797-0059 ONLY THAT ONE ANSWERED THE TREE -672-122797-0060 IT IS A VERY STUPID STORY -672-122797-0061 DON'T YOU KNOW ONE ABOUT BACON AND TALLOW CANDLES CAN'T YOU TELL ANY LARDER STORIES -672-122797-0062 NO SAID THE TREE -672-122797-0063 THEN GOOD BYE SAID THE RATS AND THEY WENT HOME -672-122797-0064 AT LAST THE LITTLE MICE STAYED AWAY ALSO AND THE TREE SIGHED AFTER ALL IT WAS VERY PLEASANT WHEN THE SLEEK LITTLE MICE SAT ROUND ME AND LISTENED TO WHAT I TOLD THEM -672-122797-0065 NOW THAT TOO IS OVER -672-122797-0066 WHY ONE MORNING THERE CAME A QUANTITY OF PEOPLE AND SET TO WORK IN THE LOFT -672-122797-0067 THE TRUNKS WERE MOVED THE TREE WAS PULLED OUT AND THROWN RATHER HARD IT IS TRUE DOWN ON THE FLOOR BUT A MAN DREW HIM TOWARDS THE STAIRS WHERE THE DAYLIGHT SHONE -672-122797-0068 BUT IT WAS NOT THE FIR TREE THAT THEY MEANT -672-122797-0069 IT WAS IN A CORNER THAT HE LAY AMONG WEEDS AND NETTLES -672-122797-0070 THE GOLDEN STAR OF TINSEL WAS STILL ON THE TOP OF THE TREE AND GLITTERED IN THE SUNSHINE -672-122797-0071 IN THE COURT YARD SOME OF THE MERRY CHILDREN WERE PLAYING WHO HAD DANCED AT CHRISTMAS ROUND THE FIR TREE AND WERE SO GLAD AT THE SIGHT OF HIM -672-122797-0072 AND THE GARDENER'S BOY CHOPPED THE TREE INTO SMALL PIECES THERE WAS A WHOLE HEAP LYING THERE -672-122797-0073 THE WOOD FLAMED UP SPLENDIDLY UNDER THE LARGE BREWING COPPER AND IT SIGHED SO DEEPLY -672-122797-0074 HOWEVER THAT WAS OVER NOW THE TREE GONE THE STORY AT AN END -6829-68769-0000 KENNETH AND BETH REFRAINED FROM TELLING THE OTHER GIRLS OR UNCLE JOHN OF OLD WILL ROGERS'S VISIT BUT THEY GOT MISTER WATSON IN THE LIBRARY AND QUESTIONED HIM CLOSELY ABOUT THE PENALTY FOR FORGING A CHECK -6829-68769-0001 IT WAS A SERIOUS CRIME INDEED MISTER WATSON TOLD THEM AND TOM GATES BADE FAIR TO SERVE A LENGTHY TERM IN STATE'S PRISON AS A CONSEQUENCE OF HIS RASH ACT -6829-68769-0002 I CAN'T SEE IT IN THAT LIGHT SAID THE OLD LAWYER -6829-68769-0003 IT WAS A DELIBERATE THEFT FROM HIS EMPLOYERS TO PROTECT A GIRL HE LOVED -6829-68769-0004 BUT THEY COULD NOT HAVE PROVEN A CASE AGAINST LUCY IF SHE WAS INNOCENT AND ALL THEIR THREATS OF ARRESTING HER WERE PROBABLY MERE BLUFF -6829-68769-0005 HE WAS SOFT HEARTED AND IMPETUOUS SAID BETH AND BEING IN LOVE HE DIDN'T STOP TO COUNT THE COST -6829-68769-0006 IF THE PROSECUTION WERE WITHDRAWN AND THE CASE SETTLED WITH THE VICTIM OF THE FORGED CHECK THEN THE YOUNG MAN WOULD BE ALLOWED HIS FREEDOM -6829-68769-0007 BUT UNDER THE CIRCUMSTANCES I DOUBT IF SUCH AN ARRANGEMENT COULD BE MADE -6829-68769-0008 FAIRVIEW WAS TWELVE MILES AWAY BUT BY TEN O'CLOCK THEY DREW UP AT THE COUNTY JAIL -6829-68769-0009 THEY WERE RECEIVED IN THE LITTLE OFFICE BY A MAN NAMED MARKHAM WHO WAS THE JAILER -6829-68769-0010 WE WISH TO TALK WITH HIM ANSWERED KENNETH TALK -6829-68769-0011 I'M RUNNING FOR REPRESENTATIVE ON THE REPUBLICAN TICKET SAID KENNETH QUIETLY -6829-68769-0012 OH SAY THAT'S DIFFERENT OBSERVED MARKHAM ALTERING HIS DEMEANOR -6829-68769-0013 MAY WE SEE GATES AT ONCE ASKED KENNETH -6829-68769-0014 THEY FOLLOWED THE JAILER ALONG A SUCCESSION OF PASSAGES -6829-68769-0015 SOMETIMES I'M THAT YEARNING FOR A SMOKE I'M NEARLY CRAZY AN I DUNNO WHICH IS WORST DYIN ONE WAY OR ANOTHER -6829-68769-0016 HE UNLOCKED THE DOOR AND CALLED HERE'S VISITORS TOM -6829-68769-0017 WORSE TOM WORSE N EVER REPLIED THE JAILER GLOOMILY -6829-68769-0018 MISS DE GRAF SAID KENNETH NOTICING THE BOY'S FACE CRITICALLY AS HE STOOD WHERE THE LIGHT FROM THE PASSAGE FELL UPON IT -6829-68769-0019 SORRY WE HAVEN'T ANY RECEPTION ROOM IN THE JAIL -6829-68769-0020 SIT DOWN PLEASE SAID GATES IN A CHEERFUL AND PLEASANT VOICE THERE'S A BENCH HERE -6829-68769-0021 A FRESH WHOLESOME LOOKING BOY WAS TOM GATES WITH STEADY GRAY EYES AN INTELLIGENT FOREHEAD BUT A SENSITIVE RATHER WEAK MOUTH -6829-68769-0022 WE HAVE HEARD SOMETHING OF YOUR STORY SAID KENNETH AND ARE INTERESTED IN IT -6829-68769-0023 I DIDN'T STOP TO THINK WHETHER IT WAS FOOLISH OR NOT I DID IT AND I'M GLAD I DID -6829-68769-0024 OLD WILL IS A FINE FELLOW BUT POOR AND HELPLESS SINCE MISSUS ROGERS HAD HER ACCIDENT -6829-68769-0025 THEN ROGERS WOULDN'T DO ANYTHING BUT LEAD HER AROUND AND WAIT UPON HER AND THE PLACE WENT TO RACK AND RUIN -6829-68769-0026 HE SPOKE SIMPLY BUT PACED UP AND DOWN THE NARROW CELL IN FRONT OF THEM -6829-68769-0027 WHOSE NAME DID YOU SIGN TO THE CHECK ASKED KENNETH -6829-68769-0028 HE IS SUPPOSED TO SIGN ALL THE CHECKS OF THE CONCERN -6829-68769-0029 IT'S A STOCK COMPANY AND RICH -6829-68769-0030 I WAS BOOKKEEPER SO IT WAS EASY TO GET A BLANK CHECK AND FORGE THE SIGNATURE -6829-68769-0031 AS REGARDS MY ROBBING THE COMPANY I'LL SAY THAT I SAVED THEM A HEAVY LOSS ONE DAY -6829-68769-0032 I DISCOVERED AND PUT OUT A FIRE THAT WOULD HAVE DESTROYED THE WHOLE PLANT BUT MARSHALL NEVER EVEN THANKED ME -6829-68769-0033 IT WAS BETTER FOR HIM TO THINK THE GIRL UNFEELING THAN TO KNOW THE TRUTH -6829-68769-0034 I'M GOING TO SEE MISTER MARSHALL SAID KENNETH AND DISCOVER WHAT I CAN DO TO ASSIST YOU THANK YOU SIR -6829-68769-0035 IT WON'T BE MUCH BUT I'M GRATEFUL TO FIND A FRIEND -6829-68769-0036 THEY LEFT HIM THEN FOR THE JAILER ARRIVED TO UNLOCK THE DOOR AND ESCORT THEM TO THE OFFICE -6829-68769-0037 I'VE SEEN LOTS OF THAT KIND IN MY DAY -6829-68769-0038 AND IT RUINS A MAN'S DISPOSITION -6829-68769-0039 HE LOOKED UP RATHER UNGRACIOUSLY BUT MOTIONED THEM TO BE SEATED -6829-68769-0040 SOME GIRL HAS BEEN HERE TWICE TO INTERVIEW MY MEN AND I HAVE REFUSED TO ADMIT HER -6829-68769-0041 I'M NOT ELECTIONEERING JUST NOW -6829-68769-0042 OH WELL SIR WHAT ABOUT HIM -6829-68769-0043 AND HE DESERVES A TERM IN STATE'S PRISON -6829-68769-0044 IT HAS COST ME TWICE SIXTY DOLLARS IN ANNOYANCE -6829-68769-0045 I'LL PAY ALL THE COSTS BESIDES -6829-68769-0046 YOU'RE FOOLISH WHY SHOULD YOU DO ALL THIS -6829-68769-0047 I HAVE MY OWN REASONS MISTER MARSHALL -6829-68769-0048 GIVE ME A CHECK FOR A HUNDRED AND FIFTY AND I'LL TURN OVER TO YOU THE FORGED CHECK AND QUASH FURTHER PROCEEDINGS -6829-68769-0049 HE DETESTED THE GRASPING DISPOSITION THAT WOULD ENDEAVOR TO TAKE ADVANTAGE OF HIS EVIDENT DESIRE TO HELP YOUNG GATES -6829-68769-0050 BETH UNEASY AT HIS SILENCE NUDGED HIM -6829-68769-0051 THERE WAS A GRIM SMILE OF AMUSEMENT ON HIS SHREWD FACE -6829-68769-0052 HE MIGHT HAVE HAD THAT FORGED CHECK FOR THE FACE OF IT IF HE'D BEEN SHARP -6829-68769-0053 AND TO THINK WE CAN SAVE ALL THAT MISERY AND DESPAIR BY THE PAYMENT OF A HUNDRED AND FIFTY DOLLARS -6829-68771-0000 SO TO THE SURPRISE OF THE DEMOCRATIC COMMITTEE AND ALL HIS FRIENDS MISTER HOPKINS ANNOUNCED THAT HE WOULD OPPOSE FORBES'S AGGRESSIVE CAMPAIGN WITH AN EQUAL AGGRESSIVENESS AND SPEND AS MANY DOLLARS IN DOING SO AS MIGHT BE NECESSARY -6829-68771-0001 ONE OF MISTER HOPKINS'S FIRST TASKS AFTER CALLING HIS FAITHFUL HENCHMEN AROUND HIM WAS TO MAKE A CAREFUL CANVASS OF THE VOTERS OF HIS DISTRICT TO SEE WHAT WAS STILL TO BE ACCOMPLISHED -6829-68771-0002 THE WEAK KNEED CONTINGENCY MUST BE STRENGTHENED AND FORTIFIED AND A COUPLE OF HUNDRED VOTES IN ONE WAY OR ANOTHER SECURED FROM THE OPPOSITION -6829-68771-0003 THE DEMOCRATIC COMMITTEE FIGURED OUT A WAY TO DO THIS -6829-68771-0004 UNDER ORDINARY CONDITIONS REYNOLDS WAS SURE TO BE ELECTED BUT THE COMMITTEE PROPOSED TO SACRIFICE HIM IN ORDER TO ELECT HOPKINS -6829-68771-0005 THE ONLY THING NECESSARY WAS TO FIX SETH REYNOLDS AND THIS HOPKINS ARRANGED PERSONALLY -6829-68771-0006 AND THIS WAS WHY KENNETH AND BETH DISCOVERED HIM CONVERSING WITH THE YOUNG WOMAN IN THE BUGGY -6829-68771-0007 THE DESCRIPTION SHE GAVE OF THE COMING RECEPTION TO THE WOMAN'S POLITICAL LEAGUE WAS SO HUMOROUS AND DIVERTING THAT THEY WERE BOTH LAUGHING HEARTILY OVER THE THING WHEN THE YOUNG PEOPLE PASSED THEM AND THUS MISTER HOPKINS FAILED TO NOTICE WHO THE OCCUPANTS OF THE OTHER VEHICLE WERE -6829-68771-0008 THESE WOMEN WERE FLATTERED BY THE ATTENTION OF THE YOUNG LADY AND HAD PROMISED TO ASSIST IN ELECTING MISTER FORBES -6829-68771-0009 LOUISE HOPED FOR EXCELLENT RESULTS FROM THIS ORGANIZATION AND WISHED THE ENTERTAINMENT TO BE SO EFFECTIVE IN WINNING THEIR GOOD WILL THAT THEY WOULD WORK EARNESTLY FOR THE CAUSE IN WHICH THEY WERE ENLISTED -6829-68771-0010 THE FAIRVIEW BAND WAS ENGAGED TO DISCOURSE AS MUCH HARMONY AS IT COULD PRODUCE AND THE RESOURCES OF THE GREAT HOUSE WERE TAXED TO ENTERTAIN THE GUESTS -6829-68771-0011 TABLES WERE SPREAD ON THE LAWN AND A DAINTY BUT SUBSTANTIAL REPAST WAS TO BE SERVED -6829-68771-0012 THIS WAS THE FIRST OCCASION WITHIN A GENERATION WHEN SUCH AN ENTERTAINMENT HAD BEEN GIVEN AT ELMHURST AND THE ONLY ONE WITHIN THE MEMORY OF MAN WHERE THE NEIGHBORS AND COUNTRY PEOPLE HAD BEEN INVITED GUESTS -6829-68771-0013 THE ATTENDANCE WAS UNEXPECTEDLY LARGE AND THE GIRLS WERE DELIGHTED FORESEEING GREAT SUCCESS FOR THEIR FETE -6829-68771-0014 WE OUGHT TO HAVE MORE ATTENDANTS BETH SAID LOUISE APPROACHING HER COUSIN -6829-68771-0015 WON'T YOU RUN INTO THE HOUSE AND SEE IF MARTHA CAN'T SPARE ONE OR TWO MORE MAIDS -6829-68771-0016 SHE WAS VERY FOND OF THE YOUNG LADIES WHOM SHE HAD KNOWN WHEN AUNT JANE WAS THE MISTRESS HERE AND BETH WAS HER ESPECIAL FAVORITE -6829-68771-0017 THE HOUSEKEEPER LED THE WAY AND BETH FOLLOWED -6829-68771-0018 FOR A MOMENT BETH STOOD STARING WHILE THE NEW MAID REGARDED HER WITH COMPOSURE AND A SLIGHT SMILE UPON HER BEAUTIFUL FACE -6829-68771-0019 SHE WAS DRESSED IN THE REGULATION COSTUME OF THE MAIDS AT ELMHURST A PLAIN BLACK GOWN WITH WHITE APRON AND CAP -6829-68771-0020 THEN SHE GAVE A LITTLE LAUGH AND REPLIED NO MISS BETH I'M ELIZABETH PARSONS -6829-68771-0021 BUT IT CAN'T BE PROTESTED THE GIRL -6829-68771-0022 I ATTEND TO THE HOUSEHOLD MENDING YOU KNOW AND CARE FOR THE LINEN -6829-68771-0023 YOU SPEAK LIKE AN EDUCATED PERSON SAID BETH WONDERINGLY WHERE IS YOUR HOME -6829-68771-0024 FOR THE FIRST TIME THE MAID SEEMED A LITTLE CONFUSED AND HER GAZE WANDERED FROM THE FACE OF HER VISITOR -6829-68771-0025 SHE SAT DOWN IN A ROCKING CHAIR AND CLASPING HER HANDS IN HER LAP ROCKED SLOWLY BACK AND FORTH I'M SORRY SAID BETH -6829-68771-0026 ELIZA PARSONS SHOOK HER HEAD -6829-68771-0027 THEY THEY EXCITE ME IN SOME WAY AND I I CAN'T BEAR THEM YOU MUST EXCUSE ME -6829-68771-0028 SHE EVEN SEEMED MILDLY AMUSED AT THE ATTENTION SHE ATTRACTED -6829-68771-0029 BETH WAS A BEAUTIFUL GIRL THE HANDSOMEST OF THE THREE COUSINS BY FAR YET ELIZA SURPASSED HER IN NATURAL CHARM AND SEEMED WELL AWARE OF THE FACT -6829-68771-0030 HER MANNER WAS NEITHER INDEPENDENT NOR ASSERTIVE BUT RATHER ONE OF WELL BRED COMPOSURE AND CALM RELIANCE -6829-68771-0031 HER EYES WANDERED TO THE MAID'S HANDS -6829-68771-0032 HOWEVER HER FEATURES AND FORM MIGHT REPRESS ANY EVIDENCE OF NERVOUSNESS THESE HANDS TOLD A DIFFERENT STORY -6829-68771-0033 SHE ROSE QUICKLY TO HER FEET WITH AN IMPETUOUS GESTURE THAT MADE HER VISITOR CATCH HER BREATH -6829-68771-0034 I WISH I KNEW MYSELF SHE CRIED FIERCELY -6829-68771-0035 WILL YOU LEAVE ME ALONE IN MY OWN ROOM OR MUST I GO AWAY TO ESCAPE YOU -6829-68771-0036 ELIZA CLOSED THE DOOR BEHIND HER WITH A DECIDED SLAM AND A KEY CLICKED IN THE LOCK -6930-75918-0000 CONCORD RETURNED TO ITS PLACE AMIDST THE TENTS -6930-75918-0001 THE ENGLISH FORWARDED TO THE FRENCH BASKETS OF FLOWERS OF WHICH THEY HAD MADE A PLENTIFUL PROVISION TO GREET THE ARRIVAL OF THE YOUNG PRINCESS THE FRENCH IN RETURN INVITED THE ENGLISH TO A SUPPER WHICH WAS TO BE GIVEN THE NEXT DAY -6930-75918-0002 CONGRATULATIONS WERE POURED IN UPON THE PRINCESS EVERYWHERE DURING HER JOURNEY -6930-75918-0003 FROM THE RESPECT PAID HER ON ALL SIDES SHE SEEMED LIKE A QUEEN AND FROM THE ADORATION WITH WHICH SHE WAS TREATED BY TWO OR THREE SHE APPEARED AN OBJECT OF WORSHIP THE QUEEN MOTHER GAVE THE FRENCH THE MOST AFFECTIONATE RECEPTION FRANCE WAS HER NATIVE COUNTRY AND SHE HAD SUFFERED TOO MUCH UNHAPPINESS IN ENGLAND FOR ENGLAND TO HAVE MADE HER FORGET FRANCE -6930-75918-0004 SHE TAUGHT HER DAUGHTER THEN BY HER OWN AFFECTION FOR IT THAT LOVE FOR A COUNTRY WHERE THEY HAD BOTH BEEN HOSPITABLY RECEIVED AND WHERE A BRILLIANT FUTURE OPENED BEFORE THEM -6930-75918-0005 THE COUNT HAD THROWN HIMSELF BACK ON HIS SEAT LEANING HIS SHOULDERS AGAINST THE PARTITION OF THE TENT AND REMAINED THUS HIS FACE BURIED IN HIS HANDS WITH HEAVING CHEST AND RESTLESS LIMBS -6930-75918-0006 THIS HAS INDEED BEEN A HARASSING DAY CONTINUED THE YOUNG MAN HIS EYES FIXED UPON HIS FRIEND -6930-75918-0007 YOU WILL BE FRANK WITH ME I ALWAYS AM -6930-75918-0008 CAN YOU IMAGINE WHY BUCKINGHAM HAS BEEN SO VIOLENT I SUSPECT -6930-75918-0009 IT IS YOU WHO ARE MISTAKEN RAOUL I HAVE READ HIS DISTRESS IN HIS EYES IN HIS EVERY GESTURE AND ACTION THE WHOLE DAY -6930-75918-0010 I CAN PERCEIVE LOVE CLEARLY ENOUGH -6930-75918-0011 I AM CONVINCED OF WHAT I SAY SAID THE COUNT -6930-75918-0012 IT IS ANNOYANCE THEN -6930-75918-0013 IN THOSE VERY TERMS I EVEN ADDED MORE -6930-75918-0014 BUT CONTINUED RAOUL NOT INTERRUPTED BY THIS MOVEMENT OF HIS FRIEND HEAVEN BE PRAISED THE FRENCH WHO ARE PRONOUNCED TO BE THOUGHTLESS AND INDISCREET RECKLESS EVEN ARE CAPABLE OF BRINGING A CALM AND SOUND JUDGMENT TO BEAR ON MATTERS OF SUCH HIGH IMPORTANCE -6930-75918-0015 THUS IT IS THAT THE HONOR OF THREE IS SAVED OUR COUNTRY'S OUR MASTER'S AND OUR OWN -6930-75918-0016 YES I NEED REPOSE MANY THINGS HAVE AGITATED ME TO DAY BOTH IN MIND AND BODY WHEN YOU RETURN TO MORROW I SHALL NO LONGER BE THE SAME MAN -6930-75918-0017 BUT IN THIS FRIENDLY PRESSURE RAOUL COULD DETECT THE NERVOUS AGITATION OF A GREAT INTERNAL CONFLICT -6930-75918-0018 THE NIGHT WAS CLEAR STARLIT AND SPLENDID THE TEMPEST HAD PASSED AWAY AND THE SWEET INFLUENCES OF THE EVENING HAD RESTORED LIFE PEACE AND SECURITY EVERYWHERE -6930-75918-0019 UPON THE LARGE SQUARE IN FRONT OF THE HOTEL THE SHADOWS OF THE TENTS INTERSECTED BY THE GOLDEN MOONBEAMS FORMED AS IT WERE A HUGE MOSAIC OF JET AND YELLOW FLAGSTONES -6930-75918-0020 BRAGELONNE WATCHED FOR SOME TIME THE CONDUCT OF THE TWO LOVERS LISTENED TO THE LOUD AND UNCIVIL SLUMBERS OF MANICAMP WHO SNORED AS IMPERIOUSLY AS THOUGH HE WAS WEARING HIS BLUE AND GOLD INSTEAD OF HIS VIOLET SUIT -6930-76324-0000 GOLIATH MAKES ANOTHER DISCOVERY -6930-76324-0001 THEY WERE CERTAINLY NO NEARER THE SOLUTION OF THEIR PROBLEM -6930-76324-0002 THE POOR LITTLE THINGS CRIED CYNTHIA THINK OF THEM HAVING BEEN TURNED TO THE WALL ALL THESE YEARS -6930-76324-0003 NOW WHAT WAS THE SENSE OF IT TWO INNOCENT BABIES LIKE THAT -6930-76324-0004 BUT JOYCE HAD NOT BEEN LISTENING ALL AT ONCE SHE PUT DOWN HER CANDLE ON THE TABLE AND FACED HER COMPANION -6930-76324-0005 THE TWIN BROTHER DID SOMETHING SHE DIDN'T LIKE AND SHE TURNED HIS PICTURE TO THE WALL -6930-76324-0006 HERS HAPPENED TO BE IN THE SAME FRAME TOO BUT SHE EVIDENTLY DIDN'T CARE ABOUT THAT -6930-76324-0007 NOW WHAT HAVE YOU TO SAY CYNTHIA SPRAGUE -6930-76324-0008 I THOUGHT WE WERE STUMPED AGAIN WHEN I FIRST SAW THAT PICTURE BUT IT'S BEEN OF SOME USE AFTER ALL -6930-76324-0009 DO YOU SUPPOSE THE MINIATURE WAS A COPY OF THE SAME THING -6930-76324-0010 WHAT IN THE WORLD IS THAT QUERIED JOYCE -6930-76324-0011 THEY WORRY ME TERRIBLY AND BESIDES I'D LIKE TO SEE WHAT THIS LOVELY FURNITURE LOOKS LIKE WITHOUT SUCH QUANTITIES OF DUST ALL OVER IT GOOD SCHEME CYN -6930-76324-0012 WE'LL COME IN HERE THIS AFTERNOON WITH OLD CLOTHES ON AND HAVE A REGULAR HOUSE CLEANING -6930-76324-0013 IT CAN'T HURT ANYTHING I'M SURE FOR WE WON'T DISTURB THINGS AT ALL -6930-76324-0014 THIS THOUGHT HOWEVER DID NOT ENTER THE HEADS OF THE ENTHUSIASTIC PAIR -6930-76324-0015 SMUGGLING THE HOUSE CLEANING PARAPHERNALIA INTO THE CELLAR WINDOW UNOBSERVED THAT AFTERNOON PROVED NO EASY TASK FOR CYNTHIA HAD ADDED A WHISK BROOM AND DUST PAN TO THE OUTFIT -6930-76324-0016 THE LURE PROVED TOO MUCH FOR HIM AND HE CAME SPORTING AFTER IT AS FRISKILY AS A YOUNG KITTEN MUCH TO CYNTHIA'S DELIGHT WHEN SHE CAUGHT SIGHT OF HIM -6930-76324-0017 OH LET HIM COME ALONG SHE URGED I DO LOVE TO SEE HIM ABOUT THAT OLD HOUSE -6930-76324-0018 HE MAKES IT SORT OF COZIER -6930-76324-0019 NOW LET'S DUST THE FURNITURE AND PICTURES -6930-76324-0020 YET LITTLE AS IT WAS IT HAD ALREADY MADE A VAST DIFFERENCE IN THE ASPECT OF THE ROOM -6930-76324-0021 SURFACE DUST AT LEAST HAD BEEN REMOVED AND THE FINE OLD FURNITURE GAVE A HINT OF ITS REAL ELEGANCE AND POLISH -6930-76324-0022 THEN SHE SUDDENLY REMARKED -6930-76324-0023 AND MY POCKET MONEY IS GETTING LOW AGAIN AND YOU HAVEN'T ANY LEFT AS USUAL -6930-76324-0024 THEY SAY ILLUMINATION BY CANDLE LIGHT IS THE PRETTIEST IN THE WORLD -6930-76324-0025 WHY IT'S GOLIATH AS USUAL THEY BOTH CRIED PEERING IN -6930-76324-0026 ISN'T HE THE GREATEST FOR GETTING INTO ODD CORNERS -6930-76324-0027 FORGETTING ALL THEIR WEARINESS THEY SEIZED THEIR CANDLES AND SCURRIED THROUGH THE HOUSE FINDING AN OCCASIONAL PAPER TUCKED AWAY IN SOME ODD CORNER -6930-76324-0028 WELL I'M CONVINCED THAT THE BOARDED UP HOUSE MYSTERY HAPPENED NOT EARLIER THAN APRIL SIXTEENTH EIGHTEEN SIXTY ONE AND PROBABLY NOT MUCH LATER -6930-81414-0000 NO WORDS WERE SPOKEN NO LANGUAGE WAS UTTERED SAVE THAT OF WAILING AND HISSING AND THAT SOMEHOW WAS INDISTINCT AS IF IT EXISTED IN FANCY AND NOT IN REALITY -6930-81414-0001 I HEARD A NOISE BEHIND I TURNED AND SAW KAFFAR HIS BLACK EYES SHINING WHILE IN HIS HAND HE HELD A GLEAMING KNIFE HE LIFTED IT ABOVE HIS HEAD AS IF TO STRIKE BUT I HAD THE STRENGTH OF TEN MEN AND I HURLED HIM FROM ME -6930-81414-0002 ONWARD SAID A DISTANT VOICE -6930-81414-0003 NO SOUND BROKE THE STILLNESS OF THE NIGHT -6930-81414-0004 THE STORY OF ITS EVIL INFLUENCE CAME BACK TO ME AND IN MY BEWILDERED CONDITION I WONDERED WHETHER THERE WAS NOT SOME TRUTH IN WHAT HAD BEEN SAID -6930-81414-0005 WHAT WAS THAT -6930-81414-0006 WHAT THEN A HUMAN HAND LARGE AND SHAPELY APPEARED DISTINCTLY ON THE SURFACE OF THE POND -6930-81414-0007 NOTHING MORE NOT EVEN THE WRIST TO WHICH IT MIGHT BE ATTACHED -6930-81414-0008 IT DID NOT BECKON OR INDEED MOVE AT ALL IT WAS AS STILL AS THE HAND OF DEATH -6930-81414-0009 I AWOKE TO CONSCIOUSNESS FIGHTING AT FIRST IT SEEMED AS IF I WAS FIGHTING WITH A PHANTOM BUT GRADUALLY MY OPPONENT BECAME MORE REAL TO ME IT WAS KAFFAR -6930-81414-0010 A SOUND OF VOICES A FLASH OF LIGHT -6930-81414-0011 A FEELING OF FREEDOM AND I WAS AWAKE WHERE -6930-81414-0012 SAID ANOTHER VOICE WHICH I RECOGNIZED AS VOLTAIRE'S KAFFAR -6930-81414-0013 I HAD SCARCELY KNOWN WHAT I HAD BEEN SAYING OR DOING UP TO THIS TIME BUT AS HE SPOKE I LOOKED AT MY HAND -6930-81414-0014 IN THE LIGHT OF THE MOON I SAW A KNIFE RED WITH BLOOD AND MY HAND TOO WAS ALSO DISCOLOURED -6930-81414-0015 I DO NOT KNOW I AM DAZED BEWILDERED -6930-81414-0016 BUT THAT IS KAFFAR'S KNIFE -6930-81414-0017 I KNOW HE HAD IT THIS VERY EVENING -6930-81414-0018 I REMEMBER SAYING HAVE WE BEEN TOGETHER -6930-81414-0019 VOLTAIRE PICKED UP SOMETHING FROM THE GROUND AND LOOKED AT IT -6930-81414-0020 I SAY YOU DO KNOW WHAT THIS MEANS AND YOU MUST TELL US -6930-81414-0021 A TERRIBLE THOUGHT FLASHED INTO MY MIND -6930-81414-0022 I HAD AGAIN BEEN ACTING UNDER THE INFLUENCE OF THIS MAN'S POWER -6930-81414-0023 PERCHANCE TOO KAFFAR'S DEATH MIGHT SERVE HIM IN GOOD STEAD -6930-81414-0024 MY TONGUE REFUSED TO ARTICULATE MY POWER OF SPEECH LEFT ME -6930-81414-0025 MY POSITION WAS TOO TERRIBLE -6930-81414-0026 MY OVERWROUGHT NERVES YIELDED AT LAST -6930-81414-0027 FOR SOME TIME AFTER THAT I REMEMBERED NOTHING DISTINCTLY -7021-79730-0000 THE THREE MODES OF MANAGEMENT -7021-79730-0001 TO SUPPOSE THAT THE OBJECT OF THIS WORK IS TO AID IN EFFECTING SUCH A SUBSTITUTION AS THAT IS ENTIRELY TO MISTAKE ITS NATURE AND DESIGN -7021-79730-0002 BY REASON AND AFFECTION -7021-79730-0003 AS THE CHAISE DRIVES AWAY MARY STANDS BEWILDERED AND PERPLEXED ON THE DOOR STEP HER MIND IN A TUMULT OF EXCITEMENT IN WHICH HATRED OF THE DOCTOR DISTRUST AND SUSPICION OF HER MOTHER DISAPPOINTMENT VEXATION AND ILL HUMOR SURGE AND SWELL AMONG THOSE DELICATE ORGANIZATIONS ON WHICH THE STRUCTURE AND DEVELOPMENT OF THE SOUL SO CLOSELY DEPEND DOING PERHAPS AN IRREPARABLE INJURY -7021-79730-0004 THE MOTHER AS SOON AS THE CHAISE IS SO FAR TURNED THAT MARY CAN NO LONGER WATCH THE EXPRESSION OF HER COUNTENANCE GOES AWAY FROM THE DOOR WITH A SMILE OF COMPLACENCY AND SATISFACTION UPON HER FACE AT THE INGENUITY AND SUCCESS OF HER LITTLE ARTIFICE -7021-79730-0005 SO YOU WILL BE A GOOD GIRL I KNOW AND NOT MAKE ANY TROUBLE BUT WILL STAY AT HOME CONTENTEDLY WON'T YOU -7021-79730-0006 THE MOTHER IN MANAGING THE CASE IN THIS WAY RELIES PARTLY ON CONVINCING THE REASON OF THE CHILD AND PARTLY ON AN APPEAL TO HER AFFECTION -7021-79730-0007 IF YOU SHOULD NOT BE A GOOD GIRL BUT SHOULD SHOW SIGNS OF MAKING US ANY TROUBLE I SHALL HAVE TO SEND YOU OUT SOMEWHERE TO THE BACK PART OF THE HOUSE UNTIL WE ARE GONE -7021-79730-0008 BUT THIS LAST SUPPOSITION IS ALMOST ALWAYS UNNECESSARY FOR IF MARY HAS BEEN HABITUALLY MANAGED ON THIS PRINCIPLE SHE WILL NOT MAKE ANY TROUBLE -7021-79730-0009 IT IS INDEED TRUE THAT THE IMPORTANCE OF TACT AND SKILL IN THE TRAINING OF THE YOUNG AND OF CULTIVATING THEIR REASON AND SECURING THEIR AFFECTION CAN NOT BE OVERRATED -7021-79740-0000 TO SUCH PERSONS THESE INDIRECT MODES OF TRAINING CHILDREN IN HABITS OF SUBORDINATION TO THEIR WILL OR RATHER OF YIELDING TO THEIR INFLUENCE ARE SPECIALLY USEFUL -7021-79740-0001 DELLA HAD A YOUNG SISTER NAMED MARIA AND A COUSIN WHOSE NAME WAS JANE -7021-79740-0002 NOW DELIA CONTRIVED TO OBTAIN A GREAT INFLUENCE AND ASCENDENCY OVER THE MINDS OF THE CHILDREN BY MEANS OF THESE DOLLS -7021-79740-0003 TO GIVE AN IDEA OF THESE CONVERSATIONS I WILL REPORT ONE OF THEM IN FULL -7021-79740-0004 YOU HAVE COME ANDELLA ANDELLA WAS THE NAME OF JANE'S DOLL TO MAKE ROSALIE A VISIT -7021-79740-0005 I AM VERY GLAD -7021-79740-0006 I EXPECT YOU HAVE BEEN A VERY GOOD GIRL ANDELLA SINCE YOU WERE HERE LAST -7021-79740-0007 THEN TURNING TO JANE SHE ASKED IN A SOMEWHAT ALTERED TONE HAS SHE BEEN A GOOD GIRL JANE -7021-79740-0008 FOR INSTANCE ONE DAY THE CHILDREN HAD BEEN PLAYING UPON THE PIAZZA WITH BLOCKS AND OTHER PLAYTHINGS AND FINALLY HAD GONE INTO THE HOUSE LEAVING ALL THE THINGS ON THE FLOOR OF THE PIAZZA INSTEAD OF PUTTING THEM AWAY IN THEIR PLACES AS THEY OUGHT TO HAVE DONE -7021-79740-0009 THEY WERE NOW PLAYING WITH THEIR DOLLS IN THE PARLOR -7021-79740-0010 DELIA CAME TO THE PARLOR AND WITH AN AIR OF GREAT MYSTERY BECKONED THE CHILDREN ASIDE AND SAID TO THEM IN A WHISPER LEAVE ANDELLA AND ROSALIE HERE AND DON'T SAY A WORD TO THEM -7021-79740-0011 SO SAYING SHE LED THE WAY ON TIPTOE FOLLOWED BY THE CHILDREN OUT OF THE ROOM AND ROUND BY A CIRCUITOUS ROUTE TO THE PIAZZA THERE -7021-79740-0012 SAID SHE POINTING TO THE PLAYTHINGS SEE -7021-79740-0013 PUT THESE PLAYTHINGS ALL AWAY QUICK AND CAREFULLY AND WE WILL NOT LET THEM KNOW ANY THING ABOUT YOUR LEAVING THEM OUT -7021-79740-0014 AND THIS METHOD OF TREATING THE CASE WAS MUCH MORE EFFECTUAL IN MAKING THEM DISPOSED TO AVOID COMMITTING A SIMILAR FAULT ANOTHER TIME THAN ANY DIRECT REBUKES OR EXPRESSIONS OF DISPLEASURE ADDRESSED PERSONALLY TO THEM WOULD HAVE BEEN -7021-79759-0000 NATURE OF THE EFFECT PRODUCED BY EARLY IMPRESSIONS -7021-79759-0001 THAT IS COMPARATIVELY NOTHING -7021-79759-0002 THEY ARE CHIEFLY FORMED FROM COMBINATIONS OF THE IMPRESSIONS MADE IN CHILDHOOD -7021-79759-0003 VAST IMPORTANCE AND INFLUENCE OF THIS MENTAL FURNISHING -7021-79759-0004 WITHOUT GOING TO ANY SUCH EXTREME AS THIS WE CAN EASILY SEE ON REFLECTION HOW VAST AN INFLUENCE ON THE IDEAS AND CONCEPTIONS AS WELL AS ON THE PRINCIPLES OF ACTION IN MATURE YEARS MUST BE EXERTED BY THE NATURE AND CHARACTER OF THE IMAGES WHICH THE PERIOD OF INFANCY AND CHILDHOOD IMPRESSES UPON THE MIND -7021-79759-0005 THE PAIN PRODUCED BY AN ACT OF HASTY AND ANGRY VIOLENCE TO WHICH A FATHER SUBJECTS HIS SON MAY SOON PASS AWAY BUT THE MEMORY OF IT DOES NOT PASS AWAY WITH THE PAIN -7021-85628-0000 BUT ANDERS CARED NOTHING ABOUT THAT -7021-85628-0001 HE MADE A BOW SO DEEP THAT HIS BACK CAME NEAR BREAKING AND HE WAS DUMBFOUNDED I CAN TELL YOU WHEN HE SAW IT WAS NOBODY BUT ANDERS -7021-85628-0002 HE WAS SUCH A BIG BOY THAT HE WORE HIGH BOOTS AND CARRIED A JACK KNIFE -7021-85628-0003 NOW THIS KNIFE WAS A SPLENDID ONE THOUGH HALF THE BLADE WAS GONE AND THE HANDLE WAS A LITTLE CRACKED AND ANDERS KNEW THAT ONE IS ALMOST A MAN AS SOON AS ONE HAS A JACK KNIFE -7021-85628-0004 YES WHY NOT THOUGHT ANDERS -7021-85628-0005 SEEING THAT I AM SO FINE I MAY AS WELL GO AND VISIT THE KING -7021-85628-0006 I AM GOING TO THE COURT BALL ANSWERED ANDERS -7021-85628-0007 AND SHE TOOK ANDERS HAND AND WALKED WITH HIM UP THE BROAD MARBLE STAIRS WHERE SOLDIERS WERE POSTED AT EVERY THIRD STEP AND THROUGH THE MAGNIFICENT HALLS WHERE COURTIERS IN SILK AND VELVET STOOD BOWING WHEREVER HE WENT -7021-85628-0008 FOR LIKE AS NOT THEY MUST HAVE THOUGHT HIM A PRINCE WHEN THEY SAW HIS FINE CAP -7021-85628-0009 AT THE FARTHER END OF THE LARGEST HALL A TABLE WAS SET WITH GOLDEN CUPS AND GOLDEN PLATES IN LONG ROWS -7021-85628-0010 ON HUGE SILVER PLATTERS WERE PYRAMIDS OF TARTS AND CAKES AND RED WINE SPARKLED IN GLITTERING DECANTERS -7021-85628-0011 THE PRINCESS SAT DOWN UNDER A BLUE CANOPY WITH BOUQUETS OF ROSES AND SHE LET ANDERS SIT IN A GOLDEN CHAIR BY HER SIDE -7021-85628-0012 BUT YOU MUST NOT EAT WITH YOUR CAP ON YOUR HEAD SHE SAID AND WAS GOING TO TAKE IT OFF -7021-85628-0013 THE PRINCESS CERTAINLY WAS BEAUTIFUL AND HE WOULD HAVE DEARLY LIKED TO BE KISSED BY HER BUT THE CAP WHICH HIS MOTHER HAD MADE HE WOULD NOT GIVE UP ON ANY CONDITION -7021-85628-0014 HE ONLY SHOOK HIS HEAD -7021-85628-0015 WELL BUT NOW SAID THE PRINCESS AND SHE FILLED HIS POCKETS WITH CAKES AND PUT HER OWN HEAVY GOLD CHAIN AROUND HIS NECK AND BENT DOWN AND KISSED HIM -7021-85628-0016 THAT IS A VERY FINE CAP YOU HAVE HE SAID -7021-85628-0017 SO IT IS SAID ANDERS -7021-85628-0018 AND IT IS MADE OF MOTHER'S BEST YARN AND SHE KNITTED IT HERSELF AND EVERYBODY WANTS TO GET IT AWAY FROM ME -7021-85628-0019 WITH ONE JUMP ANDERS GOT OUT OF HIS CHAIR -7021-85628-0020 HE DARTED LIKE AN ARROW THROUGH ALL THE HALLS DOWN ALL THE STAIRS AND ACROSS THE YARD -7021-85628-0021 HE STILL HELD ON TO IT WITH BOTH HANDS AS HE RUSHED INTO HIS MOTHER'S COTTAGE -7021-85628-0022 AND ALL HIS BROTHERS AND SISTERS STOOD ROUND AND LISTENED WITH THEIR MOUTHS OPEN -7021-85628-0023 BUT WHEN HIS BIG BROTHER HEARD THAT HE HAD REFUSED TO GIVE HIS CAP FOR A KING'S GOLDEN CROWN HE SAID THAT ANDERS WAS A STUPID -7021-85628-0024 ANDERS FACE GREW RED -7021-85628-0025 BUT HIS MOTHER HUGGED HIM CLOSE -7021-85628-0026 NO MY LITTLE SON SHE SAID -7021-85628-0027 IF YOU DRESSED IN SILK AND GOLD FROM TOP TO TOE YOU COULD NOT LOOK ANY NICER THAN IN YOUR LITTLE RED CAP -7127-75946-0000 AT THE CONCLUSION OF THE BANQUET WHICH WAS SERVED AT FIVE O'CLOCK THE KING ENTERED HIS CABINET WHERE HIS TAILORS WERE AWAITING HIM FOR THE PURPOSE OF TRYING ON THE CELEBRATED COSTUME REPRESENTING SPRING WHICH WAS THE RESULT OF SO MUCH IMAGINATION AND HAD COST SO MANY EFFORTS OF THOUGHT TO THE DESIGNERS AND ORNAMENT WORKERS OF THE COURT -7127-75946-0001 AH VERY WELL -7127-75946-0002 LET HIM COME IN THEN SAID THE KING AND AS IF COLBERT HAD BEEN LISTENING AT THE DOOR FOR THE PURPOSE OF KEEPING HIMSELF AU COURANT WITH THE CONVERSATION HE ENTERED AS SOON AS THE KING HAD PRONOUNCED HIS NAME TO THE TWO COURTIERS -7127-75946-0003 GENTLEMEN TO YOUR POSTS WHEREUPON SAINT AIGNAN AND VILLEROY TOOK THEIR LEAVE -7127-75946-0004 CERTAINLY SIRE BUT I MUST HAVE MONEY TO DO THAT WHAT -7127-75946-0005 WHAT DO YOU MEAN INQUIRED LOUIS -7127-75946-0006 HE HAS GIVEN THEM WITH TOO MUCH GRACE NOT TO HAVE OTHERS STILL TO GIVE IF THEY ARE REQUIRED WHICH IS THE CASE AT THE PRESENT MOMENT -7127-75946-0007 IT IS NECESSARY THEREFORE THAT HE SHOULD COMPLY THE KING FROWNED -7127-75946-0008 DOES YOUR MAJESTY THEN NO LONGER BELIEVE THE DISLOYAL ATTEMPT -7127-75946-0009 NOT AT ALL YOU ARE ON THE CONTRARY MOST AGREEABLE TO ME -7127-75946-0010 YOUR MAJESTY'S PLAN THEN IN THIS AFFAIR IS -7127-75946-0011 YOU WILL TAKE THEM FROM MY PRIVATE TREASURE -7127-75946-0012 THE NEWS CIRCULATED WITH THE RAPIDITY OF LIGHTNING DURING ITS PROGRESS IT KINDLED EVERY VARIETY OF COQUETRY DESIRE AND WILD AMBITION -7127-75946-0013 THE KING HAD COMPLETED HIS TOILETTE BY NINE O'CLOCK HE APPEARED IN AN OPEN CARRIAGE DECORATED WITH BRANCHES OF TREES AND FLOWERS -7127-75946-0014 THE QUEENS HAD TAKEN THEIR SEATS UPON A MAGNIFICENT DIAS OR PLATFORM ERECTED UPON THE BORDERS OF THE LAKE IN A THEATER OF WONDERFUL ELEGANCE OF CONSTRUCTION -7127-75946-0015 SUDDENLY FOR THE PURPOSE OF RESTORING PEACE AND ORDER SPRING ACCOMPANIED BY HIS WHOLE COURT MADE HIS APPEARANCE -7127-75946-0016 THE SEASONS ALLIES OF SPRING FOLLOWED HIM CLOSELY TO FORM A QUADRILLE WHICH AFTER MANY WORDS OF MORE OR LESS FLATTERING IMPORT WAS THE COMMENCEMENT OF THE DANCE -7127-75946-0017 HIS LEGS THE BEST SHAPED AT COURT WERE DISPLAYED TO GREAT ADVANTAGE IN FLESH COLORED SILKEN HOSE OF SILK SO FINE AND SO TRANSPARENT THAT IT SEEMED ALMOST LIKE FLESH ITSELF -7127-75946-0018 THERE WAS SOMETHING IN HIS CARRIAGE WHICH RESEMBLED THE BUOYANT MOVEMENTS OF AN IMMORTAL AND HE DID NOT DANCE SO MUCH AS SEEM TO SOAR ALONG -7127-75946-0019 YES IT IS SUPPRESSED -7127-75946-0020 FAR FROM IT SIRE YOUR MAJESTY HAVING GIVEN NO DIRECTIONS ABOUT IT THE MUSICIANS HAVE RETAINED IT -7127-75946-0021 YES SIRE AND READY DRESSED FOR THE BALLET -7127-75946-0022 SIRE HE SAID YOUR MAJESTY'S MOST DEVOTED SERVANT APPROACHES TO PERFORM A SERVICE ON THIS OCCASION WITH SIMILAR ZEAL THAT HE HAS ALREADY SHOWN ON THE FIELD OF BATTLE -7127-75946-0023 THE KING SEEMED ONLY PLEASED WITH EVERY ONE PRESENT -7127-75946-0024 MONSIEUR WAS THE ONLY ONE WHO DID NOT UNDERSTAND ANYTHING ABOUT THE MATTER -7127-75946-0025 THE BALLET BEGAN THE EFFECT WAS MORE THAN BEAUTIFUL -7127-75946-0026 WHEN THE MUSIC BY ITS BURSTS OF MELODY CARRIED AWAY THESE ILLUSTRIOUS DANCERS WHEN THE SIMPLE UNTUTORED PANTOMIME OF THAT PERIOD ONLY THE MORE NATURAL ON ACCOUNT OF THE VERY INDIFFERENT ACTING OF THE AUGUST ACTORS HAD REACHED ITS CULMINATING POINT OF TRIUMPH THE THEATER SHOOK WITH TUMULTUOUS APPLAUSE -7127-75946-0027 DISDAINFUL OF A SUCCESS OF WHICH MADAME SHOWED NO ACKNOWLEDGEMENT HE THOUGHT OF NOTHING BUT BOLDLY REGAINING THE MARKED PREFERENCE OF THE PRINCESS -7127-75946-0028 BY DEGREES ALL HIS HAPPINESS ALL HIS BRILLIANCY SUBSIDED INTO REGRET AND UNEASINESS SO THAT HIS LIMBS LOST THEIR POWER HIS ARMS HUNG HEAVILY BY HIS SIDES AND HIS HEAD DROOPED AS THOUGH HE WAS STUPEFIED -7127-75946-0029 THE KING WHO HAD FROM THIS MOMENT BECOME IN REALITY THE PRINCIPAL DANCER IN THE QUADRILLE CAST A LOOK UPON HIS VANQUISHED RIVAL -7127-75947-0000 EVERY ONE COULD OBSERVE HIS AGITATION AND PROSTRATION A PROSTRATION WHICH WAS INDEED THE MORE REMARKABLE SINCE PEOPLE WERE NOT ACCUSTOMED TO SEE HIM WITH HIS ARMS HANGING LISTLESSLY BY HIS SIDE HIS HEAD BEWILDERED AND HIS EYES WITH ALL THEIR BRIGHT INTELLIGENCE BEDIMMED -7127-75947-0001 UPON THIS MADAME DEIGNED TO TURN HER EYES LANGUISHINGLY TOWARDS THE COMTE OBSERVING -7127-75947-0002 DO YOU THINK SO SHE REPLIED WITH INDIFFERENCE -7127-75947-0003 YES THE CHARACTER WHICH YOUR ROYAL HIGHNESS ASSUMED IS IN PERFECT HARMONY WITH YOUR OWN -7127-75947-0004 EXPLAIN YOURSELF -7127-75947-0005 I ALLUDE TO THE GODDESS -7127-75947-0006 THE PRINCESS INQUIRED NO -7127-75947-0007 SHE THEN ROSE HUMMING THE AIR TO WHICH SHE WAS PRESENTLY GOING TO DANCE -7127-75947-0008 THE ARROW PIERCED HIS HEART AND WOUNDED HIM MORTALLY -7127-75947-0009 A QUARTER OF AN HOUR AFTERWARDS HE RETURNED TO THE THEATER BUT IT WILL BE READILY BELIEVED THAT IT WAS ONLY A POWERFUL EFFORT OF REASON OVER HIS GREAT EXCITEMENT THAT ENABLED HIM TO GO BACK OR PERHAPS FOR LOVE IS THUS STRANGELY CONSTITUTED HE FOUND IT IMPOSSIBLE EVEN TO REMAIN MUCH LONGER SEPARATED FROM THE PRESENCE OF ONE WHO HAD BROKEN HIS HEART -7127-75947-0010 WHEN SHE PERCEIVED THE YOUNG MAN SHE ROSE LIKE A WOMAN SURPRISED IN THE MIDST OF IDEAS SHE WAS DESIROUS OF CONCEALING FROM HERSELF -7127-75947-0011 REMAIN I IMPLORE YOU THE EVENING IS MOST LOVELY -7127-75947-0012 INDEED AH -7127-75947-0013 I REMEMBER NOW AND I CONGRATULATE MYSELF DO YOU LOVE ANY ONE -7127-75947-0014 FORGIVE ME I HARDLY KNOW WHAT I AM SAYING A THOUSAND TIMES FORGIVE ME MADAME WAS RIGHT QUITE RIGHT THIS BRUTAL EXILE HAS COMPLETELY TURNED MY BRAIN -7127-75947-0015 THERE CANNOT BE A DOUBT HE RECEIVED YOU KINDLY FOR IN FACT YOU RETURNED WITHOUT HIS PERMISSION -7127-75947-0016 OH MADEMOISELLE WHY HAVE I NOT A DEVOTED SISTER OR A TRUE FRIEND SUCH AS YOURSELF -7127-75947-0017 WHAT ALREADY HERE THEY SAID TO HER -7127-75947-0018 I HAVE BEEN HERE THIS QUARTER OF AN HOUR REPLIED LA VALLIERE -7127-75947-0019 DID NOT THE DANCING AMUSE YOU NO -7127-75947-0020 NO MORE THAN THE DANCING -7127-75947-0021 LA VALLIERE IS QUITE A POETESS SAID TONNAY CHARENTE -7127-75947-0022 I AM A WOMAN AND THERE ARE FEW LIKE ME WHOEVER LOVES ME FLATTERS ME WHOEVER FLATTERS ME PLEASES ME AND WHOEVER PLEASES WELL SAID MONTALAIS YOU DO NOT FINISH -7127-75947-0023 IT IS TOO DIFFICULT REPLIED MADEMOISELLE DE TONNAY CHARENTE LAUGHING LOUDLY -7127-75947-0024 LOOK YONDER DO YOU NOT SEE THE MOON SLOWLY RISING SILVERING THE TOPMOST BRANCHES OF THE CHESTNUTS AND THE OAKS -7127-75947-0025 EXQUISITE SOFT TURF OF THE WOODS THE HAPPINESS WHICH YOUR FRIENDSHIP CONFERS UPON ME -7127-75947-0026 WELL SAID MADEMOISELLE DE TONNAY CHARENTE I ALSO THINK A GOOD DEAL BUT I TAKE CARE -7127-75947-0027 TO SAY NOTHING SAID MONTALAIS SO THAT WHEN MADEMOISELLE DE TONNAY CHARENTE THINKS ATHENAIS IS THE ONLY ONE WHO KNOWS IT -7127-75947-0028 QUICK QUICK THEN AMONG THE HIGH REED GRASS SAID MONTALAIS STOOP ATHENAIS YOU ARE SO TALL -7127-75947-0029 THE YOUNG GIRLS HAD INDEED MADE THEMSELVES SMALL INDEED INVISIBLE -7127-75947-0030 SHE WAS HERE JUST NOW SAID THE COUNT -7127-75947-0031 YOU ARE POSITIVE THEN -7127-75947-0032 YES BUT PERHAPS I FRIGHTENED HER IN WHAT WAY -7127-75947-0033 HOW IS IT LA VALLIERE SAID MADEMOISELLE DE TONNAY CHARENTE THAT THE VICOMTE DE BRAGELONNE SPOKE OF YOU AS LOUISE -7127-75947-0034 IT SEEMS THE KING WILL NOT CONSENT TO IT -7127-75947-0035 GOOD GRACIOUS HAS THE KING ANY RIGHT TO INTERFERE IN MATTERS OF THAT KIND -7127-75947-0036 I GIVE MY CONSENT -7127-75947-0037 OH I AM SPEAKING SERIOUSLY REPLIED MONTALAIS AND MY OPINION IN THIS CASE IS QUITE AS GOOD AS THE KING'S I SUPPOSE IS IT NOT LOUISE -7127-75947-0038 LET US RUN THEN SAID ALL THREE AND GRACEFULLY LIFTING UP THE LONG SKIRTS OF THEIR SILK DRESSES THEY LIGHTLY RAN ACROSS THE OPEN SPACE BETWEEN THE LAKE AND THE THICKEST COVERT OF THE PARK -7127-75947-0039 IN FACT THE SOUND OF MADAME'S AND THE QUEEN'S CARRIAGES COULD BE HEARD IN THE DISTANCE UPON THE HARD DRY GROUND OF THE ROADS FOLLOWED BY THE MOUNTED CAVALIERS -7127-75947-0040 IN THIS WAY THE FETE OF THE WHOLE COURT WAS A FETE ALSO FOR THE MYSTERIOUS INHABITANTS OF THE FOREST FOR CERTAINLY THE DEER IN THE BRAKE THE PHEASANT ON THE BRANCH THE FOX IN ITS HOLE WERE ALL LISTENING -7176-88083-0000 ALL ABOUT HIM WAS A TUMULT OF BRIGHT AND BROKEN COLOR SCATTERED IN BROAD SPLASHES -7176-88083-0001 THE MERGANSER HAD A CRESTED HEAD OF IRIDESCENT GREEN BLACK A BROAD COLLAR OF LUSTROUS WHITE BLACK BACK BLACK AND WHITE WINGS WHITE BELLY SIDES FINELY PENCILLED IN BLACK AND WHITE AND A BREAST OF RICH CHESTNUT RED STREAKED WITH BLACK -7176-88083-0002 HIS FEET WERE RED HIS LONG NARROW BEAK WITH ITS SAW TOOTHED EDGES AND SHARP HOOKED TIP WAS BRIGHT RED -7176-88083-0003 BUT HERE HE WAS AT A TERRIBLE DISADVANTAGE AS COMPARED WITH THE OWLS HAWKS AND EAGLES HE HAD NO RENDING CLAWS -7176-88083-0004 BUT SUDDENLY STRAIGHT AND SWIFT AS A DIVING CORMORANT HE SHOT DOWN INTO THE TORRENT AND DISAPPEARED BENEATH THE SURFACE -7176-88083-0005 ONCE FAIRLY A WING HOWEVER HE WHEELED AND MADE BACK HURRIEDLY FOR HIS PERCH -7176-88083-0006 IT MIGHT HAVE SEEMED THAT A TROUT OF THIS SIZE WAS A FAIRLY SUBSTANTIAL MEAL -7176-88083-0007 BUT SUCH WAS HIS KEENNESS THAT EVEN WHILE THE WIDE FLUKES OF HIS ENGORGED VICTIM WERE STILL STICKING OUT AT THE CORNERS OF HIS BEAK HIS FIERCE RED EYES WERE ONCE MORE PEERING DOWNWARD INTO THE TORRENT IN SEARCH OF FRESH PREY -7176-88083-0008 IN DESPAIR HE HURLED HIMSELF DOWNWARD TOO SOON -7176-88083-0009 THE GREAT HAWK FOLLOWED HURRIEDLY TO RETRIEVE HIS PREY FROM THE GROUND -7176-88083-0010 THE CAT GROWLED SOFTLY PICKED UP THE PRIZE IN HER JAWS AND TROTTED INTO THE BUSHES TO DEVOUR IT -7176-88083-0011 IN FACT HE HAD JUST FINISHED IT THE LAST OF THE TROUT'S TAIL HAD JUST VANISHED WITH A SPASM DOWN HIS STRAINED GULLET WHEN THE BAFFLED HAWK CAUGHT SIGHT OF HIM AND SWOOPED -7176-88083-0012 THE HAWK ALIGHTED ON THE DEAD BRANCH AND SAT UPRIGHT MOTIONLESS AS IF SURPRISED -7176-88083-0013 LIKE HIS UNFORTUNATE LITTLE COUSIN THE TEAL HE TOO HAD FELT THE FEAR OF DEATH SMITTEN INTO HIS HEART AND WAS HEADING DESPERATELY FOR THE REFUGE OF SOME DARK OVERHANGING BANK DEEP FRINGED WITH WEEDS WHERE THE DREADFUL EYE OF THE HAWK SHOULD NOT DISCERN HIM -7176-88083-0014 THE HAWK SAT UPON THE BRANCH AND WATCHED HIS QUARRY SWIMMING BENEATH THE SURFACE -7176-88083-0015 ALMOST INSTANTLY HE WAS FORCED TO THE TOP -7176-88083-0016 STRAIGHTWAY THE HAWK GLIDED FROM HIS PERCH AND DARTED AFTER HIM -7176-88083-0017 BUT AT THIS POINT IN THE RAPIDS IT WAS IMPOSSIBLE FOR HIM TO STAY DOWN -7176-88083-0018 BUT THIS FREQUENTER OF THE HEIGHTS OF AIR FOR ALL HIS SAVAGE VALOR WAS TROUBLED AT THE LEAPING WAVES AND THE TOSSING FOAM OF THESE MAD RAPIDS HE DID NOT UNDERSTAND THEM -7176-88083-0019 AS HE FLEW HIS DOWN REACHING CLUTCHING TALONS WERE NOT HALF A YARD ABOVE THE FUGITIVE'S HEAD -7176-88083-0020 WHERE THE WAVES FOR AN INSTANT SANK THEY CAME CLOSER BUT NOT QUITE WITHIN GRASPING REACH -7176-88083-0021 BUT AS BEFORE THE LEAPING WAVES OF THE RAPIDS WERE TOO MUCH FOR HIS PURSUER AND HE WAS ABLE TO FLAP HIS WAY ONWARD IN A CLOUD OF FOAM WHILE DOOM HUNG LOW ABOVE HIS HEAD YET HESITATED TO STRIKE -7176-88083-0022 THE HAWK EMBITTERED BY THE LOSS OF HIS FIRST QUARRY HAD BECOME AS DOGGED IN PURSUIT AS A WEASEL NOT TO BE SHAKEN OFF OR EVADED OR DECEIVED -7176-88083-0023 HE HAD A LOT OF LINE OUT AND THE PLACE WAS NONE TOO FREE FOR A LONG CAST BUT HE WAS IMPATIENT TO DROP HIS FLIES AGAIN ON THE SPOT WHERE THE BIG FISH WAS FEEDING -7176-88083-0024 THE LAST DROP FLY AS LUCK WOULD HAVE IT CAUGHT JUST IN THE CORNER OF THE HAWK'S ANGRILY OPEN BEAK HOOKING ITSELF FIRMLY -7176-88083-0025 AT THE SUDDEN SHARP STING OF IT THE GREAT BIRD TURNED HIS HEAD AND NOTICED FOR THE FIRST TIME THE FISHERMAN STANDING ON THE BANK -7176-88083-0026 THE DRAG UPON HIS BEAK AND THE LIGHT CHECK UPON HIS WINGS WERE INEXPLICABLE TO HIM AND APPALLING -7176-88083-0027 THEN THE LEADER PARTED FROM THE LINE -7176-92135-0000 HE IS A WELCOME FIGURE AT THE GARDEN PARTIES OF THE ELECT WHO ARE ALWAYS READY TO ENCOURAGE HIM BY ACCEPTING FREE SEATS FOR HIS PLAY ACTOR MANAGERS NOD TO HIM EDITORS ALLOW HIM TO CONTRIBUTE WITHOUT CHARGE TO A SYMPOSIUM ON THE PRICE OF GOLF BALLS -7176-92135-0001 IN SHORT HE BECOMES A PROMINENT FIGURE IN LONDON SOCIETY AND IF HE IS NOT CAREFUL SOMEBODY WILL SAY SO -7176-92135-0002 BUT EVEN THE UNSUCCESSFUL DRAMATIST HAS HIS MOMENTS -7176-92135-0003 YOUR PLAY MUST BE NOT MERELY A GOOD PLAY BUT A SUCCESSFUL ONE -7176-92135-0004 FRANKLY I CANNOT ALWAYS SAY -7176-92135-0005 BUT SUPPOSE YOU SAID I'M FOND OF WRITING MY PEOPLE ALWAYS SAY MY LETTERS HOME ARE GOOD ENOUGH FOR PUNCH -7176-92135-0006 I'VE GOT A LITTLE IDEA FOR A PLAY ABOUT A MAN AND A WOMAN AND ANOTHER WOMAN AND BUT PERHAPS I'D BETTER KEEP THE PLOT A SECRET FOR THE MOMENT -7176-92135-0007 ANYHOW IT'S JOLLY EXCITING AND I CAN DO THE DIALOGUE ALL RIGHT -7176-92135-0008 LEND ME YOUR EAR FOR TEN MINUTES AND YOU SHALL LEARN JUST WHAT STAGECRAFT IS -7176-92135-0009 AND I SHOULD BEGIN WITH A SHORT HOMILY ON SOLILOQUY -7176-92135-0010 HAM TO BE OR NOT TO BE -7176-92135-0011 NOW THE OBJECT OF THIS SOLILOQUY IS PLAIN -7176-92135-0012 INDEED IRRESOLUTION BEING THE KEYNOTE OF HAMLET'S SOLILOQUY A CLEVER PLAYER COULD TO SOME EXTENT INDICATE THE WHOLE THIRTY LINES BY A SILENT WORKING OF THE JAW BUT AT THE SAME TIME IT WOULD BE IDLE TO DENY THAT HE WOULD MISS THE FINER SHADES OF THE DRAMATIST'S MEANING -7176-92135-0013 WE MODERNS HOWEVER SEE THE ABSURDITY OF IT -7176-92135-0014 IF IT BE GRANTED FIRST THAT THE THOUGHTS OF A CERTAIN CHARACTER SHOULD BE KNOWN TO THE AUDIENCE AND SECONDLY THAT SOLILOQUY OR THE HABIT OF THINKING ALOUD IS IN OPPOSITION TO MODERN STAGE TECHNIQUE HOW SHALL A SOLILOQUY BE AVOIDED WITHOUT DAMAGE TO THE PLAY -7176-92135-0015 AND SO ON TILL YOU GET TO THE END WHEN OPHELIA MIGHT SAY AH YES OR SOMETHING NON COMMITTAL OF THAT SORT -7176-92135-0016 THIS WOULD BE AN EASY WAY OF DOING IT BUT IT WOULD NOT BE THE BEST WAY FOR THE REASON THAT IT IS TOO EASY TO CALL ATTENTION TO ITSELF -7176-92135-0017 IN THE OLD BADLY MADE PLAY IT WAS FREQUENTLY NECESSARY FOR ONE OF THE CHARACTERS TO TAKE THE AUDIENCE INTO HIS CONFIDENCE -7176-92135-0018 IN THE MODERN WELL CONSTRUCTED PLAY HE SIMPLY RINGS UP AN IMAGINARY CONFEDERATE AND TELLS HIM WHAT HE IS GOING TO DO COULD ANYTHING BE MORE NATURAL -7176-92135-0019 I WANT DOUBLE NINE HAL LO -7176-92135-0020 DOUBLE NINE TWO THREE ELSINORE DOUBLE NINE YES HALLO IS THAT YOU HORATIO HAMLET SPEAKING -7176-92135-0021 I SAY I'VE BEEN WONDERING ABOUT THIS BUSINESS -7176-92135-0022 TO BE OR NOT TO BE THAT IS THE QUESTION WHETHER TIS NOBLER IN THE MIND TO SUFFER THE SLINGS AND ARROWS WHAT NO HAMLET SPEAKING -7176-92135-0023 YOU GAVE ME DOUBLE FIVE I WANT DOUBLE NINE HALLO IS THAT YOU HORATIO HAMLET SPEAKING -7176-92135-0024 TO BE OR NOT TO BE THAT IS THE QUESTION WHETHER TIS NOBLER -7176-92135-0025 IT IS TO LET HAMLET IF THAT HAPPEN TO BE THE NAME OF YOUR CHARACTER ENTER WITH A SMALL DOG PET FALCON MONGOOSE TAME BEAR OR WHATEVER ANIMAL IS MOST IN KEEPING WITH THE PART AND CONFIDE IN THIS ANIMAL SUCH SORROWS HOPES OR SECRET HISTORY AS THE AUDIENCE HAS GOT TO KNOW -7176-92135-0026 ENTER HAMLET WITH HIS FAVOURITE BOAR HOUND -7176-92135-0027 LADY LARKSPUR STARTS SUDDENLY AND TURNS TOWARDS HIM -7176-92135-0028 LARKSPUR BIT ME AGAIN THIS MORNING FOR THE THIRD TIME -7176-92135-0029 I WANT TO GET AWAY FROM IT ALL SWOONS -7176-92135-0030 ENTER LORD ARTHUR FLUFFINOSE -7176-92135-0031 AND THERE YOU ARE YOU WILL OF COURSE APPRECIATE THAT THE UNFINISHED SENTENCES NOT ONLY SAVE TIME BUT ALSO MAKE THE MANOEUVRING VERY MUCH MORE NATURAL -7176-92135-0032 HOW YOU MAY BE WONDERING ARE YOU TO BEGIN YOUR MASTERPIECE -7176-92135-0033 RELAPSES INTO SILENCE FOR THE REST OF THE EVENING -7176-92135-0034 THE DUCHESS OF SOUTHBRIDGE TO LORD REGGIE OH REGGIE WHAT DID YOU SAY -7176-92135-0035 THEN LORD TUPPENY WELL WHAT ABOUT AUCTION -7176-92135-0036 THE CROWD DRIFTS OFF LEAVING THE HERO AND HEROINE ALONE IN THE MIDDLE OF THE STAGE AND THEN YOU CAN BEGIN -7176-92135-0037 THEN IS THE TIME TO INTRODUCE A MEAL ON THE STAGE -7176-92135-0038 A STAGE MEAL IS POPULAR BECAUSE IT PROVES TO THE AUDIENCE THAT THE ACTORS EVEN WHEN CALLED CHARLES HAWTREY OR OWEN NARES ARE REAL PEOPLE JUST LIKE YOU AND ME -7176-92135-0039 TEA PLEASE MATTHEWS BUTLER IMPASSIVELY -7176-92135-0040 HOSTESS REPLACES LUMP AND INCLINES EMPTY TEAPOT OVER TRAY FOR A MOMENT THEN HANDS HIM A CUP PAINTED BROWN INSIDE THUS DECEIVING THE GENTLEMAN WITH THE TELESCOPE IN THE UPPER CIRCLE -7176-92135-0041 RE ENTER BUTLER AND THREE FOOTMEN WHO REMOVE THE TEA THINGS HOSTESS TO GUEST -7176-92135-0042 IN NOVELS THE HERO HAS OFTEN PUSHED HIS MEALS AWAY UNTASTED BUT NO STAGE HERO WOULD DO ANYTHING SO UNNATURAL AS THIS -7176-92135-0043 TWO BITES ARE MADE AND THE BREAD IS CRUMBLED WITH AN AIR OF GREAT EAGERNESS INDEED ONE FEELS THAT IN REAL LIFE THE GUEST WOULD CLUTCH HOLD OF THE FOOTMAN AND SAY HALF A MO OLD CHAP I HAVEN'T NEARLY FINISHED BUT THE ACTOR IS BETTER SCHOOLED THAN THIS -7176-92135-0044 BUT IT IS THE CIGARETTE WHICH CHIEFLY HAS BROUGHT THE MODERN DRAMA TO ITS PRESENT STATE OF PERFECTION -7176-92135-0045 LORD JOHN TAKING OUT GOLD CIGARETTE CASE FROM HIS LEFT HAND UPPER WAISTCOAT POCKET -7729-102255-0000 THE BOGUS LEGISLATURE NUMBERED THIRTY SIX MEMBERS -7729-102255-0001 THIS WAS AT THE MARCH ELECTION EIGHTEEN FIFTY FIVE -7729-102255-0002 THAT SUMMER'S EMIGRATION HOWEVER BEING MAINLY FROM THE FREE STATES GREATLY CHANGED THE RELATIVE STRENGTH OF THE TWO PARTIES -7729-102255-0003 FOR GENERAL SERVICE THEREFORE REQUIRING NO SPECIAL EFFORT THE NUMERICAL STRENGTH OF THE FACTIONS WAS ABOUT EQUAL WHILE ON EXTRAORDINARY OCCASIONS THE TWO THOUSAND BORDER RUFFIAN RESERVE LYING A LITTLE FARTHER BACK FROM THE STATE LINE COULD AT ANY TIME EASILY TURN THE SCALE -7729-102255-0004 THE FREE STATE MEN HAD ONLY THEIR CONVICTIONS THEIR INTELLIGENCE THEIR COURAGE AND THE MORAL SUPPORT OF THE NORTH THE CONSPIRACY HAD ITS SECRET COMBINATION THE TERRITORIAL OFFICIALS THE LEGISLATURE THE BOGUS LAWS THE COURTS THE MILITIA OFFICERS THE PRESIDENT AND THE ARMY -7729-102255-0005 THIS WAS A FORMIDABLE ARRAY OF ADVANTAGES SLAVERY WAS PLAYING WITH LOADED DICE -7729-102255-0006 COMING BY WAY OF THE MISSOURI RIVER TOWNS HE FELL FIRST AMONG BORDER RUFFIAN COMPANIONSHIP AND INFLUENCES AND PERHAPS HAVING HIS INCLINATIONS ALREADY MOLDED BY HIS WASHINGTON INSTRUCTIONS HIS EARLY IMPRESSIONS WERE DECIDEDLY ADVERSE TO THE FREE STATE CAUSE -7729-102255-0007 HIS RECEPTION SPEECH AT WESTPORT IN WHICH HE MAINTAINED THE LEGALITY OF THE LEGISLATURE AND HIS DETERMINATION TO ENFORCE THEIR LAWS DELIGHTED HIS PRO SLAVERY AUDITORS -7729-102255-0008 ALL THE TERRITORIAL DIGNITARIES WERE PRESENT GOVERNOR SHANNON PRESIDED JOHN CALHOUN THE SURVEYOR GENERAL MADE THE PRINCIPAL SPEECH A DENUNCIATION OF THE ABOLITIONISTS SUPPORTING THE TOPEKA MOVEMENT CHIEF JUSTICE LECOMPTE DIGNIFIED THE OCCASION WITH APPROVING REMARKS -7729-102255-0009 ALL DISSENT ALL NON COMPLIANCE ALL HESITATION ALL MERE SILENCE EVEN WERE IN THEIR STRONGHOLD TOWNS LIKE LEAVENWORTH BRANDED AS ABOLITIONISM DECLARED TO BE HOSTILITY TO THE PUBLIC WELFARE AND PUNISHED WITH PROSCRIPTION PERSONAL VIOLENCE EXPULSION AND FREQUENTLY DEATH -7729-102255-0010 OF THE LYNCHINGS THE MOBS AND THE MURDERS IT WOULD BE IMPOSSIBLE EXCEPT IN A VERY EXTENDED WORK TO NOTE THE FREQUENT AND ATROCIOUS DETAILS -7729-102255-0011 THE PRESENT CHAPTERS CAN ONLY TOUCH UPON THE MORE SALIENT MOVEMENTS OF THE CIVIL WAR IN KANSAS WHICH HAPPILY WERE NOT SANGUINARY IF HOWEVER THE INDIVIDUAL AND MORE ISOLATED CASES OF BLOODSHED COULD BE DESCRIBED THEY WOULD SHOW A STARTLING AGGREGATE OF BARBARITY AND LOSS OF LIFE FOR OPINION'S SAKE -7729-102255-0012 SEVERAL HUNDRED FREE STATE MEN PROMPTLY RESPONDED TO THE SUMMONS -7729-102255-0013 IT WAS IN FACT THE BEST WEAPON OF ITS DAY -7729-102255-0014 THE LEADERS OF THE CONSPIRACY BECAME DISTRUSTFUL OF THEIR POWER TO CRUSH THE TOWN -7729-102255-0015 ONE OF HIS MILITIA GENERALS SUGGESTED THAT THE GOVERNOR SHOULD REQUIRE THE OUTLAWS AT LAWRENCE AND ELSEWHERE TO SURRENDER THE SHARPS RIFLES ANOTHER WROTE ASKING HIM TO CALL OUT THE GOVERNMENT TROOPS AT FORT LEAVENWORTH -7729-102255-0016 THE GOVERNOR ON HIS PART BECOMING DOUBTFUL OF THE LEGALITY OF EMPLOYING MISSOURI MILITIA TO ENFORCE KANSAS LAWS WAS ALSO EAGER TO SECURE THE HELP OF FEDERAL TROOPS -7729-102255-0017 SHERIFF JONES HAD HIS POCKETS ALWAYS FULL OF WRITS ISSUED IN THE SPIRIT OF PERSECUTION BUT WAS OFTEN BAFFLED BY THE SHARP WITS AND READY RESOURCES OF THE FREE STATE PEOPLE AND SOMETIMES DEFIED OUTRIGHT -7729-102255-0018 LITTLE BY LITTLE HOWEVER THE LATTER BECAME HEMMED AND BOUND IN THE MESHES OF THE VARIOUS DEVICES AND PROCEEDINGS WHICH THE TERRITORIAL OFFICIALS EVOLVED FROM THE BOGUS LAWS -7729-102255-0019 TO EMBARRASS THIS DAMAGING EXPOSURE JUDGE LECOMPTE ISSUED A WRIT AGAINST THE EX GOVERNOR ON A FRIVOLOUS CHARGE OF CONTEMPT -7729-102255-0020 THE INCIDENT WAS NOT VIOLENT NOR EVEN DRAMATIC NO POSSE WAS SUMMONED NO FURTHER EFFORT MADE AND REEDER FEARING PERSONAL VIOLENCE SOON FLED IN DISGUISE -7729-102255-0021 BUT THE AFFAIR WAS MAGNIFIED AS A CROWNING PROOF THAT THE FREE STATE MEN WERE INSURRECTIONISTS AND OUTLAWS -7729-102255-0022 FROM THESE AGAIN SPRANG BARRICADED AND FORTIFIED DWELLINGS CAMPS AND SCOUTING PARTIES FINALLY CULMINATING IN ROVING GUERRILLA BANDS HALF PARTISAN HALF PREDATORY -7729-102255-0023 THEIR DISTINCTIVE CHARACTERS HOWEVER DISPLAY ONE BROAD AND UNFAILING DIFFERENCE -7729-102255-0024 THE FREE STATE MEN CLUNG TO THEIR PRAIRIE TOWNS AND PRAIRIE RAVINES WITH ALL THE OBSTINACY AND COURAGE OF TRUE DEFENDERS OF THEIR HOMES AND FIRESIDES -7729-102255-0025 THEIR ASSUMED CHARACTER CHANGED WITH THEIR CHANGING OPPORTUNITIES OR NECESSITIES -7729-102255-0026 IN THE SHOOTING OF SHERIFF JONES IN LAWRENCE AND IN THE REFUSAL OF EX GOVERNOR BEEDER TO ALLOW THE DEPUTY MARSHAL TO ARREST HIM THEY DISCOVERED GRAVE OFFENSES AGAINST THE TERRITORIAL AND UNITED STATES LAWS -7729-102255-0027 FOOTNOTE SUMNER TO SHANNON MAY TWELFTH EIGHTEEN FIFTY SIX -7729-102255-0028 PRIVATE PERSONS WHO HAD LEASED THE FREE STATE HOTEL VAINLY BESOUGHT THE VARIOUS AUTHORITIES TO PREVENT THE DESTRUCTION OF THEIR PROPERTY -7729-102255-0029 TEN DAYS WERE CONSUMED IN THESE NEGOTIATIONS BUT THE SPIRIT OF VENGEANCE REFUSED TO YIELD -7729-102255-0030 HE SUMMONED HALF A DOZEN CITIZENS TO JOIN HIS POSSE WHO FOLLOWED OBEYED AND ASSISTED HIM -7729-102255-0031 HE CONTINUED HIS PRETENDED SEARCH AND TO GIVE COLOR TO HIS ERRAND MADE TWO ARRESTS -7729-102255-0032 THE FREE STATE HOTEL A STONE BUILDING IN DIMENSIONS FIFTY BY SEVENTY FEET THREE STORIES HIGH AND HANDSOMELY FURNISHED PREVIOUSLY OCCUPIED ONLY FOR LODGING ROOMS ON THAT DAY FOR THE FIRST TIME OPENED ITS TABLE ACCOMMODATIONS TO THE PUBLIC AND PROVIDED A FREE DINNER IN HONOR OF THE OCCASION -7729-102255-0033 AS HE HAD PROMISED TO PROTECT THE HOTEL THE REASSURED CITIZENS BEGAN TO LAUGH AT THEIR OWN FEARS -7729-102255-0034 TO THEIR SORROW THEY WERE SOON UNDECEIVED -7729-102255-0035 THE MILITARY FORCE PARTLY RABBLE PARTLY ORGANIZED HAD MEANWHILE MOVED INTO THE TOWN -7729-102255-0036 HE PLANTED A COMPANY BEFORE THE HOTEL AND DEMANDED A SURRENDER OF THE ARMS BELONGING TO THE FREE STATE MILITARY COMPANIES -7729-102255-0037 HALF AN HOUR LATER TURNING A DEAF EAR TO ALL REMONSTRANCE HE GAVE THE PROPRIETORS UNTIL FIVE O'CLOCK TO REMOVE THEIR FAMILIES AND PERSONAL PROPERTY FROM THE FREE STATE HOTEL -7729-102255-0038 ATCHISON WHO HAD BEEN HARANGUING THE MOB PLANTED HIS TWO GUNS BEFORE THE BUILDING AND TRAINED THEM UPON IT -7729-102255-0039 THE INMATES BEING REMOVED AT THE APPOINTED HOUR A FEW CANNON BALLS WERE FIRED THROUGH THE STONE WALLS -7729-102255-0040 IN THIS INCIDENT CONTRASTING THE CREATIVE AND THE DESTRUCTIVE SPIRIT OF THE FACTIONS THE EMIGRANT AID SOCIETY OF MASSACHUSETTS FINDS ITS MOST HONORABLE AND TRIUMPHANT VINDICATION -7729-102255-0041 THE WHOLE PROCEEDING WAS SO CHILDISH THE MISERABLE PLOT SO TRANSPARENT THE OUTRAGE SO GROSS AS TO BRING DISGUST TO THE BETTER CLASS OF BORDER RUFFIANS WHO WERE WITNESSES AND ACCESSORIES -7729-102255-0042 RELOCATED FOOTNOTE GOVERNOR ROBINSON BEING ON HIS WAY EAST THE STEAMBOAT ON WHICH HE WAS TRAVELING STOPPED AT LEXINGTON MISSOURI -7729-102255-0043 IN A FEW DAYS AN OFFICER CAME WITH A REQUISITION FROM GOVERNOR SHANNON AND TOOK THE PRISONER BY LAND TO WESTPORT AND AFTERWARDS FROM THERE TO KANSAS CITY AND LEAVENWORTH -7729-102255-0044 HERE HE WAS PLACED IN THE CUSTODY OF CAPTAIN MARTIN OF THE KICKAPOO RANGERS WHO PROVED A KIND JAILER AND MATERIALLY ASSISTED IN PROTECTING HIM FROM THE DANGEROUS INTENTIONS OF THE MOB WHICH AT THAT TIME HELD LEAVENWORTH UNDER A REIGN OF TERROR -7729-102255-0045 CAPTAIN MARTIN SAID I SHALL GIVE YOU A PISTOL TO HELP PROTECT YOURSELF IF WORSE COMES TO WORST -7729-102255-0046 IN THE EARLY MORNING OF THE NEXT DAY MAY TWENTY NINTH A COMPANY OF DRAGOONS WITH ONE EMPTY SADDLE CAME DOWN FROM THE FORT AND WHILE THE PRO SLAVERY MEN STILL SLEPT THE PRISONER AND HIS ESCORT WERE ON THEIR WAY ACROSS THE PRAIRIES TO LECOMPTON IN THE CHARGE OF OFFICERS OF THE UNITED STATES ARMY -8224-274381-0000 THOUGH THROWN INTO PRISON FOR THIS ENTERPRISE AND DETAINED SOME TIME HE WAS NOT DISCOURAGED BUT STILL CONTINUED BY HIS COUNTENANCE AND PROTECTION TO INFUSE SPIRIT INTO THE DISTRESSED ROYALISTS -8224-274381-0001 AMONG OTHER PERSONS OF DISTINCTION WHO UNITED THEMSELVES TO HIM WAS LORD NAPIER OF MERCHISTON SON OF THE FAMOUS INVENTOR OF THE LOGARITHMS THE PERSON TO WHOM THE TITLE OF A GREAT MAN IS MORE JUSTLY DUE THAN TO ANY OTHER WHOM HIS COUNTRY EVER PRODUCED -8224-274381-0002 WHILE THE FORMER FORETOLD THAT THE SCOTTISH COVENANTERS WERE SECRETLY FORMING A UNION WITH THE ENGLISH PARLIAMENT AND INCULCATED THE NECESSITY OF PREVENTING THEM BY SOME VIGOROUS UNDERTAKING THE LATTER STILL INSISTED THAT EVERY SUCH ATTEMPT WOULD PRECIPITATE THEM INTO MEASURES TO WHICH OTHERWISE THEY WERE NOT PERHAPS INCLINED -8224-274381-0003 THE KING'S EARS WERE NOW OPEN TO MONTROSE'S COUNSELS WHO PROPOSED NONE BUT THE BOLDEST AND MOST DARING AGREEABLY TO THE DESPERATE STATE OF THE ROYAL CAUSE IN SCOTLAND -8224-274381-0004 FIVE HUNDRED MEN MORE WHO HAD BEEN LEVIED BY THE COVENANTERS WERE PERSUADED TO EMBRACE THE ROYAL CAUSE AND WITH THIS COMBINED FORCE HE HASTENED TO ATTACK LORD ELCHO WHO LAY AT PERTH WITH AN ARMY OF SIX THOUSAND MEN ASSEMBLED UPON THE FIRST NEWS OF THE IRISH INVASION -8224-274381-0005 DREADING THE SUPERIOR POWER OF ARGYLE WHO HAVING JOINED HIS VASSALS TO A FORCE LEVIED BY THE PUBLIC WAS APPROACHING WITH A CONSIDERABLE ARMY MONTROSE HASTENED NORTHWARDS IN ORDER TO ROUSE AGAIN THE MARQUIS OF HUNTLEY AND THE GORDONS WHO HAVING BEFORE HASTILY TAKEN ARMS HAD BEEN INSTANTLY SUPPRESSED BY THE COVENANTERS -8224-274381-0006 THIS NOBLEMAN'S CHARACTER THOUGH CELEBRATED FOR POLITICAL COURAGE AND CONDUCT WAS VERY LOW FOR MILITARY PROWESS AND AFTER SOME SKIRMISHES IN WHICH HE WAS WORSTED HE HERE ALLOWED MONTROSE TO ESCAPE HIM -8224-274381-0007 BY QUICK MARCHES THROUGH THESE INACCESSIBLE MOUNTAINS THAT GENERAL FREED HIMSELF FROM THE SUPERIOR FORCES OF THE COVENANTERS -8224-274381-0008 WITH THESE AND SOME REENFORCEMENTS OF THE ATHOLEMEN AND MACDONALDS WHOM HE HAD RECALLED MONTROSE FELL SUDDENLY UPON ARGYLE'S COUNTRY AND LET LOOSE UPON IT ALL THE RAGE OF WAR CARRYING OFF THE CATTLE BURNING THE HOUSES AND PUTTING THE INHABITANTS TO THE SWORD -8224-274381-0009 THIS SEVERITY BY WHICH MONTROSE SULLIED HIS VICTORIES WAS THE RESULT OF PRIVATE ANIMOSITY AGAINST THE CHIEFTAIN AS MUCH AS OF ZEAL FOR THE PUBLIC CAUSE ARGYLE COLLECTING THREE THOUSAND MEN MARCHED IN QUEST OF THE ENEMY WHO HAD RETIRED WITH THEIR PLUNDER AND HE LAY AT INNERLOCHY SUPPOSING HIMSELF STILL AT A CONSIDERABLE DISTANCE FROM THEM -8224-274381-0010 BY A QUICK AND UNEXPECTED MARCH MONTROSE HASTENED TO INNERLOCHY AND PRESENTED HIMSELF IN ORDER OF BATTLE BEFORE THE SURPRISED BUT NOT AFFRIGHTENED COVENANTERS -8224-274381-0011 HIS CONDUCT AND PRESENCE OF MIND IN THIS EMERGENCE APPEARED CONSPICUOUS -8224-274381-0012 MONTROSE WEAK IN CAVALRY HERE LINED HIS TROOPS OF HORSE WITH INFANTRY AND AFTER PUTTING THE ENEMY'S HORSE TO ROUT FELL WITH UNITED FORCE UPON THEIR FOOT WHO WERE ENTIRELY CUT IN PIECES THOUGH WITH THE LOSS OF THE GALLANT LORD GORDON ON THE PART OF THE ROYALISTS -8224-274381-0013 FROM THE SAME MEN NEW REGIMENTS AND NEW COMPANIES WERE FORMED DIFFERENT OFFICERS APPOINTED AND THE WHOLE MILITARY FORCE PUT INTO SUCH HANDS AS THE INDEPENDENTS COULD RELY ON -8224-274381-0014 BESIDES MEMBERS OF PARLIAMENT WHO WERE EXCLUDED MANY OFFICERS UNWILLING TO SERVE UNDER THE NEW GENERALS THREW UP THEIR COMMISSIONS AND UNWARILY FACILITATED THE PROJECT OF PUTTING THE ARMY ENTIRELY INTO THE HANDS OF THAT FACTION -8224-274381-0015 THOUGH THE DISCIPLINE OF THE FORMER PARLIAMENTARY ARMY WAS NOT CONTEMPTIBLE A MORE EXACT PLAN WAS INTRODUCED AND RIGOROUSLY EXECUTED BY THESE NEW COMMANDERS -8224-274381-0016 VALOR INDEED WAS VERY GENERALLY DIFFUSED OVER THE ONE PARTY AS WELL AS THE OTHER DURING THIS PERIOD DISCIPLINE ALSO WAS ATTAINED BY THE FORCES OF THE PARLIAMENT BUT THE PERFECTION OF THE MILITARY ART IN CONCERTING THE GENERAL PLANS OF ACTION AND THE OPERATIONS OF THE FIELD SEEMS STILL ON BOTH SIDES TO HAVE BEEN IN A GREAT MEASURE WANTING -8224-274381-0017 HISTORIANS AT LEAST PERHAPS FROM THEIR OWN IGNORANCE AND INEXPERIENCE HAVE NOT REMARKED ANY THING BUT A HEADLONG IMPETUOUS CONDUCT EACH PARTY HURRYING TO A BATTLE WHERE VALOR AND FORTUNE CHIEFLY DETERMINED THE SUCCESS -8224-274384-0000 HE PASSED THROUGH HENLEY SAINT ALBANS AND CAME SO NEAR TO LONDON AS HARROW ON THE HILL -8224-274384-0001 THE SCOTTISH GENERALS AND COMMISSIONERS AFFECTED GREAT SURPRISE ON THE APPEARANCE OF THE KING AND THOUGH THEY PAID HIM ALL THE EXTERIOR RESPECT DUE TO HIS DIGNITY THEY INSTANTLY SET A GUARD UPON HIM UNDER COLOR OF PROTECTION AND MADE HIM IN REALITY A PRISONER -8224-274384-0002 THEY INFORMED THE ENGLISH PARLIAMENT OF THIS UNEXPECTED INCIDENT AND ASSURED THEM THAT THEY HAD ENTERED INTO NO PRIVATE TREATY WITH THE KING -8224-274384-0003 OR HATH HE GIVEN US ANY GIFT -8224-274384-0004 AND THE MEN OF ISRAEL ANSWERED THE MEN OF JUDAH AND SAID WE HAVE TEN PARTS IN THE KING AND WE HAVE ALSO MORE RIGHT IN DAVID THAN YE WHY THEN DID YE DESPISE US THAT OUR ADVICE SHOULD NOT BE FIRST HAD IN BRINGING BACK OUR KING -8224-274384-0005 ANOTHER PREACHER AFTER REPROACHING HIM TO HIS FACE WITH HIS MISGOVERNMENT ORDERED THIS PSALM TO BE SUNG -8224-274384-0006 THE KING STOOD UP AND CALLED FOR THAT PSALM WHICH BEGINS WITH THESE WORDS -8224-274384-0007 HAVE MERCY LORD ON ME I PRAY FOR MEN WOULD ME DEVOUR -8224-274384-0008 THE GOOD NATURED AUDIENCE IN PITY TO FALLEN MAJESTY SHOWED FOR ONCE GREATER DEFERENCE TO THE KING THAN TO THE MINISTER AND SUNG THE PSALM WHICH THE FORMER HAD CALLED FOR -8224-274384-0009 THE PARLIAMENT AND THE SCOTS LAID THEIR PROPOSALS BEFORE THE KING -8224-274384-0010 BEFORE THE SETTLEMENT OF TERMS THE ADMINISTRATION MUST BE POSSESSED ENTIRELY BY THE PARLIAMENTS OF BOTH KINGDOMS AND HOW INCOMPATIBLE THAT SCHEME WITH THE LIBERTY OF THE KING IS EASILY IMAGINED -8224-274384-0011 THE ENGLISH IT IS EVIDENT HAD THEY NOT BEEN PREVIOUSLY ASSURED OF RECEIVING THE KING WOULD NEVER HAVE PARTED WITH SO CONSIDERABLE A SUM AND WHILE THEY WEAKENED THEMSELVES BY THE SAME MEASURE HAVE STRENGTHENED A PEOPLE WITH WHOM THEY MUST AFTERWARDS HAVE SO MATERIAL AN INTEREST TO DISCUSS -8224-274384-0012 IF ANY STILL RETAINED RANCOR AGAINST HIM IN HIS PRESENT CONDITION THEY PASSED IN SILENCE WHILE HIS WELL WISHERS MORE GENEROUS THAN PRUDENT ACCOMPANIED HIS MARCH WITH TEARS WITH ACCLAMATIONS AND WITH PRAYERS FOR HIS SAFETY -8224-274384-0013 HIS DEATH IN THIS CONJUNCTURE WAS A PUBLIC MISFORTUNE -8230-279154-0000 THE ANALYSIS OF KNOWLEDGE WILL OCCUPY US UNTIL THE END OF THE THIRTEENTH LECTURE AND IS THE MOST DIFFICULT PART OF OUR WHOLE ENTERPRISE -8230-279154-0001 WHAT IS CALLED PERCEPTION DIFFERS FROM SENSATION BY THE FACT THAT THE SENSATIONAL INGREDIENTS BRING UP HABITUAL ASSOCIATES IMAGES AND EXPECTATIONS OF THEIR USUAL CORRELATES ALL OF WHICH ARE SUBJECTIVELY INDISTINGUISHABLE FROM THE SENSATION -8230-279154-0002 WHETHER OR NOT THIS PRINCIPLE IS LIABLE TO EXCEPTIONS EVERYONE WOULD AGREE THAT IS HAS A BROAD MEASURE OF TRUTH THOUGH THE WORD EXACTLY MIGHT SEEM AN OVERSTATEMENT AND IT MIGHT SEEM MORE CORRECT TO SAY THAT IDEAS APPROXIMATELY REPRESENT IMPRESSIONS -8230-279154-0003 AND WHAT SORT OF EVIDENCE IS LOGICALLY POSSIBLE -8230-279154-0004 THERE IS NO LOGICAL IMPOSSIBILITY IN THE HYPOTHESIS THAT THE WORLD SPRANG INTO BEING FIVE MINUTES AGO EXACTLY AS IT THEN WAS WITH A POPULATION THAT REMEMBERED A WHOLLY UNREAL PAST -8230-279154-0005 ALL THAT I AM DOING IS TO USE ITS LOGICAL TENABILITY AS A HELP IN THE ANALYSIS OF WHAT OCCURS WHEN WE REMEMBER -8230-279154-0006 THE BEHAVIOURIST WHO ATTEMPTS TO MAKE PSYCHOLOGY A RECORD OF BEHAVIOUR HAS TO TRUST HIS MEMORY IN MAKING THE RECORD -8230-279154-0007 HABIT IS A CONCEPT INVOLVING THE OCCURRENCE OF SIMILAR EVENTS AT DIFFERENT TIMES IF THE BEHAVIOURIST FEELS CONFIDENT THAT THERE IS SUCH A PHENOMENON AS HABIT THAT CAN ONLY BE BECAUSE HE TRUSTS HIS MEMORY WHEN IT ASSURES HIM THAT THERE HAVE BEEN OTHER TIMES -8230-279154-0008 BUT I DO NOT THINK SUCH AN INFERENCE IS WARRANTED -8230-279154-0009 OUR CONFIDENCE OR LACK OF CONFIDENCE IN THE ACCURACY OF A MEMORY IMAGE MUST IN FUNDAMENTAL CASES BE BASED UPON A CHARACTERISTIC OF THE IMAGE ITSELF SINCE WE CANNOT EVOKE THE PAST BODILY AND COMPARE IT WITH THE PRESENT IMAGE -8230-279154-0010 WE SOMETIMES HAVE IMAGES THAT ARE BY NO MEANS PECULIARLY VAGUE WHICH YET WE DO NOT TRUST FOR EXAMPLE UNDER THE INFLUENCE OF FATIGUE WE MAY SEE A FRIEND'S FACE VIVIDLY AND CLEARLY BUT HORRIBLY DISTORTED -8230-279154-0011 SOME IMAGES LIKE SOME SENSATIONS FEEL VERY FAMILIAR WHILE OTHERS FEEL STRANGE -8230-279154-0012 FAMILIARITY IS A FEELING CAPABLE OF DEGREES -8230-279154-0013 IN AN IMAGE OF A WELL KNOWN FACE FOR EXAMPLE SOME PARTS MAY FEEL MORE FAMILIAR THAN OTHERS WHEN THIS HAPPENS WE HAVE MORE BELIEF IN THE ACCURACY OF THE FAMILIAR PARTS THAN IN THAT OF THE UNFAMILIAR PARTS -8230-279154-0014 I COME NOW TO THE OTHER CHARACTERISTIC WHICH MEMORY IMAGES MUST HAVE IN ORDER TO ACCOUNT FOR OUR KNOWLEDGE OF THE PAST -8230-279154-0015 THEY MUST HAVE SOME CHARACTERISTIC WHICH MAKES US REGARD THEM AS REFERRING TO MORE OR LESS REMOTE PORTIONS OF THE PAST -8230-279154-0016 IN ACTUAL FACT THERE ARE DOUBTLESS VARIOUS FACTORS THAT CONCUR IN GIVING US THE FEELING OF GREATER OR LESS REMOTENESS IN SOME REMEMBERED EVENT -8230-279154-0017 THERE MAY BE A SPECIFIC FEELING WHICH COULD BE CALLED THE FEELING OF PASTNESS ESPECIALLY WHERE IMMEDIATE MEMORY IS CONCERNED -8230-279154-0018 THERE IS OF COURSE A DIFFERENCE BETWEEN KNOWING THE TEMPORAL RELATION OF A REMEMBERED EVENT TO THE PRESENT AND KNOWING THE TIME ORDER OF TWO REMEMBERED EVENTS -8230-279154-0019 IT WOULD SEEM THAT ONLY RATHER RECENT EVENTS CAN BE PLACED AT ALL ACCURATELY BY MEANS OF FEELINGS GIVING THEIR TEMPORAL RELATION TO THE PRESENT BUT IT IS CLEAR THAT SUCH FEELINGS MUST PLAY AN ESSENTIAL PART IN THE PROCESS OF DATING REMEMBERED EVENTS -8230-279154-0020 IF WE HAD RETAINED THE SUBJECT OR ACT IN KNOWLEDGE THE WHOLE PROBLEM OF MEMORY WOULD HAVE BEEN COMPARATIVELY SIMPLE -8230-279154-0021 REMEMBERING HAS TO BE A PRESENT OCCURRENCE IN SOME WAY RESEMBLING OR RELATED TO WHAT IS REMEMBERED -8230-279154-0022 SOME POINTS MAY BE TAKEN AS FIXED AND SUCH AS ANY THEORY OF MEMORY MUST ARRIVE AT -8230-279154-0023 IN THIS CASE AS IN MOST OTHERS WHAT MAY BE TAKEN AS CERTAIN IN ADVANCE IS RATHER VAGUE -8230-279154-0024 THE FIRST OF OUR VAGUE BUT INDUBITABLE DATA IS THAT THERE IS KNOWLEDGE OF THE PAST -8230-279154-0025 WE MIGHT PROVISIONALLY THOUGH PERHAPS NOT QUITE CORRECTLY DEFINE MEMORY AS THAT WAY OF KNOWING ABOUT THE PAST WHICH HAS NO ANALOGUE IN OUR KNOWLEDGE OF THE FUTURE SUCH A DEFINITION WOULD AT LEAST SERVE TO MARK THE PROBLEM WITH WHICH WE ARE CONCERNED THOUGH SOME EXPECTATIONS MAY DESERVE TO RANK WITH MEMORY AS REGARDS IMMEDIACY -8230-279154-0026 THIS DISTINCTION IS VITAL TO THE UNDERSTANDING OF MEMORY BUT IT IS NOT SO EASY TO CARRY OUT IN PRACTICE AS IT IS TO DRAW IN THEORY -8230-279154-0027 A GRAMOPHONE BY THE HELP OF SUITABLE RECORDS MIGHT RELATE TO US THE INCIDENTS OF ITS PAST AND PEOPLE ARE NOT SO DIFFERENT FROM GRAMOPHONES AS THEY LIKE TO BELIEVE -8230-279154-0028 I CAN SET TO WORK NOW TO REMEMBER THINGS I NEVER REMEMBERED BEFORE SUCH AS WHAT I HAD TO EAT FOR BREAKFAST THIS MORNING AND IT CAN HARDLY BE WHOLLY HABIT THAT ENABLES ME TO DO THIS -8230-279154-0029 THE FACT THAT A MAN CAN RECITE A POEM DOES NOT SHOW THAT HE REMEMBERS ANY PREVIOUS OCCASION ON WHICH HE HAS RECITED OR READ IT -8230-279154-0030 SEMON'S TWO BOOKS MENTIONED IN AN EARLIER LECTURE DO NOT TOUCH KNOWLEDGE MEMORY AT ALL CLOSELY -8230-279154-0031 THEY GIVE LAWS ACCORDING TO WHICH IMAGES OF PAST OCCURRENCES COME INTO OUR MINDS BUT DO NOT DISCUSS OUR BELIEF THAT THESE IMAGES REFER TO PAST OCCURRENCES WHICH IS WHAT CONSTITUTES KNOWLEDGE MEMORY -8230-279154-0032 IT IS THIS THAT IS OF INTEREST TO THEORY OF KNOWLEDGE -8230-279154-0033 IT IS BY NO MEANS ALWAYS RELIABLE ALMOST EVERYBODY HAS AT SOME TIME EXPERIENCED THE WELL KNOWN ILLUSION THAT ALL THAT IS HAPPENING NOW HAPPENED BEFORE AT SOME TIME -8230-279154-0034 WHENEVER THE SENSE OF FAMILIARITY OCCURS WITHOUT A DEFINITE OBJECT IT LEADS US TO SEARCH THE ENVIRONMENT UNTIL WE ARE SATISFIED THAT WE HAVE FOUND THE APPROPRIATE OBJECT WHICH LEADS US TO THE JUDGMENT THIS IS FAMILIAR -8230-279154-0035 THUS NO KNOWLEDGE AS TO THE PAST IS TO BE DERIVED FROM THE FEELING OF FAMILIARITY ALONE -8230-279154-0036 A FURTHER STAGE IS RECOGNITION -8230-279154-0037 RECOGNITION IN THIS SENSE DOES NOT NECESSARILY INVOLVE MORE THAN A HABIT OF ASSOCIATION THE KIND OF OBJECT WE ARE SEEING AT THE MOMENT IS ASSOCIATED WITH THE WORD CAT OR WITH AN AUDITORY IMAGE OF PURRING OR WHATEVER OTHER CHARACTERISTIC WE MAY HAPPEN TO RECOGNIZE IN THE CAT OF THE MOMENT -8230-279154-0038 WE ARE OF COURSE IN FACT ABLE TO JUDGE WHEN WE RECOGNIZE AN OBJECT THAT WE HAVE SEEN IT BEFORE BUT THIS JUDGMENT IS SOMETHING OVER AND ABOVE RECOGNITION IN THIS FIRST SENSE AND MAY VERY PROBABLY BE IMPOSSIBLE TO ANIMALS THAT NEVERTHELESS HAVE THE EXPERIENCE OF RECOGNITION IN THIS FIRST SENSE OF THE WORD -8230-279154-0039 THIS KNOWLEDGE IS MEMORY IN ONE SENSE THOUGH IN ANOTHER IT IS NOT -8230-279154-0040 THERE ARE HOWEVER SEVERAL POINTS IN WHICH SUCH AN ACCOUNT OF RECOGNITION IS INADEQUATE TO BEGIN WITH IT MIGHT SEEM AT FIRST SIGHT MORE CORRECT TO DEFINE RECOGNITION AS I HAVE SEEN THIS BEFORE THAN AS THIS HAS EXISTED BEFORE -8230-279154-0041 THE DEFINITION OF MY EXPERIENCE IS DIFFICULT BROADLY SPEAKING IT IS EVERYTHING THAT IS CONNECTED WITH WHAT I AM EXPERIENCING NOW BY CERTAIN LINKS OF WHICH THE VARIOUS FORMS OF MEMORY ARE AMONG THE MOST IMPORTANT -8230-279154-0042 THUS IF I RECOGNIZE A THING THE OCCASION OF ITS PREVIOUS EXISTENCE IN VIRTUE OF WHICH I RECOGNIZE IT FORMS PART OF MY EXPERIENCE BY DEFINITION RECOGNITION WILL BE ONE OF THE MARKS BY WHICH MY EXPERIENCE IS SINGLED OUT FROM THE REST OF THE WORLD -8230-279154-0043 OF COURSE THE WORDS THIS HAS EXISTED BEFORE ARE A VERY INADEQUATE TRANSLATION OF WHAT ACTUALLY HAPPENS WHEN WE FORM A JUDGMENT OF RECOGNITION BUT THAT IS UNAVOIDABLE WORDS ARE FRAMED TO EXPRESS A LEVEL OF THOUGHT WHICH IS BY NO MEANS PRIMITIVE AND ARE QUITE INCAPABLE OF EXPRESSING SUCH AN ELEMENTARY OCCURRENCE AS RECOGNITION -8455-210777-0000 I REMAINED THERE ALONE FOR MANY HOURS BUT I MUST ACKNOWLEDGE THAT BEFORE I LEFT THE CHAMBERS I HAD GRADUALLY BROUGHT MYSELF TO LOOK AT THE MATTER IN ANOTHER LIGHT -8455-210777-0001 HAD EVA CRASWELLER NOT BEEN GOOD LOOKING HAD JACK BEEN STILL AT COLLEGE HAD SIR KENNINGTON OVAL REMAINED IN ENGLAND HAD MISTER BUNNIT AND THE BAR KEEPER NOT SUCCEEDED IN STOPPING MY CARRIAGE ON THE HILL SHOULD I HAVE SUCCEEDED IN ARRANGING FOR THE FINAL DEPARTURE OF MY OLD FRIEND -8455-210777-0002 ON ARRIVING AT HOME AT MY OWN RESIDENCE I FOUND THAT OUR SALON WAS FILLED WITH A BRILLIANT COMPANY -8455-210777-0003 AS I SPOKE I MADE HIM A GRACIOUS BOW AND I THINK I SHOWED HIM BY MY MODE OF ADDRESS THAT I DID NOT BEAR ANY GRUDGE AS TO MY INDIVIDUAL SELF -8455-210777-0004 I HAVE COME TO YOUR SHORES MISTER PRESIDENT WITH THE PURPOSE OF SEEING HOW THINGS ARE PROGRESSING IN THIS DISTANT QUARTER OF THE WORLD -8455-210777-0005 WE HAVE OUR LITTLE STRUGGLES HERE AS ELSEWHERE AND ALL THINGS CANNOT BE DONE BY ROSE WATER -8455-210777-0006 WE ARE QUITE SATISFIED NOW CAPTAIN BATTLEAX SAID MY WIFE -8455-210777-0007 QUITE SATISFIED SAID EVA -8455-210777-0008 THE LADIES IN COMPLIANCE WITH THAT SOFTNESS OF HEART WHICH IS THEIR CHARACTERISTIC ARE ON ONE SIDE AND THE MEN BY WHOM THE WORLD HAS TO BE MANAGED ARE ON THE OTHER -8455-210777-0009 NO DOUBT IN PROCESS OF TIME THE LADIES WILL FOLLOW -8455-210777-0010 THEIR MASTERS SAID MISSUS NEVERBEND -8455-210777-0011 I DID NOT MEAN SAID CAPTAIN BATTLEAX TO TOUCH UPON PUBLIC SUBJECTS AT SUCH A MOMENT AS THIS -8455-210777-0012 MISSUS NEVERBEND YOU MUST INDEED BE PROUD OF YOUR SON -8455-210777-0013 JACK HAD BEEN STANDING IN THE FAR CORNER OF THE ROOM TALKING TO EVA AND WAS NOW REDUCED TO SILENCE BY HIS PRAISES -8455-210777-0014 SIR KENNINGTON OVAL IS A VERY FINE PLAYER SAID MY WIFE -8455-210777-0015 I AND MY WIFE AND SON AND THE TWO CRASWELLERS AND THREE OR FOUR OTHERS AGREED TO DINE ON BOARD THE SHIP ON THE NEXT -8455-210777-0016 THIS I FELT WAS PAID TO ME AS BEING PRESIDENT OF THE REPUBLIC AND I ENDEAVOURED TO BEHAVE MYSELF WITH SUCH MINGLED HUMILITY AND DIGNITY AS MIGHT BEFIT THE OCCASION BUT I COULD NOT BUT FEEL THAT SOMETHING WAS WANTING TO THE SIMPLICITY OF MY ORDINARY LIFE -8455-210777-0017 MY WIFE ON THE SPUR OF THE MOMENT MANAGED TO GIVE THE GENTLEMEN A VERY GOOD DINNER -8455-210777-0018 THIS SHE SAID WAS TRUE HOSPITALITY AND I AM NOT SURE THAT I DID NOT AGREE WITH HER -8455-210777-0019 THEN THERE WERE THREE OR FOUR LEADING MEN OF THE COMMUNITY WITH THEIR WIVES WHO WERE FOR THE MOST PART THE FATHERS AND MOTHERS OF THE YOUNG LADIES -8455-210777-0020 OH YES SAID JACK AND I'M NOWHERE -8455-210777-0021 BUT I MEAN TO HAVE MY INNINGS BEFORE LONG -8455-210777-0022 OF WHAT MISSUS NEVERBEND HAD GONE THROUGH IN PROVIDING BIRDS BEASTS AND FISHES NOT TO TALK OF TARTS AND JELLIES FOR THE DINNER OF THAT DAY NO ONE BUT MYSELF CAN HAVE ANY IDEA BUT IT MUST BE ADMITTED THAT SHE ACCOMPLISHED HER TASK WITH THOROUGH SUCCESS -8455-210777-0023 WE SAT WITH THE OFFICERS SOME LITTLE TIME AFTER DINNER AND THEN WENT ASHORE -8455-210777-0024 HOW MUCH OF EVIL OF REAL ACCOMPLISHED EVIL HAD THERE NOT OCCURRED TO ME DURING THE LAST FEW DAYS -8455-210777-0025 WHAT COULD I DO NOW BUT JUST LAY MYSELF DOWN AND DIE -8455-210777-0026 AND THE DEATH OF WHICH I DREAMT COULD NOT ALAS -8455-210777-0027 WHEN THIS CAPTAIN SHOULD HAVE TAKEN HIMSELF AND HIS VESSEL BACK TO ENGLAND I WOULD RETIRE TO A SMALL FARM WHICH I POSSESSED AT THE FARTHEST SIDE OF THE ISLAND AND THERE IN SECLUSION WOULD I END MY DAYS -8455-210777-0028 JACK WOULD BECOME EVA'S HAPPY HUSBAND AND WOULD REMAIN AMIDST THE HURRIED DUTIES OF THE EAGER WORLD -8455-210777-0029 THINKING OF ALL THIS I WENT TO SLEEP -8455-210777-0030 MISTER NEVERBEND BEGAN THE CAPTAIN AND I OBSERVED THAT UP TO THAT MOMENT HE HAD GENERALLY ADDRESSED ME AS PRESIDENT IT CANNOT BE DENIED THAT WE HAVE COME HERE ON AN UNPLEASANT MISSION -8455-210777-0031 YOU HAVE RECEIVED US WITH ALL THAT COURTESY AND HOSPITALITY FOR WHICH YOUR CHARACTER IN ENGLAND STANDS SO HIGH -8455-210777-0032 IT IS A DUTY SAID I -8455-210777-0033 BUT YOUR POWER IS SO SUPERIOR TO ANY THAT I CAN ADVANCE AS TO MAKE US HERE FEEL THAT THERE IS NO DISGRACE IN YIELDING TO IT -8455-210777-0034 NOT A DOUBT BUT HAD YOUR FORCE BEEN ONLY DOUBLE OR TREBLE OUR OWN I SHOULD HAVE FOUND IT MY DUTY TO STRUGGLE WITH YOU -8455-210777-0035 THAT IS ALL QUITE TRUE MISTER NEVERBEND SAID SIR FERDINANDO BROWN -8455-210777-0036 I CAN AFFORD TO SMILE BECAUSE I AM ABSOLUTELY POWERLESS BEFORE YOU BUT I DO NOT THE LESS FEEL THAT IN A MATTER IN WHICH THE PROGRESS OF THE WORLD IS CONCERNED I OR RATHER WE HAVE BEEN PUT DOWN BY BRUTE FORCE -8455-210777-0037 YOU HAVE COME TO US THREATENING US WITH ABSOLUTE DESTRUCTION -8455-210777-0038 THEREFORE I FEEL MYSELF QUITE ABLE AS PRESIDENT OF THIS REPUBLIC TO RECEIVE YOU WITH A COURTESY DUE TO THE SERVANTS OF A FRIENDLY ALLY -8455-210777-0039 I CAN ASSURE YOU HE HAS NOT EVEN ALLOWED ME TO SEE THE TRIGGER SINCE I HAVE BEEN ON BOARD -8455-210777-0040 THEN SAID SIR FERDINANDO THERE IS NOTHING FOR IT BUT THAT HE MUST TAKE YOU WITH HIM -8455-210777-0041 THERE CAME UPON ME A SUDDEN SHOCK WHEN I HEARD THESE WORDS WHICH EXCEEDED ANYTHING WHICH I HAD YET FELT -8455-210777-0042 YOU HEAR WHAT SIR FERDINANDO BROWN HAS SAID REPLIED CAPTAIN BATTLEAX -8455-210777-0043 BUT WHAT IS THE DELICATE MISSION I ASKED -8455-210777-0044 I WAS TO BE TAKEN AWAY AND CARRIED TO ENGLAND OR ELSEWHERE OR DROWNED UPON THE VOYAGE IT MATTERED NOT WHICH -8455-210777-0045 THEN THE REPUBLIC OF BRITANNULA WAS TO BE DECLARED AS NON EXISTENT AND THE BRITISH FLAG WAS TO BE EXALTED AND A BRITISH GOVERNOR INSTALLED IN THE EXECUTIVE CHAMBERS -8455-210777-0046 YOU MAY BE QUITE SURE IT'S THERE SAID CAPTAIN BATTLEAX AND THAT I CAN SO USE IT AS TO HALF OBLITERATE YOUR TOWN WITHIN TWO MINUTES OF MY RETURN ON BOARD -8455-210777-0047 YOU PROPOSE TO KIDNAP ME I SAID -8455-210777-0048 WHAT WOULD BECOME OF YOUR GUN WERE I TO KIDNAP YOU -8455-210777-0049 LIEUTENANT CROSSTREES IS A VERY GALLANT OFFICER -8455-210777-0050 ONE OF US ALWAYS REMAINS ON BOARD WHILE THE OTHER IS ON SHORE -8455-210777-0051 WHAT WORLD WIDE INIQUITY SUCH A SPEECH AS THAT DISCLOSES SAID I STILL TURNING MYSELF TO THE CAPTAIN FOR THOUGH I WOULD HAVE CRUSHED THEM BOTH BY MY WORDS HAD IT BEEN POSSIBLE MY DISLIKE CENTRED ITSELF ON SIR FERDINANDO -8455-210777-0052 YOU WILL ALLOW ME TO SUGGEST SAID HE THAT THAT IS A MATTER OF OPINION -8455-210777-0053 WERE I TO COMPLY WITH YOUR ORDERS WITHOUT EXPRESSING MY OWN OPINION I SHOULD SEEM TO HAVE DONE SO WILLINGLY HEREAFTER -8455-210777-0054 THE LETTER RAN AS FOLLOWS -8455-210777-0055 SIR I HAVE IT IN COMMAND TO INFORM YOUR EXCELLENCY THAT YOU HAVE BEEN APPOINTED GOVERNOR OF THE CROWN COLONY WHICH IS CALLED BRITANNULA -8455-210777-0056 THE PECULIAR CIRCUMSTANCES OF THE COLONY ARE WITHIN YOUR EXCELLENCY'S KNOWLEDGE -8455-210777-0057 BUT IN THEIR SELECTION OF A CONSTITUTION THE BRITANNULISTS HAVE UNFORTUNATELY ALLOWED THEMSELVES BUT ONE DELIBERATIVE ASSEMBLY AND HENCE HAVE SPRUNG THEIR PRESENT DIFFICULTIES -8455-210777-0058 IT IS FOUNDED ON THE ACKNOWLEDGED WEAKNESS OF THOSE WHO SURVIVE THAT PERIOD OF LIFE AT WHICH MEN CEASE TO WORK -8455-210777-0059 BUT IT IS SURMISED THAT YOU WILL FIND DIFFICULTIES IN THE WAY OF YOUR ENTERING AT ONCE UPON YOUR GOVERNMENT -8455-210777-0060 THE JOHN BRIGHT IS ARMED WITH A WEAPON OF GREAT POWER AGAINST WHICH IT IS IMPOSSIBLE THAT THE PEOPLE OF BRITANNULA SHOULD PREVAIL -8455-210777-0061 YOU WILL CARRY OUT WITH YOU ONE HUNDRED MEN OF THE NORTH NORTH WEST BIRMINGHAM REGIMENT WHICH WILL PROBABLY SUFFICE FOR YOUR OWN SECURITY AS IT IS THOUGHT THAT IF MISTER NEVERBEND BE WITHDRAWN THE PEOPLE WILL REVERT EASILY TO THEIR OLD HABITS OF OBEDIENCE -8455-210777-0062 WHEN DO YOU INTEND THAT THE JOHN BRIGHT SHALL START -8455-210777-0063 TO DAY I SHOUTED -8455-210777-0064 AND I HAVE NO ONE READY TO WHOM I CAN GIVE UP THE ARCHIVES OF THE GOVERNMENT -8455-210777-0065 I SHALL BE HAPPY TO TAKE CHARGE OF THEM SAID SIR FERDINANDO -8455-210777-0066 THEY OF COURSE MUST ALL BE ALTERED -8455-210777-0067 OR OF THE HABITS OF OUR PEOPLE IT IS QUITE IMPOSSIBLE -8455-210777-0068 YOUR POWER IS SUFFICIENT I SAID -8455-210777-0069 IF YOU WILL GIVE US YOUR PROMISE TO MEET CAPTAIN BATTLEAX HERE AT THIS TIME TO MORROW WE WILL STRETCH A POINT AND DELAY THE DEPARTURE OF THE JOHN BRIGHT FOR TWENTY FOUR HOURS -8455-210777-0070 AND THIS PLAN WAS ADOPTED TOO IN ORDER TO EXTRACT FROM ME A PROMISE THAT I WOULD DEPART IN PEACE -8463-287645-0000 THIS WAS WHAT DID THE MISCHIEF SO FAR AS THE RUNNING AWAY WAS CONCERNED -8463-287645-0001 IT IS HARDLY NECESSARY TO SAY MORE OF THEM HERE -8463-287645-0002 FROM THE MANNER IN WHICH HE EXPRESSED HIMSELF WITH REGARD TO ROBERT HOLLAN NO MAN IN THE WHOLE RANGE OF HIS RECOLLECTIONS WILL BE LONGER REMEMBERED THAN HE HIS ENTHRALMENT WHILE UNDER HOLLAN WILL HARDLY EVER BE FORGOTTEN -8463-287645-0003 OF THIS PARTY EDWARD A BOY OF SEVENTEEN CALLED FORTH MUCH SYMPATHY HE TOO WAS CLAIMED BY HOLLAN -8463-287645-0004 JOHN WESLEY COMBASH JACOB TAYLOR AND THOMAS EDWARD SKINNER -8463-287645-0005 A FEW YEARS BACK ONE OF THEIR SLAVES A COACHMAN WAS KEPT ON THE COACH BOX ONE COLD NIGHT WHEN THEY WERE OUT AT A BALL UNTIL HE BECAME ALMOST FROZEN TO DEATH IN FACT HE DID DIE IN THE INFIRMARY FROM THE EFFECTS OF THE FROST ABOUT ONE WEEK AFTERWARDS -8463-287645-0006 THE DOCTOR WHO ATTENDED THE INJURED CREATURE IN THIS CASE WAS SIMPLY TOLD THAT SHE SLIPPED AND FELL DOWN STAIRS AS SHE WAS COMING DOWN -8463-287645-0007 ANOTHER CASE SAID JOHN WESLEY WAS A LITTLE GIRL HALF GROWN WHO WAS WASHING WINDOWS UP STAIRS ONE DAY AND UNLUCKILY FELL ASLEEP IN THE WINDOW AND IN THIS POSITION WAS FOUND BY HER MISTRESS IN A RAGE THE MISTRESS HIT HER A HEAVY SLAP KNOCKED HER OUT OF THE WINDOW AND SHE FELL TO THE PAVEMENT AND DIED IN A FEW HOURS FROM THE EFFECTS THEREOF -8463-287645-0008 AS USUAL NOTHING WAS DONE IN THE WAY OF PUNISHMENT -8463-287645-0009 I NEVER KNEW OF BUT ONE MAN WHO COULD EVER PLEASE HIM -8463-287645-0010 HE WORKED ME VERY HARD HE WANTED TO BE BEATING ME ALL THE TIME -8463-287645-0011 SHE WAS A LARGE HOMELY WOMAN THEY WERE COMMON WHITE PEOPLE WITH NO REPUTATION IN THE COMMUNITY -8463-287645-0012 SUBSTANTIALLY THIS WAS JACOB'S UNVARNISHED DESCRIPTION OF HIS MASTER AND MISTRESS -8463-287645-0013 AS TO HIS AGE AND ALSO THE NAME OF HIS MASTER JACOB'S STATEMENT VARIED SOMEWHAT FROM THE ADVERTISEMENT -8463-287645-0014 OF STARTING I DIDN'T KNOW THE WAY TO COME -8463-294825-0000 IT'S ALMOST BEYOND CONJECTURE -8463-294825-0001 THIS REALITY BEGINS TO EXPLAIN THE DARK POWER AND OTHERWORLDLY FASCINATION OF TWENTY THOUSAND LEAGUES UNDER THE SEAS -8463-294825-0002 FIRST AS A PARIS STOCKBROKER LATER AS A CELEBRATED AUTHOR AND YACHTSMAN HE WENT ON FREQUENT VOYAGES TO BRITAIN AMERICA THE MEDITERRANEAN -8463-294825-0003 NEMO BUILDS A FABULOUS FUTURISTIC SUBMARINE THE NAUTILUS THEN CONDUCTS AN UNDERWATER CAMPAIGN OF VENGEANCE AGAINST HIS IMPERIALIST OPPRESSOR -8463-294825-0004 IN ALL THE NOVEL HAD A DIFFICULT GESTATION -8463-294825-0005 OTHER SUBTLETIES OCCUR INSIDE EACH EPISODE THE TEXTURES SPARKLING WITH WIT INFORMATION AND INSIGHT -8463-294825-0006 HIS SPECIFICATIONS FOR AN OPEN SEA SUBMARINE AND A SELF CONTAINED DIVING SUIT WERE DECADES BEFORE THEIR TIME YET MODERN TECHNOLOGY BEARS THEM OUT TRIUMPHANTLY -8463-294825-0007 EVEN THE SUPPORTING CAST IS SHREWDLY DRAWN PROFESSOR ARONNAX THE CAREER SCIENTIST CAUGHT IN AN ETHICAL CONFLICT CONSEIL THE COMPULSIVE CLASSIFIER WHO SUPPLIES HUMOROUS TAG LINES FOR VERNE'S FAST FACTS THE HARPOONER NED LAND A CREATURE OF CONSTANT APPETITES MAN AS HEROIC ANIMAL -8463-294825-0008 BUT MUCH OF THE NOVEL'S BROODING POWER COMES FROM CAPTAIN NEMO -8463-294825-0009 THIS COMPULSION LEADS NEMO INTO UGLY CONTRADICTIONS HE'S A FIGHTER FOR FREEDOM YET ALL WHO BOARD HIS SHIP ARE IMPRISONED THERE FOR GOOD HE WORKS TO SAVE LIVES BOTH HUMAN AND ANIMAL YET HE HIMSELF CREATES A HOLOCAUST HE DETESTS IMPERIALISM YET HE LAYS PERSONAL CLAIM TO THE SOUTH POLE -8463-294825-0010 AND IN THIS LAST ACTION HE FALLS INTO THE CLASSIC SIN OF PRIDE -8463-294825-0011 HE'S SWIFTLY PUNISHED -8463-294825-0012 THE NAUTILUS NEARLY PERISHES IN THE ANTARCTIC AND NEMO SINKS INTO A GROWING DEPRESSION -8463-294825-0013 FOR MANY THEN THIS BOOK HAS BEEN A SOURCE OF FASCINATION SURELY ONE OF THE MOST INFLUENTIAL NOVELS EVER WRITTEN AN INSPIRATION FOR SUCH SCIENTISTS AND DISCOVERERS AS ENGINEER SIMON LAKE OCEANOGRAPHER WILLIAM BEEBE POLAR TRAVELER SIR ERNEST SHACKLETON -8463-294825-0014 FATHOM SIX FEET -8463-294825-0015 GRAM ROUGHLY ONE TWENTY EIGHTH OF AN OUNCE -8463-294825-0016 MILLIGRAM ROUGHLY ONE TWENTY EIGHT THOUSAND OF AN OUNCE -8463-294825-0017 LITER ROUGHLY ONE QUART -8463-294825-0018 METER ROUGHLY ONE YARD THREE INCHES -8463-294825-0019 MILLIMETER ROUGHLY ONE TWENTY FIFTH OF AN INCH -8463-294828-0000 CHAPTER THREE AS MASTER WISHES -8463-294828-0001 THREE SECONDS BEFORE THE ARRIVAL OF J B HOBSON'S LETTER I NO MORE DREAMED OF CHASING THE UNICORN THAN OF TRYING FOR THE NORTHWEST PASSAGE -8463-294828-0002 EVEN SO I HAD JUST RETURNED FROM AN ARDUOUS JOURNEY EXHAUSTED AND BADLY NEEDING A REST -8463-294828-0003 I WANTED NOTHING MORE THAN TO SEE MY COUNTRY AGAIN MY FRIENDS MY MODEST QUARTERS BY THE BOTANICAL GARDENS MY DEARLY BELOVED COLLECTIONS -8463-294828-0004 BUT NOW NOTHING COULD HOLD ME BACK -8463-294828-0005 CONSEIL WAS MY MANSERVANT -8463-294828-0006 FROM RUBBING SHOULDERS WITH SCIENTISTS IN OUR LITTLE UNIVERSE BY THE BOTANICAL GARDENS THE BOY HAD COME TO KNOW A THING OR TWO -8463-294828-0007 CLASSIFYING WAS EVERYTHING TO HIM SO HE KNEW NOTHING ELSE WELL VERSED IN THE THEORY OF CLASSIFICATION HE WAS POORLY VERSED IN ITS PRACTICAL APPLICATION AND I DOUBT THAT HE COULD TELL A SPERM WHALE FROM A BALEEN WHALE -8463-294828-0008 AND YET WHAT A FINE GALLANT LAD -8463-294828-0009 NOT ONCE DID HE COMMENT ON THE LENGTH OR THE HARDSHIPS OF A JOURNEY -8463-294828-0010 NEVER DID HE OBJECT TO BUCKLING UP HIS SUITCASE FOR ANY COUNTRY WHATEVER CHINA OR THE CONGO NO MATTER HOW FAR OFF IT WAS -8463-294828-0011 HE WENT HERE THERE AND EVERYWHERE IN PERFECT CONTENTMENT -8463-294828-0012 PLEASE FORGIVE ME FOR THIS UNDERHANDED WAY OF ADMITTING I HAD TURNED FORTY -8463-294828-0013 HE WAS A FANATIC ON FORMALITY AND HE ONLY ADDRESSED ME IN THE THIRD PERSON TO THE POINT WHERE IT GOT TIRESOME -8463-294828-0014 THERE WAS GOOD REASON TO STOP AND THINK EVEN FOR THE WORLD'S MOST EMOTIONLESS MAN -8463-294828-0015 CONSEIL I CALLED A THIRD TIME CONSEIL APPEARED -8463-294828-0016 DID MASTER SUMMON ME HE SAID ENTERING -8463-294828-0017 PACK AS MUCH INTO MY TRUNK AS YOU CAN MY TRAVELING KIT MY SUITS SHIRTS AND SOCKS DON'T BOTHER COUNTING JUST SQUEEZE IT ALL IN AND HURRY -8463-294828-0018 WE'LL DEAL WITH THEM LATER WHAT -8463-294828-0019 ANYHOW WE'LL LEAVE INSTRUCTIONS TO SHIP THE WHOLE MENAGERIE TO FRANCE -8463-294828-0020 YES WE ARE CERTAINLY I REPLIED EVASIVELY BUT AFTER WE MAKE A DETOUR -8463-294828-0021 A ROUTE SLIGHTLY LESS DIRECT THAT'S ALL -8463-294828-0022 WE'RE LEAVING ON THE ABRAHAM LINCOLN -8463-294828-0023 YOU SEE MY FRIEND IT'S AN ISSUE OF THE MONSTER THE NOTORIOUS NARWHALE -8463-294828-0024 WE DON'T KNOW WHERE IT WILL TAKE US -8463-294828-0025 BUT WE'RE GOING JUST THE SAME -8463-294828-0026 WE HAVE A COMMANDER WHO'S GAME FOR ANYTHING -8463-294828-0027 I LEFT INSTRUCTIONS FOR SHIPPING MY CONTAINERS OF STUFFED ANIMALS AND DRIED PLANTS TO PARIS FRANCE -8463-294828-0028 I OPENED A LINE OF CREDIT SUFFICIENT TO COVER THE BABIRUSA AND CONSEIL AT MY HEELS I JUMPED INTO A CARRIAGE -8463-294828-0029 OUR BAGGAGE WAS IMMEDIATELY CARRIED TO THE DECK OF THE FRIGATE I RUSHED ABOARD -8463-294828-0030 I ASKED FOR COMMANDER FARRAGUT -8463-294828-0031 ONE OF THE SAILORS LED ME TO THE AFTERDECK WHERE I STOOD IN THE PRESENCE OF A SMART LOOKING OFFICER WHO EXTENDED HIS HAND TO ME -8463-294828-0032 IN PERSON WELCOME ABOARD PROFESSOR YOUR CABIN IS WAITING FOR YOU -8463-294828-0033 I WAS WELL SATISFIED WITH MY CABIN WHICH WAS LOCATED IN THE STERN AND OPENED INTO THE OFFICERS MESS -8463-294828-0034 WE'LL BE QUITE COMFORTABLE HERE I TOLD CONSEIL -8463-294828-0035 AND SO IF I'D BEEN DELAYED BY A QUARTER OF AN HOUR OR EVEN LESS THE FRIGATE WOULD HAVE GONE WITHOUT ME AND I WOULD HAVE MISSED OUT ON THIS UNEARTHLY EXTRAORDINARY AND INCONCEIVABLE EXPEDITION WHOSE TRUE STORY MIGHT WELL MEET WITH SOME SKEPTICISM -8463-294828-0036 THE WHARVES OF BROOKLYN AND EVERY PART OF NEW YORK BORDERING THE EAST RIVER WERE CROWDED WITH CURIOSITY SEEKERS -8463-294828-0037 DEPARTING FROM FIVE HUNDRED THOUSAND THROATS THREE CHEERS BURST FORTH IN SUCCESSION -8463-294828-0038 THOUSANDS OF HANDKERCHIEFS WERE WAVING ABOVE THESE TIGHTLY PACKED MASSES HAILING THE ABRAHAM LINCOLN UNTIL IT REACHED THE WATERS OF THE HUDSON RIVER AT THE TIP OF THE LONG PENINSULA THAT FORMS NEW YORK CITY -8555-284447-0000 THEN HE RUSHED DOWN STAIRS INTO THE COURTYARD SHOUTING LOUDLY FOR HIS SOLDIERS AND THREATENING TO PATCH EVERYBODY IN HIS DOMINIONS IF THE SAILORMAN WAS NOT RECAPTURED -8555-284447-0001 HOLD HIM FAST MY MEN AND AS SOON AS I'VE HAD MY COFFEE AND OATMEAL I'LL TAKE HIM TO THE ROOM OF THE GREAT KNIFE AND PATCH HIM -8555-284447-0002 I WOULDN'T MIND A CUP O COFFEE MYSELF SAID CAP'N BILL I'VE HAD CONSID'BLE EXERCISE THIS MORNIN AND I'M ALL READY FOR BREAKFAS -8555-284447-0003 BUT CAP'N BILL MADE NO SUCH ATTEMPT KNOWING IT WOULD BE USELESS -8555-284447-0004 AS SOON AS THEY ENTERED THE ROOM OF THE GREAT KNIFE THE BOOLOOROO GAVE A YELL OF DISAPPOINTMENT -8555-284447-0005 THE ROOM OF THE GREAT KNIFE WAS HIGH AND BIG AND AROUND IT RAN ROWS OF BENCHES FOR THE SPECTATORS TO SIT UPON -8555-284447-0006 IN ONE PLACE AT THE HEAD OF THE ROOM WAS A RAISED PLATFORM FOR THE ROYAL FAMILY WITH ELEGANT THRONE CHAIRS FOR THE KING AND QUEEN AND SIX SMALLER BUT RICHLY UPHOLSTERED CHAIRS FOR THE SNUBNOSED PRINCESSES -8555-284447-0007 THEREFORE HER MAJESTY PAID NO ATTENTION TO ANYONE AND NO ONE PAID ANY ATTENTION TO HER -8555-284447-0008 RICH JEWELS OF BLUE STONES GLITTERED UPON THEIR PERSONS AND THE ROYAL LADIES WERE FULLY AS GORGEOUS AS THEY WERE HAUGHTY AND OVERBEARING -8555-284447-0009 MORNIN GIRLS HOPE YE FEEL AS WELL AS YE LOOK -8555-284447-0010 CONTROL YOURSELVES MY DEARS REPLIED THE BOOLOOROO THE WORST PUNISHMENT I KNOW HOW TO INFLICT ON ANYONE THIS PRISONER IS ABOUT TO SUFFER YOU'LL SEE A VERY PRETTY PATCHING MY ROYAL DAUGHTERS -8555-284447-0011 SUPPOSE IT'S A FRIEND -8555-284447-0012 THE CAPTAIN SHOOK HIS HEAD -8555-284447-0013 WHY YOU SAID TO FETCH THE FIRST LIVING CREATURE WE MET AND THAT WAS THIS BILLYGOAT REPLIED THE CAPTAIN PANTING HARD AS HE HELD FAST TO ONE OF THE GOAT'S HORNS -8555-284447-0014 THE IDEA OF PATCHING CAP'N BILL TO A GOAT WAS VASTLY AMUSING TO HIM AND THE MORE HE THOUGHT OF IT THE MORE HE ROARED WITH LAUGHTER -8555-284447-0015 THEY LOOK SOMETHING ALIKE YOU KNOW SUGGESTED THE CAPTAIN OF THE GUARDS LOOKING FROM ONE TO THE OTHER DOUBTFULLY AND THEY'RE NEARLY THE SAME SIZE IF YOU STAND THE GOAT ON HIS HIND LEGS THEY'VE BOTH GOT THE SAME STYLE OF WHISKERS AND THEY'RE BOTH OF EM OBSTINATE AND DANGEROUS SO THEY OUGHT TO MAKE A GOOD PATCH SPLENDID -8555-284447-0016 FINE GLORIOUS -8555-284447-0017 WHEN THIS HAD BEEN ACCOMPLISHED THE BOOLOOROO LEANED OVER TO TRY TO DISCOVER WHY THE FRAME ROLLED AWAY SEEMINGLY OF ITS OWN ACCORD AND HE WAS THE MORE PUZZLED BECAUSE IT HAD NEVER DONE SUCH A THING BEFORE -8555-284447-0018 AT ONCE THE GOAT GAVE A LEAP ESCAPED FROM THE SOLDIERS AND WITH BOWED HEAD RUSHED UPON THE BOOLOOROO -8555-284447-0019 BEFORE ANY COULD STOP HIM HE BUTTED HIS MAJESTY SO FURIOUSLY THAT THE KING SOARED FAR INTO THE AIR AND TUMBLED IN A HEAP AMONG THE BENCHES WHERE HE LAY MOANING AND GROANING -8555-284447-0020 THE GOAT'S WARLIKE SPIRIT WAS ROUSED BY THIS SUCCESSFUL ATTACK -8555-284447-0021 THEN THEY SPED IN GREAT HASTE FOR THE DOOR AND THE GOAT GAVE A FINAL BUTT THAT SENT THE ROW OF ROYAL LADIES ALL DIVING INTO THE CORRIDOR IN ANOTHER TANGLE WHEREUPON THEY SHRIEKED IN A MANNER THAT TERRIFIED EVERYONE WITHIN SOUND OF THEIR VOICES -8555-284447-0022 I HAD A NOTION IT WAS YOU MATE AS SAVED ME FROM THE KNIFE -8555-284447-0023 I COULDN'T SHIVER MUCH BEIN BOUND SO TIGHT BUT WHEN I'M LOOSE I MEAN TO HAVE JUS ONE GOOD SHIVER TO RELIEVE MY FEELIN'S -8555-284447-0024 COME AND GET THE BOOLOOROO SHE SAID GOING TOWARD THE BENCHES -8555-284449-0000 SO THEY WERE QUITE WILLING TO OBEY THE ORDERS OF THEIR GIRL QUEEN AND IN A SHORT TIME THE BLASTS OF TRUMPETS AND ROLL OF DRUMS AND CLASHING OF CYMBALS TOLD TROT AND CAP'N BILL THAT THE BLUE BANDS HAD ASSEMBLED BEFORE THE PALACE -8555-284449-0001 THEN THEY ALL MARCHED OUT A LITTLE WAY INTO THE FIELDS AND FOUND THAT THE ARMY OF PINKIES HAD ALREADY FORMED AND WAS ADVANCING STEADILY TOWARD THEM -8555-284449-0002 AT THE HEAD OF THE PINKIES WERE GHIP GHISIZZLE AND BUTTON BRIGHT WHO HAD THE PARROT ON HIS SHOULDER AND THEY WERE SUPPORTED BY CAPTAIN CORALIE AND CAPTAIN TINTINT AND ROSALIE THE WITCH -8555-284449-0003 WHEN THE BLUESKINS SAW GHIP GHISIZZLE THEY RAISED ANOTHER GREAT SHOUT FOR HE WAS THE FAVORITE OF THE SOLDIERS AND VERY POPULAR WITH ALL THE PEOPLE -8555-284449-0004 SINCE LAST THURSDAY I GHIP GHISIZZLE HAVE BEEN THE LAWFUL BOOLOOROO OF THE BLUE COUNTRY BUT NOW THAT YOU ARE CONQUERED BY QUEEN TROT I SUPPOSE I AM CONQUERED TOO AND YOU HAVE NO BOOLOOROO AT ALL -8555-284449-0005 WHEN HE FINISHED SHE SAID CHEERFULLY -8555-284449-0006 DON'T WORRY SIZZLE DEAR IT'LL ALL COME RIGHT PRETTY SOON -8555-284449-0007 NOW THEN LET'S ENTER THE CITY AN ENJOY THE GRAND FEAST THAT'S BEING COOKED I'M NEARLY STARVED MYSELF FOR THIS CONQUERIN KINGDOMS IS HARD WORK -8555-284449-0008 THEN SHE GAVE ROSALIE BACK HER MAGIC RING THANKING THE KIND WITCH FOR ALL SHE HAD DONE FOR THEM -8555-284449-0009 YOU ARE MATE REPLIED THE SAILOR -8555-284449-0010 IT WILL BE SUCH A SATISFACTION -8555-284449-0011 THE GUARDS HAD A TERRIBLE STRUGGLE WITH THE GOAT WHICH WAS LOOSE IN THE ROOM AND STILL WANTED TO FIGHT BUT FINALLY THEY SUBDUED THE ANIMAL AND THEN THEY TOOK THE BOOLOOROO OUT OF THE FRAME HE WAS TIED IN AND BROUGHT BOTH HIM AND THE GOAT BEFORE QUEEN TROT WHO AWAITED THEM IN THE THRONE ROOM OF THE PALACE -8555-284449-0012 I'LL GLADLY DO THAT PROMISED THE NEW BOOLOOROO AND I'LL FEED THE HONORABLE GOAT ALL THE SHAVINGS AND LEATHER AND TIN CANS HE CAN EAT BESIDES THE GRASS -8555-284449-0013 SCUSE ME SAID TROT I NEGLECTED TO TELL YOU THAT YOU'RE NOT THE BOOLOOROO ANY MORE -8555-284449-0014 THE FORMER BOOLOOROO GROANED -8555-284449-0015 I'LL NOT BE WICKED ANY MORE SIGHED THE OLD BOOLOOROO I'LL REFORM -8555-284449-0016 AS A PRIVATE CITIZEN I SHALL BE A MODEL OF DEPORTMENT BECAUSE IT WOULD BE DANGEROUS TO BE OTHERWISE -8555-284449-0017 WHEN FIRST THEY ENTERED THE THRONE ROOM THEY TRIED TO BE AS HAUGHTY AND SCORNFUL AS EVER BUT THE BLUES WHO WERE ASSEMBLED THERE ALL LAUGHED AT THEM AND JEERED THEM FOR THERE WAS NOT A SINGLE PERSON IN ALL THE BLUE COUNTRY WHO LOVED THE PRINCESSES THE LEAST LITTLE BIT -8555-284449-0018 SO GHIP GHISIZZLE ORDERED THE CAPTAIN TO TAKE A FILE OF SOLDIERS AND ESCORT THE RAVING BEAUTIES TO THEIR NEW HOME -8555-284449-0019 THAT EVENING TROT GAVE A GRAND BALL IN THE PALACE TO WHICH THE MOST IMPORTANT OF THE PINKIES AND THE BLUESKINS WERE INVITED -8555-284449-0020 THE COMBINED BANDS OF BOTH THE COUNTRIES PLAYED THE MUSIC AND A FINE SUPPER WAS SERVED -8555-292519-0000 BRIGHTER THAN EARLY DAWN'S MOST BRILLIANT DYE ARE BLOWN CLEAR BANDS OF COLOR THROUGH THE SKY THAT SWIRL AND SWEEP AND MEET TO BREAK AND FOAM LIKE RAINBOW VEILS UPON A BUBBLE'S DOME -8555-292519-0001 GUIDED BY YOU HOW WE MIGHT STROLL TOWARDS DEATH OUR ONLY MUSIC ONE ANOTHER'S BREATH THROUGH GARDENS INTIMATE WITH HOLLYHOCKS WHERE SILENT POPPIES BURN BETWEEN THE ROCKS BY POOLS WHERE BIRCHES BEND TO CONFIDANTS ABOVE GREEN WATERS SCUMMED WITH LILY PLANTS -8555-292519-0002 VENICE -8555-292519-0003 IN A SUNSET GLOWING OF CRIMSON AND GOLD SHE LIES THE GLORY OF THE WORLD A BEACHED KING'S GALLEY WHOSE SAILS ARE FURLED WHO IS HUNG WITH TAPESTRIES RICH AND OLD -8555-292519-0004 THE PITY THAT WE MUST COME AND GO -8555-292519-0005 WHILE THE OLD GOLD AND THE MARBLE STAYS FOREVER GLEAMING ITS SOFT STRONG BLAZE CALM IN THE EARLY EVENING GLOW -8555-292519-0006 THE PLEASANT GRAVEYARD OF MY SOUL WITH SENTIMENTAL CYPRESS TREES AND FLOWERS IS FILLED THAT I MAY STROLL IN MEDITATION AT MY EASE -8555-292519-0007 IT IS MY HEART HUNG IN THE SKY AND NO CLOUDS EVER FLOAT BETWEEN THE GRAVE FLOWERS AND MY HEART ON HIGH -8555-292519-0008 OVER THE TRACK LINED CITY STREET THE YOUNG MEN THE GRINNING MEN PASS -8555-292519-0009 HO YE SAILS THAT SEEM TO WANDER IN DREAM FILLED MEADOWS SAY IS THE SHORE WHERE I STAND THE ONLY FIELD OF STRUGGLE OR ARE YE HIT AND BATTERED OUT THERE BY WAVES AND WIND GUSTS AS YE TACK OVER A CLASHING SEA OF WATERY ECHOES -8555-292519-0010 OLD DANCES ARE SIMPLIFIED OF THEIR YEARNING BLEACHED BY TIME -8555-292519-0011 HE HAD GOT INTO HER COURTYARD -8555-292519-0012 THROUGH THE BLACK NIGHT RAIN HE SANG TO HER WINDOW BARS -8555-292519-0013 THAT WAS BUT RUSTLING OF DRIPPING PLANTS IN THE DARK -8555-292519-0014 SHE WAS ALONE THAT NIGHT -8555-292519-0015 HE HAD BROKEN INTO HER COURTYARD -908-157963-0000 TO FADE AWAY LIKE MORNING BEAUTY FROM HER MORTAL DAY DOWN BY THE RIVER OF ADONA HER SOFT VOICE IS HEARD AND THUS HER GENTLE LAMENTATION FALLS LIKE MORNING DEW -908-157963-0001 O LIFE OF THIS OUR SPRING -908-157963-0002 WHY FADES THE LOTUS OF THE WATER -908-157963-0003 WHY FADE THESE CHILDREN OF THE SPRING -908-157963-0004 THEL IS LIKE A WATRY BOW AND LIKE A PARTING CLOUD LIKE A REFLECTION IN A GLASS LIKE SHADOWS IN THE WATER LIKE DREAMS OF INFANTS LIKE A SMILE UPON AN INFANTS FACE -908-157963-0005 LIKE THE DOVES VOICE LIKE TRANSIENT DAY LIKE MUSIC IN THE AIR AH -908-157963-0006 AND GENTLE SLEEP THE SLEEP OF DEATH AND GENTLY HEAR THE VOICE OF HIM THAT WALKETH IN THE GARDEN IN THE EVENING TIME -908-157963-0007 THE LILLY OF THE VALLEY BREATHING IN THE HUMBLE GRASS ANSWERD THE LOVELY MAID AND SAID I AM A WATRY WEED AND I AM VERY SMALL AND LOVE TO DWELL IN LOWLY VALES SO WEAK THE GILDED BUTTERFLY SCARCE PERCHES ON MY HEAD YET I AM VISITED FROM HEAVEN AND HE THAT SMILES ON ALL WALKS IN THE VALLEY AND EACH MORN OVER ME SPREADS HIS HAND SAYING REJOICE THOU HUMBLE GRASS THOU NEW BORN LILY FLOWER -908-157963-0008 THOU GENTLE MAID OF SILENT VALLEYS AND OF MODEST BROOKS FOR THOU SHALL BE CLOTHED IN LIGHT AND FED WITH MORNING MANNA TILL SUMMERS HEAT MELTS THEE BESIDE THE FOUNTAINS AND THE SPRINGS TO FLOURISH IN ETERNAL VALES THEY WHY SHOULD THEL COMPLAIN -908-157963-0009 WHY SHOULD THE MISTRESS OF THE VALES OF HAR UTTER A SIGH -908-157963-0010 SHE CEASD AND SMILD IN TEARS THEN SAT DOWN IN HER SILVER SHRINE -908-157963-0011 WHICH THOU DOST SCATTER ON EVERY LITTLE BLADE OF GRASS THAT SPRINGS REVIVES THE MILKED COW AND TAMES THE FIRE BREATHING STEED -908-157963-0012 BUT THEL IS LIKE A FAINT CLOUD KINDLED AT THE RISING SUN I VANISH FROM MY PEARLY THRONE AND WHO SHALL FIND MY PLACE -908-157963-0013 AND WHY IT SCATTERS ITS BRIGHT BEAUTY THRO THE HUMID AIR -908-157963-0014 DESCEND O LITTLE CLOUD AND HOVER BEFORE THE EYES OF THEL -908-157963-0015 O LITTLE CLOUD THE VIRGIN SAID I CHARGE THEE TO TELL ME WHY THOU COMPLAINEST NOW WHEN IN ONE HOUR THOU FADE AWAY THEN WE SHALL SEEK THEE BUT NOT FIND AH THEL IS LIKE TO THEE -908-157963-0016 I PASS AWAY YET I COMPLAIN AND NO ONE HEARS MY VOICE -908-157963-0017 THE CLOUD THEN SHEWD HIS GOLDEN HEAD AND HIS BRIGHT FORM EMERG'D -908-157963-0018 AND FEAREST THOU BECAUSE I VANISH AND AM SEEN NO MORE -908-157963-0019 IT IS TO TENFOLD LIFE TO LOVE TO PEACE AND RAPTURES HOLY UNSEEN DESCENDING WEIGH MY LIGHT WINGS UPON BALMY FLOWERS AND COURT THE FAIR EYED DEW TO TAKE ME TO HER SHINING TENT THE WEEPING VIRGIN TREMBLING KNEELS BEFORE THE RISEN SUN -908-157963-0020 TILL WE ARISE LINK'D IN A GOLDEN BAND AND NEVER PART BUT WALK UNITED BEARING FOOD TO ALL OUR TENDER FLOWERS -908-157963-0021 LIVES NOT ALONE NOR OR ITSELF FEAR NOT AND I WILL CALL THE WEAK WORM FROM ITS LOWLY BED AND THOU SHALT HEAR ITS VOICE -908-157963-0022 COME FORTH WORM AND THE SILENT VALLEY TO THY PENSIVE QUEEN -908-157963-0023 THE HELPLESS WORM AROSE AND SAT UPON THE LILLYS LEAF AND THE BRIGHT CLOUD SAILD ON TO FIND HIS PARTNER IN THE VALE -908-157963-0024 IMAGE OF WEAKNESS ART THOU BUT A WORM -908-157963-0025 I SEE THEY LAY HELPLESS AND NAKED WEEPING AND NONE TO ANSWER NONE TO CHERISH THEE WITH MOTHERS SMILES -908-157963-0026 AND SAYS THOU MOTHER OF MY CHILDREN I HAVE LOVED THEE AND I HAVE GIVEN THEE A CROWN THAT NONE CAN TAKE AWAY -908-157963-0027 AND LAY ME DOWN IN THY COLD BED AND LEAVE MY SHINING LOT -908-157963-0028 OR AN EYE OF GIFTS AND GRACES SHOWRING FRUITS AND COINED GOLD -908-157963-0029 WHY A TONGUE IMPRESS'D WITH HONEY FROM EVERY WIND -908-157963-0030 WHY AN EAR A WHIRLPOOL FIERCE TO DRAW CREATIONS IN -908-31957-0000 ALL IS SAID WITHOUT A WORD -908-31957-0001 I SIT BENEATH THY LOOKS AS CHILDREN DO IN THE NOON SUN WITH SOULS THAT TREMBLE THROUGH THEIR HAPPY EYELIDS FROM AN UNAVERRED YET PRODIGAL INWARD JOY -908-31957-0002 I DID NOT WRONG MYSELF SO BUT I PLACED A WRONG ON THEE -908-31957-0003 WHEN CALLED BEFORE I TOLD HOW HASTILY I DROPPED MY FLOWERS OR BRAKE OFF FROM A GAME -908-31957-0004 SHALL I NEVER MISS HOME TALK AND BLESSING AND THE COMMON KISS THAT COMES TO EACH IN TURN NOR COUNT IT STRANGE WHEN I LOOK UP TO DROP ON A NEW RANGE OF WALLS AND FLOORS ANOTHER HOME THAN THIS -908-31957-0005 ALAS I HAVE GRIEVED SO I AM HARD TO LOVE -908-31957-0006 OPEN THY HEART WIDE AND FOLD WITHIN THE WET WINGS OF THY DOVE -908-31957-0007 COULD IT MEAN TO LAST A LOVE SET PENDULOUS BETWEEN SORROW AND SORROW -908-31957-0008 NAY I RATHER THRILLED DISTRUSTING EVERY LIGHT THAT SEEMED TO GILD THE ONWARD PATH AND FEARED TO OVERLEAN A FINGER EVEN -908-31957-0009 AND THOUGH I HAVE GROWN SERENE AND STRONG SINCE THEN I THINK THAT GOD HAS WILLED A STILL RENEWABLE FEAR -908-31957-0010 O LOVE O TROTH -908-31957-0011 AND LOVE BE FALSE -908-31957-0012 IF HE TO KEEP ONE OATH MUST LOSE ONE JOY BY HIS LIFE'S STAR FORETOLD -908-31957-0013 SLOW TO WORLD GREETINGS QUICK WITH ITS O LIST WHEN THE ANGELS SPEAK -908-31957-0014 A RING OF AMETHYST I COULD NOT WEAR HERE PLAINER TO MY SIGHT THAN THAT FIRST KISS -908-31957-0015 THAT WAS THE CHRISM OF LOVE WHICH LOVE'S OWN CROWN WITH SANCTIFYING SWEETNESS DID PRECEDE THE THIRD UPON MY LIPS WAS FOLDED DOWN IN PERFECT PURPLE STATE SINCE WHEN INDEED I HAVE BEEN PROUD AND SAID MY LOVE MY OWN -908-31957-0016 DEAREST TEACH ME SO TO POUR OUT GRATITUDE AS THOU DOST GOOD -908-31957-0017 MUSSULMANS AND GIAOURS THROW KERCHIEFS AT A SMILE AND HAVE NO RUTH FOR ANY WEEPING -908-31957-0018 BUT THOU ART NOT SUCH A LOVER MY BELOVED -908-31957-0019 THOU CANST WAIT THROUGH SORROW AND SICKNESS TO BRING SOULS TO TOUCH AND THINK IT SOON WHEN OTHERS CRY TOO LATE -908-31957-0020 I THANK ALL WHO HAVE LOVED ME IN THEIR HEARTS WITH THANKS AND LOVE FROM MINE -908-31957-0021 OH TO SHOOT MY SOUL'S FULL MEANING INTO FUTURE YEARS THAT THEY SHOULD LEND IT UTTERANCE AND SALUTE LOVE THAT ENDURES FROM LIFE THAT DISAPPEARS -908-31957-0022 THEN I LONG TRIED BY NATURAL ILLS RECEIVED THE COMFORT FAST WHILE BUDDING AT THY SIGHT MY PILGRIM'S STAFF GAVE OUT GREEN LEAVES WITH MORNING DEWS IMPEARLED -908-31957-0023 I LOVE THEE FREELY AS MEN STRIVE FOR RIGHT I LOVE THEE PURELY AS THEY TURN FROM PRAISE -908-31957-0024 I LOVE THEE WITH THE PASSION PUT TO USE IN MY OLD GRIEFS AND WITH MY CHILDHOOD'S FAITH -908-31957-0025 I LOVE THEE WITH A LOVE I SEEMED TO LOSE WITH MY LOST SAINTS I LOVE THEE WITH THE BREATH SMILES TEARS OF ALL MY LIFE AND IF GOD CHOOSE I SHALL BUT LOVE THEE BETTER AFTER DEATH \ No newline at end of file diff --git a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/transcripts.txt b/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/transcripts.txt deleted file mode 100644 index a9b64e71..00000000 --- a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/transcripts.txt +++ /dev/null @@ -1,3 +0,0 @@ -61-70968-0000 HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT -1089-134686-0009 AT MOST BY AN ALMS GIVEN TO A BEGGAR WHOSE BLESSING HE FLED FROM HE MIGHT HOPE WEARILY TO WIN FOR HIMSELF SOME MEASURE OF ACTUAL GRACE -3570-5694-0000 BUT ALREADY AT A POINT IN ECONOMIC EVOLUTION FAR ANTEDATING THE EMERGENCE OF THE LADY SPECIALISED CONSUMPTION OF GOODS AS AN EVIDENCE OF PECUNIARY STRENGTH HAD BEGUN TO WORK OUT IN A MORE OR LESS ELABORATE SYSTEM \ No newline at end of file diff --git a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/1089-134686-0009.wav b/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/1089-134686-0009.wav deleted file mode 100644 index 9c711054..00000000 Binary files a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/1089-134686-0009.wav and /dev/null differ diff --git a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/3570-5694-0000.wav b/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/3570-5694-0000.wav deleted file mode 100644 index 31643ce5..00000000 Binary files a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/3570-5694-0000.wav and /dev/null differ diff --git a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/61-70968-0000.wav b/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/61-70968-0000.wav deleted file mode 100644 index ab3cab4f..00000000 Binary files a/Demos/ASR/Whisper/eval_dataset/LibriSpeech-samples/wav/61-70968-0000.wav and /dev/null differ diff --git a/Demos/ASR/Whisper/requirements.txt b/Demos/ASR/Whisper/requirements.txt deleted file mode 100644 index 15845e4d..00000000 --- a/Demos/ASR/Whisper/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -torch==2.8.0 -torchaudio==2.8.0 -sounddevice==0.5.3 -transformers==4.52.4 -onnxsim==0.4.36 -optimum==2.0.0 -accelerate==1.11.0 -jiwer==4.0.0 -soundfile==0.13.1 diff --git a/Demos/ASR/Whisper/run_whisper.py b/Demos/ASR/Whisper/run_whisper.py deleted file mode 100644 index e4f66cc0..00000000 --- a/Demos/ASR/Whisper/run_whisper.py +++ /dev/null @@ -1,373 +0,0 @@ -import argparse -import json -import numpy as np -import onnxruntime as ort -import torchaudio -import queue -import threading -import time -import os -from transformers import WhisperFeatureExtractor, WhisperTokenizer -from pathlib import Path -from jiwer import wer, cer -from huggingface_hub import snapshot_download - -SAMPLE_RATE = 16000 -CHUNK_SIZE = 1600 # 0.1 sec chunks - - -class WhisperONNX: - def __init__(self, encoder_path, decoder_path, - model_type, encoder_providers=None, decoder_providers=None, language=None): - - self.encoder = ort.InferenceSession(encoder_path, providers=encoder_providers) - self.decoder = ort.InferenceSession(decoder_path, providers=decoder_providers) - - self.feature_extractor = WhisperFeatureExtractor.from_pretrained(f"openai/{model_type}") - self.tokenizer = WhisperTokenizer.from_pretrained(f"openai/{model_type}") - self.decoder_start_token = self.sot_token = self.tokenizer.convert_tokens_to_ids("<|startoftranscript|>") - self.eos_token = self.tokenizer.eos_token_id - self.max_length = min(448, self.decoder.get_inputs()[0].shape[1]) - if not isinstance(self.max_length, int): - raise ValueError("Invalid/Dynamic input shapes") - - self.language = language - if self.language: - self.tokenizer.set_prefix_tokens(language=self.language, task="transcribe") - self.initial_tokens = list(self.tokenizer.prefix_tokens) - else: - self.initial_tokens = [self.decoder_start_token] - - def preprocess(self, audio): - """ - Convert raw audio to Whisper log-mel spectrogram - """ - inputs = self.feature_extractor(audio, sampling_rate=SAMPLE_RATE, return_tensors="np") - return inputs["input_features"] - - def encode(self, input_features): - """ - Run encoder ONNX model - """ - input_name = self.encoder.get_inputs()[0].name - return self.encoder.run(None, {input_name: input_features})[0] - - def decode(self, encoder_out): - """ - Greedy decode with fixed-length input_ids - """ - tokens = list(self.initial_tokens) - first_token_delay = None - decode_start = time.time() - - # Get decoder input names - decoder_inputs = self.decoder.get_inputs() - input_ids_name = decoder_inputs[0].name - encoder_out_name = decoder_inputs[1].name - - # Distinguish inputs by data type if the order is not guaranteed - if decoder_inputs[0].type != 'tensor(int64)': - input_ids_name, encoder_out_name = encoder_out_name, input_ids_name - - for _ in range(len(tokens), self.max_length): - decoder_input = np.full((1, self.max_length), self.eos_token, dtype=np.int64) - decoder_input[0, :len(tokens)] = tokens - - outputs = self.decoder.run(None, { - input_ids_name: decoder_input, - encoder_out_name: encoder_out - }) - logits = outputs[0] - next_token = int(np.argmax(logits[0, len(tokens) - 1])) - - if next_token == self.eos_token: - break - tokens.append(next_token) - if first_token_delay is None: - first_token_delay = time.time() - decode_start - return tokens, first_token_delay - - def transcribe(self, audio, chunk_length_s=30, is_mic=False): - """ - Full encode-decode pipeline with support for long-form transcription using chunking. - """ - chunk_size = SAMPLE_RATE * chunk_length_s - total_samples = len(audio) - transcription = [] - chunk_idx = 0 - total_start_time = time.time() - - overlap = SAMPLE_RATE * 1 # Tune this - for start in range(0, total_samples, chunk_size - overlap): - end = min(start + chunk_size, total_samples) - audio_chunk = audio[start:end] - - input_features = self.preprocess(audio_chunk) - encoder_out = self.encode(input_features) - tokens, first_token_delay = self.decode(encoder_out) - decoded_text = self.tokenizer.decode( - tokens[len(self.initial_tokens):], - skip_special_tokens=True - ).strip() - transcription.append(decoded_text) - chunk_idx += 1 - if not is_mic: - if first_token_delay is not None: - print(f"\nPerformance Metric (Chunk {chunk_idx}):") - print(f" Time to First Token for this chunk: {first_token_delay:.2f} seconds") - else: - print(f"\nPerformance Metric (Chunk {chunk_idx}):") - print(" Time to First Token for this chunk: n/a (no token before EOS)") - - total_end_time = time.time() - input_audio_duration = total_samples / SAMPLE_RATE - rtf = (total_end_time - total_start_time) / input_audio_duration - if not is_mic: - print(f" RTF: {rtf:.2f}") - - return " ".join(transcription), rtf - - -def evaluate(model, dataset_dir, results_dir): - dataset_name = Path(dataset_dir).name - wav_dir = Path(dataset_dir) / "wav" - transcript_file = Path(dataset_dir) / "transcripts.txt" - - if not transcript_file.exists() or not wav_dir.exists(): - print(f"Missing transcripts.txt or wav folder in {dataset_dir}") - return - - with open(transcript_file, "r", encoding="utf-8") as f: - references = {line.split()[0]: " ".join(line.strip().split()[1:]) for line in f.readlines()} - - output_dir = Path(results_dir) / dataset_name - output_dir.mkdir(parents=True, exist_ok=True) - result_file = output_dir / "results.txt" - - total_wer, total_cer, total_rtf, count = 0, 0, 0, 0 - - with result_file.open("w", encoding="utf-8") as out_f: - for wav_path in sorted(wav_dir.glob("*.wav")): - key = wav_path.stem - if key not in references: - print(f"Reference for {key} not found in transcripts.txt") - continue - reference = references[key].lower() - waveform, sr = torchaudio.load(str(wav_path)) - if sr != SAMPLE_RATE: - waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=SAMPLE_RATE)(waveform) - audio = waveform.squeeze(0).numpy() - predicted, rtf = model.transcribe(audio) - - sample_wer = wer(reference, predicted) - sample_cer = cer(reference, predicted) - total_wer += sample_wer - total_cer += sample_cer - total_rtf += rtf - count += 1 - - out_f.write(f"{key}\n") - out_f.write(f"Reference: {reference}\n") - out_f.write(f"Predicted: {predicted}\n") - out_f.write(f"WER: {sample_wer:.3f}, CER: {sample_cer:.3f}, RTF: {rtf:.3f}\n\n") - - if count: - avg_wer = total_wer / count - avg_cer = total_cer / count - avg_rtf = total_rtf / count - print(f"Evaluation completed for {count} files.") - print(f"Average WER: {avg_wer:.3f}, Average CER: {avg_cer:.3f}, Average RTF: {avg_rtf:.3f}") - out_f.write(f"Summary:\nAverage WER: {avg_wer:.3f}\nAverage CER: {avg_cer:.3f}\nAverage RTF: {avg_rtf:.3f}\n") - else: - print("No valid audio-transcript pairs found.") - - -def load_provider_options(config, model_name, device): - model_key = model_name.replace("whisper-", "") - if model_key not in config["whisper"]: - raise ValueError(f"Model type '{model_key}' not found in config") - - if device not in config["whisper"][model_key]: - raise ValueError(f"Device '{device}' not found in config for model type '{model_key}'") - - model_config = config["whisper"][model_key][device] - encoder_opts = model_config["encoder"] - decoder_opts = model_config["decoder"] - - def build_provider_opts(opts): - if opts.get("config_file"): - return [ - ( - "VitisAIExecutionProvider", - { - "config_file": opts["config_file"], - "cache_dir": opts.get("cache_dir", ""), - "cache_key": opts.get("cache_key", "") - } - ) - ] - else: - return ["CPUExecutionProvider"] - - print("Selected Provider Options: ") - print("Decoder: ", build_provider_opts(decoder_opts)) - print("Encoder: ", build_provider_opts(encoder_opts)) - return build_provider_opts(encoder_opts), build_provider_opts(decoder_opts) - - -def mic_stream(model, duration=0, silence_threshold=0.01, silence_duration=5.0): - q_audio = queue.Queue() - stop_flag = threading.Event() - - def audio_callback(indata, frames, time, status): - if status: - print(status, flush=True) - q_audio.put(indata.copy()) - - def feeder(): - try: - with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, dtype='float32', - blocksize=CHUNK_SIZE, callback=audio_callback): - if duration > 0: - sd.sleep(int(duration * 1000)) - stop_flag.set() - else: - while not stop_flag.is_set(): - sd.sleep(100) - except sd.PortAudioError as e: - print(f"\n Microphone error: {e}") - print("⚠️ Could not initialize microphone. Please check your audio device settings.") - stop_flag.set() - - threading.Thread(target=feeder, daemon=True).start() - - buffer = np.zeros((0,), dtype=np.float32) - silence_start = None - print("\n🎤 Real-time Transcription. Start Speaking ..\n") - while not stop_flag.is_set(): - try: - chunk = q_audio.get(timeout=0.1).squeeze() - buffer = np.concatenate((buffer, chunk)) - - rms = np.sqrt(np.mean(chunk ** 2)) - if rms < silence_threshold: - if silence_start is None: - silence_start = time.time() - elif time.time() - silence_start >= silence_duration: - print("\n🔕 Silence detected. Stopping transcription.") - stop_flag.set() - break - else: - silence_start = None - - if len(buffer) >= SAMPLE_RATE * 2: - text, _ = model.transcribe(buffer, is_mic=True) - print(text) - buffer = np.zeros((0,), dtype=np.float32) - except queue.Empty: - continue - - -def download_whisper_onnx(model_type: str): - """ - Download Whisper ONNX encoder/decoder from Hugging Face if not already present. - Returns paths to encoder and decoder model files. - """ - hf_model_map = { - "whisper-small": "amd/whisper-small-onnx-npu", - "whisper-medium": "amd/whisper-medium-onnx-npu", - "whisper-large-v3-turbo": "amd/whisper-large-turbo-onnx-npu" - } - - repo_id = hf_model_map.get(model_type) - if repo_id is None: - raise ValueError(f"Unsupported model_type '{model_type}' for ONNX auto-download.") - - local_dir = snapshot_download( - repo_id=repo_id, - ) - - # Construct paths to encoder/decoder ONNX files - encoder_path = os.path.join(local_dir, "encoder_model.onnx") - decoder_path = os.path.join(local_dir, "decoder_model.onnx") - - if not (os.path.exists(encoder_path) and os.path.exists(decoder_path)): - raise FileNotFoundError(f"Could not find encoder/decoder in {local_dir}") - - - return encoder_path, decoder_path - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--input", help="WAV file path or 'mic'") - parser.add_argument("--encoder", help="Path to Whisper encoder ONNX model (optional, auto-download if not provided)") - parser.add_argument("--decoder", help="Path to Whisper decoder ONNX model (optional, auto-download if not provided)") - parser.add_argument("--model-type", required=True, default="whisper-base", - choices=["whisper-tiny", "whisper-base", "whisper-small", - "whisper-medium", "whisper-large-v3-turbo"], - help="Whisper model name") - parser.add_argument("--eval-dir", help="Dataset directory with wavs/ and transcripts.txt") - parser.add_argument("--results-dir", default="results", help="Directory to store evaluation results") - parser.add_argument("--config-file", default="./config/model_config.json", help="Path to Model provider configs") - parser.add_argument("--device", choices=['cpu', 'npu'], default='cpu') - parser.add_argument("--duration", type=int, default=0, help="Mic duration in seconds (0 = unlimited)") - parser.add_argument("--language", help="Force decoder language (e.g., 'zh', 'ja', 'en')") - args = parser.parse_args() - - if Path(args.config_file).exists(): - with open(args.config_file) as f: - model_config = json.load(f) - else: - raise FileNotFoundError(f"Config file {args.config_file} not found") - - encoder_providers, decoder_providers = load_provider_options( - model_config, args.model_type, args.device - ) - - # Auto-download ONNX models if not provided - if args.encoder is None or args.decoder is None: - print(f"Downloading ONNX models for {args.model_type} from Hugging Face ...") - encoder_path, decoder_path = download_whisper_onnx(args.model_type) - else: - encoder_path, decoder_path = args.encoder, args.decoder - - model = WhisperONNX( - encoder_path, - decoder_path, - args.model_type, - encoder_providers=encoder_providers, - decoder_providers=decoder_providers, - language=args.language - ) - - if args.eval_dir: - evaluate(model, args.eval_dir, args.results_dir) - return - - if not args.input and not args.eval_dir: - print("Error: You must provide --input (wav or mic) or --eval-dir.") - return - - if args.input and args.input.lower() not in ['mic'] and not Path(args.input).suffix == '.wav': - print("Error: --input must be 'mic' or path to a .wav file.") - return - - if args.input.lower() == 'mic': - import sounddevice as sd - try: - mic_stream(model, args.duration) - except sd.PortAudioError as e: - print("Fix your device or try using a .wav file instead of mic. Exiting") - return - else: - waveform, sr = torchaudio.load(args.input) - if sr != SAMPLE_RATE: - waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=SAMPLE_RATE)(waveform) - audio = waveform.squeeze(0).numpy() - text, _ = model.transcribe(audio, chunk_length_s=30) - print("\nTranscription:", text) - - -if __name__ == "__main__": - main() diff --git a/Demos/NPU-GPU-Pipeline/data/__init__.py b/Demos/NPU-GPU-Pipeline/data/__init__.py deleted file mode 100644 index 8df8bbad..00000000 --- a/Demos/NPU-GPU-Pipeline/data/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -from importlib import import_module -#from dataloader import MSDataLoader -from torch.utils.data import dataloader -from torch.utils.data import ConcatDataset -import torch -import random -# This is a simple wrapper function for ConcatDataset -class MyConcatDataset(ConcatDataset): - def __init__(self, datasets): - super(MyConcatDataset, self).__init__(datasets) - - - def set_scale(self, idx_scale): - for d in self.datasets: - if hasattr(d, 'set_scale'): d.set_scale(idx_scale) - -class Data: - def __init__(self, args): - self.loader_train = None - self.loader_test = [] - for d in args.data_test: - if d in ['Set5', 'Set14', 'B100', 'Urban100']: - m = import_module('data.benchmark') - testset = getattr(m, 'Benchmark')(args, name=d) - else: - assert NotImplementedError - - self.loader_test.append( - dataloader.DataLoader( - testset, - batch_size=1, - shuffle=False, - pin_memory=False, - num_workers=args.n_threads, - ) - ) diff --git a/Demos/NPU-GPU-Pipeline/data/anchors.npy b/Demos/NPU-GPU-Pipeline/data/anchors.npy deleted file mode 100644 index 4177c12f..00000000 Binary files a/Demos/NPU-GPU-Pipeline/data/anchors.npy and /dev/null differ diff --git a/Demos/NPU-GPU-Pipeline/data/benchmark.py b/Demos/NPU-GPU-Pipeline/data/benchmark.py deleted file mode 100644 index b3d9a10a..00000000 --- a/Demos/NPU-GPU-Pipeline/data/benchmark.py +++ /dev/null @@ -1,23 +0,0 @@ -import os - -#from data import common -from data import srdata -import numpy as np -import torch -import torch.utils.data as data - -class Benchmark(srdata.SRData): - def __init__(self, args, name='', benchmark=True): - super(Benchmark, self).__init__( - args, name=name, benchmark=True - ) - - def _set_filesystem(self, dir_data): - self.apath = os.path.join(dir_data, 'benchmark', self.name) - self.dir_hr = os.path.join(self.apath, 'HR') - if self.input_large: - self.dir_lr = os.path.join(self.apath, 'LR_bicubicL') - else: - self.dir_lr = os.path.join(self.apath, 'LR_bicubic') - self.ext = ('', '.png') - diff --git a/Demos/NPU-GPU-Pipeline/data/common.py b/Demos/NPU-GPU-Pipeline/data/common.py deleted file mode 100644 index 5953cd85..00000000 --- a/Demos/NPU-GPU-Pipeline/data/common.py +++ /dev/null @@ -1,34 +0,0 @@ -import random - -import numpy as np -import skimage.color as sc - -import torch - -def set_channel(*args, n_channels=3): - def _set_channel(img): - if img.ndim == 2: - img = np.expand_dims(img, axis=2) - - c = img.shape[2] - if n_channels == 1 and c == 3: - img = np.expand_dims(sc.rgb2ycbcr(img)[:, :, 0], 2) - elif n_channels == 3 and c == 1: - img = np.concatenate([img] * n_channels, 2) - - return img - - return [_set_channel(a) for a in args] - -def np2Tensor(*args, rgb_range=255): - def _np2Tensor(img): - np_transpose = np.ascontiguousarray(img.transpose((2, 0, 1))) - tensor = torch.from_numpy(np_transpose).float() - tensor.mul_(rgb_range / 255) - - return tensor - - return [_np2Tensor(a) for a in args] - - - diff --git a/Demos/NPU-GPU-Pipeline/data/data_tiling.py b/Demos/NPU-GPU-Pipeline/data/data_tiling.py deleted file mode 100644 index d77ab517..00000000 --- a/Demos/NPU-GPU-Pipeline/data/data_tiling.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -import onnxruntime -import numpy as np -import math - - -def tiling_inference(session, lr, overlapping, patch_size): - _, _, h, w = lr.shape - sr = np.zeros((1, 3, 2*h, 2*w)) - n_h = math.ceil(h / float(patch_size[0] - overlapping)) - n_w = math.ceil(w / float(patch_size[1] - overlapping)) - #every tilling input has same size of patch_size - for ih in range(n_h): - h_idx = ih * (patch_size[0] - overlapping) - h_idx = h_idx if h_idx + patch_size[0] <= h else h - patch_size[0] - for iw in range(n_w): - w_idx = iw * (patch_size[1] - overlapping) - w_idx = w_idx if w_idx + patch_size[1] <= w else w - patch_size[1] - - tilling_lr = lr[..., h_idx: h_idx+patch_size[0], w_idx: w_idx+patch_size[1]] - sr_tiling = session.run(None, {session.get_inputs()[0].name: tilling_lr.transpose(0,2,3,1)})[0].transpose(0,3,1,2) - - left, right, top, bottom = 0, patch_size[1], 0, patch_size[0] - left += overlapping//2 - right -= overlapping//2 - top += overlapping//2 - bottom -= overlapping//2 - #processing edge pixels - if w_idx == 0: - left -= overlapping//2 - if h_idx == 0: - top -= overlapping//2 - if h_idx+patch_size[0]>=h: - bottom += overlapping//2 - if w_idx+patch_size[1]>=w: - right += overlapping//2 - - #get preditions - sr[... , 2*(h_idx+top): 2*(h_idx+bottom), 2*(w_idx+left): 2*(w_idx+right)] = sr_tiling[..., 2*top:2*bottom, 2*left:2*right] - return sr \ No newline at end of file diff --git a/Demos/NPU-GPU-Pipeline/data/srdata.py b/Demos/NPU-GPU-Pipeline/data/srdata.py deleted file mode 100644 index 90fa5ad7..00000000 --- a/Demos/NPU-GPU-Pipeline/data/srdata.py +++ /dev/null @@ -1,85 +0,0 @@ -import os -import glob -import random -import pickle - -from data import common - -import numpy as np -import imageio -import torch -import torch.utils.data as data - -class SRData(data.Dataset): - def __init__(self, args, name='', benchmark=False): - self.args = args - self.name = name - self.split = 'test' - self.do_eval = True - self.benchmark = benchmark - self.input_large = False - self.scale = args.scale - self.idx_scale = 0 - self._set_filesystem(args.dir_data) - list_hr, list_lr = self._scan() - self.images_hr, self.images_lr = list_hr, list_lr - - # Below functions as used to prepare images - def _scan(self): - names_hr = sorted( - glob.glob(os.path.join(self.dir_hr, '*' + self.ext[0])) - ) - names_lr = [[] for _ in self.scale] - for f in names_hr: - filename, _ = os.path.splitext(os.path.basename(f)) - for si, s in enumerate(self.scale): - names_lr[si].append(os.path.join( - self.dir_lr, 'X{}/{}x{}{}'.format( - s, filename, s, self.ext[1] - ) - )) - - return names_hr, names_lr - - def _set_filesystem(self, dir_data): - self.apath = os.path.join(dir_data, self.name) - self.dir_hr = os.path.join(self.apath, 'HR') - self.dir_lr = os.path.join(self.apath, 'LR_bicubic') - self.ext = ('.png', '.png') - - def __getitem__(self, idx): - lr, hr, filename = self._load_file(idx) - pair = self.get_patch(lr, hr) - pair = common.set_channel(*pair, n_channels=3) - pair_t = common.np2Tensor(*pair, rgb_range=255) - - return pair_t[0], pair_t[1], filename - - def __len__(self): - return len(self.images_hr) - - def _get_index(self, idx): - return idx - - def _load_file(self, idx): - idx = self._get_index(idx) - f_hr = self.images_hr[idx] - f_lr = self.images_lr[self.idx_scale][idx] - - filename, _ = os.path.splitext(os.path.basename(f_hr)) - hr = imageio.imread(f_hr) - lr = imageio.imread(f_lr) - return lr, hr, filename - - def get_patch(self, lr, hr): - scale = self.scale[self.idx_scale] - ih, iw = lr.shape[:2] - hr = hr[0:ih * scale, 0:iw * scale] - return lr, hr - - def set_scale(self, idx_scale): - if not self.input_large: - self.idx_scale = idx_scale - else: - self.idx_scale = random.randint(0, len(self.scale) - 1) - diff --git a/Demos/NPU-GPU-Pipeline/data/strides.npy b/Demos/NPU-GPU-Pipeline/data/strides.npy deleted file mode 100644 index 5526912a..00000000 Binary files a/Demos/NPU-GPU-Pipeline/data/strides.npy and /dev/null differ diff --git a/Demos/NPU-GPU-Pipeline/npu_gpu_utils.py b/Demos/NPU-GPU-Pipeline/npu_gpu_utils.py deleted file mode 100644 index 8a424828..00000000 --- a/Demos/NPU-GPU-Pipeline/npu_gpu_utils.py +++ /dev/null @@ -1,19 +0,0 @@ - -# return current working Machine hardware -import os -import subprocess - -def get_apu_info(): - # Run pnputil as a subprocess to enumerate PCI devices - command = r'pnputil /enum-devices /bus PCI /deviceids ' - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - # Check for supported Hardware IDs - apu_type = '' - if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): apu_type = 'PHX/HPT' - if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): apu_type = 'STX' - if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): apu_type = 'KRK' - return apu_type - \ No newline at end of file diff --git a/Demos/NPU-GPU-Pipeline/pipeline.py b/Demos/NPU-GPU-Pipeline/pipeline.py deleted file mode 100644 index f79f4c5d..00000000 --- a/Demos/NPU-GPU-Pipeline/pipeline.py +++ /dev/null @@ -1,546 +0,0 @@ -# -# Copyright © 2023 Advanced Micro Devices, Inc. All rights reserved. -# -import torch -import torch.nn as nn -import onnxruntime -import numpy as np -import argparse -from utils_img2img import ( - LoadImages, - non_max_suppression, - plot_images, - output_to_target, - preprocess, post_process -) -import sys -import pathlib - -CURRENT_DIR = pathlib.Path(__file__).parent -sys.path.append(str(CURRENT_DIR)) -from data.data_tiling import tiling_inference -import os -import threading -import queue -import cv2 -import time - -sys.path.append("./stable_diffusion") -from pathlib import Path -from typing import Dict -import config -from packaging import version -from olive.common.utils import set_tempdir -from olive.workflows import run as olive_run -import tkinter as tk -from tkinter import font -from PIL import Image, ImageTk -from power_utils_filtered import * -import random -from ort_util_img2img import get_ort_pipeline_sd -import collections -from npu_gpu_utils import get_apu_info - -def make_parser(): - parser = argparse.ArgumentParser("onnxruntime inference sample") - parser.add_argument( - "-i", - "--image_path", - type=str, - default="./test.mp4", - help="path to your input image or video.", - ) - parser.add_argument( - "-o", - "--output_path", - type=str, - default="./demo_infer.jpg", - help="path to your output directory.", - ) - parser.add_argument( - "--npu", action="store_true", help="flag to enable off-loading CNNs to NPU" - ) - parser.add_argument( - "--power", action="store_true", help="flag to enable power measurements" - ) - parser.add_argument( - "--igpu", - action="store_true", - help="flag to enable off-loading Stable Diffusion to iGPU", - ) - parser.add_argument( - "--provider_config", default="", type=str, help="provider config for ryzen ai" - ) - return parser - - -classnames = [ - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic light", - "fire hydrant", - "stop sign", - "parking meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "backpack", - "umbrella", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports ball", - "kite", - "baseball bat", - "baseball glove", - "skateboard", - "surfboard", - "tennis racket", - "bottle", - "wine glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted plant", - "bed", - "dining table", - "toilet", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "book", - "clock", - "vase", - "scissors", - "teddy bear", - "hair drier", - "toothbrush", -] -names = {k: classnames[k] for k in range(80)} -imgsz = [640, 640] -detected_objects_queue = queue.Queue() -stop_event_sres = threading.Event() -stop_event_detect = threading.Event() -stop_event_sd = threading.Event() -sd_input = set() -sd_input_queue = queue.Queue() -sd_input_set = set() -detected_objects_dict = {} - -### Stable Diffusion Parameters ### -sd_model_id = "runwayml/stable-diffusion-v1-5" -sd_num_images = 1 -sd_batch_size = 1 -sd_image_size = 384 -sd_num_inference_steps = 50 -sd_guidance_scale = 7.5 -sd_strength = 1.0 -sd_provider = "cpu" -sd_count = 0 -generated_images = [] -generated_prompts = [] -sr_images = [] -sr_input_dict = collections.defaultdict(int) -od_output_set = set() -### Stable Diffusion Parameters ### - - -def run_pipeline_sd( - pipeline, - prompt, - negative_prompt, - init_image, - num_images, - batch_size, - image_size, - num_inference_steps, - guidance_scale, - strength: float, - provider: str, - image_callback=None, - step_callback=None, -): - print(f"\nInference Batch Start (batch size = {batch_size}).") - kwargs = {} - result = pipeline( - [prompt] * batch_size, - negative_prompt=[negative_prompt], - image=init_image, - num_inference_steps=num_inference_steps, - callback=None, - guidance_scale=guidance_scale, - generator=np.random.RandomState(45), - **kwargs, - ) - return result.images[0] - - -def display_images_sd(generated_images, prompt): - generated_images = [Image.fromarray(np.uint8(img)) for img in generated_images] - total_width = sum(img.width for img in generated_images) - max_height = max(img.height for img in generated_images) - text_height = 50 - - root = tk.Toplevel() - root.title("Image generated by Stable Diffusion, enhanced by RCAN (super res)") - canvas = tk.Canvas(root, width=total_width, height=max_height + text_height) - canvas.pack() - my_font = font.Font(weight="bold", size=12) - - tk_images = [ImageTk.PhotoImage(img) for img in generated_images] - x_offset = 0 - for tk_image in tk_images: - canvas.create_image(x_offset, 0, anchor="nw", image=tk_image) - x_offset += tk_image.width() - canvas.create_text( - 0, - tk_images[0].height() + 8, - anchor="nw", - text="style transfer: " + prompt, - font=my_font, - fill="black", - width=tk_images[0].width(), - ) - root.mainloop() - - -def display_images_sr(sr_images): - sr_images = [ - Image.fromarray(np.uint8(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) - for img in sr_images - ] - images_per_row = 5 - total_widths = [sum(img.width for img in sr_images[i:i+images_per_row]) for i in range(0, len(sr_images), images_per_row)] - total_heights = [max(img.height for img in sr_images[i:i+images_per_row]) for i in range(0, len(sr_images), images_per_row)] - max_width = max(total_widths) - total_height = sum(total_heights) - root = tk.Tk() - root.title("Detected objects after super resolution") - canvas = tk.Canvas(root, width=max_width, height=total_height) - canvas.pack() - - tk_images = [ImageTk.PhotoImage(img) for img in sr_images] - x_offset = 0 - y_offset = 0 - for i, tk_image in enumerate(tk_images): - canvas.create_image(x_offset, y_offset, anchor="nw", image=tk_image) - x_offset += tk_image.width() - if (i + 1) % images_per_row == 0: - x_offset = 0 - y_offset += total_heights[i // images_per_row] - root.mainloop() - - -def display_video(source, dataset, video_filename): - cap = cv2.VideoCapture(source) - source_fps = cap.get(cv2.CAP_PROP_FPS) - cap.release() - out_video = cv2.VideoWriter( - video_filename, - cv2.VideoWriter_fourcc("m", "p", "4", "v"), - source_fps, - (640, 640), - ) - for i in range(dataset.frame): - img = detected_objects_dict[i] - out_video.write(cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)) - out_video.release() - - wait_time = int(1000 / (1.2 * source_fps)) - cap = cv2.VideoCapture(video_filename) - while cap.isOpened(): - ret, frame = cap.read() - if ret: - cv2.imshow("Video", frame) - if cv2.waitKey(wait_time) & 0xFF == ord("q"): - break - else: - break - - cap.release() - - -def object_detection_worker(task_queue, onnx_runtime_session_1): - while not stop_event_detect.is_set(): - try: - batch_no, im = task_queue.get(block=False) - if batch_no == 0: - print("Processing frames for object detection") - - except queue.Empty: - stop_event_detect.set() - detected_objects_queue.put(None) - print("Task queue empty") - break - im = preprocess(im) - if len(im.shape) == 3: - im = im[None] - outputs = onnx_runtime_session_1.run( - None, - { - onnx_runtime_session_1.get_inputs()[0] - .name: im.permute(0, 2, 3, 1) - .cpu() - .numpy() - }, - ) - outputs = [torch.tensor(item).permute(0, 3, 1, 2) for item in outputs] - - preds = post_process(outputs) - preds = non_max_suppression( - preds, 0.25, 0.7, agnostic=False, max_det=300, classes=None - ) - res, boxes, cropped_imgs, op_labels = plot_images( - im, - *output_to_target(preds, max_det=15), - source, - names=names, - ) - detected_objects_dict[batch_no] = res - if batch_no%5==0: - detected_objects_queue.put( - (res, boxes, batch_no, cropped_imgs, op_labels) - ) - if int(batch_no) == 0: - sd_input_queue.put(op_labels) - print("Object detection worker is done.") - - -def super_resolution_worker(onnx_runtime_session_2): - while not stop_event_sres.is_set(): - try: - item = detected_objects_queue.get() - except queue.Empty: - stop_event_sres.set() - print("Detection queue empty") - break - if item is None: - stop_event_sres.set() - break - res, boxes, batch_no, cropped_imgs, op_labels = item - labels = [] - for img, label in cropped_imgs: - lr = img[np.newaxis, :, :, :].transpose((0, 3, 1, 2)).astype(np.float32) - if sr_input_dict[label] >= 5: - continue - try: - sr = tiling_inference(onnx_runtime_session_2, lr, 8, (56, 56)) - sr = np.clip(sr, 0, 255) - sr = sr.squeeze().transpose((1, 2, 0)).astype(np.uint8) - sr = cv2.cvtColor(sr, cv2.COLOR_BGR2RGB) - sr_input_dict[label]+=1 - processed_folder = "./results/sr_images/" + label + "/" - if not os.path.exists(processed_folder): - os.makedirs(processed_folder) - filename = os.path.join(processed_folder, str(sr_input_dict[label]) + ".png") - cv2.imwrite(filename, sr) - sr_images.append(sr) - print("Finished super resolution on a frame.") - except Exception as e: - # print(f"Error processing {filename}: {e}") - pass - detected_objects_queue.task_done() - print("Super resolution worker is done.") - - -def final_sr(onnx_runtime_session_2, image_path): - img = cv2.imread(image_path) - lr = img[np.newaxis, :, :, :].transpose((0, 3, 1, 2)).astype(np.float32) - try: - sr = tiling_inference(onnx_runtime_session_2, lr, 8, (56, 56)) - sr = np.clip(sr, 0, 255) - sr = sr.squeeze().transpose((1, 2, 0)).astype(np.uint8) - sr = cv2.cvtColor(sr, cv2.COLOR_BGR2RGB) - filename = os.path.join("./results/sd_img2img/enhanced_sd.png") - cv2.imwrite(filename, cv2.cvtColor(sr, cv2.COLOR_BGR2RGB)) - return sr - except: - pass - - -def stable_diffusion_worker(sd_pipeline, init_image): - while not stop_event_sd.is_set(): - try: - objects = sd_input_queue.get(timeout=10) - start_prompt = "" - for obj in objects: - start_prompt += obj + " " - except queue.Empty: - print("Prompt queue empty") - break - if len(sd_input) == 1: - stop_event_sd.set() - break - prompt = ( - start_prompt - + "in a beach, sand, water, high definition, realistic, detailed and intricate" - ) - negative_prompt = "animated" - img = run_pipeline_sd( - sd_pipeline, - prompt, - negative_prompt, - init_image, - sd_num_images, - sd_batch_size, - sd_image_size, - sd_num_inference_steps, - sd_guidance_scale, - sd_strength, - provider=sd_provider, - ) - generated_images.append(img) - generated_prompts.append(prompt) - res_folder = "./results/sd_img2img/" - if not os.path.exists(res_folder): - os.makedirs(res_folder) - op_path = res_folder + "sd_result.png" - img.save(op_path) - print("Finished generating image for a prompt.") - sd_input.add(prompt) - print("Stable Diffusion worker is done.") - - -if __name__ == "__main__": - random.seed(45) - np.random.seed(45) - torch.manual_seed(45) - args = make_parser().parse_args() - num_threads = 4 if args.npu else 1 - os.environ["NUM_OF_DPU_RUNNERS"] = str(num_threads) - source = args.image_path - dataset = LoadImages( - source, imgsz=imgsz, stride=32, auto=False, transforms=None, vid_stride=1 - ) - task_queue = queue.Queue() - - script_dir = Path(__file__).resolve().parent - if args.igpu: - model_dir_name = f"optimized-dml" - sd_provider = "dml" - else: - model_dir_name = f"unoptimized" - sd_model_dir = ( - script_dir / "stable_diffusion" / "models" / model_dir_name / sd_model_id - ) - - for batch_no, batch in enumerate(dataset): - path, im, im0s, vid_cap, s = batch - task_queue.put((batch_no, im)) - if batch_no == 0: - im_pil = Image.fromarray(np.transpose(im, (1, 2, 0))) - im_pil_resized = im_pil.resize((sd_image_size, sd_image_size)) - im_pil_resized.save("init_image.png") - - print("Number of frames = ", dataset.frames) - if args.npu: - npu_device = get_apu_info() - print('RYZEN_AI_INSTALLATION_PATH:', os.environ["RYZEN_AI_INSTALLATION_PATH"]) - provider_options = [{}] - if npu_device == 'PHX': - xclbin_path = '{}\\voe-4.0-win_amd64\\xclbins\\phoenix\\4x4.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - provider_options = [{ - "target": "X1", - "xclbin": xclbin_path - }] - print('XCLBIN_PATH:', xclbin_path) - - providers = ["VitisAIExecutionProvider"] - #cache_dir = Path(__file__).parent.resolve() - - - else: - providers = ["CPUExecutionProvider"] - provider_options = [{}] - - onnx_model = onnxruntime.InferenceSession( - "yolov8m.onnx", providers=providers, provider_options=provider_options - ) - ort_session_rcan = onnxruntime.InferenceSession( - "RCAN_int8_NHWC.onnx", providers=providers, provider_options=provider_options - ) - thread_pool = [] - - pipeline_pool = [] - pipeline_pool.append( - get_ort_pipeline_sd( - sd_model_dir, sd_batch_size, sd_image_size, sd_provider, sd_guidance_scale - ) - ) - for i in range(len(pipeline_pool)): - thread_pool.append( - threading.Thread( - target=stable_diffusion_worker, args=(pipeline_pool[i], im_pil_resized) - ) - ) - for i in range(num_threads): - thread_pool.append( - threading.Thread( - target=object_detection_worker, args=(task_queue, onnx_model) - ) - ) - thread_pool.append( - threading.Thread(target=super_resolution_worker, args=(ort_session_rcan,)) - ) - - start = time.perf_counter() - if args.power: - powerfilename = StartPowerMeas() - for thread in thread_pool: - thread.start() - for thread in thread_pool: - thread.join() - generated_images_enhanced = final_sr( - ort_session_rcan, "./results/sd_img2img/sd_result.png" - ) - generated_images.append(generated_images_enhanced) - end = time.perf_counter() - print("total inference time = ", (end - start), " seconds") - if args.power: - StopPowerMeas() - median_powers = med_pow(powerfilename) - print("Power stats: ", median_powers) - - display_video(source, dataset, "detection_results_giraffe_zebra.mp4") - display_images_sr(sr_images) - display_images_sd(generated_images, generated_prompts[0]) diff --git a/Demos/NPU-GPU-Pipeline/power_utils_filtered.py b/Demos/NPU-GPU-Pipeline/power_utils_filtered.py deleted file mode 100644 index 3b1cee24..00000000 --- a/Demos/NPU-GPU-Pipeline/power_utils_filtered.py +++ /dev/null @@ -1,72 +0,0 @@ -# -# Copyright © 2023 Advanced Micro Devices, Inc. All rights reserved. -# -import pandas as pd -import numpy as np -import subprocess -import time -import shlex - -def StartPowerMeas(): - timestamp = time.strftime("%Y%m%d%H%M%S") - filename = "power_" + timestamp + ".csv" - command = f'"C:\Program Files\AMD Graphics Manager\AMDGraphicsManager.exe" -unilog=PM,CLK -unilogsetup=unilogsetup.cfg -unilogperiod=50 -unilogstopcheck -unilogoutput="{filename}"' - cmds = shlex.split(command) - process = subprocess.Popen( - cmds, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - return filename - -def StopPowerMeas(): - file_name = "terminate.txt" - try: - with open(file_name, "w") as file: - pass # The "pass" statement does nothing, but it's needed to create an empty file - print( - f"Stopping the power measurement" - ) - except IOError as e: - print(f"An error occurred while creating the file: {e}") - -def med_pow(filename): - # real name and short name - cols_names = { - "CPU0 Power Correlation VDDCR_VDD Power": "CPU", - "CPU0 Power Correlation VDDCR_SOC Power": "NPU_SOC", - "CPU0 Power Correlation VDD MEM rail Power": "MEM_PHY", - "CPU0 Power Correlation SOCKET Power": "APU", - } - relevant_cols = ["Time Stamp"] + list(cols_names.keys()) - return medians(filename, relevant_cols, cols_names, ylabel="[W]") - -def time_to_seconds(time_str): - h, m, s = map(float, time_str.split(":")) - return h * 3600 + m * 60 + s - -def str_to_sec(date_string): - date_string = date_string.strip("--[ ]").strip() - # - # date_format = "%d.%m.%Y %H:%M:%S.%f" - time_str = date_string.split(" ")[1] - seconds = time_to_seconds(time_str) - return seconds - -def medians(filename, relevant_cols, short_names, ylabel): - # remove first and last samples affected by AGM - df = pd.read_csv(filename) - df_selected = df[relevant_cols] - df_selected = df_selected.rename(columns=short_names) - - df_selected["Time Stamp"] = df_selected["Time Stamp"].apply(str_to_sec) - df_selected["Time Stamp"] = ( - df_selected["Time Stamp"] - df_selected["Time Stamp"].min() - ) - - medians = {} - for key, value in short_names.items(): - # Calculate the median (more robust respect to average without utliers) - median_value = np.median(df_selected[value]) - # print(f"{value}: {median_value} {ylabel}") - medians[value] = median_value - - return medians diff --git a/Demos/NPU-GPU-Pipeline/requirements.txt b/Demos/NPU-GPU-Pipeline/requirements.txt deleted file mode 100644 index 0e43d253..00000000 --- a/Demos/NPU-GPU-Pipeline/requirements.txt +++ /dev/null @@ -1,32 +0,0 @@ -# Ultralytics requirements -# Usage: pip install -r requirements.txt - -# Base ---------------------------------------- -matplotlib>=3.2.2 -numpy>=1.18.5 -opencv-python>=4.6.0 -Pillow>=7.1.2 -PyYAML>=5.3.1 -requests>=2.23.0 -scipy>=1.4.1 -torch>=2.8.0 -torchvision>=0.23.0 -tqdm>=4.64.0 -onnxscript -# Logging ------------------------------------- -tensorboard>=2.4.1 -# clearml -# comet - -# Plotting ------------------------------------ -pandas>=1.1.4 -seaborn>=0.11.0 - - -# Extras -------------------------------------- -ipython # interactive notebook -psutil # system utilization -thop>=0.1.1 # FLOPs computation -# albumentations>=1.0.3 -pycocotools>=2.0.6 # COCO mAP -# roboflow diff --git a/Demos/NPU-GPU-Pipeline/run_with_timeout.py b/Demos/NPU-GPU-Pipeline/run_with_timeout.py deleted file mode 100644 index 928cd522..00000000 --- a/Demos/NPU-GPU-Pipeline/run_with_timeout.py +++ /dev/null @@ -1,25 +0,0 @@ -import subprocess -import time -import signal -import os - -# Start the process -process = subprocess.Popen( - ["python", "pipeline.py", "-i", "test/test_img2img.mp4", "--npu", "--provider_config", "vaip_config.json", "--igpu"] -) - -# Wait for 2 minutes -try: - time.sleep(240) -except KeyboardInterrupt: - pass - -# Terminate the process -process.terminate() - -# Wait for the process to terminate -try: - process.wait(timeout=10) -except subprocess.TimeoutExpired: - process.kill() - diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/config.py b/Demos/NPU-GPU-Pipeline/stable_diffusion/config.py deleted file mode 100644 index f8cfccd4..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/config.py +++ /dev/null @@ -1,8 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- - -vae_sample_size = 512 -unet_sample_size = 64 -cross_attention_dim = 768 diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_safety_checker.json b/Demos/NPU-GPU-Pipeline/stable_diffusion/config_safety_checker.json deleted file mode 100644 index 8252d621..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_safety_checker.json +++ /dev/null @@ -1,118 +0,0 @@ -{ - "input_model": { - "type": "PyTorchModel", - "config": { - "model_path": "runwayml/stable-diffusion-v1-5", - "model_loader": "safety_checker_load", - "model_script": "user_script.py", - "io_config": { - "input_names": [ "clip_input", "images" ], - "output_names": [ "out_images", "has_nsfw_concepts" ], - "dynamic_axes": { - "clip_input": { "0": "batch", "1": "channels", "2": "height", "3": "width" }, - "images": { "0": "batch", "1": "height", "2": "width", "3": "channels" } - } - }, - "dummy_inputs_func": "safety_checker_conversion_inputs" - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "config": { - "accelerators": ["gpu"] - } - } - }, - "evaluators": { - "common_evaluator": { - "metrics": [ - { - "name": "latency", - "type": "latency", - "sub_types": [{"name": "avg"}], - "user_config": { - "user_script": "user_script.py", - "dataloader_func": "safety_checker_data_loader", - "batch_size": 1 - } - } - ] - } - }, - "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, - "ov_convert": { - "type": "OpenVINOConversion", - "config": { - "user_script": "user_script.py", - "example_input_func": "safety_checker_conversion_inputs", - "output_model": "safety_checker" - } - }, - "optimize": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "unet", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "use_multi_head_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": true, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": true, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": true, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "group_norm_channels_last": false - }, - "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { - "GroupNorm": [0, 1, 2] - } - } - }, - "optimize_cuda": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "unet", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false - } - } - }, - "pass_flows": [ - ["convert", "optimize"] - ], - "engine": { - "log_severity_level": 0, - "evaluator": "common_evaluator", - "evaluate_input_model": false, - "host": "local_system", - "target": "local_system", - "cache_dir": "cache", - "output_name": "safety_checker", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] - } -} diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_text_encoder.json b/Demos/NPU-GPU-Pipeline/stable_diffusion/config_text_encoder.json deleted file mode 100644 index bbdcf428..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_text_encoder.json +++ /dev/null @@ -1,115 +0,0 @@ -{ - "input_model": { - "type": "PyTorchModel", - "config": { - "model_path": "runwayml/stable-diffusion-v1-5", - "model_loader": "text_encoder_load", - "model_script": "user_script.py", - "io_config": { - "input_names": [ "input_ids" ], - "output_names": [ "last_hidden_state", "pooler_output" ], - "dynamic_axes": { "input_ids": { "0": "batch", "1": "sequence" } } - }, - "dummy_inputs_func": "text_encoder_conversion_inputs" - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "config": { - "accelerators": ["gpu"] - } - } - }, - "evaluators": { - "common_evaluator": { - "metrics": [ - { - "name": "latency", - "type": "latency", - "sub_types": [{"name": "avg"}], - "user_config": { - "user_script": "user_script.py", - "dataloader_func": "text_encoder_data_loader", - "batch_size": 1 - } - } - ] - } - }, - "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, - "ov_convert": { - "type": "OpenVINOConversion", - "config": { - "user_script": "user_script.py", - "example_input_func": "text_encoder_conversion_inputs", - "output_model": "text_encoder" - } - }, - "optimize": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "clip", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "use_multi_head_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": true, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": true, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": true, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "group_norm_channels_last": false - }, - "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { - "GroupNorm": [0, 1, 2] - } - } - }, - "optimize_cuda": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "clip", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false - } - } - }, - "pass_flows": [ - ["convert", "optimize"] - ], - "engine": { - "log_severity_level": 0, - "evaluator": "common_evaluator", - "evaluate_input_model": false, - "host": "local_system", - "target": "local_system", - "cache_dir": "cache", - "output_name": "text_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] - } -} diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_unet.json b/Demos/NPU-GPU-Pipeline/stable_diffusion/config_unet.json deleted file mode 100644 index b0f5a241..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_unet.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "input_model": { - "type": "PyTorchModel", - "config": { - "model_path": "runwayml/stable-diffusion-v1-5", - "model_loader": "unet_load", - "model_script": "user_script.py", - "io_config": { - "input_names": [ "sample", "timestep", "encoder_hidden_states", "return_dict" ], - "output_names": [ "out_sample" ], - "dynamic_axes": { - "sample": {"0": "unet_sample_batch", "1": "unet_sample_channels", "2": "unet_sample_height", "3": "unet_sample_width"}, - "timestep": {"0": "unet_time_batch"}, - "encoder_hidden_states": {"0": "unet_hidden_batch", "1": "unet_hidden_sequence"} - } - }, - "dummy_inputs_func": "unet_conversion_inputs" - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "config": { - "accelerators": ["gpu"] - } - } - }, - "evaluators": { - "common_evaluator": { - "metrics": [ - { - "name": "latency", - "type": "latency", - "sub_types": [{"name": "avg"}], - "user_config": { - "user_script": "user_script.py", - "dataloader_func": "unet_data_loader", - "batch_size": 2 - } - } - ] - } - }, - "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14, - "save_as_external_data": true, - "all_tensors_to_one_file": true, - "external_data_name": "weights.pb" - } - }, - "ov_convert": { - "type": "OpenVINOConversion", - "config": { - "user_script": "user_script.py", - "example_input_func": "get_unet_ov_example_input", - "output_model": "unet" - } - }, - "optimize": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "unet", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "use_multi_head_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": true, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": true, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": true, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "group_norm_channels_last": false - }, - "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { - "GroupNorm": [0, 1, 2] - } - } - }, - "optimize_cuda": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "unet", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false - } - } - }, - "pass_flows": [ - ["convert", "optimize"] - ], - "engine": { - "log_severity_level": 0, - "evaluator": "common_evaluator", - "evaluate_input_model": false, - "host": "local_system", - "target": "local_system", - "cache_dir": "cache", - "output_name": "unet", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] - } -} diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_vae_decoder.json b/Demos/NPU-GPU-Pipeline/stable_diffusion/config_vae_decoder.json deleted file mode 100644 index 95050605..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_vae_decoder.json +++ /dev/null @@ -1,115 +0,0 @@ -{ - "input_model": { - "type": "PyTorchModel", - "config": { - "model_path": "runwayml/stable-diffusion-v1-5", - "model_loader": "vae_decoder_load", - "model_script": "user_script.py", - "io_config": { - "input_names": [ "latent_sample", "return_dict" ], - "output_names": [ "sample" ], - "dynamic_axes": { "latent_sample": { "0": "batch", "1": "channels", "2": "height", "3": "width" } } - }, - "dummy_inputs_func": "vae_decoder_conversion_inputs" - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "config": { - "accelerators": ["gpu"] - } - } - }, - "evaluators": { - "common_evaluator": { - "metrics": [ - { - "name": "latency", - "type": "latency", - "sub_types": [{"name": "avg"}], - "user_config": { - "user_script": "user_script.py", - "dataloader_func": "vae_decoder_data_loader", - "batch_size": 1 - } - } - ] - } - }, - "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, - "ov_convert": { - "type": "OpenVINOConversion", - "config": { - "user_script": "user_script.py", - "example_input_func": "vae_decoder_conversion_inputs", - "output_model": "vae_decoder" - } - }, - "optimize": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "vae", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "use_multi_head_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": true, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": true, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": true, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "group_norm_channels_last": false - }, - "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { - "GroupNorm": [0, 1, 2] - } - } - }, - "optimize_cuda": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "vae", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false - } - } - }, - "pass_flows": [ - ["convert", "optimize"] - ], - "engine": { - "log_severity_level": 0, - "evaluator": "common_evaluator", - "evaluate_input_model": false, - "host": "local_system", - "target": "local_system", - "cache_dir": "cache", - "output_name": "vae_decoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] - } -} diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_vae_encoder.json b/Demos/NPU-GPU-Pipeline/stable_diffusion/config_vae_encoder.json deleted file mode 100644 index 489e83d1..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/config_vae_encoder.json +++ /dev/null @@ -1,115 +0,0 @@ -{ - "input_model": { - "type": "PyTorchModel", - "config": { - "model_path": "runwayml/stable-diffusion-v1-5", - "model_loader": "vae_encoder_load", - "model_script": "user_script.py", - "io_config": { - "input_names": [ "sample", "return_dict" ], - "output_names": [ "latent_sample" ], - "dynamic_axes": { "sample": { "0": "batch", "1": "channels", "2": "height", "3": "width" } } - }, - "dummy_inputs_func": "vae_encoder_conversion_inputs" - } - }, - "systems": { - "local_system": { - "type": "LocalSystem", - "config": { - "accelerators": ["gpu"] - } - } - }, - "evaluators": { - "common_evaluator": { - "metrics": [ - { - "name": "latency", - "type": "latency", - "sub_types": [{"name": "avg"}], - "user_config": { - "user_script": "user_script.py", - "dataloader_func": "vae_encoder_data_loader", - "batch_size": 1 - } - } - ] - } - }, - "passes": { - "convert": { - "type": "OnnxConversion", - "config": { - "target_opset": 14 - } - }, - "ov_convert": { - "type": "OpenVINOConversion", - "config": { - "user_script": "user_script.py", - "example_input_func": "vae_encoder_conversion_inputs", - "output_model": "vae_encoder" - } - }, - "optimize": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "vae", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "use_multi_head_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": true, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": true, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": true, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "group_norm_channels_last": false - }, - "force_fp32_ops": ["RandomNormalLike"], - "force_fp16_inputs": { - "GroupNorm": [0, 1, 2] - } - } - }, - "optimize_cuda": { - "type": "OrtTransformersOptimization", - "config": { - "model_type": "vae", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false - } - } - }, - "pass_flows": [ - ["convert", "optimize"] - ], - "engine": { - "log_severity_level": 0, - "evaluator": "common_evaluator", - "evaluate_input_model": false, - "host": "local_system", - "target": "local_system", - "cache_dir": "cache", - "output_name": "vae_encoder", - "output_dir": "footprints", - "execution_providers": ["DmlExecutionProvider"] - } -} diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/ort_util_img2img.py b/Demos/NPU-GPU-Pipeline/stable_diffusion/ort_util_img2img.py deleted file mode 100644 index e9a186dd..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/ort_util_img2img.py +++ /dev/null @@ -1,192 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import json -import shutil -import sys -from pathlib import Path -from typing import Dict - -import onnxruntime as ort -from diffusers import OnnxRuntimeModel, OnnxStableDiffusionImg2ImgPipeline -from onnxruntime import __version__ as OrtVersion -from packaging import version - -from olive.model import ONNXModelHandler - -# ruff: noqa: TID252, T201 - - -def update_cuda_config(config: Dict): - if version.parse(OrtVersion) < version.parse("1.17.0"): - # disable skip_group_norm fusion since there is a shape inference bug which leads to invalid models - config["passes"]["optimize_cuda"]["config"]["optimization_options"] = {"enable_skip_group_norm": False} - config["pass_flows"] = [["convert", "optimize_cuda"]] - config["engine"]["execution_providers"] = ["CUDAExecutionProvider"] - return config - - -def validate_args(args, provider): - ort.set_default_logger_severity(4) - if args.static_dims: - print( - "WARNING: the --static_dims option is deprecated, and static shape optimization is enabled by default. " - "Use --dynamic_dims to disable static shape optimization." - ) - - validate_ort_version(provider) - - -def validate_ort_version(provider: str): - if provider == "dml" and version.parse(OrtVersion) < version.parse("1.16.0"): - print("This script requires onnxruntime-directml 1.16.0 or newer") - sys.exit(1) - elif provider == "cuda" and version.parse(OrtVersion) < version.parse("1.17.0"): - if version.parse(OrtVersion) < version.parse("1.16.2"): - print("This script requires onnxruntime-gpu 1.16.2 or newer") - sys.exit(1) - print( - f"WARNING: onnxruntime {OrtVersion} has known issues with shape inference for SkipGroupNorm. Will disable" - " skip_group_norm fusion. onnxruntime-gpu 1.17.0 or newer is strongly recommended!" - ) - - -def save_optimized_onnx_submodel(submodel_name, provider, model_info): - footprints_file_path = ( - Path(__file__).resolve().parent / "footprints" / f"{submodel_name}_gpu-{provider}_footprints.json" - ) - with footprints_file_path.open("r") as footprint_file: - footprints = json.load(footprint_file) - - conversion_footprint = None - optimizer_footprint = None - for footprint in footprints.values(): - if footprint["from_pass"] == "OnnxConversion": - conversion_footprint = footprint - elif footprint["from_pass"] == "OrtTransformersOptimization": - optimizer_footprint = footprint - - assert conversion_footprint - assert optimizer_footprint - - unoptimized_olive_model = ONNXModelHandler(**conversion_footprint["model_config"]["config"]) - optimized_olive_model = ONNXModelHandler(**optimizer_footprint["model_config"]["config"]) - - model_info[submodel_name] = { - "unoptimized": { - "path": Path(unoptimized_olive_model.model_path), - }, - "optimized": { - "path": Path(optimized_olive_model.model_path), - }, - } - - print(f"Unoptimized Model : {model_info[submodel_name]['unoptimized']['path']}") - print(f"Optimized Model : {model_info[submodel_name]['optimized']['path']}") - - -def save_onnx_pipeline( - has_safety_checker, model_info, optimized_model_dir, unoptimized_model_dir, pipeline, submodel_names -): - # Save the unoptimized models in a directory structure that the diffusers library can load and run. - # This is optional, and the optimized models can be used directly in a custom pipeline if desired. - print("\nCreating ONNX pipeline...") - - if has_safety_checker: - safety_checker = OnnxRuntimeModel.from_pretrained(model_info["safety_checker"]["unoptimized"]["path"].parent) - else: - safety_checker = None - - onnx_pipeline = OnnxStableDiffusionImg2ImgPipeline( - vae_encoder=OnnxRuntimeModel.from_pretrained(model_info["vae_encoder"]["unoptimized"]["path"].parent), - vae_decoder=OnnxRuntimeModel.from_pretrained(model_info["vae_decoder"]["unoptimized"]["path"].parent), - text_encoder=OnnxRuntimeModel.from_pretrained(model_info["text_encoder"]["unoptimized"]["path"].parent), - tokenizer=pipeline.tokenizer, - unet=OnnxRuntimeModel.from_pretrained(model_info["unet"]["unoptimized"]["path"].parent), - scheduler=pipeline.scheduler, - safety_checker=safety_checker, - feature_extractor=pipeline.feature_extractor, - requires_safety_checker=True, - ) - - print("Saving unoptimized models...") - onnx_pipeline.save_pretrained(unoptimized_model_dir) - - # Create a copy of the unoptimized model directory, then overwrite with optimized models from the olive cache. - print("Copying optimized models...") - shutil.copytree(unoptimized_model_dir, optimized_model_dir, ignore=shutil.ignore_patterns("weights.pb")) - for submodel_name in submodel_names: - src_path = model_info[submodel_name]["optimized"]["path"] - dst_path = optimized_model_dir / submodel_name / "model.onnx" - shutil.copyfile(src_path, dst_path) - - print(f"The optimized pipeline is located here: {optimized_model_dir}") - -def get_ort_pipeline_sd(model_dir, batch_size, image_size, provider, guidance_scale): - ort.set_default_logger_severity(3) - print("Loading models into ORT session...") - sess_options = ort.SessionOptions() - sess_options.enable_mem_pattern = False - - static_dims = True - - if static_dims: - hidden_batch_size = batch_size if (guidance_scale == 0.0) else batch_size * 2 - # Not necessary, but helps DML EP further optimize runtime performance. - # batch_size is doubled for sample & hidden state because of classifier free guidance: - # https://github.com/huggingface/diffusers/blob/46c52f9b9607e6ecb29c782c052aea313e6487b7/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L672 - sess_options.add_free_dimension_override_by_name("unet_sample_batch", hidden_batch_size) - sess_options.add_free_dimension_override_by_name("unet_sample_channels", 4) - sess_options.add_free_dimension_override_by_name("unet_sample_height", image_size // 8) - sess_options.add_free_dimension_override_by_name("unet_sample_width", image_size // 8) - sess_options.add_free_dimension_override_by_name("unet_time_batch", 1) - sess_options.add_free_dimension_override_by_name("unet_hidden_batch", hidden_batch_size) - sess_options.add_free_dimension_override_by_name("unet_hidden_sequence", 77) - - provider_map = { - "dml": ('DmlExecutionProvider', { - 'device_id': 0, - }), - "cpu": "CPUExecutionProvider", - } - assert provider in provider_map, f"Unsupported provider: {provider}" - return OnnxStableDiffusionImg2ImgPipeline.from_pretrained( - model_dir, provider=provider_map[provider], sess_options=sess_options - ) - -def get_ort_pipeline(model_dir, common_args, ort_args, guidance_scale): - ort.set_default_logger_severity(3) - - print("Loading models into ORT session...") - sess_options = ort.SessionOptions() - sess_options.enable_mem_pattern = False - - static_dims = not ort_args.dynamic_dims - batch_size = common_args.batch_size - image_size = common_args.image_size - provider = common_args.provider - - if static_dims: - hidden_batch_size = batch_size if (guidance_scale == 0.0) else batch_size * 2 - # Not necessary, but helps DML EP further optimize runtime performance. - # batch_size is doubled for sample & hidden state because of classifier free guidance: - # https://github.com/huggingface/diffusers/blob/46c52f9b9607e6ecb29c782c052aea313e6487b7/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L672 - sess_options.add_free_dimension_override_by_name("unet_sample_batch", hidden_batch_size) - sess_options.add_free_dimension_override_by_name("unet_sample_channels", 4) - sess_options.add_free_dimension_override_by_name("unet_sample_height", image_size // 8) - sess_options.add_free_dimension_override_by_name("unet_sample_width", image_size // 8) - sess_options.add_free_dimension_override_by_name("unet_time_batch", 1) - sess_options.add_free_dimension_override_by_name("unet_hidden_batch", hidden_batch_size) - sess_options.add_free_dimension_override_by_name("unet_hidden_sequence", 77) - - provider_map = { - "dml": ('DmlExecutionProvider', { - 'device_id': 1, - }), - "cuda": "CUDAExecutionProvider", - } - assert provider in provider_map, f"Unsupported provider: {provider}" - return OnnxStableDiffusionImg2ImgPipeline.from_pretrained( - model_dir, provider=provider_map[provider], sess_options=sess_options - ) \ No newline at end of file diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/requirements-common.txt b/Demos/NPU-GPU-Pipeline/stable_diffusion/requirements-common.txt deleted file mode 100644 index 201235f5..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/requirements-common.txt +++ /dev/null @@ -1,9 +0,0 @@ -olive-ai==0.5.0 -accelerate -diffusers==0.33.1 -pillow -protobuf==3.20.3 # protobuf 4.x aborts with OOM when optimizing unet -tabulate -torch==2.8.0 -transformers<4.43.0 -torchvision==0.23.0 diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/stable_diffusion.py b/Demos/NPU-GPU-Pipeline/stable_diffusion/stable_diffusion.py deleted file mode 100644 index 36830e6e..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/stable_diffusion.py +++ /dev/null @@ -1,193 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import argparse -import json -import shutil -import sys -import warnings -from pathlib import Path -from typing import Dict - -import config -import torch -from diffusers import StableDiffusionImg2ImgPipeline -from packaging import version -from user_script import get_base_model_name - -from olive.common.utils import set_tempdir -from olive.workflows import run as olive_run - -file_path = str(Path(__file__).resolve().parent) -sys.path.append(file_path) -from PIL import Image - -def update_config_with_provider(config: Dict, provider: str): - print("Printing config:",config) - if provider == "dml": - # DirectML EP is the default, so no need to update config. - return config - else: - raise ValueError(f"Unsupported provider: {provider}") - -def optimize( - model_id: str, - provider: str, - unoptimized_model_dir: Path, - optimized_model_dir: Path, -): - from google.protobuf import __version__ as protobuf_version - - # protobuf 4.x aborts with OOM when optimizing unet - if version.parse(protobuf_version) > version.parse("3.20.3"): - print("This script requires protobuf 3.20.3. Please ensure your package version matches requirements.txt.") - sys.exit(1) - - script_dir = Path(__file__).resolve().parent - - # Clean up previously optimized models, if any. - shutil.rmtree(script_dir / "footprints", ignore_errors=True) - shutil.rmtree(unoptimized_model_dir, ignore_errors=True) - shutil.rmtree(optimized_model_dir, ignore_errors=True) - - # The model_id and base_model_id are identical when optimizing a standard stable diffusion model like - # runwayml/stable-diffusion-v1-5. These variables are only different when optimizing a LoRA variant. - base_model_id = get_base_model_name(model_id) - - # Load the entire PyTorch pipeline to ensure all models and their configurations are downloaded and cached. - # This avoids an issue where the non-ONNX components (tokenizer, scheduler, and feature extractor) are not - # automatically cached correctly if individual models are fetched one at a time. - print("Download stable diffusion PyTorch pipeline...") - pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(base_model_id, torch_dtype=torch.float32) - config.vae_sample_size = pipeline.vae.config.sample_size - config.cross_attention_dim = pipeline.unet.config.cross_attention_dim - config.unet_sample_size = pipeline.unet.config.sample_size - - model_info = {} - - submodel_names = ["vae_encoder", "vae_decoder", "unet", "text_encoder"] - - # has_safety_checker = getattr(pipeline, "safety_checker", None) is not None - has_safety_checker = False - - if has_safety_checker: - submodel_names.append("safety_checker") - - for submodel_name in submodel_names: - print(f"\nOptimizing {submodel_name}") - - olive_config = None - with (script_dir / f"config_{submodel_name}.json").open() as fin: - olive_config = json.load(fin) - olive_config = update_config_with_provider(olive_config, provider) - - if submodel_name in ("unet", "text_encoder"): - olive_config["input_model"]["config"]["model_path"] = model_id - else: - # Only the unet & text encoder are affected by LoRA, so it's better to use the base model ID for - # other models: the Olive cache is based on the JSON config, and two LoRA variants with the same - # base model ID should be able to reuse previously optimized copies. - olive_config["input_model"]["config"]["model_path"] = base_model_id - - run_res = olive_run(olive_config) - from ort_util_img2img import save_optimized_onnx_submodel - save_optimized_onnx_submodel(submodel_name, provider, model_info) - from ort_util_img2img import save_onnx_pipeline - save_onnx_pipeline( - has_safety_checker, model_info, optimized_model_dir, unoptimized_model_dir, pipeline, submodel_names - ) - - return model_info - - -def parse_common_args(raw_args): - parser = argparse.ArgumentParser("Common arguments") - - parser.add_argument("--model_id", default="runwayml/stable-diffusion-v1-5", type=str) - parser.add_argument( - "--provider", default="dml", type=str, choices=["dml"], help="Execution provider to use" - ) - parser.add_argument("--optimize", action="store_true", help="Runs the optimization step") - parser.add_argument("--clean_cache", action="store_true", help="Deletes the Olive cache") - parser.add_argument("--test_unoptimized", action="store_true", help="Use unoptimized model for inference") - parser.add_argument("--batch_size", default=1, type=int, help="Number of images to generate per batch") - parser.add_argument( - "--prompt", - default=( - "castle surrounded by water and nature, village, volumetric lighting, photorealistic, " - "detailed and intricate, fantasy, epic cinematic shot, mountains, 8k ultra hd" - ), - type=str, - ) - parser.add_argument( - "--guidance_scale", - default=7.5, - type=float, - help="Guidance scale as defined in Classifier-Free Diffusion Guidance", - ) - parser.add_argument("--num_images", default=1, type=int, help="Number of images to generate") - parser.add_argument("--num_inference_steps", default=50, type=int, help="Number of steps in diffusion process") - parser.add_argument("--interactive", action="store_true", help="Run with a GUI") - parser.add_argument("--tempdir", default=None, type=str, help="Root directory for tempfile directories and files") - parser.add_argument("--image_path", default=None, type=str, help="Root directory for tempfile directories and files") - parser.add_argument( - "--strength", - default=1.0, - type=float, - help="Value between 0.0 and 1.0, that controls the amount of noise that is added to the input image. " - "Values that approach 1.0 enable lots of variations but will also produce images " - "that are not semantically consistent with the input.", - ) - parser.add_argument("--image_size", default=512, type=int, help="Width and height of the images to generate") - - return parser.parse_known_args(raw_args) - - -def parse_ort_args(raw_args): - parser = argparse.ArgumentParser("ONNX Runtime arguments") - - parser.add_argument( - "--static_dims", - action="store_true", - help="DEPRECATED (now enabled by default). Use --dynamic_dims to disable static_dims.", - ) - parser.add_argument("--dynamic_dims", action="store_true", help="Disable static shape optimization") - - return parser.parse_known_args(raw_args) - - -def main(raw_args=None): - common_args, extra_args = parse_common_args(raw_args) - - provider = common_args.provider - model_id = common_args.model_id - - script_dir = Path(__file__).resolve().parent - unoptimized_model_dir = script_dir / "models" / "unoptimized" / model_id - optimized_dir_name = f"optimized-{provider}" - optimized_model_dir = script_dir / "models" / optimized_dir_name / model_id - - if common_args.clean_cache: - shutil.rmtree(script_dir / "cache", ignore_errors=True) - - guidance_scale = common_args.guidance_scale - - if model_id == "stabilityai/sd-turbo" and guidance_scale > 0: - guidance_scale = 0.0 - print(f"WARNING: Classifier free guidance has been forcefully disabled since {model_id} doesn't support it.") - - ort_args, extra_args = parse_ort_args(extra_args) - - if common_args.optimize or not optimized_model_dir.exists(): - set_tempdir(common_args.tempdir) - - # TODO(jstoecker): clean up warning filter (mostly during conversion from torch to ONNX) - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - from ort_util_img2img import validate_args - validate_args(ort_args, common_args.provider) - optimize(common_args.model_id, common_args.provider, unoptimized_model_dir, optimized_model_dir) - -if __name__ == "__main__": - main() diff --git a/Demos/NPU-GPU-Pipeline/stable_diffusion/user_script.py b/Demos/NPU-GPU-Pipeline/stable_diffusion/user_script.py deleted file mode 100644 index 89cd0d88..00000000 --- a/Demos/NPU-GPU-Pipeline/stable_diffusion/user_script.py +++ /dev/null @@ -1,275 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -import config -import torch -from diffusers import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker -from huggingface_hub import model_info -from transformers.models.clip.modeling_clip import CLIPTextModel - - -# Helper latency-only dataloader that creates random tensors with no label -class RandomDataLoader: - def __init__(self, create_inputs_func, batchsize, torch_dtype): - self.create_input_func = create_inputs_func - self.batchsize = batchsize - self.torch_dtype = torch_dtype - - def __getitem__(self, idx): - label = None - return self.create_input_func(self.batchsize, self.torch_dtype), label - - -def get_base_model_name(model_name): - return model_info(model_name).cardData.get("base_model") or model_name - - -def is_lora_model(model_name): - # TODO(jstoecker): might be a better way to detect (e.g. presence of LORA weights file) - return model_name != get_base_model_name(model_name) - - -# Merges LoRA weights into the layers of a base model -def merge_lora_weights(base_model, lora_model_id, submodel_name="unet", scale=1.0): - import inspect - from collections import defaultdict - from functools import reduce - - try: - from diffusers.loaders import LORA_WEIGHT_NAME - except ImportError: - # moved in version 0.24.0 - from diffusers.loaders.lora import LORA_WEIGHT_NAME - from diffusers.models.attention_processor import LoRAAttnProcessor - from diffusers.utils.hub_utils import _get_model_file - - parameters = inspect.signature(_get_model_file).parameters - - kwargs = {} - if "use_auth_token" in parameters: - kwargs["use_auth_token"] = None - elif "token" in parameters: - kwargs["token"] = None - - # Load LoRA weights - model_file = _get_model_file( - lora_model_id, - weights_name=LORA_WEIGHT_NAME, - cache_dir=None, - force_download=False, - resume_download=False, - proxies=None, - local_files_only=False, - revision=None, - subfolder=None, - user_agent={ - "file_type": "attn_procs_weights", - "framework": "pytorch", - }, - **kwargs, - ) - lora_state_dict = torch.load(model_file, map_location="cpu") - - # All keys in the LoRA state dictionary should have 'lora' somewhere in the string. - keys = list(lora_state_dict.keys()) - assert all("lora" in k for k in keys) - - if all(key.startswith(submodel_name) for key in keys): - # New format (https://github.com/huggingface/diffusers/pull/2918) supports LoRA weights in both the - # unet and text encoder where keys are prefixed with 'unet' or 'text_encoder', respectively. - submodel_state_dict = {k: v for k, v in lora_state_dict.items() if k.startswith(submodel_name)} - else: - # Old format. Keys will not have any prefix. This only applies to unet, so exit early if this is - # optimizing the text encoder. - if submodel_name != "unet": - return - submodel_state_dict = lora_state_dict - - # Group LoRA weights into attention processors - attn_processors = {} - lora_grouped_dict = defaultdict(dict) - for key, value in submodel_state_dict.items(): - attn_processor_key, sub_key = ".".join(key.split(".")[:-3]), ".".join(key.split(".")[-3:]) - lora_grouped_dict[attn_processor_key][sub_key] = value - - for key, value_dict in lora_grouped_dict.items(): - rank = value_dict["to_k_lora.down.weight"].shape[0] - cross_attention_dim = value_dict["to_k_lora.down.weight"].shape[1] - hidden_size = value_dict["to_k_lora.up.weight"].shape[0] - - attn_processors[key] = LoRAAttnProcessor( - hidden_size=hidden_size, cross_attention_dim=cross_attention_dim, rank=rank - ) - attn_processors[key].load_state_dict(value_dict) - - # Merge LoRA attention processor weights into existing Q/K/V/Out weights - for name, proc in attn_processors.items(): - attention_name = name[: -len(".processor")] - attention = reduce(getattr, attention_name.split(sep="."), base_model) - attention.to_q.weight.data += scale * torch.mm(proc.to_q_lora.up.weight, proc.to_q_lora.down.weight) - attention.to_k.weight.data += scale * torch.mm(proc.to_k_lora.up.weight, proc.to_k_lora.down.weight) - attention.to_v.weight.data += scale * torch.mm(proc.to_v_lora.up.weight, proc.to_v_lora.down.weight) - attention.to_out[0].weight.data += scale * torch.mm(proc.to_out_lora.up.weight, proc.to_out_lora.down.weight) - - -# ----------------------------------------------------------------------------- -# TEXT ENCODER -# ----------------------------------------------------------------------------- - - -def text_encoder_inputs(batchsize, torch_dtype): - return torch.zeros((batchsize, 77), dtype=torch_dtype) - - -def text_encoder_load(model_name): - base_model_id = get_base_model_name(model_name) - model = CLIPTextModel.from_pretrained(base_model_id, subfolder="text_encoder") - if is_lora_model(model_name): - merge_lora_weights(model, model_name, "text_encoder") - return model - - -def text_encoder_conversion_inputs(model=None): - return text_encoder_inputs(1, torch.int32) - - -def text_encoder_data_loader(data_dir, batchsize, *args, **kwargs): - return RandomDataLoader(text_encoder_inputs, batchsize, torch.int32) - - -# ----------------------------------------------------------------------------- -# UNET -# ----------------------------------------------------------------------------- - - -def unet_inputs(batchsize, torch_dtype, is_conversion_inputs=False): - # TODO(jstoecker): Rename onnx::Concat_4 to text_embeds and onnx::Shape_5 to time_ids - inputs = { - "sample": torch.rand((batchsize, 4, config.unet_sample_size, config.unet_sample_size), dtype=torch_dtype), - "timestep": torch.rand((batchsize,), dtype=torch_dtype), - "encoder_hidden_states": torch.rand((batchsize, 77, config.cross_attention_dim), dtype=torch_dtype), - } - - # use as kwargs since they won't be in the correct position if passed along with the tuple of inputs - kwargs = { - "return_dict": False, - } - if is_conversion_inputs: - inputs["additional_inputs"] = { - **kwargs, - "added_cond_kwargs": { - "text_embeds": torch.rand((1, 1280), dtype=torch_dtype), - "time_ids": torch.rand((1, 5), dtype=torch_dtype), - }, - } - else: - inputs.update(kwargs) - inputs["onnx::Concat_4"] = torch.rand((1, 1280), dtype=torch_dtype) - inputs["onnx::Shape_5"] = torch.rand((1, 5), dtype=torch_dtype) - - return inputs - - -def get_unet_ov_example_input(): - import numpy as np - - encoder_hidden_state = torch.ones((2, 77, 768)) - latents_shape = (2, 4, 512 // 8, 512 // 8) - latents = torch.randn(latents_shape) - t = torch.from_numpy(np.array(1, dtype=float)) - return (latents, t, encoder_hidden_state) - - -def unet_load(model_name): - base_model_id = get_base_model_name(model_name) - model = UNet2DConditionModel.from_pretrained(base_model_id, subfolder="unet") - if is_lora_model(model_name): - merge_lora_weights(model, model_name, "unet") - return model - - -def unet_conversion_inputs(model=None): - return tuple(unet_inputs(1, torch.float32, True).values()) - - -def unet_data_loader(data_dir, batchsize, *args, **kwargs): - return RandomDataLoader(unet_inputs, batchsize, torch.float16) - - -# ----------------------------------------------------------------------------- -# VAE ENCODER -# ----------------------------------------------------------------------------- - - -def vae_encoder_inputs(batchsize, torch_dtype): - return {"sample": torch.rand((batchsize, 3, config.vae_sample_size, config.vae_sample_size), dtype=torch_dtype)} - - -def vae_encoder_load(model_name): - base_model_id = get_base_model_name(model_name) - model = AutoencoderKL.from_pretrained(base_model_id, subfolder="vae") - model.forward = lambda sample: model.encode(sample)[0].sample() - return model - - -def vae_encoder_conversion_inputs(model=None): - return tuple(vae_encoder_inputs(1, torch.float32).values()) - - -def vae_encoder_data_loader(data_dir, batchsize, *args, **kwargs): - return RandomDataLoader(vae_encoder_inputs, batchsize, torch.float16) - - -# ----------------------------------------------------------------------------- -# VAE DECODER -# ----------------------------------------------------------------------------- - - -def vae_decoder_inputs(batchsize, torch_dtype): - return { - "latent_sample": torch.rand((batchsize, 4, config.unet_sample_size, config.unet_sample_size), dtype=torch_dtype) - } - - -def vae_decoder_load(model_name): - base_model_id = get_base_model_name(model_name) - model = AutoencoderKL.from_pretrained(base_model_id, subfolder="vae") - model.forward = model.decode - return model - - -def vae_decoder_conversion_inputs(model=None): - return tuple(vae_decoder_inputs(1, torch.float32).values()) - - -def vae_decoder_data_loader(data_dir, batchsize, *args, **kwargs): - return RandomDataLoader(vae_decoder_inputs, batchsize, torch.float16) - - -# ----------------------------------------------------------------------------- -# SAFETY CHECKER -# ----------------------------------------------------------------------------- - - -def safety_checker_inputs(batchsize, torch_dtype): - return { - "clip_input": torch.rand((batchsize, 3, 224, 224), dtype=torch_dtype), - "images": torch.rand((batchsize, config.vae_sample_size, config.vae_sample_size, 3), dtype=torch_dtype), - } - - -def safety_checker_load(model_name): - base_model_id = get_base_model_name(model_name) - model = StableDiffusionSafetyChecker.from_pretrained(base_model_id, subfolder="safety_checker") - model.forward = model.forward_onnx - return model - - -def safety_checker_conversion_inputs(model=None): - return tuple(safety_checker_inputs(1, torch.float32).values()) - - -def safety_checker_data_loader(data_dir, batchsize, *args, **kwargs): - return RandomDataLoader(safety_checker_inputs, batchsize, torch.float16) diff --git a/Demos/NPU-GPU-Pipeline/test/test_img2img.mp4 b/Demos/NPU-GPU-Pipeline/test/test_img2img.mp4 deleted file mode 100644 index 5c1f60a7..00000000 Binary files a/Demos/NPU-GPU-Pipeline/test/test_img2img.mp4 and /dev/null differ diff --git a/Demos/NPU-GPU-Pipeline/unilogsetup.cfg b/Demos/NPU-GPU-Pipeline/unilogsetup.cfg deleted file mode 100644 index 0b784815..00000000 --- a/Demos/NPU-GPU-Pipeline/unilogsetup.cfg +++ /dev/null @@ -1,16 +0,0 @@ -unilog.csv -4 -1 -root/CPU0/Frequencies = enable, expand, visible -root/CPU0/DPM Frequencies = enable, expand, visible -root/CPU0/DPM Residencies = enable, expand, visible -root/CPU0/VDDCR_SOC Voltage = enable, expand, visible -root/CPU0/MISC = enable, expand, visible -root/CPU0/CORES = enable, expand, visible -root/CPU0/PSI3 = enable, expand, visible -root/CPU0/Power Correlation = enable, expand, visible -root/CPU0/DPM Activity Monitors = enable, expand, visible -root/CPU0/DF Pstate Table = enable, expand, visible -root/CPU0/SVI3 = enable, expand, visible -root/CPU0/Activity Monitors = enable, expand, visible -root/CPU0/DF Bandwidth = enable, expand, visible \ No newline at end of file diff --git a/Demos/NPU-GPU-Pipeline/utils_img2img.py b/Demos/NPU-GPU-Pipeline/utils_img2img.py deleted file mode 100644 index 833664c8..00000000 --- a/Demos/NPU-GPU-Pipeline/utils_img2img.py +++ /dev/null @@ -1,2176 +0,0 @@ -import threading -import os -import contextlib -import torch -import torch.nn as nn -from PIL import Image, ImageDraw, ImageFont, ExifTags -from PIL import __version__ as pil_version -from multiprocessing.pool import ThreadPool -import numpy as np -from itertools import repeat -import glob -import cv2 -import tempfile -import hashlib -from pathlib import Path -import time -import torchvision -import math -import re -from typing import List, Union, Dict -import pkg_resources as pkg -from types import SimpleNamespace -from torch.utils.data import Dataset, DataLoader -from tqdm import tqdm -import random -import yaml -import logging.config -import sys -import pathlib -CURRENT_DIR = pathlib.Path(__file__).parent -sys.path.append(str(CURRENT_DIR)) - -LOGGING_NAME = 'ultralytics' -LOGGER = logging.getLogger(LOGGING_NAME) -for fn in LOGGER.info, LOGGER.warning: - setattr(LOGGER, fn.__name__, lambda x: fn(x)) -IMG_FORMATS = "bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm" # include image suffixes -VID_FORMATS = "asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv" # include video suffixes -TQDM_BAR_FORMAT = '{l_bar}{bar:10}{r_bar}' # tqdm bar format -NUM_THREADS = min(8, os.cpu_count()) -PIN_MEMORY = str(os.getenv("PIN_MEMORY", True)).lower() == "true" # global pin_memory for dataloaders -_formats = ["xyxy", "xywh", "ltwh"] -CFG_FLOAT_KEYS = {'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear'} -CFG_FRACTION_KEYS = { - 'dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr', 'fl_gamma', - 'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud', 'fliplr', 'mosaic', - 'mixup', 'copy_paste', 'conf', 'iou'} -CFG_INT_KEYS = { - 'epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride', - 'line_thickness', 'workspace', 'nbs'} -CFG_BOOL_KEYS = { - 'save', 'exist_ok', 'pretrained', 'verbose', 'deterministic', 'single_cls', 'image_weights', 'rect', 'cos_lr', - 'overlap_mask', 'val', 'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', - 'save_crop', 'hide_labels', 'hide_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks', 'boxes', 'keras', - 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'v5loader'} -# Get orientation exif tag -for orientation in ExifTags.TAGS.keys(): - if ExifTags.TAGS[orientation] == 'Orientation': - break - - -def preprocess(img): - img = torch.from_numpy(img) - img = img.float() # uint8 to fp16/32 - img /= 255 # 0 - 255 to 0.0 - 1.0 - return img - -def segments2boxes(segments): - """ - It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) - - Args: - segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates - - Returns: - (np.ndarray): the xywh coordinates of the bounding boxes. - """ - boxes = [] - for s in segments: - x, y = s.T # segment xy - boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy - return xyxy2xywh(np.array(boxes)) # cls, xywh - - -def check_version( - current: str = "0.0.0", - minimum: str = "0.0.0", - name: str = "version ", - pinned: bool = False, - hard: bool = False, - verbose: bool = False, -) -> bool: - """ - Check current version against the required minimum version. - - Args: - current (str): Current version. - minimum (str): Required minimum version. - name (str): Name to be used in warning message. - pinned (bool): If True, versions must match exactly. If False, minimum version must be satisfied. - hard (bool): If True, raise an AssertionError if the minimum version is not met. - verbose (bool): If True, print warning message if minimum version is not met. - - Returns: - bool: True if minimum version is met, False otherwise. - """ - current, minimum = (pkg.parse_version(x) for x in (current, minimum)) - result = (current == minimum) if pinned else (current >= minimum) # bool - warning_message = f"WARNING ⚠️ {name}{minimum} is required by YOLOv8, but {name}{current} is currently installed" - if verbose and not result: - LOGGER.warning(warning_message) - return result - - -TORCH_1_9 = check_version(torch.__version__, '1.9.0') - - -def smart_inference_mode(): - # Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator - def decorate(fn): - return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn) - - return decorate - - -def box_iou(box1, box2, eps=1e-7): - # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py - """ - Return intersection-over-union (Jaccard index) of boxes. - Both sets of boxes are expected to be in (x1, y1, x2, y2) format. - Arguments: - box1 (Tensor[N, 4]) - box2 (Tensor[M, 4]) - Returns: - iou (Tensor[N, M]): the NxM matrix containing the pairwise - IoU values for every element in boxes1 and boxes2 - """ - - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) - inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) - - # IoU = inter / (area1 + area2 - inter) - return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps) - - -class LoadImages: - # YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4` - def __init__( - self, path, imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1 - ): - # *.txt file with img/vid/dir on each line - if isinstance(path, str) and Path(path).suffix == ".txt": - path = Path(path).read_text().rsplit() - files = [] - for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: - p = str(Path(p).resolve()) - if "*" in p: - files.extend(sorted(glob.glob(p, recursive=True))) # glob - elif os.path.isdir(p): - files.extend(sorted(glob.glob(os.path.join(p, "*.*")))) # dir - elif os.path.isfile(p): - files.append(p) # files - else: - raise FileNotFoundError(f"{p} does not exist") - # include image suffixes - images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS] - videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS] - ni, nv = len(images), len(videos) - - self.imgsz = imgsz - self.stride = stride - self.files = images + videos - self.nf = ni + nv # number of files - self.video_flag = [False] * ni + [True] * nv - self.mode = "image" - self.auto = auto - self.transforms = transforms # optional - self.vid_stride = vid_stride # video frame-rate stride - self.bs = 1 - if any(videos): - self.orientation = None # rotation degrees - self._new_video(videos[0]) # new video - else: - self.cap = None - if self.nf == 0: - raise FileNotFoundError( - f"No images or videos found in {p}. " - f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}" - ) - - def __iter__(self): - self.count = 0 - return self - - def __next__(self): - if self.count == self.nf: - raise StopIteration - path = self.files[self.count] - - if self.video_flag[self.count]: - # Read video - self.mode = "video" - for _ in range(self.vid_stride): - self.cap.grab() - success, im0 = self.cap.retrieve() - while not success: - self.count += 1 - self.cap.release() - if self.count == self.nf: # last video - raise StopIteration - path = self.files[self.count] - self._new_video(path) - success, im0 = self.cap.read() - - self.frame += 1 - s = f"video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: " - - else: - # Read image - self.count += 1 - im0 = cv2.imread(path) # BGR - if im0 is None: - raise FileNotFoundError(f"Image Not Found {path}") - s = f"image {self.count}/{self.nf} {path}: " - - if self.transforms: - im = self.transforms(im0) # transforms - else: - im = LetterBox(self.imgsz, self.auto, stride=self.stride)(image=im0) - im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - im = np.ascontiguousarray(im) # contiguous - - return path, im, im0, self.cap, s - - def _new_video(self, path): - # Create a new video capture object - self.frame = 0 - self.cap = cv2.VideoCapture(path) - self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) - if hasattr(cv2, "CAP_PROP_ORIENTATION_META"): # cv2<4.6.0 compatibility - self.orientation = int( - self.cap.get(cv2.CAP_PROP_ORIENTATION_META) - ) # rotation degrees - # Disable auto-orientation due to known issues in https://github.com/ultralytics/yolov5/issues/8493 - # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) - - def _cv2_rotate(self, im): - # Rotate a cv2 video manually - if self.orientation == 0: - return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) - elif self.orientation == 180: - return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) - elif self.orientation == 90: - return cv2.rotate(im, cv2.ROTATE_180) - return im - - def __len__(self): - return self.nf # number of files - - -class LetterBox: - """Resize image and padding for detection, instance segmentation, pose""" - - def __init__( - self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32 - ): - self.new_shape = new_shape - self.auto = auto - self.scaleFill = scaleFill - self.scaleup = scaleup - self.stride = stride - - def __call__(self, labels=None, image=None): - if labels is None: - labels = {} - img = labels.get("img") if image is None else image - shape = img.shape[:2] # current shape [height, width] - new_shape = labels.pop("rect_shape", self.new_shape) - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - # only scale down, do not scale up (for better val mAP) - if not self.scaleup: - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - if self.auto: # minimum rectangle - dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding - elif self.scaleFill: # stretch - dw, dh = 0.0, 0.0 - new_unpad = (new_shape[1], new_shape[0]) - ratio = ( - new_shape[1] / shape[1], - new_shape[0] / shape[0], - ) # width, height ratios - - dw /= 2 # divide padding into 2 sides - dh /= 2 - if labels.get("ratio_pad"): - labels["ratio_pad"] = (labels["ratio_pad"], (dw, dh)) # for evaluation - - if shape[::-1] != new_unpad: # resize - img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) - img = cv2.copyMakeBorder( - img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) - ) # add border - - if len(labels): - labels = self._update_labels(labels, ratio, dw, dh) - labels["img"] = img - labels["resized_shape"] = new_shape - return labels - else: - return img - - def _update_labels(self, labels, ratio, padw, padh): - """Update labels""" - labels["instances"].convert_bbox(format="xyxy") - labels["instances"].denormalize(*labels["img"].shape[:2][::-1]) - labels["instances"].scale(*ratio) - labels["instances"].add_padding(padw, padh) - return labels - - -class Annotator: - # YOLOv8 Annotator for train/val mosaics and jpgs and detect/hub inference annotations - def __init__( - self, - im, - line_width=None, - font_size=None, - font="Arial.ttf", - pil=False, - example="abc", - ): - assert ( - im.data.contiguous - ), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images." - # non-latin labels, i.e. asian, arabic, cyrillic - non_ascii = not is_ascii(example) - self.pil = pil or non_ascii - if self.pil: # use PIL - self.pil_9_2_0_check = check_version( - pil_version, "9.2.0" - ) # deprecation check - self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) - self.draw = ImageDraw.Draw(self.im) - self.font = ImageFont.load_default() - else: # use cv2 - self.im = im - self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width - self.original_image = self.im.copy() - - def box_label( - self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255) - ): - # Add one xyxy box to image with label - if isinstance(box, torch.Tensor): - box = box.tolist() - if self.pil or not is_ascii(label): - self.draw.rectangle(box, width=self.lw, outline=color) # box - if label: - if self.pil_9_2_0_check: - _, _, w, h = self.font.getbbox(label) # text width, height (New) - else: - w, h = self.font.getsize( - label - ) # text width, height (Old, deprecated in 9.2.0) - outside = box[1] - h >= 0 # label fits outside box - self.draw.rectangle( - ( - box[0], - box[1] - h if outside else box[1], - box[0] + w + 1, - box[1] + 1 if outside else box[1] + h + 1, - ), - fill=color, - ) - # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 - self.draw.text( - (box[0], box[1] - h if outside else box[1]), - label, - fill=txt_color, - font=self.font, - ) - else: # cv2 - print("cv2") - p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) - cv2.rectangle( - self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA - ) - if label: - tf = max(self.lw - 1, 1) # font thickness - # text width, height - w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] - outside = p1[1] - h >= 3 - p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 - cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled - cv2.putText( - self.im, - label, - (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), - 0, - self.lw / 3, - txt_color, - thickness=tf, - lineType=cv2.LINE_AA, - ) - - def rectangle(self, xy, fill=None, outline=None, width=1): - # Add rectangle to image (PIL-only) - self.draw.rectangle(xy, fill, outline, width) - - def text(self, xy, text, txt_color=(255, 255, 255), anchor="top"): - # Add text to image (PIL-only) - if anchor == "bottom": # start y from font bottom - w, h = self.font.getsize(text) # text width, height - xy[1] += 1 - h - self.draw.text(xy, text, fill=txt_color, font=self.font) - - def fromarray(self, im): - # Update self.im from a numpy array - self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) - self.draw = ImageDraw.Draw(self.im) - - def result(self): - # Return annotated image as array - return np.asarray(self.im) - - def crop_object(self, box, save_path=None): - """ - Crop an object based on the bounding box and optionally save it to a file. - Args: - box (tuple): The bounding box with (x1, y1, x2, y2) format. - save_path (str, optional): Path to save the cropped image. If None, the image is not saved. - - Returns: - numpy.ndarray: The cropped image array. - """ - if isinstance(self.original_image, Image.Image): # If using PIL - crop_img = self.original_image.crop(box) # PIL uses (left, upper, right, lower) box format - if save_path: - crop_img.save(save_path) - return np.array(crop_img) - else: # If using cv2 - crop_img = self.original_image[box[1]:box[3], box[0]:box[2]] - if save_path: - cv2.imwrite(save_path, crop_img) - return crop_img - - -def non_max_suppression( - prediction, - conf_thres=0.25, - iou_thres=0.45, - classes=None, - agnostic=False, - multi_label=False, - labels=(), - max_det=300, - nm=0, # number of masks -): - # Checks - assert ( - 0 <= conf_thres <= 1 - ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" - assert ( - 0 <= iou_thres <= 1 - ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" - # YOLOv8 model in validation model, output = (inference_out, loss_out) - if isinstance(prediction, (list, tuple)): - prediction = prediction[0] # select only inference output - device = prediction.device - mps = "mps" in device.type # Apple MPS - if mps: # MPS not fully supported yet, convert tensors to CPU before NMS - prediction = prediction.cpu() - bs = prediction.shape[0] # batch size - nc = prediction.shape[1] - nm - 4 # number of classes - mi = 4 + nc # mask start index - xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates - - # Settings - # min_wh = 2 # (pixels) minimum box width and height - max_wh = 7680 # (pixels) maximum box width and height - max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() - time_limit = 0.5 + 0.05 * bs # seconds to quit after - redundant = True # require redundant detections - multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) - merge = False # use merge-NMS - - t = time.time() - output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs - for xi, x in enumerate(prediction): # image index, image inference - # Apply constraints - # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height - x = x.transpose(0, -1)[xc[xi]] # confidence - - # Cat apriori labels if autolabelling - if labels and len(labels[xi]): - lb = labels[xi] - v = torch.zeros((len(lb), nc + nm + 5), device=x.device) - v[:, :4] = lb[:, 1:5] # box - v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls - x = torch.cat((x, v), 0) - - # If none remain process next image - if not x.shape[0]: - continue - - # Detections matrix nx6 (xyxy, conf, cls) - box, cls, mask = x.split((4, nc, nm), 1) - # center_x, center_y, width, height) to (x1, y1, x2, y2) - box = xywh2xyxy(box) - if multi_label: - i, j = (cls > conf_thres).nonzero(as_tuple=False).T - x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) - else: # best class only - conf, j = cls.max(1, keepdim=True) - x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] - - # Filter by class - if classes is not None: - x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] - - # Check shape - n = x.shape[0] # number of boxes - if not n: # no boxes - continue - # sort by confidence and remove excess boxes - x = x[x[:, 4].argsort(descending=True)[:max_nms]] - - # Batched NMS - c = x[:, 5:6] * (0 if agnostic else max_wh) # classes - # boxes (offset by class), scores - boxes, scores = x[:, :4] + c, x[:, 4] - i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS - i = i[:max_det] # limit detections - if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) - # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) - iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix - weights = iou * scores[None] # box weights - x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( - 1, keepdim=True - ) # merged boxes - if redundant: - i = i[iou.sum(1) > 1] # require redundancy - - output[xi] = x[i] - if mps: - output[xi] = output[xi].to(device) - if (time.time() - t) > time_limit: - LOGGER.warning(f"WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded") - break # time limit exceeded - - return output - - -class Colors: - # Ultralytics color palette https://ultralytics.com/ - def __init__(self): - # hex = matplotlib.colors.TABLEAU_COLORS.values() - hexs = ( - "FF3838", - "FF9D97", - "FF701F", - "FFB21D", - "CFD231", - "48F90A", - "92CC17", - "3DDB86", - "1A9334", - "00D4BB", - "2C99A8", - "00C2FF", - "344593", - "6473FF", - "0018EC", - "8438FF", - "520085", - "CB38FF", - "FF95C8", - "FF37C7", - ) - self.palette = [self.hex2rgb(f"#{c}") for c in hexs] - self.n = len(self.palette) - - def __call__(self, i, bgr=False): - c = self.palette[int(i) % self.n] - return (c[2], c[1], c[0]) if bgr else c - - @staticmethod - def hex2rgb(h): # rgb order (PIL) - return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)) - - -colors = Colors() # create instance for 'from utils.plots import colors' - - -def threaded(func): - # Multi-threads a target function and returns thread. Usage: @threaded decorator - def wrapper(*args, **kwargs): - thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) - thread.start() - return thread - - return wrapper - - -def plot_images( - images, - batch_idx, - cls, - bboxes, - masks=np.zeros(0, dtype=np.uint8), - paths=None, - names=None, - save_path=None, -): - op_labels = set() - # Plot image grid with labels - if isinstance(images, torch.Tensor): - images = images.cpu().float().numpy() - if isinstance(cls, torch.Tensor): - cls = cls.cpu().numpy() - if isinstance(bboxes, torch.Tensor): - bboxes = bboxes.cpu().numpy() - if isinstance(masks, torch.Tensor): - masks = masks.cpu().numpy().astype(int) - if isinstance(batch_idx, torch.Tensor): - batch_idx = batch_idx.cpu().numpy() - - max_size = 1920 # max image size - max_subplots = 16 # max image subplots, i.e. 4x4 - bs, _, h, w = images.shape # batch size, _, height, width - bs = min(bs, max_subplots) # limit plot images - ns = np.ceil(bs**0.5) # number of subplots (square) - if np.max(images[0]) <= 1: - images *= 255 # de-normalise (optional) - - # Build Image - mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, im in enumerate(images): - if i == max_subplots: # if last batch has fewer images than we expect - break - x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin - im = im.transpose(1, 2, 0) - mosaic[y : y + h, x : x + w, :] = im - - # Resize (optional) - scale = max_size / ns / max(h, w) - if scale < 1: - h = math.ceil(scale * h) - w = math.ceil(scale * w) - mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) - - # Annotate - fs = int((h + w) * ns * 0.01) # font size - annotator = Annotator( - mosaic, line_width=2, font_size=fs, pil=True, example=names - ) - for i in range(i + 1): - x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin - annotator.rectangle( - [x, y, x + w, y + h], None, (255, 255, 255), width=2 - ) # borders - if paths: - annotator.text( - # filenames - (x + 5, y + 5 + h), - text=Path(paths[i]).name[:40], - txt_color=(220, 220, 220), - ) - if len(cls) > 0: - idx = batch_idx == i - boxes = xywh2xyxy(bboxes[idx, :4]).T - classes = cls[idx].astype("int") - labels = bboxes.shape[1] == 4 # labels if no conf column - # check for confidence presence (label vs pred) - conf = None if labels else bboxes[idx, 4] - - if boxes.shape[1]: - if boxes.max() <= 1.01: # if normalized with tolerance 0.01 - boxes[[0, 2]] *= w # scale to pixels - boxes[[1, 3]] *= h - elif scale < 1: # absolute coords need scale if image scales - boxes *= scale - boxes[[0, 2]] += x - boxes[[1, 3]] += y - cropped_imgs = [] - for j, box in enumerate(boxes.T.tolist()): - c = classes[j] - color = colors(c) - c = names[c] if names else c - if labels or conf[j] > 0.25: # 0.25 conf thresh - label = f"{c}" if labels else f"{c} {conf[j]:.1f}" - op_labels.add(c) - annotator.box_label(box, label, color=color) - if save_path: - cropped_imgs.append((annotator.crop_object(box, save_path+"detection_{}.jpg".format(j)), c)) - else: - cropped_imgs.append((annotator.crop_object(box), c)) - return annotator.im, boxes, cropped_imgs, op_labels - - -def output_to_target(output, max_det=300): - # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting - targets = [] - for i, o in enumerate(output): - box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1) - j = torch.full((conf.shape[0], 1), i) - targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1)) - targets = torch.cat(targets, 0).numpy() - return targets[:, 0], targets[:, 1], targets[:, 2:] - - -def is_ascii(s=""): - # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7) - s = str(s) # convert list, tuple, None, etc. to str - return len(s.encode().decode("ascii", "ignore")) == len(s) - - -def xyxy2xywh(x): - """ - Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format. - - Args: - x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. - Returns: - y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x, y, width, height) format. - """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) - y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center - y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center - y[..., 2] = x[..., 2] - x[..., 0] # width - y[..., 3] = x[..., 3] - x[..., 1] # height - return y - - -def xywh2xyxy(x): - # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) - y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x - y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y - y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x - y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y - return y - - -def check_det_dataset(dataset, autodownload=True): - # Download, check and/or unzip dataset if not found locally - data = dataset - # Download (optional) - extract_dir = '' - - # Read yaml (optional) - if isinstance(data, (str, Path)): - data = yaml_load(data, append_filename=True) # dictionary - - # Checks - if isinstance(data['names'], (list, tuple)): # old array format - data['names'] = dict(enumerate(data['names'])) # convert to dict - data['nc'] = len(data['names']) - - # Resolve paths - path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root - - DATASETS_DIR = os.path.abspath('.') - if not path.is_absolute(): - path = (DATASETS_DIR / path).resolve() - data['path'] = path # download scripts - for k in 'train', 'val', 'test': - if data.get(k): # prepend path - if isinstance(data[k], str): - x = (path / data[k]).resolve() - if not x.exists() and data[k].startswith('../'): - x = (path / data[k][3:]).resolve() - data[k] = str(x) - else: - data[k] = [str((path / x).resolve()) for x in data[k]] - - # Parse yaml - train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) - if val: - val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path - if not all(x.exists() for x in val): - msg = f"\nDataset '{dataset}' not found ⚠️, missing paths %s" % [str(x) for x in val if not x.exists()] - if s and autodownload: - LOGGER.warning(msg) - else: - raise FileNotFoundError(msg) - t = time.time() - if s.startswith('bash '): # bash script - LOGGER.info(f'Running {s} ...') - r = os.system(s) - else: # python script - r = exec(s, {'yaml': data}) # return None - dt = f'({round(time.time() - t, 1)}s)' - s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌" - LOGGER.info(f"Dataset download {s}\n") - - return data # dictionary - - -def yaml_load(file='data.yaml', append_filename=False): - """ - Load YAML data from a file. - - Args: - file (str, optional): File name. Default is 'data.yaml'. - append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False. - - Returns: - dict: YAML data and file name. - """ - with open(file, errors='ignore', encoding='utf-8') as f: - # Add YAML filename to dict and return - s = f.read() # string - if not s.isprintable(): # remove special characters - s = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+', '', s) - return {**yaml.safe_load(s), 'yaml_file': str(file)} if append_filename else yaml.safe_load(s) - - -class IterableSimpleNamespace(SimpleNamespace): - """ - Iterable SimpleNamespace class to allow SimpleNamespace to be used with dict() and in for loops - """ - - def __iter__(self): - return iter(vars(self).items()) - - def __str__(self): - return '\n'.join(f"{k}={v}" for k, v in vars(self).items()) - - def get(self, key, default=None): - return getattr(self, key, default) - - -def colorstr(*input): - # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') - *args, string = input if len(input) > 1 else ("blue", "bold", input[0]) # color arguments, string - colors = { - "black": "\033[30m", # basic colors - "red": "\033[31m", - "green": "\033[32m", - "yellow": "\033[33m", - "blue": "\033[34m", - "magenta": "\033[35m", - "cyan": "\033[36m", - "white": "\033[37m", - "bright_black": "\033[90m", # bright colors - "bright_red": "\033[91m", - "bright_green": "\033[92m", - "bright_yellow": "\033[93m", - "bright_blue": "\033[94m", - "bright_magenta": "\033[95m", - "bright_cyan": "\033[96m", - "bright_white": "\033[97m", - "end": "\033[0m", # misc - "bold": "\033[1m", - "underline": "\033[4m"} - return "".join(colors[x] for x in args) + f"{string}" + colors["end"] - - -def seed_worker(worker_id): - # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader - worker_seed = torch.initial_seed() % 2 ** 32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode="train"): - assert mode in ["train", "val"] - shuffle = mode == "train" - if cfg.rect and shuffle: - LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False") - shuffle = False - dataset = YOLODataset( - img_path=img_path, - imgsz=cfg.imgsz, - batch_size=batch, - augment=mode == "train", # augmentation - hyp=cfg, - rect=cfg.rect or rect, # rectangular batches - cache=cfg.cache or None, - single_cls=cfg.single_cls or False, - stride=int(stride), - pad=0.0 if mode == "train" else 0.5, - prefix=colorstr(f"{mode}: "), - use_segments=cfg.task == "segment", - use_keypoints=cfg.task == "keypoint", - names=names) - - batch = min(batch, len(dataset)) - nd = torch.cuda.device_count() # number of CUDA devices - workers = cfg.workers if mode == "train" else cfg.workers * 2 - nw = min([os.cpu_count() // max(nd, 1), batch if batch > 1 else 0, workers]) # number of workers - - if rank == -1: - sampler = None - if cfg.image_weights or cfg.close_mosaic: - loader = DataLoader - generator = torch.Generator() - generator.manual_seed(6148914691236517205) - return loader(dataset=dataset, - batch_size=batch, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=PIN_MEMORY, - collate_fn=getattr(dataset, "collate_fn", None), - worker_init_fn=seed_worker, - generator=generator), dataset - - -class BaseDataset(Dataset): - """Base Dataset. - Args: - img_path (str): image path. - pipeline (dict): a dict of image transforms. - label_path (str): label path, this can also be an ann_file or other custom label path. - """ - - def __init__( - self, - img_path, - imgsz=640, - cache=False, - augment=True, - hyp=None, - prefix="", - rect=False, - batch_size=None, - stride=32, - pad=0.5, - single_cls=False, - ): - super().__init__() - self.img_path = img_path - self.imgsz = imgsz - self.augment = augment - self.single_cls = single_cls - self.prefix = prefix - self.im_files = self.get_img_files(self.img_path) - self.labels = self.get_labels() - self.ni = len(self.labels) - - # rect stuff - self.rect = rect - self.batch_size = batch_size - self.stride = stride - self.pad = pad - if self.rect: - assert self.batch_size is not None - self.set_rectangle() - - # cache stuff - self.ims = [None] * self.ni - self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files] - if cache: - self.cache_images(cache) - - # transforms - self.transforms = self.build_transforms(hyp=hyp) - - def get_img_files(self, img_path): - """Read image files.""" - try: - f = [] # image files - for p in img_path if isinstance(img_path, list) else [img_path]: - p = Path(p) # os-agnostic - if p.is_dir(): # dir - f += glob.glob(str(p / "**" / "*.*"), recursive=True) - # f = list(p.rglob('*.*')) # pathlib - elif p.is_file(): # file - with open(p) as t: - t = t.read().strip().splitlines() - parent = str(p.parent) + os.sep - f += [x.replace("./", parent) if x.startswith("./") else x for x in t] # local to global path - # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) - else: - raise FileNotFoundError(f"{self.prefix}{p} does not exist") - im_files = sorted(x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS) - # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib - assert im_files, f"{self.prefix}No images found" - except Exception as e: - raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}\n") from e - return im_files - - def load_image(self, i): - # Loads 1 image from dataset index 'i', returns (im, resized hw) - im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i] - if im is None: # not cached in RAM - if fn.exists(): # load npy - im = np.load(fn) - else: # read image - im = cv2.imread(f) # BGR - if im is None: - raise FileNotFoundError(f"Image Not Found {f}") - h0, w0 = im.shape[:2] # orig hw - r = self.imgsz / max(h0, w0) # ratio - if r != 1: # if sizes are not equal - interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA - im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) - return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized - return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized - - def cache_images(self, cache): - # cache images to memory or disk - gb = 0 # Gigabytes of cached images - self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni - fcn = self.cache_images_to_disk if cache == "disk" else self.load_image - with ThreadPool(NUM_THREADS) as pool: - results = pool.imap(fcn, range(self.ni)) - pbar = tqdm(enumerate(results), total=self.ni, bar_format=TQDM_BAR_FORMAT) - for i, x in pbar: - if cache == "disk": - gb += self.npy_files[i].stat().st_size - else: # 'ram' - self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) - gb += self.ims[i].nbytes - pbar.desc = f"{self.prefix}Caching images ({gb / 1E9:.1f}GB {cache})" - pbar.close() - - def cache_images_to_disk(self, i): - # Saves an image as an *.npy file for faster loading - f = self.npy_files[i] - if not f.exists(): - np.save(f.as_posix(), cv2.imread(self.im_files[i])) - - def set_rectangle(self): - bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index - nb = bi[-1] + 1 # number of batches - - s = np.array([x.pop("shape") for x in self.labels]) # hw - ar = s[:, 0] / s[:, 1] # aspect ratio - irect = ar.argsort() - self.im_files = [self.im_files[i] for i in irect] - self.labels = [self.labels[i] for i in irect] - ar = ar[irect] - - # Set training image shapes - shapes = [[1, 1]] * nb - for i in range(nb): - ari = ar[bi == i] - mini, maxi = ari.min(), ari.max() - if maxi < 1: - shapes[i] = [maxi, 1] - elif mini > 1: - shapes[i] = [1, 1 / mini] - - self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride - self.batch = bi # batch index of image - - def __getitem__(self, index): - return self.transforms(self.get_label_info(index)) - - def get_label_info(self, index): - label = self.labels[index].copy() - label.pop("shape", None) # shape is for rect, remove it - label["img"], label["ori_shape"], label["resized_shape"] = self.load_image(index) - label["ratio_pad"] = ( - label["resized_shape"][0] / label["ori_shape"][0], - label["resized_shape"][1] / label["ori_shape"][1], - ) # for evaluation - if self.rect: - label["rect_shape"] = self.batch_shapes[self.batch[index]] - label = self.update_labels_info(label) - return label - - def __len__(self): - return len(self.labels) - - def update_labels_info(self, label): - """custom your label format here""" - return label - - def build_transforms(self, hyp=None): - """Users can custom augmentations here - like: - if self.augment: - # training transforms - return Compose([]) - else: - # val transforms - return Compose([]) - """ - raise NotImplementedError - - def get_labels(self): - """Users can custom their own format here. - Make sure your output is a list with each element like below: - dict( - im_file=im_file, - shape=shape, # format: (height, width) - cls=cls, - bboxes=bboxes, # xywh - segments=segments, # xy - keypoints=keypoints, # xy - normalized=True, # or False - bbox_format="xyxy", # or xywh, ltwh - ) - """ - raise NotImplementedError - - -def img2label_paths(img_paths): - # Define label paths as a function of image paths - sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}" # /images/, /labels/ substrings - return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths] - - -def get_hash(paths): - # Returns a single hash value of a list of paths (files or dirs) - size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes - h = hashlib.md5(str(size).encode()) # hash sizes - h.update("".join(paths).encode()) # hash paths - return h.hexdigest() # return hash - - -class Compose: - - def __init__(self, transforms): - self.transforms = transforms - - def __call__(self, data): - for t in self.transforms: - data = t(data) - return data - - def append(self, transform): - self.transforms.append(transform) - - def tolist(self): - return self.transforms - - def __repr__(self): - format_string = f"{self.__class__.__name__}(" - for t in self.transforms: - format_string += "\n" - format_string += f" {t}" - format_string += "\n)" - return format_string - - -class Format: - - def __init__(self, - bbox_format="xywh", - normalize=True, - return_mask=False, - return_keypoint=False, - mask_ratio=4, - mask_overlap=True, - batch_idx=True): - self.bbox_format = bbox_format - self.normalize = normalize - self.return_mask = return_mask # set False when training detection only - self.return_keypoint = return_keypoint - self.mask_ratio = mask_ratio - self.mask_overlap = mask_overlap - self.batch_idx = batch_idx # keep the batch indexes - - def __call__(self, labels): - img = labels.pop("img") - h, w = img.shape[:2] - cls = labels.pop("cls") - instances = labels.pop("instances") - instances.convert_bbox(format=self.bbox_format) - instances.denormalize(w, h) - nl = len(instances) - - if self.normalize: - instances.normalize(w, h) - labels["img"] = self._format_img(img) - labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl) - labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) - if self.return_keypoint: - labels["keypoints"] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2)) - # then we can use collate_fn - if self.batch_idx: - labels["batch_idx"] = torch.zeros(nl) - return labels - - def _format_img(self, img): - if len(img.shape) < 3: - img = np.expand_dims(img, -1) - img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) - img = torch.from_numpy(img) - return img - -class Bboxes: - """Now only numpy is supported""" - - def __init__(self, bboxes, format="xyxy") -> None: - assert format in _formats - bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes - assert bboxes.ndim == 2 - assert bboxes.shape[1] == 4 - self.bboxes = bboxes - self.format = format - - def convert(self, format): - assert format in _formats - if self.format == format: - return - elif self.format == "xyxy": - if format == "xywh": - bboxes = xyxy2xywh(self.bboxes) - elif self.format == "xywh": - if format == "xyxy": - bboxes = xywh2xyxy(self.bboxes) - self.bboxes = bboxes - self.format = format - - def areas(self): - self.convert("xyxy") - return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) - - def mul(self, scale): - """ - Args: - scale (tuple | List | int): the scale for four coords. - """ - assert isinstance(scale, (tuple, list)) - assert len(scale) == 4 - self.bboxes[:, 0] *= scale[0] - self.bboxes[:, 1] *= scale[1] - self.bboxes[:, 2] *= scale[2] - self.bboxes[:, 3] *= scale[3] - - def add(self, offset): - """ - Args: - offset (tuple | List | int): the offset for four coords. - """ - assert isinstance(offset, (tuple, list)) - assert len(offset) == 4 - self.bboxes[:, 0] += offset[0] - self.bboxes[:, 1] += offset[1] - self.bboxes[:, 2] += offset[2] - self.bboxes[:, 3] += offset[3] - - def __len__(self): - return len(self.bboxes) - - @classmethod - def concatenate(cls, boxes_list: List["Bboxes"], axis=0) -> "Bboxes": - """ - Concatenates a list of Boxes into a single Bboxes - - Arguments: - boxes_list (list[Bboxes]) - - Returns: - Bboxes: the concatenated Boxes - """ - assert isinstance(boxes_list, (list, tuple)) - if not boxes_list: - return cls(np.empty(0)) - assert all(isinstance(box, Bboxes) for box in boxes_list) - - if len(boxes_list) == 1: - return boxes_list[0] - return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis)) - - def __getitem__(self, index) -> "Bboxes": - """ - Args: - index: int, slice, or a BoolArray - - Returns: - Bboxes: Create a new :class:`Bboxes` by indexing. - """ - if isinstance(index, int): - return Bboxes(self.bboxes[index].view(1, -1)) - b = self.bboxes[index] - assert b.ndim == 2, f"Indexing on Bboxes with {index} failed to return a matrix!" - return Bboxes(b) - - -def resample_segments(segments, n=1000): - """ - Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each. - - Args: - segments (list): a list of (n,2) arrays, where n is the number of points in the segment. - n (int): number of points to resample the segment to. Defaults to 1000 - - Returns: - segments (list): the resampled segments. - """ - for i, s in enumerate(segments): - s = np.concatenate((s, s[0:1, :]), axis=0) - x = np.linspace(0, len(s) - 1, n) - xp = np.arange(len(s)) - segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy - return segments - - -class Instances: - - def __init__(self, bboxes, segments=None, keypoints=None, bbox_format="xywh", normalized=True) -> None: - """ - Args: - bboxes (ndarray): bboxes with shape [N, 4]. - segments (list | ndarray): segments. - keypoints (ndarray): keypoints with shape [N, 17, 2]. - """ - if segments is None: - segments = [] - self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format) - self.keypoints = keypoints - self.normalized = normalized - - if len(segments) > 0: - # list[np.array(1000, 2)] * num_samples - segments = resample_segments(segments) - # (N, 1000, 2) - segments = np.stack(segments, axis=0) - else: - segments = np.zeros((0, 1000, 2), dtype=np.float32) - self.segments = segments - - def convert_bbox(self, format): - self._bboxes.convert(format=format) - - def bbox_areas(self): - self._bboxes.areas() - - def scale(self, scale_w, scale_h, bbox_only=False): - """this might be similar with denormalize func but without normalized sign""" - self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h)) - if bbox_only: - return - self.segments[..., 0] *= scale_w - self.segments[..., 1] *= scale_h - if self.keypoints is not None: - self.keypoints[..., 0] *= scale_w - self.keypoints[..., 1] *= scale_h - - def denormalize(self, w, h): - if not self.normalized: - return - self._bboxes.mul(scale=(w, h, w, h)) - self.segments[..., 0] *= w - self.segments[..., 1] *= h - if self.keypoints is not None: - self.keypoints[..., 0] *= w - self.keypoints[..., 1] *= h - self.normalized = False - - def normalize(self, w, h): - if self.normalized: - return - self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h)) - self.segments[..., 0] /= w - self.segments[..., 1] /= h - if self.keypoints is not None: - self.keypoints[..., 0] /= w - self.keypoints[..., 1] /= h - self.normalized = True - - def add_padding(self, padw, padh): - # handle rect and mosaic situation - assert not self.normalized, "you should add padding with absolute coordinates." - self._bboxes.add(offset=(padw, padh, padw, padh)) - self.segments[..., 0] += padw - self.segments[..., 1] += padh - if self.keypoints is not None: - self.keypoints[..., 0] += padw - self.keypoints[..., 1] += padh - - def __getitem__(self, index) -> "Instances": - """ - Args: - index: int, slice, or a BoolArray - - Returns: - Instances: Create a new :class:`Instances` by indexing. - """ - segments = self.segments[index] if len(self.segments) else self.segments - keypoints = self.keypoints[index] if self.keypoints is not None else None - bboxes = self.bboxes[index] - bbox_format = self._bboxes.format - return Instances( - bboxes=bboxes, - segments=segments, - keypoints=keypoints, - bbox_format=bbox_format, - normalized=self.normalized, - ) - - def flipud(self, h): - if self._bboxes.format == "xyxy": - y1 = self.bboxes[:, 1].copy() - y2 = self.bboxes[:, 3].copy() - self.bboxes[:, 1] = h - y2 - self.bboxes[:, 3] = h - y1 - else: - self.bboxes[:, 1] = h - self.bboxes[:, 1] - self.segments[..., 1] = h - self.segments[..., 1] - if self.keypoints is not None: - self.keypoints[..., 1] = h - self.keypoints[..., 1] - - def fliplr(self, w): - if self._bboxes.format == "xyxy": - x1 = self.bboxes[:, 0].copy() - x2 = self.bboxes[:, 2].copy() - self.bboxes[:, 0] = w - x2 - self.bboxes[:, 2] = w - x1 - else: - self.bboxes[:, 0] = w - self.bboxes[:, 0] - self.segments[..., 0] = w - self.segments[..., 0] - if self.keypoints is not None: - self.keypoints[..., 0] = w - self.keypoints[..., 0] - - def clip(self, w, h): - ori_format = self._bboxes.format - self.convert_bbox(format="xyxy") - self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w) - self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h) - if ori_format != "xyxy": - self.convert_bbox(format=ori_format) - self.segments[..., 0] = self.segments[..., 0].clip(0, w) - self.segments[..., 1] = self.segments[..., 1].clip(0, h) - if self.keypoints is not None: - self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w) - self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h) - - def update(self, bboxes, segments=None, keypoints=None): - new_bboxes = Bboxes(bboxes, format=self._bboxes.format) - self._bboxes = new_bboxes - if segments is not None: - self.segments = segments - if keypoints is not None: - self.keypoints = keypoints - - def __len__(self): - return len(self.bboxes) - - @classmethod - def concatenate(cls, instances_list: List["Instances"], axis=0) -> "Instances": - """ - Concatenates a list of Boxes into a single Bboxes - - Arguments: - instances_list (list[Bboxes]) - axis - - Returns: - Boxes: the concatenated Boxes - """ - assert isinstance(instances_list, (list, tuple)) - if not instances_list: - return cls(np.empty(0)) - assert all(isinstance(instance, Instances) for instance in instances_list) - - if len(instances_list) == 1: - return instances_list[0] - - use_keypoint = instances_list[0].keypoints is not None - bbox_format = instances_list[0]._bboxes.format - normalized = instances_list[0].normalized - - cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis) - cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis) - cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None - return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized) - - @property - def bboxes(self): - return self._bboxes.bboxes - - -def is_dir_writeable(dir_path: Union[str, Path]) -> bool: - """ - Check if a directory is writeable. - - Args: - dir_path (str) or (Path): The path to the directory. - - Returns: - bool: True if the directory is writeable, False otherwise. - """ - try: - with tempfile.TemporaryFile(dir=dir_path): - pass - return True - except OSError: - return False - - -class YOLODataset(BaseDataset): - cache_version = '1.0.1' # dataset labels *.cache version, >= 1.0.0 for YOLOv8 - rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] - """YOLO Dataset. - Args: - img_path (str): image path. - prefix (str): prefix. - """ - - def __init__(self, - img_path, - imgsz=640, - cache=False, - augment=True, - hyp=None, - prefix="", - rect=False, - batch_size=None, - stride=32, - pad=0.0, - single_cls=False, - use_segments=False, - use_keypoints=False, - names=None): - self.use_segments = use_segments - self.use_keypoints = use_keypoints - self.names = names - assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints." - super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls) - - def cache_labels(self, path=Path("./labels.cache")): - # Cache dataset labels, check images and read shapes - if path.exists(): - path.unlink() # remove *.cache file if exists - x = {"labels": []} - nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages - desc = f"{self.prefix}Scanning {path.parent / path.stem}..." - total = len(self.im_files) - with ThreadPool(NUM_THREADS) as pool: - results = pool.imap(func=verify_image_label, - iterable=zip(self.im_files, self.label_files, repeat(self.prefix), - repeat(self.use_keypoints), repeat(len(self.names)))) - pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT) - for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar: - nm += nm_f - nf += nf_f - ne += ne_f - nc += nc_f - if im_file: - x["labels"].append( - dict( - im_file=im_file, - shape=shape, - cls=lb[:, 0:1], # n, 1 - bboxes=lb[:, 1:], # n, 4 - segments=segments, - keypoints=keypoint, - normalized=True, - bbox_format="xywh")) - if msg: - msgs.append(msg) - pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt" - pbar.close() - - if msgs: - LOGGER.info("\n".join(msgs)) - x["hash"] = get_hash(self.label_files + self.im_files) - x["results"] = nf, nm, ne, nc, len(self.im_files) - x["msgs"] = msgs # warnings - x["version"] = self.cache_version # cache version - self.im_files = [lb["im_file"] for lb in x["labels"]] # update im_files - if is_dir_writeable(path.parent): - np.save(str(path), x) # save cache for next time - path.with_suffix(".cache.npy").rename(path) # remove .npy suffix - LOGGER.info(f"{self.prefix}New cache created: {path}") - else: - LOGGER.warning(f"{self.prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable") # not writeable - return x - - def get_labels(self): - self.label_files = img2label_paths(self.im_files) - cache_path = Path(self.label_files[0]).parent.with_suffix(".cache") - try: - cache, exists = np.load(str(cache_path), allow_pickle=True).item(), True # load dict - assert cache["version"] == self.cache_version # matches current version - assert cache["hash"] == get_hash(self.label_files + self.im_files) # identical hash - except (FileNotFoundError, AssertionError, AttributeError): - cache, exists = self.cache_labels(cache_path), False # run cache ops - - # Display cache - nf, nm, ne, nc, n = cache.pop("results") # found, missing, empty, corrupt, total - if exists: - d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt" - tqdm(None, desc=self.prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results - if cache["msgs"]: - LOGGER.info("\n".join(cache["msgs"])) # display warnings - - # Read cache - [cache.pop(k) for k in ("hash", "version", "msgs")] # remove items - labels = cache["labels"] - - # Check if the dataset is all boxes or all segments - len_cls = sum(len(lb["cls"]) for lb in labels) - len_boxes = sum(len(lb["bboxes"]) for lb in labels) - len_segments = sum(len(lb["segments"]) for lb in labels) - if len_segments and len_boxes != len_segments: - LOGGER.warning( - f"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, " - f"len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. " - "To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.") - for lb in labels: - lb["segments"] = [] - return labels - - - def build_transforms(self, hyp=None): - transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)]) - transforms.append( - Format(bbox_format="xywh", - normalize=True, - return_mask=self.use_segments, - return_keypoint=self.use_keypoints, - batch_idx=True, - mask_ratio=hyp.mask_ratio, - mask_overlap=hyp.overlap_mask)) - return transforms - - def close_mosaic(self, hyp): - hyp.mosaic = 0.0 # set mosaic ratio=0.0 - hyp.copy_paste = 0.0 # keep the same behavior as previous v8 close-mosaic - hyp.mixup = 0.0 # keep the same behavior as previous v8 close-mosaic - self.transforms = self.build_transforms(hyp) - - def update_labels_info(self, label): - """custom your label format here""" - # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label - # we can make it also support classification and semantic segmentation by add or remove some dict keys there. - bboxes = label.pop("bboxes") - segments = label.pop("segments") - keypoints = label.pop("keypoints", None) - bbox_format = label.pop("bbox_format") - normalized = label.pop("normalized") - label["instances"] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized) - return label - - @staticmethod - def collate_fn(batch): - new_batch = {} - keys = batch[0].keys() - values = list(zip(*[list(b.values()) for b in batch])) - for i, k in enumerate(keys): - value = values[i] - if k == "img": - value = torch.stack(value, 0) - if k in ["masks", "keypoints", "bboxes", "cls"]: - value = torch.cat(value, 0) - new_batch[k] = value - new_batch["batch_idx"] = list(new_batch["batch_idx"]) - for i in range(len(new_batch["batch_idx"])): - new_batch["batch_idx"][i] += i # add target image index for build_targets() - new_batch["batch_idx"] = torch.cat(new_batch["batch_idx"], 0) - return new_batch - - -class DFL(nn.Module): - # Integral module of Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 - def __init__(self, c1=16): - super().__init__() - self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) - x = torch.arange(c1, dtype=torch.float) - self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) - self.c1 = c1 - - def forward(self, x): - b, c, a = x.shape # batch, channels, anchors - return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view( - b, 4, a - ) - - -def dist2bbox(distance, anchor_points, xywh=True, dim=-1): - """Transform distance(ltrb) to box(xywh or xyxy).""" - lt, rb = torch.split(distance, 2, dim) - x1y1 = anchor_points - lt - x2y2 = anchor_points + rb - if xywh: - c_xy = (x1y1 + x2y2) / 2 - wh = x2y2 - x1y1 - return torch.cat((c_xy, wh), dim) # xywh bbox - return torch.cat((x1y1, x2y2), dim) # xyxy bbox - - -def post_process(x): - dfl = DFL(16) - anchors = torch.tensor( - np.load( - "./data/anchors.npy", - allow_pickle=True, - ) - ) - strides = torch.tensor( - np.load( - "./data/strides.npy", - allow_pickle=True, - ) - ) - box, cls = torch.cat([xi.view(x[0].shape[0], 144, -1) for xi in x], 2).split( - (16 * 4, 80), 1 - ) - dbox = dist2bbox(dfl(box), anchors.unsqueeze(0), xywh=True, dim=1) * strides - y = torch.cat((dbox, cls.sigmoid()), 1) - return y, x - - -def smooth(y, f=0.05): - # Box filter of fraction f - nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) - p = np.ones(nf // 2) # ones padding - yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded - return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed - - -def compute_ap(recall, precision): - """ Compute the average precision, given the recall and precision curves - # Arguments - recall: The recall curve (list) - precision: The precision curve (list) - # Returns - Average precision, precision curve, recall curve - """ - - # Append sentinel values to beginning and end - mrec = np.concatenate(([0.0], recall, [1.0])) - mpre = np.concatenate(([1.0], precision, [0.0])) - - # Compute the precision envelope - mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) - - # Integrate area under curve - method = 'interp' # methods: 'continuous', 'interp' - if method == 'interp': - x = np.linspace(0, 1, 101) # 101-point interp (COCO) - ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate - else: # 'continuous' - i = np.where(mrec[1:] != mrec[:-1])[0] # points where x-axis (recall) changes - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve - - return ap, mpre, mrec - - -def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=Path(), names=(), eps=1e-16, prefix=""): - """ Compute the average precision, given the recall and precision curves. - Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. - # Arguments - tp: True positives (nparray, nx1 or nx10). - conf: Objectness value from 0-1 (nparray). - pred_cls: Predicted object classes (nparray). - target_cls: True object classes (nparray). - plot: Plot precision-recall curve at mAP@0.5 - save_dir: Plot save directory - # Returns - The average precision as computed in py-faster-rcnn. - """ - - # Sort by objectness - i = np.argsort(-conf) - tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] - - # Find unique classes - unique_classes, nt = np.unique(target_cls, return_counts=True) - nc = unique_classes.shape[0] # number of classes, number of detections - - # Create Precision-Recall curve and compute AP for each class - px, py = np.linspace(0, 1, 1000), [] # for plotting - ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) - for ci, c in enumerate(unique_classes): - i = pred_cls == c - n_l = nt[ci] # number of labels - n_p = i.sum() # number of predictions - if n_p == 0 or n_l == 0: - continue - - # Accumulate FPs and TPs - fpc = (1 - tp[i]).cumsum(0) - tpc = tp[i].cumsum(0) - - # Recall - recall = tpc / (n_l + eps) # recall curve - r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases - - # Precision - precision = tpc / (tpc + fpc) # precision curve - p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score - - # AP from recall-precision curve - for j in range(tp.shape[1]): - ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) - if plot and j == 0: - py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 - - # Compute F1 (harmonic mean of precision and recall) - f1 = 2 * p * r / (p + r + eps) - names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data - names = dict(enumerate(names)) # to dict - - i = smooth(f1.mean(0), 0.1).argmax() # max F1 index - p, r, f1 = p[:, i], r[:, i], f1[:, i] - tp = (r * nt).round() # true positives - fp = (tp / (p + eps) - tp).round() # false positives - return tp, fp, p, r, f1, ap, unique_classes.astype(int) - - -class Metric: - - def __init__(self) -> None: - self.p = [] # (nc, ) - self.r = [] # (nc, ) - self.f1 = [] # (nc, ) - self.all_ap = [] # (nc, 10) - self.ap_class_index = [] # (nc, ) - self.nc = 0 - - @property - def ap50(self): - """AP@0.5 of all classes. - Return: - (nc, ) or []. - """ - return self.all_ap[:, 0] if len(self.all_ap) else [] - - @property - def ap(self): - """AP@0.5:0.95 - Return: - (nc, ) or []. - """ - return self.all_ap.mean(1) if len(self.all_ap) else [] - - @property - def mp(self): - """mean precision of all classes. - Return: - float. - """ - return self.p.mean() if len(self.p) else 0.0 - - @property - def mr(self): - """mean recall of all classes. - Return: - float. - """ - return self.r.mean() if len(self.r) else 0.0 - - @property - def map50(self): - """Mean AP@0.5 of all classes. - Return: - float. - """ - return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 - - @property - def map75(self): - """Mean AP@0.75 of all classes. - Return: - float. - """ - return self.all_ap[:, 5].mean() if len(self.all_ap) else 0.0 - - @property - def map(self): - """Mean AP@0.5:0.95 of all classes. - Return: - float. - """ - return self.all_ap.mean() if len(self.all_ap) else 0.0 - - def mean_results(self): - """Mean of results, return mp, mr, map50, map""" - return [self.mp, self.mr, self.map50, self.map] - - def class_result(self, i): - """class-aware result, return p[i], r[i], ap50[i], ap[i]""" - return self.p[i], self.r[i], self.ap50[i], self.ap[i] - - @property - def maps(self): - """mAP of each class""" - maps = np.zeros(self.nc) + self.map - for i, c in enumerate(self.ap_class_index): - maps[c] = self.ap[i] - return maps - - def fitness(self): - # Model fitness as a weighted combination of metrics - w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (np.array(self.mean_results()) * w).sum() - - def update(self, results): - """ - Args: - results: tuple(p, r, ap, f1, ap_class) - """ - self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results - - -class DetMetrics: - - def __init__(self, save_dir=Path("."), plot=False, names=()) -> None: - self.save_dir = save_dir - self.plot = plot - self.names = names - self.box = Metric() - - def process(self, tp, conf, pred_cls, target_cls): - results = ap_per_class(tp, conf, pred_cls, target_cls, plot=self.plot, save_dir=self.save_dir, - names=self.names)[2:] - self.box.nc = len(self.names) - self.box.update(results) - - @property - def keys(self): - return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"] - - def mean_results(self): - return self.box.mean_results() - - def class_result(self, i): - return self.box.class_result(i) - - @property - def maps(self): - return self.box.maps - - @property - def fitness(self): - return self.box.fitness() - - @property - def ap_class_index(self): - return self.box.ap_class_index - - @property - def results_dict(self): - return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness])) - - -def increment_path(path, exist_ok=False, sep='', mkdir=False): - """ - Increments a file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. - - If the path exists and exist_ok is not set to True, the path will be incremented by appending a number and sep to - the end of the path. If the path is a file, the file extension will be preserved. If the path is a directory, the - number will be appended directly to the end of the path. If mkdir is set to True, the path will be created as a - directory if it does not already exist. - - Args: - path (str or pathlib.Path): Path to increment. - exist_ok (bool, optional): If True, the path will not be incremented and will be returned as-is. Defaults to False. - sep (str, optional): Separator to use between the path and the incrementation number. Defaults to an empty string. - mkdir (bool, optional): If True, the path will be created as a directory if it does not exist. Defaults to False. - - Returns: - pathlib.Path: Incremented path. - """ - path = Path(path) # os-agnostic - if path.exists() and not exist_ok: - path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') - - # Method 1 - for n in range(2, 9999): - p = f'{path}{sep}{n}{suffix}' # increment path - if not os.path.exists(p): # - break - path = Path(p) - - if mkdir: - path.mkdir(parents=True, exist_ok=True) # make directory - - return path - - -def cfg2dict(cfg): - """ - Convert a configuration object to a dictionary. - - This function converts a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object. - - Inputs: - cfg (str) or (Path) or (SimpleNamespace): Configuration object to be converted to a dictionary. - - Returns: - cfg (dict): Configuration object in dictionary format. - """ - if isinstance(cfg, (str, Path)): - cfg = yaml_load(cfg) # load dict - elif isinstance(cfg, SimpleNamespace): - cfg = vars(cfg) # convert to dict - return cfg - - -def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = None, overrides: Dict = None): - """ - Load and merge configuration data from a file or dictionary. - - Args: - cfg (str) or (Path) or (Dict) or (SimpleNamespace): Configuration data. - overrides (str) or (Dict), optional: Overrides in the form of a file name or a dictionary. Default is None. - - Returns: - (SimpleNamespace): Training arguments namespace. - """ - cfg = cfg2dict(cfg) - - # Merge overrides - if overrides: - overrides = cfg2dict(overrides) - cfg = {**cfg, **overrides} # merge cfg and overrides dicts (prefer overrides) - - # Special handling for numeric project/names - for k in 'project', 'name': - if k in cfg and isinstance(cfg[k], (int, float)): - cfg[k] = str(cfg[k]) - - # Type and Value checks - for k, v in cfg.items(): - if v is not None: # None values may be from optional args - if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)): - raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " - f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')") - elif k in CFG_FRACTION_KEYS: - if not isinstance(v, (int, float)): - raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " - f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')") - if not (0.0 <= v <= 1.0): - raise ValueError(f"'{k}={v}' is an invalid value. " - f"Valid '{k}' values are between 0.0 and 1.0.") - elif k in CFG_INT_KEYS and not isinstance(v, int): - raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " - f"'{k}' must be an int (i.e. '{k}=0')") - elif k in CFG_BOOL_KEYS and not isinstance(v, bool): - raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " - f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')") - - # Return instance - return IterableSimpleNamespace(**cfg) - - -def clip_boxes(boxes, shape): - """ - It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the - shape - - Args: - boxes (torch.Tensor): the bounding boxes to clip - shape (tuple): the shape of the image - """ - if isinstance(boxes, torch.Tensor): # faster individually - boxes[..., 0].clamp_(0, shape[1]) # x1 - boxes[..., 1].clamp_(0, shape[0]) # y1 - boxes[..., 2].clamp_(0, shape[1]) # x2 - boxes[..., 3].clamp_(0, shape[0]) # y2 - else: # np.array (faster grouped) - boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2 - boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2 - - -def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): - """ - Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in - (img1_shape) to the shape of a different image (img0_shape). - - Args: - img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width). - boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2) - img0_shape (tuple): the shape of the target image, in the format of (height, width). - ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be - calculated based on the size difference between the two images. - - Returns: - boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2) - """ - if ratio_pad is None: # calculate from img0_shape - gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new - pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding - else: - gain = ratio_pad[0][0] - pad = ratio_pad[1] - - boxes[..., [0, 2]] -= pad[0] # x padding - boxes[..., [1, 3]] -= pad[1] # y padding - boxes[..., :4] /= gain - clip_boxes(boxes, img0_shape) - return boxes - - -def exif_size(img): - # Returns exif-corrected PIL size - s = img.size # (width, height) - with contextlib.suppress(Exception): - rotation = dict(img._getexif().items())[orientation] - if rotation in [6, 8]: # rotation 270 or 90 - s = (s[1], s[0]) - return s - - -def verify_image_label(args): - # Verify one image-label pair - im_file, lb_file, prefix, keypoint, num_cls = args - # number (missing, found, empty, corrupt), message, segments, keypoints - nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, "", [], None - try: - # verify images - im = Image.open(im_file) - im.verify() # PIL verify - shape = exif_size(im) # image size - shape = (shape[1], shape[0]) # hw - assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels" - assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}" - if im.format.lower() in ("jpg", "jpeg"): - with open(im_file, "rb") as f: - f.seek(-2, 2) - - # verify labels - if os.path.isfile(lb_file): - nf = 1 # label found - with open(lb_file) as f: - lb = [x.split() for x in f.read().strip().splitlines() if len(x)] - if any(len(x) > 6 for x in lb) and (not keypoint): # is segment - classes = np.array([x[0] for x in lb], dtype=np.float32) - segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) - lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) - lb = np.array(lb, dtype=np.float32) - nl = len(lb) - if nl: - if keypoint: - assert lb.shape[1] == 56, "labels require 56 columns each" - assert (lb[:, 5::3] <= 1).all(), "non-normalized or out of bounds coordinate labels" - assert (lb[:, 6::3] <= 1).all(), "non-normalized or out of bounds coordinate labels" - kpts = np.zeros((lb.shape[0], 39)) - for i in range(len(lb)): - kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5, 3)) # remove occlusion param from GT - kpts[i] = np.hstack((lb[i, :5], kpt)) - lb = kpts - assert lb.shape[1] == 39, "labels require 39 columns each after removing occlusion parameter" - else: - assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected" - assert (lb[:, 1:] <= 1).all(), \ - f"non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}" - # All labels - max_cls = int(lb[:, 0].max()) # max label count - assert max_cls <= num_cls, \ - f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \ - f'Possible class labels are 0-{num_cls - 1}' - assert (lb >= 0).all(), f"negative label values {lb[lb < 0]}" - _, i = np.unique(lb, axis=0, return_index=True) - if len(i) < nl: # duplicate row check - lb = lb[i] # remove duplicates - if segments: - segments = [segments[x] for x in i] - msg = f"{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed" - else: - ne = 1 # label empty - lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) - else: - nm = 1 # label missing - lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) - if keypoint: - keypoints = lb[:, 5:].reshape(-1, 17, 2) - lb = lb[:, :5] - return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg - except Exception as e: - nc = 1 - msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}" - return [None, None, None, None, None, nm, nf, ne, nc, msg] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f0352130 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative + Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and do + not modify the License. You may add Your own attribution notices + within Derivative Works that You distribute, alongside or as an + addendum to the NOTICE text from the Work, provided that such + additional attribution notices cannot be construed as modifying + the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 Advanced Micro Devices, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100755 index a218338f..00000000 --- a/LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/LLM-examples/RAG-OGA/Dataset/NPU.pdf b/LLM-examples/RAG-OGA/Dataset/NPU.pdf deleted file mode 100644 index 4710b0b6..00000000 Binary files a/LLM-examples/RAG-OGA/Dataset/NPU.pdf and /dev/null differ diff --git a/LLM-examples/RAG-OGA/Dataset/Quark_Quantization.pdf b/LLM-examples/RAG-OGA/Dataset/Quark_Quantization.pdf deleted file mode 100644 index 4640f208..00000000 Binary files a/LLM-examples/RAG-OGA/Dataset/Quark_Quantization.pdf and /dev/null differ diff --git a/LLM-examples/RAG-OGA/Dataset/ai_analyzer.rst b/LLM-examples/RAG-OGA/Dataset/ai_analyzer.rst deleted file mode 100644 index 890291ab..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ai_analyzer.rst +++ /dev/null @@ -1,238 +0,0 @@ -########### -AI Analyzer -########### - -AMD AI Analyzer is a tool that supports analysis and visualization of model compilation and inference on Ryzen AI. The primary goal of the tool is to help users better understand how the models are processed by the hardware, and to identify performance bottlenecks that may be present during model inference. Using AI Analyzer, users can visualize graph and operator partitions between the NPU and CPU. - -Installation -~~~~~~~~~~~~ - -If you installed the Ryzen AI software using automatic installer, AI Analyzer is already installed in the conda environment. - -If you manually installed the software, you will need to install the AI Analyzer wheel file in your environment. - - -.. code-block:: - - python -m pip install path\to\RyzenAI\installation\files\aianalyzer-.whl - - -Enabling Profiling and Visualization -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Profiling and Visualization can be enabled by passing additional provider options to the ONNXRuntime Inference Session. An example is shown below: - -.. code-block:: - - provider_options = [{ - 'config_file': 'vaip_config.json', - 'cacheDir': str(cache_dir), - 'cacheKey': 'modelcachekey', - 'ai_analyzer_visualization': True, - 'ai_analyzer_profiling': True, - }] - session = ort.InferenceSession(model.SerializeToString(), providers=providers, - provider_options=provider_options) - - -The ``ai_analyzer_profiling`` flag enables generation of artifacts related to the inference profile. The ``ai_analyzer_visualization`` flag enables generation of artifacts related to graph partitions and operator fusion. These artifacts are generated as .json files in the current run directory. - -AI Analyzer also supports native ONNX Runtime profiling, which can be used to analyze the parts of the session running on the CPU. Users can enable ONNX Runtime profiling through session options and pass it alongside the provider options as shown below: - -.. code-block:: - - # Configure session options for profiling - sess_options = rt.SessionOptions() - sess_options.enable_profiling = True - - provider_options = [{ - 'config_file': 'vaip_config.json', - 'cacheDir': str(cache_dir), - 'cacheKey': 'modelcachekey', - 'ai_analyzer_visualization': True, - 'ai_analyzer_profiling': True, - }] - - session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=providers, - provider_options=provider_options) - - -Launching AI Analyzer -~~~~~~~~~~~~~~~~~~~~~ - -Once the artifacts are generated, `aianalyzer` can be invoked through the command line as follows: - - -.. code-block:: - - aianalyzer - - -**Positional Arguments** - -``logdir``: Path to the folder containing generated artifacts - -Additional Options - -``-v``, ``--version``: Show the version info and exit. - -``-b ADDR``, ``--bind ADDR``: Hostname or IP address on which to listen, default is 'localhost'. - -``-p PORT``, ``--port PORT``: TCP port on which to listen, default is '8000'. - -``-n``, ``--no-browser``: Prevent the opening of the default url in the browser. - -``-t TOKEN``, ``--token TOKEN``: Token used for authenticating first-time connections to the server. The default is to generate a new, random token. Setting to an empty string disables authentication altogether, which is NOT RECOMMENDED. - - - -Features -~~~~~~~~ - -AI Analyzer provides visibility into how your AI model is compiled and executed on Ryzen AI hardware. Its two main use cases are: - -1. Analyzing how the model was partitioned and mapped onto Ryzen AI's CPU and NPU accelerator -2. Profiling model performance as it executes inferencing workloads - -When launched, the AI Analyzer server will scan the folder specified with the logdir argument and detect and load all files relevant to compilation and/or inferencing per the ai_analyzer_visualization and ai_anlayzer_profiling flags. - -You can instruct the AI Analyzer server to either start a browser on the same host or else return to you a URL that you can then load into a browser on any host. - - -User Interface -~~~~~~~~~~~~~~ - -AI Analyzer has the following three sections as seen in the left-panel navigator - -1. PARTITIONING - A breakdown of your model was assigned to execute inference across CPU and NPU -2. NPU INSIGHTS - A detailed look at the how your model was optimized for inference execution on NPU -3. PERFORMANCE - A breakdown of inference execution through the model - - -These sections are described in more detail below - - - -PARTITIONING -@@@@@@@@@@@@ - -This section is comprised of two pages: Summary and Graph - -**Summary** - -The Summary page gives an overview of how the models operators have been assigned to Ryzen's CPU and NPU along with charts capturing GigaOp (GOP) offloading by operator type . - -There is also table titled "CPU Because" that shows the reasons why certain operators were not offloaded to the NPU. - -**Graph** - -The graph page shows an interactive diagram of the partitioned ONNX model, showing graphically how the layers are assigned to the Ryzen hardware. - - - -Toolbar - -- You can choose to show/hide individual NPU partitions, if any, with the "Filter by Partition" button -- A panel that displays properties for selected objects can be shown or hidden via the "Show Properties" toggle button -- The model table can be shown and hidden via the "Show Table" toggle button. -- Settings - - - Show Processor will separate operators that run on CPU and NPU respectively - - Show Partition will separate operators running on the NPU by their respective NPU partition, if any - - Show Instance Name will display the full hierarchical name for the operators in the ONNX model - -All objects in the graph have properties which can be viewed to the right of the graph. - - - -*Model Table* - -This table below the graph lists all objects in the partitioned ONNX model: - -- Processor (NPU or CPU) -- Function (Layer) -- Operator -- Ports -- NPU Partitions - - -NPU INSIGHTS -@@@@@@@@@@@@ - -This section is comprised of three pages: Summary, Original Graph, and Optimized Graph. - - - -**Summary** - -The Summary page gives an overview of how your model was mapped to the AMD Ryzen NPU. Charts are displayed showing statistics on the number of operators and total GMACs that have been mapped to the NPU (and if necessary, back to CPU via the "Failsafe CPU" mechanism). The statistics are shown per operator type and per NPU partition. - - - -**Original Graph** - -This is an interactive graph representing your model lowered to supported NPU primitive operators, and broken up into partitions if necessary. As with the PARTITIONING graph, there is a companion table containing all of the model elements that will cross-probe to the graph view. The objects in the graph and table will also cross-probe to the PARTITIONING graph. - -Toolbar - -You can choose to show/hide individual NPU partitions, if any, with the "Filter by Partition" button -A panel that displays properties for selected objects can be shown or hidden via the "Show Properties" toggle button -A code viewer showing the MLIR source code with cross-probing can be shown/hidden via the "Show Code View" button -The table below can be shown and hidden via the "Show Table" toggle button. -Display options for the graph can be accessed with the "Settings" button - - - - -**Optimized Graph** - -This page shows the final model that will be mapped to the NPU after all transformations and optimizations such as fusion and chaining. It will also report the operators that had to be moved back to the CPU via the "Failsafe CPU" mechanism. As usual, there is a companion table below that contains all of the graph's elements, and cross-selection is supported to and from the PARTITIONING graph and the Original Graph. - -Toolbar - -You can choose to show/hide individual NPU partitions, if any, with the "Filter by Partition" button -A panel that displays properties for selected objects can be shown or hidden via the "Show Properties" toggle button -The table below can be shown and hidden via the "Show Table" toggle button. -Display options for the graph can be accessed with the "Settings" button - - -PERFORMANCE -@@@@@@@@@@@ - -This section is used to view the performance of your model on RyzenAI when running one or more inferences. It is comprised of two pages: Summary and Timeline. - - - -**Summary** - -The performance summary page shows several overall statistics on the inference(s) as well as charts breaking down operator runtime by operator. If you run with ONNX runtime profiler enabled, you will see overall inference time including layers that run on the CPU. If you have NPU profiling enabled via the ai_analyzer_profiling flag, you will see numerous NPU-based statistics, including GOP and MAC efficiency and a chart of runtime per NPU operator type. - -The clock frequency field shows the assumed NPU clock frequency, but it can be edited. If you change the frequency, all timestamp data that is collected as clock cycles but displayed in time units will be adjusted accordingly. - - -**Timeline** - -The Performance timeline shows a layer-by-layer breakdown of your model's execution. The upper section is a graphical depiction of layer execution across a timeline, while the lower section shows the same information in tabular format. It is important to note that the Timeline page shows one inference at a time, so if you have captured profiling data for two or more inferences, you can choose which one to display with the "Inferences" chooser. - - - -Within each inference, you can examine the overall model execution or the detailed NPU execution data by using the "Partition" chooser. - - - -Toolbar - -A panel that displays properties for selected objects can be shown or hidden via the "Show Properties" toggle button -The table below can be shown and hidden via the "Show Table" toggle button. -The graphical timeline can be downloaded to SVG via the "Export to SVG" button - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. - diff --git a/LLM-examples/RAG-OGA/Dataset/app_development.rst b/LLM-examples/RAG-OGA/Dataset/app_development.rst deleted file mode 100644 index cfcac200..00000000 --- a/LLM-examples/RAG-OGA/Dataset/app_development.rst +++ /dev/null @@ -1,198 +0,0 @@ -.. include:: /icons.txt - -####################### -Application Development -####################### - -This page captures requirements and recommendations for developers looking to create, package and distribute applications targeting NPU-enabled AMD processors. - - - -.. _driver-compatibility: - -************************************* -VitisAI EP / NPU Driver Compatibility -************************************* - -The VitisAI EP requires a compatible version of the NPU drivers. For each version of the VitisAI EP, compatible drivers are bounded by a minimum version and a maximum release date. NPU drivers are backward compatible with VitisAI EP released up to 3 years before. The maximum driver release date is therefore set to 3 years after the release date of the corresponding VitisAI EP. - -The table below summarizes the driver requirements for the different versions of the VitisAI EP. - -.. list-table:: - :header-rows: 1 - - * - VitisAI EP version - - Minimum NPU Driver version - - Maximum NPU Driver release date - * - 1.4 - - 32.0.203.257 - - March 25th, 2028 - * - 1.3.1 - - 32.0.203.242 - - January 17th, 2028 - * - 1.3 - - 32.0.203.237 - - November 26th, 2027 - * - 1.2 - - 32.0.201.204 - - July 30th, 2027 - -The application must check that NPU drivers compatible with the version of the Vitis AI EP being used are installed. - -.. _apu-types: - -***************** -APU Types -***************** - -The Ryzen AI Software supports different types of NPU-enabled APUs. These APU types are referred to as PHX, HPT, STX and KRK. - -To programmatically determine the type of the local APU, it is possible to enumerate the PCI devices and check for an instance with a matching Hardware ID. - -.. list-table:: - :header-rows: 1 - - * - Vendor - - Device - - Revision - - APU Type - * - 0x1022 - - 0x1502 - - 0x00 - - PHX or HPT - * - 0x1022 - - 0x17F0 - - 0x00 - - STX - * - 0x1022 - - 0x17F0 - - 0x10 - - STX - * - 0x1022 - - 0x17F0 - - 0x11 - - STX - * - 0x1022 - - 0x17F0 - - 0x20 - - KRK - -The application must check that it is running on an AMD processor with an NPU, and that the NPU type is supported by the version of the Vitis AI EP being used. - - - -************************************ -Application Development Requirements -************************************ - -ONNX-RT Session -=============== - -The application should only use the Vitis AI Execution Provider if the following conditions are met: - -- The application is running on an AMD processor with an NPU type supported by the version of the Vitis AI EP being used. See :ref:`list ` above in this page. -- NPU drivers compatible with the version of the Vitis AI EP being used are installed. See :ref:`compatibility table ` above in this page. - -|memo| **NOTE**: Sample C++ code implementing the compatibility checks to be performed before using the VitisAI EP is provided here: https://github.com/amd/RyzenAI-SW/tree/main/utilities/npu_check - - -VitisAI EP Provider Options -=========================== - -For INT8 models, the application should detect which type of APU is present (PHX/HPT/STX/KRK) and set the ``xclbin`` provider option accordingly. Refer to the section about :ref:`compilation of INT8 models ` for details about this. - -For BF16 models, the application should set the ``config_file`` provider option to use the same file as the one which was used to precompile the BF16 model. Refer to the section about :ref:`compilation of BF16 models ` for details about this. - - -Cache Management -================ - -Cache directories generated by the Vitis AI Execution Provider should not be reused across different versions of the Vitis AI EP or across different version of the NPU drivers. - -The application should check the version of the Vitis AI EP and of the NPU drivers. If the application detects a version change, it should delete the cache, or create a new cache directory with a different name. - - -Pre-Compiled Models -=================== - -The deployment version of the VitisAI Execution Provider (EP) does not support the on-the-fly compilation of BF16 models. Applications utilizing BF16 models must include pre-compiled versions of these models. The VitisAI EP can then load the pre-compiled models and deploy them efficiently on the NPU. - -Although including pre-compiled versions of INT8 models is not mandatory, it is beneficial as it reduces session creation time and enhances the end-user experience. - -| - -********************************** -Application Packaging Requirements -********************************** - -|excl| **IMPORTANT**: A patched version of the ``%RYZEN_AI_INSTALLATION_PATH%\deployment`` folder is available for download at the following link: `Download Here `_. This patched ``deployment`` folder is designed to replace the one included in the official installation of Ryzen AI 1.4. The following instructions assume that the original ``deployment`` folder has been replaced with the updated version. - -A C++ application built on the Ryzen AI ONNX Runtime requires the following components to be included in its distribution package. - -.. rubric:: For INT8 models - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\dyn_dispatch_core.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_shared.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_vitisai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitisai_ep.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\transaction.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\xclbin.dll - -- NPU Binary files (.xclbin) from the ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins`` folder - -- Recommended but not mandatory: pre-compiled models in the form of :ref:`Vitis AI EP cache folders ` or :ref:`Onnx Runtime EP context models ` - -.. rubric:: For BF16 models - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\dyn_dispatch_core.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_shared.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_vitisai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitisai_ep.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\transaction.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\xclbin.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\flexmlrt\\flexmlrt.dll - -- Pre-compiled models in the form of :ref:`Vitis AI EP cache folders ` - -.. rubric:: For Hybrid LLMs - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\onnx_custom_ops.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\onnxruntime-genai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\ryzen_mm.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\ryzenai_onnx_utils.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\DirectML.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - -.. rubric:: For NPU-only LLMs - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\npu-llm\\onnxruntime-genai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitis_ai_custom_ops.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_shared.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitisai_ep.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\dyn_dispatch_core.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_vitisai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\transaction.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\xclbin.dll - -- VAIP LLM configuration file: %RYZEN_AI_INSTALLATION_PATH%\\deployment\\npu-llm\\vaip_llm.json - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/examples.rst b/LLM-examples/RAG-OGA/Dataset/examples.rst deleted file mode 100644 index 6cb76f49..00000000 --- a/LLM-examples/RAG-OGA/Dataset/examples.rst +++ /dev/null @@ -1,52 +0,0 @@ -########################## -Examples, Demos, Tutorials -########################## - -This page introduces various demos, examples, and tutorials currently available with the Ryzen™ AI Software. - -************************* -Getting Started Tutorials -************************* - -NPU -~~~ - -- The :doc:`Getting Started Tutorial ` deploys a custom ResNet model demonstrating: - - - Pretrained model conversion to ONNX - - Quantization using AMD Quark quantizer - - Deployment using ONNX Runtime C++ and Python code - -- `Hello World Jupyter Notebook Tutorial `_ - -- New BF16 Model examples: - - - `Image Classification `_ - - `Finetuned DistilBERT for Text Classification `_ - - `Text Embedding Model Alibaba-NLP/gte-large-en-v1.5 `_ - -iGPU -~~~~ - -- `ResNet50 on iGPU `_ - - -************************************ -Other examples, demos, and tutorials -************************************ - -- Refer to `RyzenAI-SW repo `_ - - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. - - - diff --git a/LLM-examples/RAG-OGA/Dataset/getstartex.rst b/LLM-examples/RAG-OGA/Dataset/getstartex.rst deleted file mode 100644 index 14616b99..00000000 --- a/LLM-examples/RAG-OGA/Dataset/getstartex.rst +++ /dev/null @@ -1,414 +0,0 @@ -:orphan: - -######################## -Getting Started Tutorial -######################## - -This tutorial uses a fine-tuned version of the ResNet model (using the CIFAR-10 dataset) to demonstrate the process of preparing, quantizing, and deploying a model using Ryzen AI Software. The tutorial features deployment using both Python and C++ ONNX runtime code. - -.. note:: - In this documentation, "NPU" is used in descriptions, while "IPU" is retained in some of the tool's language, code, screenshots, and commands. This intentional - distinction aligns with existing tool references and does not affect functionality. Avoid making replacements in the code. - -- The source code files can be downloaded from `this link `_. Alternatively, you can clone the RyzenAI-SW repo and change the directory into "tutorial". - -.. code-block:: - - git clone https://github.com/amd/RyzenAI-SW.git - cd tutorial/getting_started_resnet - -| - -The following are the steps and the required files to run the example: - -.. list-table:: - :widths: 20 25 25 - :header-rows: 1 - - * - Steps - - Files Used - - Description - * - Installation - - ``requirements.txt`` - - Install the necessary package for this example. - * - Preparation - - ``prepare_model_data.py``, - ``resnet_utils.py`` - - The script ``prepare_model_data.py`` prepares the model and the data for the rest of the tutorial. - - 1. To prepare the model the script converts pre-trained PyTorch model to ONNX format. - 2. To prepare the necessary data the script downloads and extracts CIFAR-10 dataset. - - * - Pretrained model - - ``models/resnet_trained_for_cifar10.pt`` - - The ResNet model trained using CIFAR-10 is provided in .pt format. - * - Quantization - - ``resnet_quantize.py`` - - Convert the model to the NPU-deployable model by performing Post-Training Quantization flow using AMD Quark Quantization. - * - Deployment - Python - - ``predict.py`` - - Run the Quantized model using the ONNX Runtime code. We demonstrate running the model on both CPU and NPU. - * - Deployment - C++ - - ``cpp/resnet_cifar/.`` - - This folder contains the source code ``resnet_cifar.cpp`` that demonstrates running inference using C++ APIs. We additionally provide the infrastructure (required libraries, CMake files and header files) required by the example. - - -| -| - -************************ -Step 1: Install Packages -************************ - -* Ensure that the Ryzen AI Software is correctly installed. For more details, see the :doc:`installation instructions `. - -* Use the conda environment created during the installation for the rest of the steps. This example requires a couple of additional packages. Run the following command to install them: - - -.. code-block:: - - python -m pip install -r requirements.txt - -| -| - - -************************************** -Step 2: Prepare dataset and ONNX model -************************************** - -In this example, we utilize a custom ResNet model finetuned using the CIFAR-10 dataset - -The ``prepare_model_data.py`` script downloads the CIFAR-10 dataset in pickle format (for python) and binary format (for C++). This dataset will be used in the subsequent steps for quantization and inference. The script also exports the provided PyTorch model into ONNX format. The following snippet from the script shows how the ONNX model is exported: - -.. code-block:: - - dummy_inputs = torch.randn(1, 3, 32, 32) - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} - tmp_model_path = str(models_dir / "resnet_trained_for_cifar10.onnx") - torch.onnx.export( - model, - dummy_inputs, - tmp_model_path, - export_params=True, - opset_version=13, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - ) - -Note the following settings for the onnx conversion: - -- Ryzen AI supports a batch size=1, so dummy input is fixed to a batch_size =1 during model conversion -- Recommended ``opset_version`` setting 13 is used. - -Run the following command to prepare the dataset and export the ONNX model: - -.. code-block:: - - python prepare_model_data.py - -* The downloaded CIFAR-10 dataset is saved in the current directory at the following location: ``data/*``. -* The ONNX model is generated at models/resnet_trained_for_cifar10.onnx - -| -| - -************************** -Step 3: Quantize the Model -************************** - -Quantizing AI models from floating-point to 8-bit integers reduces computational power and the memory footprint required for inference. This example utilizes Quark for ONNX quantizer workflow. Quark takes the pre-trained float32 model from the previous step (``resnet_trained_for_cifar10.onnx``) and provides a quantized model. - -.. code-block:: - - python resnet_quantize.py - -This generates a quantized model using QDQ quant format and generate Quantized model with default configuration. After the completion of the run, the quantized ONNX model ``resnet_quantized.onnx`` is saved to models/resnet_quantized.onnx - -The :file:`resnet_quantize.py` file has ``ModelQuantizer::quantize_model`` function that applies quantization to the model. - -.. code-block:: - - from quark.onnx.quantization.config import (Config, get_default_config) - from quark.onnx import ModelQuantizer - - # Get quantization configuration - quant_config = get_default_config("XINT8") - config = Config(global_quant_config=quant_config) - - # Create an ONNX quantizer - quantizer = ModelQuantizer(config) - - # Quantize the ONNX model - quantizer.quantize_model(input_model_path, output_model_path, dr) - -The parameters of this function are: - -* **input_model_path**: (String) The file path of the model to be quantized. -* **output_model_path**: (String) The file path where the quantized model is saved. -* **dr**: (Object or None) Calibration data reader that enumerates the calibration data and producing inputs for the original model. In this example, CIFAR10 dataset is used for calibration during the quantization process. - - -| -| - -************************ -Step 4: Deploy the Model -************************ - -We demonstrate deploying the quantized model using both Python and C++ APIs. - -* :ref:`Deployment - Python ` -* :ref:`Deployment - C++ ` - -.. note:: - During the Python and C++ deployment, the compiled model artifacts are saved in the cache folder named ``/modelcachekey``. Ryzen-AI does not support the complied model artifacts across the versions, so if the model artifacts exist from the previous software version, ensure to delete the folder ``modelcachekey`` before the deployment steps. - - -.. _dep-python: - -Deployment - Python -=========================== - -The ``predict.py`` script is used to deploy the model. It extracts the first ten images from the CIFAR-10 test dataset and converts them to the .png format. The script then reads all those ten images and classifies them by running the quantized custom ResNet model on CPU or NPU. - -Deploy the Model on the CPU ----------------------------- - -By default, ``predict.py`` runs the model on CPU. - -.. code-block:: - - python predict.py - -Typical output - -.. code-block:: - - Image 0: Actual Label cat, Predicted Label cat - Image 1: Actual Label ship, Predicted Label ship - Image 2: Actual Label ship, Predicted Label airplane - Image 3: Actual Label airplane, Predicted Label airplane - Image 4: Actual Label frog, Predicted Label frog - Image 5: Actual Label frog, Predicted Label frog - Image 6: Actual Label automobile, Predicted Label automobile - Image 7: Actual Label frog, Predicted Label frog - Image 8: Actual Label cat, Predicted Label cat - Image 9: Actual Label automobile, Predicted Label automobile - - -Deploy the Model on the Ryzen AI NPU ------------------------------------- - -To successfully run the model on the NPU, run the following setup steps: - -- Ensure ``RYZEN_AI_INSTALLATION_PATH`` points to ``path\to\ryzen-ai-sw-\``. If you installed Ryzen-AI software using the MSI installer, this variable should already be set. Ensure that the Ryzen-AI software package has not been moved post installation, in which case ``RYZEN_AI_INSTALLATION_PATH`` will have to be set again. - -- By default, the Ryzen AI Conda environment automatically sets the standard binary for all inference sessions through the ``XLNX_VART_FIRMWARE`` environment variable. However, explicitly passing the xclbin option in provider_options overrides the default setting. - -.. code-block:: - - parser = argparse.ArgumentParser() - parser.add_argument('--ep', type=str, default ='cpu',choices = ['cpu','npu'], help='EP backend selection') - opt = parser.parse_args() - - providers = ['CPUExecutionProvider'] - provider_options = [{}] - - if opt.ep == 'npu': - providers = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - provider_options = [{ - 'cacheDir': str(cache_dir), - 'cacheKey': 'modelcachekey', - 'xclbin': 'path/to/xclbin' - }] - - session = ort.InferenceSession(model.SerializeToString(), providers=providers, - provider_options=provider_options) - - -Run the ``predict.py`` with the ``--ep npu`` switch to run the custom ResNet model on the Ryzen AI NPU: - - -.. code-block:: - - python predict.py --ep npu - -Typical output - -.. code-block:: - - [Vitis AI EP] No. of Operators : CPU 2 IPU 398 99.50% - [Vitis AI EP] No. of Subgraphs : CPU 1 IPU 1 Actually running on IPU 1 - ... - Image 0: Actual Label cat, Predicted Label cat - Image 1: Actual Label ship, Predicted Label ship - Image 2: Actual Label ship, Predicted Label ship - Image 3: Actual Label airplane, Predicted Label airplane - Image 4: Actual Label frog, Predicted Label frog - Image 5: Actual Label frog, Predicted Label frog - Image 6: Actual Label automobile, Predicted Label truck - Image 7: Actual Label frog, Predicted Label frog - Image 8: Actual Label cat, Predicted Label cat - Image 9: Actual Label automobile, Predicted Label automobile - - -.. _dep-cpp: - -Deployment - C++ -=========================== - -Prerequisites -------------- - -1. Visual Studio 2022 Community edition, ensure "Desktop Development with C++" is installed -2. cmake (version >= 3.26) -3. opencv (version=4.6.0) required for the custom resnet example - -Install OpenCV --------------- - -It is recommended to build OpenCV from the source code and use static build. The default installation location is "\install" , the following instruction installs OpenCV in the location "C:\\opencv" as an example. You may first change the directory to where you want to clone the OpenCV repository. - -.. code-block:: bash - - git clone https://github.com/opencv/opencv.git -b 4.6.0 - cd opencv - cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -G "Visual Studio 17 2022" "-DCMAKE_INSTALL_PREFIX=C:\opencv" "-DCMAKE_PREFIX_PATH=C:\opencv" -DCMAKE_BUILD_TYPE=Release -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_WITH_STATIC_CRT=OFF -B build - cmake --build build --config Release - cmake --install build --config Release - -The build files will be written to ``build\``. - -Build and Run Custom Resnet C++ sample --------------------------------------- - -The C++ source files, CMake list files and related artifacts are provided in the ``cpp/resnet_cifar/*`` folder. The source file ``cpp/resnet_cifar/resnet_cifar.cpp`` takes 10 images from the CIFAR-10 test set, converts them to .png format, preprocesses them, and performs model inference. The example has onnxruntime dependencies, that are provided in ``%RYZEN_AI_INSTALLATION_PATH%/onnxruntime/*``. - -Run the following command to build the resnet example. Assign ``-DOpenCV_DIR`` to the OpenCV build directory. - -.. code-block:: bash - - cd getting_started_resnet/cpp - cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -DCMAKE_INSTALL_PREFIX=. -DCMAKE_PREFIX_PATH=. -B build -S resnet_cifar -DOpenCV_DIR="C:/opencv/build" -G "Visual Studio 17 2022" - -This should generate the build directory with the ``resnet_cifar.sln`` solution file along with other project files. Open the solution file using Visual Studio 2022 and build to compile. You can also use "Developer Command Prompt for VS 2022" to open the solution file in Visual Studio. - -.. code-block:: bash - - devenv build/resnet_cifar.sln - -Now to deploy our model, we will go back to the parent directory (getting_started_resnet) of this example. After compilation, the executable should be generated in ``cpp/build/Release/resnet_cifar.exe``. We will copy this application over to the parent directory: - -.. code-block:: bash - - cd .. - xcopy cpp\build\Release\resnet_cifar.exe . - -Additionally, we will also need to copy the onnxruntime DLLs from the Vitis AI Execution Provider package to the current directory. The following commands copy the required files in the current directory: - -.. code-block:: bash - - xcopy %RYZEN_AI_INSTALLATION_PATH%\onnxruntime\bin\* /E /I - - -The C++ application that was generated takes 3 arguments: - -#. Path to the quantized ONNX model generated in Step 3 -#. The execution provider of choice (cpu or NPU) -#. vaip_config.json (pass None if running on CPU) - - -Deploy the Model on the CPU -**************************** - -To run the model on the CPU, use the following command: - -.. code-block:: bash - - resnet_cifar.exe models\resnet_quantized.onnx cpu - -Typical output: - -.. code-block:: bash - - model name:models\resnet_quantized.onnx - ep:cpu - Input Node Name/Shape (1): - input : -1x3x32x32 - Output Node Name/Shape (1): - output : -1x10 - Final results: - Predicted label is cat and actual label is cat - Predicted label is ship and actual label is ship - Predicted label is ship and actual label is ship - Predicted label is airplane and actual label is airplane - Predicted label is frog and actual label is frog - Predicted label is frog and actual label is frog - Predicted label is truck and actual label is automobile - Predicted label is frog and actual label is frog - Predicted label is cat and actual label is cat - Predicted label is automobile and actual label is automobile - -Deploy the Model on the NPU -**************************** - -To successfully run the model on the NPU: - -- Ensure ``RYZEN_AI_INSTALLATION_PATH`` points to ``path\to\ryzen-ai-sw-\``. If you installed Ryzen-AI software using the MSI installer, this variable should already be set. Ensure that the Ryzen-AI software package has not been moved post installation, in which case ``RYZEN_AI_INSTALLATION_PATH`` will have to be set again. - -- By default, the Ryzen AI Conda environment automatically sets the standard binary for all inference sessions through the ``XLNX_VART_FIRMWARE`` environment variable. However, explicitly passing the xclbin option in provider_options overrides the default setting. - -The following code block from ``reset_cifar.cpp`` shows how ONNX Runtime is configured to deploy the model on the Ryzen AI NPU: - -.. code-block:: bash - - auto session_options = Ort::SessionOptions(); - - auto cache_dir = std::filesystem::current_path().string(); - - if(ep=="npu") - { - auto options = - std::unordered_map{ {"cacheDir", cache_dir}, {"cacheKey", "modelcachekey"}, {"xclbin", "path/to/xclbin"}}; - session_options.AppendExecutionProvider_VitisAI(options) - } - - auto session = Ort::Session(env, model_name.data(), session_options); - -To run the model on the NPU, we will pass the npu flag and the vaip_config.json file as arguments to the C++ application. Use the following command to run the model on the NPU: - -.. code-block:: bash - - resnet_cifar.exe models\resnet_quantized.onnx npu - -Typical output: - -.. code-block:: - - [Vitis AI EP] No. of Operators : CPU 2 IPU 398 99.50% - [Vitis AI EP] No. of Subgraphs : CPU 1 IPU 1 Actually running on IPU 1 - ... - Final results: - Predicted label is cat and actual label is cat - Predicted label is ship and actual label is ship - Predicted label is ship and actual label is ship - Predicted label is airplane and actual label is airplane - Predicted label is frog and actual label is frog - Predicted label is frog and actual label is frog - Predicted label is truck and actual label is automobile - Predicted label is frog and actual label is frog - Predicted label is cat and actual label is cat - Predicted label is automobile and actual label is automobile -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/gpu_ryzenai_gpu - Copy.rst b/LLM-examples/RAG-OGA/Dataset/gpu_ryzenai_gpu - Copy.rst deleted file mode 100644 index 89bbbf0d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/gpu_ryzenai_gpu - Copy.rst +++ /dev/null @@ -1,56 +0,0 @@ -########################### -DirectML Flow -########################### - -************* -Prerequisites -************* - -- DirectX12 capable Windows OS (Windows 11 recommended) -- Latest AMD `GPU device driver `_ installed -- `Microsoft Olive `_ for model conversion and optimization -- Latest `ONNX Runtime DirectML EP `_ - -You can ensure GPU driver and DirectX version from ``Windows Task Manager`` -> ``Performance`` -> ``GPU`` - -****************************** -Running models on Ryzen AI GPU -****************************** - -Running models on the Ryzen AI GPU is accomplished in two simple steps: - -**Model Conversion and Optimization**: After the model is trained, Microsoft Olive Optimizer can be used to convert the model to ONNX and optimize it for optimal target execution. - -For additional information, refer to the `Microsoft Olive Documentation `_ - - -**Deployment**: Once the model is in the ONNX format, the ONNX Runtime DirectML EP (``DmlExecutionProvider``) is used to run the model on the AMD Ryzen AI GPU. - -For additional information, refer to the `ONNX Runtime documentation for the DirectML Execution Provider `_ - - -******** -Examples -******** - -- Optimizing and running `ResNet on Ryzen AI GPU `_ - - -******************** -Additional Resources -******************** - - -- Article on how AMD and Black Magic Design worked together to accelerate `Davinci Resolve Studio `_ workload on AMD hardware: - - - `AI Accelerated Video Editing with DaVinci Resolve 18.6 & AMD Radeon Graphics `_ - -| - -- Blog posts on using the Ryzen AI Software for various generative AI workloads on GPU: - - - `Automatic1111 Stable Diffusion WebUI with DirectML Extension on AMD GPUs `_ - - - `Running Optimized Llama2 with Microsoft DirectML on AMD Radeon Graphics `_ - - - `AI-Assisted Mobile Workstation Workflows Powered by AMD Ryzen™ AI `_ diff --git a/LLM-examples/RAG-OGA/Dataset/llm_overview.rst b/LLM-examples/RAG-OGA/Dataset/llm_overview.rst deleted file mode 100644 index 57ead57e..00000000 --- a/LLM-examples/RAG-OGA/Dataset/llm_overview.rst +++ /dev/null @@ -1,211 +0,0 @@ -######## -Overview -######## - -************************************ -OGA-based Flow with Hybrid Execution -************************************ - -Ryzen AI Software supports deploying quantized 4-bit LLMs on Ryzen AI 300-series PCs. This solution uses a hybrid execution mode, which leverages both the NPU and integrated GPU (iGPU), and is built on the OnnxRuntime GenAI (OGA) framework. - -Hybrid execution mode optimally partitions the model such that different operations are scheduled on NPU vs. iGPU. This minimizes time-to-first-token (TTFT) in the prefill-phase and maximizes token generation (tokens per second, TPS) in the decode phase. - -OGA is a multi-vendor generative AI framework from Microsoft that provides a convenient LLM interface for execution backends such as Ryzen AI. - -Supported Configurations -======================== - -- Only Ryzen AI 300-series Strix Point (STX) and Krackan Point (KRK) processors support OGA-based hybrid execution. -- Developers with Ryzen AI 7000- and 8000-series processors can get started using the CPU-based examples linked in the :ref:`featured-llms` table. -- Windows 11 is the required operating system. - - -******************************* -Development Interfaces -******************************* - -The Ryzen AI LLM software stack is available through three development interfaces, each suited for specific use cases as outlined in the sections below. All three interfaces are built on top of native OnnxRuntime GenAI (OGA) libraries, as shown in the :ref:`software-stack-table` diagram below. - -The high-level Python APIs, as well as the Server Interface, also leverage the Lemonade SDK, which is multi-vendor open-source software that provides everything necessary for quickly getting started with LLMs on OGA. - -A key benefit of both OGA and Lemonade is that software developed against their interfaces is portable to many other execution backends. - -.. _software-stack-table: - -.. flat-table:: Ryzen AI Software Stack - :header-rows: 1 - :class: center-table - - * - Your Python Application - - Your LLM Stack - - Your Native Application - * - `Lemonade Python API* <#high-level-python-sdk>`_ - - `Lemonade Server Interface* <#server-interface-rest-api>`_ - - :rspan:`1` `OGA C++ Headers <../hybrid_oga.html>`_ - * - :cspan:`1` `OGA Python API* `_ - * - :cspan:`2` `Custom AMD OnnxRuntime GenAI (OGA) `_ - * - :cspan:`2` `AMD Ryzen AI Driver and Hardware `_ - -\* indicates open-source software (OSS). - -High-Level Python SDK -===================== - -The high-level Python SDK, Lemonade, allows you to get started using PyPI installation in approximately 5 minutes. - -This SDK allows you to: - -- Experiment with models in hybrid execution mode on Ryzen AI hardware. -- Validate inference speed and task performance. -- Integrate with Python apps using a high-level API. - -To get started in Python, follow these instructions: :doc:`high_level_python`. - - -Server Interface (REST API) -=========================== - -The Server Interface provides a convenient means to integrate with applications that: - -- Already support an LLM server interface, such as the Ollama server or OpenAI API. -- Are written in any language (C++, C#, Javascript, etc.) that supports REST APIs. -- Benefits from process isolation for the LLM backend. - -To get started with the server interface, follow these instructions: :doc:`server_interface`. - -For example applications that have been tested with Lemonade Server, see the `Lemonade Server Examples `_. - - -OGA APIs for C++ Libraries and Python -===================================== - -Native C++ libraries for OGA are available to give full customizability for deployment into native applications. - -The Python bindings for OGA also provide a customizable interface for Python development. - -To get started with the OGA APIs, follow these instructions: :doc:`../hybrid_oga`. - - -.. _featured-llms: - -******************************* -Featured LLMs -******************************* - -The following tables contain a curated list of LLMs that have been validated on Ryzen AI hybrid execution mode. The hybrid examples are built on top of OnnxRuntime GenAI (OGA). - -The comprehensive set of pre-optimized models for hybrid execution used in these examples are available in the `AMD hybrid collection on Hugging Face `_. It is also possible to run fine-tuned versions of the models listed (for example, fine-tuned versions of Llama2 or Llama3). For instructions on how to prepare a fine-tuned OGA model for hybrid execution, refer to :doc:`../oga_model_prepare`. - -.. _ryzen-ai-oga-featured-llms: - -.. flat-table:: Ryzen AI OGA Featured LLMs - :header-rows: 2 - :class: llm-table - - * - - - :cspan:`1` CPU Baseline (HF bfloat16) - - :cspan:`3` Ryzen AI Hybrid (OGA int4) - * - Model - - Example - - Validation - - Example - - TTFT Speedup - - Tokens/S Speedup - - Validation - - * - `DeepSeek-R1-Distill-Qwen-7B `_ - - `Link `__ - - 🟢 - - `Link `__ - - 3.4x - - 8.4x - - 🟢 - * - `DeepSeek-R1-Distill-Llama-8B `_ - - `Link `__ - - 🟢 - - `Link `__ - - 4.2x - - 7.6x - - 🟢 - * - `Llama-3.2-1B-Instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 1.9x - - 5.1x - - 🟢 - * - `Llama-3.2-3B-Instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 2.8x - - 8.1x - - 🟢 - * - `Phi-3-mini-4k-instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 3.6x - - 7.8x - - 🟢 - * - `Qwen1.5-7B-Chat `_ - - `Link `__ - - 🟢 - - `Link `__ - - 4.0x - - 7.3x - - 🟢 - * - `Mistral-7B-Instruct-v0.3 `_ - - `Link `__ - - 🟢 - - `Link `__ - - 5.0x - - 8.1x - - 🟢 - * - `Llama-3.1-8B-Instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 3.9x - - 8.9x - - 🟢 - -The :ref:`ryzen-ai-oga-featured-llms` table was compiled using validation, benchmarking, and accuracy metrics as measured by the `ONNX TurnkeyML v6.1.0 `_ ``lemonade`` commands in each example link. After this table was created, the Lemonade SDK moved to the new location found `here `_. - -Data collection details: - -* All validation, performance, and accuracy metrics are collected on the same system configuration: - - * System: HP OmniBook Ultra Laptop 14z - * Processor: AMD Ryzen AI 9 HX 375 W/ Radeon 890M - * Memory: 32GB of RAM - -* The Hugging Face ``transformers`` framework is used as the baseline implementation for speedup and accuracy comparisons. - - * The baseline checkpoint is the original ``safetensors`` Hugging Face checkpoint linked in each table row, in the ``bfloat16`` data type. - -* All speedup numbers are the measured performance of the model with input sequence length (ISL) of ``1024`` and output sequence length (OSL) of ``64``, on the specified backend, divided by the measured performance of the baseline. -* We assign the 🟢 validation score based on this criteria: all commands in the example guide ran successfully. - - -************************************** -OGA-based Flow with NPU-only Execution -************************************** - -The primary OGA-based flow for LLMs employs a hybrid execution mode which leverages both the NPU and iGPU. AMD also provides support for an OGA-based flow where the iGPU is not solicited and where the compute-intensive operations are exclusively offloaded to the NPU. - -The OGA-based NPU-only execution mode is supported on STX and KRK platforms. - -To get started with the OGA-based NPU-only execution mode, follow these instructions :doc:`../npu_oga`. - - - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/modelrun.rst b/LLM-examples/RAG-OGA/Dataset/modelrun.rst deleted file mode 100644 index f6bed98d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/modelrun.rst +++ /dev/null @@ -1,380 +0,0 @@ -.. include:: /icons.txt - -################################ -Model Compilation and Deployment -################################ - -***************** -Introduction -***************** - -The Ryzen AI Software supports compiling and deploying quantized model saved in the ONNX format. The ONNX graph is automatically partitioned into multiple subgraphs by the VitisAI Execution Provider (EP). The subgraph(s) containing operators supported by the NPU are executed on the NPU. The remaining subgraph(s) are executed on the CPU. This graph partitioning and deployment technique across CPU and NPU is fully automated by the VAI EP and is totally transparent to the end-user. - -|memo| **NOTE**: Models with ONNX opset 17 are recommended. If your model uses a different opset version, consider converting it using the `ONNX Version Converter `_ - -Models are compiled for the NPU by creating an ONNX inference session using the Vitis AI Execution Provider (VAI EP): - -.. code-block:: python - - providers = ['VitisAIExecutionProvider'] - session = ort.InferenceSession( - model, - sess_options = sess_opt, - providers = providers, - provider_options = provider_options - ) - - -The ``provider_options`` parameter allows passing special options to the Vitis AI EP. - -.. list-table:: - :widths: 20 35 - :header-rows: 1 - - * - Provider Options - - Description - * - config_file - - Configuration file to pass certain compile-specific options, used for BF16 compilation. - * - xclbin - - NPU binary file to specify NPU configuration, used for INT8 models. - * - cache_dir - - The path and name of the cache directory. - * - cache_key - - The subfolder in the cache directory where the compiled model is stored. - * - encryptionKey - - Used for generating an encrypted compiled model. - -Detailed usage of these options is discussed in the following sections of this page. - - -.. _compile-bf16: - -************************** -Compiling BF16 models -************************** - -|memo| **NOTE**: For compiling large BF16 models a machine with at least 32GB of memory is recommended. The machine does not need to have an NPU. It is also possible to compile BF16 models on a Linux workstation. More details can be found here: :doc:`rai_linux` - -When compiling BF16 models, a compilation configuration file must be provided through the ``config_file`` provider options. - -.. code-block:: python - - providers = ['VitisAIExecutionProvider'] - - provider_options = [{ - 'config_file': 'vai_ep_config.json' - }] - - session = ort.InferenceSession( - "resnet50.onnx", - providers=providers, - provider_options=provider_options - ) - - -By default, the configuration file for compiling BF16 models should contain the following: - -.. code-block:: json - - { - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": {} - } - ] - } - - -Additional options can be specified in the ``vaiml_config`` section of the configuration file, as described below. - -**Performance Optimization** - -The default compilation optimization level is 1. The optimization level can be changed as follows: - -.. code-block:: json - - "vaiml_config": {"optimize_level": 2} - -Supported values: 1 (default), 2 - - -**Automatic FP32 to BF16 Conversion** - -If a FP32 model is used, the compiler will automatically cast it to BF16 if this option is enabled. For better control over accuracy, it is recommended to quantize the model to BF16 using Quark. - -.. code-block:: json - - "vaiml_config": {"enable_f32_to_bf16_conversion": true} - -Supported values: false (default), true - - -**Optimizations for Transformer-Based Models** - -By default, the compiler vectorizes the data to optimize performance for CNN models. However, transformers perform best with unvectorized data. To better optimize transformer-based models, set: - -.. code-block:: json - - "vaiml_config": {"preferred_data_storage": "unvectorized"} - -Supported values: "vectorized" (default), "unvectorized" - - -.. _compile-int8: - -************************** -Compiling INT8 models -************************** - -When compiling INT8 models, the NPU configuration must be specified through the ``xclbin`` provider option. This option is not required for BF16 models. - -There are two types of NPU configurations for INT8 models: standard and benchmark. Setting the NPU configuration involves specifying a specific ``.xclbin`` binary file, which is located in the Ryzen AI Software installation tree. - -Depending on the target processor and binary type (standard/benchmark), the following ``.xclbin`` files should be used: - -**For STX/KRK APUs**: - -- Standard binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\strix\AMD_AIE2P_Nx4_Overlay.xclbin`` -- Benchmark binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\strix\AMD_AIE2P_4x4_Overlay.xclbin`` - -**For PHX/HPT APUs**: - -- Standard binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\phoenix\1x4.xclbin`` -- Benchmark binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\phoenix\4x4.xclbin`` - -Python example selecting the standard NPU configuration for STX/KRK: - -.. code-block:: python - - providers = ['VitisAIExecutionProvider'] - - provider_options = [{ - 'xclbin': '{}\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_Nx4_Overlay.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - }] - - session = ort.InferenceSession( - "resnet50.onnx", - providers=providers, - provider_options=provider_options - ) - -| - -By default, the Ryzen AI Conda environment automatically sets the standard binary for all inference sessions through the ``XLNX_VART_FIRMWARE`` environment variable. However, explicitly passing the xclbin option in the provider options overrides the environment variable. - -.. code-block:: - - > echo %XLNX_VART_FIRMWARE% - C:\Program Files\RyzenAI\1.4.0\voe-4.0-win_amd64\xclbins\strix\AMD_AIE2P_Nx4_Overlay.xclbin - - - -| - -************************************ -Managing Compiled Models -************************************ - -To avoid the overhead of recompiling models, it is very advantageous to save the compiled models and use these pre-compiled versions in the final application. Pre-compiled models can be loaded instantaneously and immediately executed on the NPU. This greatly improves the session creation time and overall end-user experience. - -The RyzenAI Software supports two mechanisms for saving and reloading compiled models: - -- VitisAI EP Cache -- OnnxRuntime EP Context Cache - -.. _vitisai-ep-cache: - -VitisAI EP Cache -================ - -The VitisAI EP includes a built-in caching mechanism. This mechanism is enabled by default. When a model is compiled for the first time, it is automatically saved in the VitisAI EP cache directory. Any subsequent creation of an ONNX Runtime session using the same model will load the precompiled model from the cache directory, thereby reducing session creation time. - -The location of the VitisAI EP cache is specified with the ``cache_dir`` and ``cache_key`` provider options: - -- ``cache_dir`` - Specifies the path and name of the cache directory. -- ``cache_key`` - Specifies the subfolder in the cache directory where the compiled model is stored. - -Python example: - -.. code-block:: python - - from pathlib import Path - - providers = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - provider_options = [{'cache_dir': str(cache_dir), - 'cache_key': 'compiled_resnet50'}] - - session = ort.InferenceSession( - "resnet50.onnx", - providers=providers, - provider_options=provider_options - ) - - -In the example above, the cache directory is set to the absolute path of the folder containing the script being executed. Once the session is created, the compiled model is saved inside a subdirectory named ``compiled_resnet50`` within the specified cache folder. - -Default Settings ----------------- -In the current release, if ``cache_dir`` is not set, the default cache location is determined by the type of model: - -- INT8 models - ``C:\temp\%USERNAME%\vaip\.cache`` -- BF16 models - The directory where the script or program is executed - - -Disabling the Cache -------------------- -To ignore cached models and force recompilation, unset the ``XLNX_ENABLE_CACHE`` environment variable before running the application: - -.. code-block:: - - set XLNX_ENABLE_CACHE= - - - -VitisAI EP Cache Encryption ---------------------------- - -The contents of the VitisAI EP cache folder can be encrypted using AES256. Cache encryption is enabled by passing an encryption key through the VAI EP provider options. The same key must be used to decrypt the model when loading it from the cache. The key is a 256-bit value represented as a 64-digit string. - -Python example: - -.. code-block:: python - - session = onnxruntime.InferenceSession( - "resnet50.onnx", - providers=["VitisAIExecutionProvider"], - provider_options=[{ - "config_file":"/path/to/vaip_config.json", - "encryptionKey": "89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2" - }]) - -C++ example: - -.. code-block:: cpp - - auto onnx_model_path = "resnet50.onnx" - Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "resnet50"); - auto session_options = Ort::SessionOptions(); - auto options = std::unorderd_map({}); - options["config_file"] = "/path/to/vaip_config.json"; - options["encryptionKey"] = "89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2"; - - session_options.AppendExecutionProvider("VitisAI", options); - auto session = Ort::Experimental::Session(env, model_name, session_options); - -As a result of encryption, the model generated in the cache directory cannot be opened with Netron. Additionally, dumping is disabled to prevent the leakage of sensitive information about the model. - -.. _ort-ep-context-cache: - -OnnxRuntime EP Context Cache -============================ - -The Vitis AI EP supports the ONNX Runtime EP context cache feature. This features allows dumping and reloading a snapshot of the EP context before deployment. Currently, this feature is only available for INT8 models. - -The user can enable dumping of the EP context by setting the ``ep.context_enable`` session option to 1. - -The following options can be used for additional control: - -- ``ep.context_file_path`` – Specifies the output path for the dumped context model. -- ``ep.context_embed_mode`` – Embeds the EP context into the ONNX model when set to 1. - -For further details, refer to the official ONNX Runtime documentation: https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html - - -EP Context Encryption ---------------------- - -By default, the generated context model is unencrypted and can be used directly during inference. If needed, the context model can be encrypted using one of the methods described below. - -User-managed encryption -~~~~~~~~~~~~~~~~~~~~~~~ -After the context model is generated, the developer can encrypt the generated file using a method of choice. At runtime, the encrypted file can be loaded by the application, decrypted in memory and passed as a serialized string to the inference session. This method gives complete control to the developer over the encryption process. - -EP-managed encryption -~~~~~~~~~~~~~~~~~~~~~~~ -The Vitis AI EP encryption mechanism can be used to encrypt the context model. This is enabled by passing an encryption key via the ``encryptionKey`` provider option (discussed in the previous section). The model is encrypted using AES256. At runtime, the same encryption key must be provided to decrypt and load the context model. With this method, encryption and decryption is seamlessly managed by the VitisAI EP. - -Python example: - -.. code-block:: python - - # Compilation session - session_options = ort.SessionOptions() - session_options.add_session_config_entry('ep.context_enable', '1') - session_options.add_session_config_entry('ep.context_file_path', 'context_model.onnx') - session_options.add_session_config_entry('ep.context_embed_mode', '1') - session = ort.InferenceSession( - path_or_bytes='resnet50.onnx', - sess_options=session_options, - providers=['VitisAIExecutionProvider'], - provider_options=[{'encryptionKey': '89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2'}] - ) - - # Inference session - session_options = ort.SessionOptions() - session = ort.InferenceSession( - path_or_bytes='context_model.onnx', - sess_options=session_options, - providers=['VitisAIExecutionProvider'], - provider_options=[{'encryptionKey': '89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2'}] - ) - - -**NOTE**: When compiling with encryptionKey, ensure that any existing cache directory (either the default cache directory or the directory specified by the ``cache_dir`` provider option) is deleted before compiling. - -| - -************************** -Operator Assignment Report -************************** - - -Vitis AI EP generates a file named ``vitisai_ep_report.json`` that provides a report on model operator assignments across CPU and NPU. This file is automatically generated in the cache directory if no explicit cache location is specified in the code. This report includes information such as the total number of nodes, the list of operator types in the model, and which nodes and operators runs on the NPU or on the CPU. Additionally, the report includes node statistics, such as input to a node, the applied operation, and output from the node. - - -.. code-block:: - - { - "deviceStat": [ - { - "name": "all", - "nodeNum": 400, - "supportedOpType": [ - "::Add", - "::Conv", - ... - ] - }, - { - "name": "CPU", - "nodeNum": 2, - "supportedOpType": [ - "::DequantizeLinear", - "::QuantizeLinear" - ] - }, - { - "name": "NPU", - "nodeNum": 398, - "supportedOpType": [ - "::Add", - "::Conv", - ... - ] - ... - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_onnx_apis.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_onnx_apis.rst deleted file mode 100644 index 627dea8f..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_onnx_apis.rst +++ /dev/null @@ -1,23 +0,0 @@ -AMD Quark APIs for ONNX -======================= - -**User facing APIs:** - -.. toctree:: - :maxdepth: 1 - - Quantization <../autoapi/quark/onnx/quantization/api/index> - Optimization <../autoapi/quark/onnx/optimize/index> - Calibration <../autoapi/quark/onnx/calibrate/index> - ONNX Quantizer <../autoapi/quark/onnx/onnx_quantizer/index> - QDQ Quantizer <../autoapi/quark/onnx/qdq_quantizer/index> - Configuration <../autoapi/quark/onnx/quantization/config/config/index> - Quantization Utilities <../autoapi/quark/onnx/quant_utils/index> - -.. - ------------ - ##################################### - License - ##################################### - - AMD Quark is licensed under MIT License. Refer to the LICENSE file for the full license text and copyright notice. \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_pytorch_apis.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_pytorch_apis.rst deleted file mode 100644 index b0497656..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_pytorch_apis.rst +++ /dev/null @@ -1,23 +0,0 @@ -Quark APIs for PyTorch -====================== - -**User facing APIs:** - - -.. toctree:: - :maxdepth: 1 - - Pruning <../autoapi/quark/torch/pruning/api/index.rst> - Quantization <../autoapi/quark/torch/quantization/api/index.rst> - Export <../autoapi/quark/torch/export/api/index.rst> - Pruner Configuration <../autoapi/quark/torch/pruning/config/index.rst> - Quantizer Configuration <../autoapi/quark/torch/quantization/config/config/index.rst> - Exporter Configuration <../autoapi/quark/torch/export/config/config/index.rst> - -.. - ------------ - ##################################### - License - ##################################### - - Quark is licensed under MIT License. Refer to the LICENSE file for the full license text and copyright notice. \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_calibrate_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_calibrate_index.rst deleted file mode 100644 index a4fa692d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_calibrate_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -ONNX model calibration -====================== - -.. automodule:: quark.onnx.calibrate - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_onnx_quantizer_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_onnx_quantizer_index.rst deleted file mode 100644 index a3367a8b..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_onnx_quantizer_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -ONNX quantizer -============== - -.. automodule:: quark.onnx.onnx_quantizer - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_optimize_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_optimize_index.rst deleted file mode 100644 index 5f182561..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_optimize_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -ONNX model optimization -======================= - -.. automodule:: quark.onnx.optimize - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_qdq_quantizer_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_qdq_quantizer_index.rst deleted file mode 100644 index e8a3124e..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_qdq_quantizer_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -QDQ quantizer -============= - -.. automodule:: quark.onnx.qdq_quantizer - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quant_utils_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quant_utils_index.rst deleted file mode 100644 index 1d24b075..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quant_utils_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -ONNX quantization utilities -=========================== - -.. automodule:: quark.onnx.quant_utils - :members: - :member-order: bysource \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quantization_api_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quantization_api_index.rst deleted file mode 100644 index 2eb216e3..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quantization_api_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -ONNX model quantization -======================= - -.. automodule:: quark.onnx.quantization.api - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quantization_config_config_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quantization_config_config_index.rst deleted file mode 100644 index 0445523f..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_onnx_quantization_config_config_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -ONNX quantization configuration -=============================== - -.. automodule:: quark.onnx.quantization.config.config - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_pruning_api_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_pruning_api_index.rst deleted file mode 100644 index 92e92aa0..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_pruning_api_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -Pruning -======= - -.. automodule:: quark.torch.pruning.api - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_pruning_config_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_pruning_config_index.rst deleted file mode 100644 index 2add8933..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_pruning_config_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -Pruning configuration -===================== - -.. automodule:: quark.torch.pruning.config - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_quantization_api_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_quantization_api_index.rst deleted file mode 100644 index 837c946c..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_quantization_api_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -PyTorch quantization -==================== - -.. automodule:: quark.torch.quantization.api - :members: - :member-order: bysource diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_quantization_config_config_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_quantization_config_config_index.rst deleted file mode 100644 index 6243b8bc..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_autoapi_quark_torch_quantization_config_config_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -Quantization configuration -========================== - -.. automodule:: quark.torch.quantization.config.config - :members: - :member-order: bysource \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_ada.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_ada.rst deleted file mode 100644 index bf1c90a8..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_ada.rst +++ /dev/null @@ -1,123 +0,0 @@ -Quantization Using AdaQuant and AdaRound -======================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -.. note:: - - For information on accessing AMD Quark ONNX examples, refer to :doc:`Accessing ONNX Examples <../onnx_examples>`. - These examples and the relevant files are available at ``/onnx/accuracy_improvement/adaquant`` and ``/onnx/accuracy_improvement/adaround``. - -Fast Finetune -------------- - -Fast finetune improves the quantized model's accuracy by training the output of each layer as close as possible to the floating-point model. It includes two practical algorithms: "AdaRound" and "AdaQuant". Applying fast finetune might achieve better accuracy for some models but takes much longer time than normal PTQ. It is disabled by default to save quantization time but can be turned on if you encounter accuracy issues. If this feature is enabled, `quark.onnx` will require the PyTorch package. - -.. code-block:: python - - from quark.onnx import ModelQuantizer, PowerOfTwoMethod, QuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - quant_format=QuantFormat.QDQ, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - include_fast_ft=True, - extra_options={ - 'ActivationSymmetric': True, - 'FastFinetune': { - 'OptimAlgorithm': 'adaround', - 'OptimDevice': 'cpu', - 'BatchSize': 1, - 'NumIterations': 1000, - 'LearningRate': 0.1, - }, - }, - ) - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None) - -Arguments -~~~~~~~~~ - -- **include_fast_ft**: (Boolean) This parameter is a flag that determines whether to optimize the models using Fast Finetune. Set to True to enable fast finetune (default is False). - -- **extra_options**: (Dictionary or None) Contains key-value pairs for various options in different cases. Fast finetune related options are packaged within `extra_options` as a member whose key is "FastFinetune" and values are: - - - **OptimAlgorithm**: (String) The specified algorithm for fast finetune. Optional values are "adaround" and "adaquant". "Adaround" adjusts the weight's rounding function, which is relatively stable and might converge faster, while "adaquant" trains the weight directly, potentially offering greater improvement. The default value is "adaround". - - - **OptimDevice**: (String) Specifies the compute device used for PyTorch model training during fast finetuning. Optional values are "cpu" and "cuda:0". The default value is "cpu". - - - **BatchSize**: (Int) Batch size for finetuning. A larger batch size might result in better accuracy but longer training time. The default value is 1. - - - **NumIterations**: (Int) The number of iterations for finetuning. More iterations can lead to better accuracy but also longer training time. The default value is 1000. - - - **LearningRate**: (Float) Learning rate for finetuning. It significantly impacts the improvement of fast finetune, and experimenting with different learning rates might yield better results for your model. The default value is 0.1. - -AdaRound -~~~~~~~~ - -**AdaRound**, short for "Adaptive Rounding," is a post-training quantization technique that aims to minimize the accuracy drop typically associated with quantization. Unlike standard rounding methods, which can be too rigid and cause significant deviations from the original model's behavior, AdaRound uses an adaptive approach to determine the optimal rounding of weights. Here is the `link `__ to the paper. - -AdaQuant -~~~~~~~~ - -**AdaQuant**, short for "Adaptive Quantization," is an advanced quantization technique designed to minimize the accuracy loss typically associated with post-training quantization. Unlike traditional static quantization methods, which apply uniform quantization across all layers and weights, AdaQuant dynamically adapts the quantization parameters based on the characteristics of the model and its data. Here is the `link `__ to the paper. - -Benefits of AdaRound and AdaQuant -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -1. **Improved Accuracy**: By minimizing the quantization error, AdaRound helps preserve the model's accuracy closer to its original state. By dynamically adjusting quantization parameters, AdaQuant helps retain a higher level of model accuracy compared to traditional quantization methods. -2. **Flexibility**: AdaRound and AdaQuant can be applied to various layers and types of neural networks, making it a versatile tool for different quantization needs. -3. **Post-Training Application**: AdaRound does not require retraining the model from scratch. It can be applied after the model has been trained, making it a convenient choice for deploying pre-trained models in resource-constrained environments. -4. **Efficiency**: AdaQuant enables the deployment of high-performance models in resource-constrained environments, such as mobile and edge devices, without the need for extensive retraining. - -Upgrades of AdaRound / AdaQuant in AMD Quark for ONNX ------------------------------------------------------ - -Comparing with the original algorithm, AdaRound in AMD Quark for ONNX is modified and upgraded to be more flexible. - -1. **Unified Framework**: These two algorithms were integrated into a unified framework named as "fast finetune". -2. **Quantization Aware Finetuning**: Only the weight and bias (optional) will be updated, the scales and zero points are fixed, which ensures that all the quantizing information and the structure of the quantized model keep unchanged after finetuning. -3. **Flexibility**: AdaRound in Quark for ONNX is compatible with many more graph patterns-matching. -4. **More Advanced Options** - - - **Early Stop**: If the average loss of the current batch iterations decreases compared to the previous batch of iterations, the training of the layer will stop early. It will accelerate the finetuning process. - - **Selective Update**: If the end-to-end accuracy does not improve after training a certain layer, discard the finetuning result of that layer. - - **Adjust Learning Rate**: Besides the overall learning rate, you could set up a scheme to adjust learning rate layer-wise. For example, apply a larger learning rate on the layer that has a bigger loss. - -How to Enable AdaRound / AdaQuant in AMD Quark? ------------------------------------------------ - -AdaRound and AdaQuant are provided as options of optimal algorithms for fast finetune. - -Here is a simple example showing how to enable default AdaRound and AdaQuant configuration. - -.. code:: python - - from quark.onnx.quantization.config import Config, QuantizationConfig, get_default_config - # Config of default AdaRound - quant_config = get_default_config("S8S8_AAWS_ADAROUND") - config = Config(global_quant_config=quant_config) - # Config of default AdaQuant - quant_config = get_default_config("S8S8_AAWS_ADAQUANT") - config = Config(global_quant_config=quant_config) - -Examples --------- - -AdaRound -~~~~~~~~ - -This :doc:`example <../example_quark_onnx_adaround>` demonstrates quantizing a mobilenetv2_050.lamb_in1k model using the AMD Quark ONNX quantizer. - -AdaQuant -~~~~~~~~ - -This :doc:`example <../example_quark_onnx_adaquant>` demonstrates quantizing a mobilenetv2_050.lamb_in1k model using the AMD Quark ONNX quantizer. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_cle.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_cle.rst deleted file mode 100644 index aeb2ac03..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_cle.rst +++ /dev/null @@ -1,51 +0,0 @@ -Quantizing Using CrossLayerEqualization (CLE) -============================================= - -CrossLayerEqualization (CLE) can equalize the weights of consecutive convolution layers, making the model weights easier to perform per-tensor quantization. Experiments show that using the CLE technique can improve the PTQ accuracy of some models, especially for models with depthwise_conv layers, such as Mobilenet. Here is a sample showing how to enable CLE using `quark.onnx`. - -.. code-block:: python - - from quark.onnx import ModelQuantizer, PowerOfTwoMethod, QuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - quant_format=QuantFormat.QDQ, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - include_cle=True, - extra_options={ - 'ActivationSymmetric': True, - 'ReplaceClip6Relu': True, - 'CLESteps': 1, - 'CLEScaleAppendBias': True, - }, - ) - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None) - -Arguments ---------- - -- **include_cle**: (Boolean) This parameter is a flag that determines whether to optimize the models using CrossLayerEqualization; it can improve the accuracy of some models. The default is True. - -- **extra_options**: (Dictionary or None) Contains key-value pairs for various options in different cases. Options related to CLE are: - - - **ReplaceClip6Relu**: (Boolean) If True, Replace Clip(0,6) with Relu in the model. The default value is False. - - - **CLESteps**: (Int) Specifies the steps for CrossLayerEqualization execution when include_cle is set to true. The default is 1. When set to -1, adaptive CrossLayerEqualization steps are conducted. The default value is 1. - - - **CLEScaleAppendBias**: (Boolean) Whether the bias is included when calculating the scale of the weights. The default value is True. - -Example -======= - -.. note:: - - For information on accessing AMD Quark ONNX examples, refer to :doc:`Accessing ONNX Examples <../onnx_examples>`. - This example and the relevant files are available at ``/onnx/accuracy_improvement/cle`` - -This :doc:`example <../example_quark_onnx_cle>` demonstrates quantizing a resnet152 model using the AMD Quark ONNX quantizer. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_quarot.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_quarot.rst deleted file mode 100644 index d452467c..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_quarot.rst +++ /dev/null @@ -1,52 +0,0 @@ -QuaRot -====== - -QuaRot is proposed to harmonize the outliers within the activations before MatMul/Gemm. The main idea for QuaRot is to insert Hadamard transformation pairs into activations, hence projecting activations to the Hadamard domain. This projection can make discrete energy concentrated, or make concentrated energy discrete. Due to the discrete distribution of activation, the distribution after the Hadamard transform becomes more concentrated, thereby mitigating the outlier situation and relieving activation quantization error. Experiments show that using the QuaRot technique can improve the PTQ accuracy of LLMs like Llama-2, especially for models with a large number of outliers in the activation. Here is a sample showing how to enable QuaRot using `quark.onnx`: - -.. code-block:: python - - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx import ModelQuantizer, QuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - quant_format=QuantFormat.QDQ, - calibrate_method=CalibrationMethod.MinMax, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_transformer=True, - include_rotation=True, - extra_options={ - 'RMatrixDim': 4096, - 'UseRandomHad': False, - 'RConfigPath': "rotation_config.json", - 'ActivationSymmetric': True, - 'CalibMovingAverage': True - }, - ) - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None) - -Arguments ---------- - -- **include_rotation**: (Boolean) This parameter is a flag that determines whether to optimize the models using QuaRot. It can improve the accuracy of LLMs like Llama. RConfigPath must be given if include_rotation is True. The default is False. -- **extra_options**: (Dictionary or None) Contains key-value pairs for various options in different cases. Options related to SQ are: - - - **RMatrixDim**: (Int) Specifies the dimension for constructing rotation matrix. The default value is 4096. - - **UseRandomHad**: (Boolean) If True, the rotation matrix is generated by the random Hadamard scheme. The default is False. - - **RConfigPath**: (String) Sets the path for the rotation config file. This is necessary when using QuaRot. The default is "". - - **ActivationSymmetric**: (Boolean) If True, symmetrizes calibration data for activations. The default is False. - - **CalibMovingAverage**: (Boolean) If True, the moving average of the minimum and maximum values is computed when the calibration method selected is MinMax. The default is False. In PowerOfTwoMethod calibration method, this should be set to False. - -Example -------- - -.. note:: - - For information on accessing AMD Quark ONNX examples, refer to :doc:`Accessing ONNX Examples <../onnx_examples>`. - This example and the relevant files are available at ``/onnx/accuracy_improvement/quarot`` - -This :doc:`example <../example_quark_onnx_quarot>` demonstrates quantizing a Llama-2-7b-hf model using the AMD Quark ONNX quantizer. It also shows how to use the QuaRot algorithm. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_sq.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_sq.rst deleted file mode 100644 index 0b150215..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_algorithms_sq.rst +++ /dev/null @@ -1,45 +0,0 @@ -SmoothQuant (SQ) -================ - -SmoothQuant (SQ) is another technique used to improve PTQ accuracy. It smooths the outliers of the activation so that it loses as little precision as possible during quantization. Experiments show that using the SQ technique can improve the PTQ accuracy of some models, especially for models with a large number of outliers in the activation. Here is a sample showing how to enable SQ using `quark.onnx`. - -.. code-block:: python - - from quark.onnx import ModelQuantizer, PowerOfTwoMethod, QuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - quant_format=QuantFormat.QDQ, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - include_sq=True, - extra_options={ - 'ActivationSymmetric': True, - 'SmoothAlpha': 0.5, - }, - ) - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None) - -Arguments ---------- - -- **include_sq**: (Boolean) This parameter is a flag that determines whether to optimize the models using SmoothQuant; it can improve the accuracy of some models. The default is False. - -- **extra_options**: (Dictionary or None) Contains key-value pairs for various options in different cases. Options related to SQ are: - - - **SmoothAlpha**: (Float) This parameter controls how much difficulty we want to migrate from activation to weights. The default value is 0.5. - -Example -------- - -.. note:: - - For information on accessing AMD Quark ONNX examples, refer to :doc:`Accessing ONNX Examples <../onnx_examples>`. - This example and the relevant files are available at ``/onnx/accuracy_improvement/smooth_quant`` - -This :doc:`example <../example_quark_onnx_smoothquant>` demonstrates quantizing an opt-125m model using the AMD Quark ONNX quantizer. It also shows how to use the Smooth Quant algorithm. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_improvement_algorithms.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_improvement_algorithms.rst deleted file mode 100644 index 07e4b7cd..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_accuracy_improvement_algorithms.rst +++ /dev/null @@ -1,16 +0,0 @@ -Accuracy Improvement Algorithms -=============================== - - -AMD Quark for ONNX provides several techniques to improve the accuracy for quantized models after PTQ. - - -.. toctree:: - :hidden: - :maxdepth: 1 - - accuracy_algorithms/cle.rst - accuracy_algorithms/ada.rst - accuracy_algorithms/sq.rst - accuracy_algorithms/quarot.rst - example_quark_onnx_gptq.rst diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_appendix_full_quant_config_features.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_appendix_full_quant_config_features.rst deleted file mode 100644 index 306c82ca..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_appendix_full_quant_config_features.rst +++ /dev/null @@ -1,706 +0,0 @@ -Full List of Quantization Configuration Features -================================================ - -Overview --------- - -It's very simple to quantize a model using the ONNX quantizer of Quark, only a few straightforward Python statements: - -.. code:: python - - from quark.onnx import ModelQuantizer - from quark.onnx.quantization.config import Config, QuantizationConfig - - quant_config = QuantizationConfig() - - config = Config(global_quant_config=quant_config) - quantizer = ModelQuantizer(config) - quantizer.quantize_model(model_input, model_output, calibration_data_reader) - - -As shown in the code, just create a quantization configuration and use it to initialize a quantizer, and then call the quantizer's *quantize_model()* API, which has 3 main parameters: -* **model_input**: (String or ModelProto) This parameter specifies the file path of the model that is to be quantized. When a file path cannot be specified, the loaded ModelProto can also be passed in directly. -* **model_output**: (Optional String) This parameter specifies the file path where the quantized model will be saved. You can leave it unspecified (it will default to None), and the ModelProto format quantized model will be returned by the API. -* **calibration_data_reader**: (Optional Object) This parameter is a calibration data reader that enumerates the calibration data and generates inputs for the original model. You can leave it unspecified (it will default to None), and simply enable *UseRandomData* in extra options of quantization configuration to use random data for calibration. - -The next section will provide a detailed list of all parameters in the quantization configuration. - -Quantization Configuration --------------------------- - -.. code:: python - - quant_config = QuantizationConfig( - calibrate_method = quark.onnx.CalibrationMethod.MinMax, - quant_format = quark.onnx.QuantFormat.QDQ, - activation_type = quark.onnx.QuantType.QInt8, - weight_type = quark.onnx.QuantType.QInt8, - input_nodes: List[str] = [], - output_nodes: List[str] = [], - op_types_to_quantize: List[str] = [], - nodes_to_quantize: List[str] = [], - extra_op_types_to_quantize: List[str] = [], - nodes_to_exclude: List[str] = [], - subgraphs_to_exclude: List[Tuple[List[str]]] = [], - specific_tensor_precision: bool = False, - execution_providers: List[str] = ['CPUExecutionProvider'], - per_channel: bool = False, - reduce_range: bool = False, - optimize_model: bool = True, - use_dynamic_quant: bool = False, - use_external_data_format: bool = False, - convert_fp16_to_fp32: bool = False, - convert_nchw_to_nhwc: bool = False, - include_sq: bool = False, - include_rotation: bool = False, - include_cle: bool = True, - include_auto_mp: bool = False, - include_fast_ft: bool = False, - enable_npu_cnn: bool = False, - enable_npu_transformer: bool = False, - debug_mode: bool = False, - crypto_mode: bool = False, - print_summary: bool = True, - ignore_warnings: bool = True, - log_severity_level: int = 1, - extra_options: Dict[str, Any] = {}, - ) - -* **calibrate_method**: (String) The method used in calibration, default to quark.onnx.CalibrationMethod.MinMax. - - For NPU_CNN platforms, power-of-two methods should be used, options are: - - - quark.onnx.PowerOfTwoMethod.NonOverflow: This method get the power-of-two quantize parameters for each tensor to make sure min/max values not overflow. - - quark.onnx.PowerOfTwoMethod.MinMSE: This method get the power-of-two quantize parameters for each tensor to minimize the mean-square-loss of quantized values and float values. This takes longer time but usually gets better accuracy. - - For NPU_Transformer or CPU platforms, float scale methods should be used, options are: - - - quark.onnx.CalibrationMethod.MinMax: This method obtains the quantization parameters based on the minimum and maximum values of each tensor. - - quark.onnx.CalibrationMethod.Entropy: This method determines the quantization parameters by considering the entropy algorithm of each tensor's distribution. - - quark.onnx.CalibrationMethod.Percentile: This method calculates quantization parameters using percentiles of the tensor values. - - quark.onnx.LayerWiseMethod.LayerWisePercentile: This method calculates quantization parameters using different percentiles for different layers according to minimize mean average error or mean square error loss value. - -* **quant_format**: (String) This parameter is used to specify the quantization format of the model. It has the following options: - - - quark.onnx.QuantFormat.QOperator: This option quantizes the model directly using quantized operators. - - quark.onnx.QuantFormat.QDQ: This option quantizes the model by inserting QuantizeLinear/DeQuantizeLinear into the tensor. It supports 16-bit/8-bit/4-bit quantization. - - quark.onnx.ExtendedQuantFormat.QDQ: This option quantizes the model by inserting our customized QuantizeLinear/DequantizeLinear or BFPQuantizeDequantize/MXQuantizeDequantize into the tensor, which support a wider range of bit-widths and precisions. - -* **activation_type**: (QuantType) Specifies the quantization data type for activations, options can be found in the table below. The default is quark.onnx.QuantType.QInt8. -* **weight_type**: (QuantType) Specifies the quantization data type for weights, options can be found in the table below. The default is quark.onnx.QuantType.QInt8. For NPU devices, this must be set to QuantType.QInt8. - -* **input_nodes**: (List of Strings) This parameter is a list of the - names of the starting nodes to be quantized. Nodes in the model - before these nodes will not be quantized. For example, this argument - can be used to skip some pre-processing nodes or stop the first node - from being quantized. The default value is an empty list ([]). -* **output_nodes**: (List of Strings) This parameter is a list of the - names of the end nodes to be quantized. Nodes in the model after - these nodes will not be quantized. For example, this argument can be - used to skip some post-processing nodes or stop the last node from - being quantized. The default value is an empty list ([]). -* **op_types_to_quantize**: (List of Strings or None) If specified, - only operators of the given types will be quantized (e.g., ['Conv'] - to only quantize Convolutional layers). By default, all supported - operators will be quantized. -* **nodes_to_quantize**:(List of Strings or None) If specified, only - the nodes in this list are quantized. The list should contain the - names of the nodes, for example, ['Conv\__224', 'Conv\__252']. The - default value is an empty list ([]). -* **extra_op_types_to_quantize**: (List of Strings or None) If specified, - the given operator types will be included as additional targets for - quantization, expanding the set of operators to be quantized without - replacing the existing configuration (e.g., ['Gemm'] to include Gemm - layers in addition to the currently specified types). By default, no - extra operator types will be added for quantization. -* **nodes_to_exclude**:(List of Strings or None) If specified, the nodes - in this list will be excluded from quantization. The elements in this list - can be either regular expression patterns with .\* or exact node names. - For instance, to exclude all nodes whose names start with /layer0/, you can - include a pattern like ^/layer0/.* in the list. The default value is an empty - list ([]). -* **subgraphs_to_exclude**:(List or None) If specified, the - nodes in these subgraphs will be excluded from quantization. For example, - you can use [(["Conv1"], ["Conv2"]), (["Relu9", "MatMul10"])] if you do - not want to quantize nodes between "Conv1" and "Conv2" and nodes between - "Relu9" and "MatMul10", as well as these start and end nodes themselves. - If the subgraph is complex with multiple start nodes and multiple end nodes, - you can use [([start_node1, start_node2], [end_node1, end_node2, end_node3])]. - The default value is an empty list ([]). -* **specific_tensor_precision**: (Boolean) This parameter is a flag - that determines whether to use tensor-level mixed precision, this is - an experimental feature. The default is False. -* **execution_providers**: (List of Strings) This parameter defines the - execution providers that will be used by ONNX Runtime to do - calibration for the specified model. The default value - 'CPUExecutionProvider' implies that the model will be computed using - the CPU as the execution provider. You can also set this to other - execution providers supported by ONNX Runtime such as - 'ROCMExecutionProvider' and 'CUDAExecutionProvider' for GPU-based computation, - if they are available in your environment. The default is - ['CPUExecutionProvider']. -* **per_channel**: (Boolean) Determines whether weights should be - quantized per channel. The default value is False. For DPU/NPU - devices, this must be set to False as they currently do not support - per-channel quantization. -* **reduce_range**: (Boolean) If True, quantizes weights with 7-bits. - The default value is False. For DPU/NPU devices, this must be set to - False as they currently do not support reduced range quantization. -* **optimize_model**:(Boolean) If True, optimizes the model before - quantization. Model optimization performs certain operator fusion - that makes quantization tool's job easier. For instance, a - Conv/ConvTranspose/Gemm operator followed by BatchNormalization can - be fused into one during the optimization, which can be quantized - very efficiently. The default value is True. -* **use_dynamic_quant**: (Boolean) This flag determines whether to apply - dynamic quantization to the model. If True, dynamic quantization is used; - if False, static quantization is applied. The default is False. -* **use_external_data_format**: (Boolean) This option is used for large - size (>2GB) model. The model proto and data will be stored in - separate files. The default is False. -* **convert_fp16_to_fp32**: (Boolean) This parameter controls whether - to convert the input model from float16 to float32 before - quantization. For float16 models, it is recommended to set this - parameter to True. The default value is False. When using - convert_fp16_to_fp32 in AMD Quark for ONNX, it requires onnxsim to - simplify the ONNX model. Please make sure that onnxsim is installed - by using 'python -m pip install onnxsim'. -* **convert_nchw_to_nhwc**: (Boolean) This parameter controls whether - to convert the input NCHW model to input NHWC model before - quantization. For input NCHW models, it is recommended to set this - parameter to True. The default value is False. -* **include_sq**: (Boolean) This parameter is a flag that determines - whether to optimize the models using SmoothQuant; it can improve the - accuracy of transformer-based models like Llama. The default is False. -* **include_rotation**: (Boolean) This parameter is a flag that determines whether - to optimize the models using QuaRot. It can improve the accuracy of LLMs like - Llama. RConfigPath must be given if include_rotation is True. The default is False. -* **include_cle**: (Boolean) This parameter is a flag that determines - whether to optimize the models using CrossLayerEqualization; it can - improve the accuracy of some models. The default is True. -* **include_auto_mp**: (Boolean) If True, the auto mixed precision will be turned on. - The default is False. -* **include_fast_ft**: (Boolean) This parameter is a flag that - determines whether to use adaround or adaquant algorithm for - finetuning, this is an experimental feature. The default is False. -* **enable_npu_cnn**: (Boolean) This parameter is a flag that - determines whether to generate a quantized model that is suitable for - the DPU/NPU. If set to True, the quantization process will consider - the specific limitations and requirements of the DPU/NPU, thus - creating a model that is optimized for DPU/NPU computations. This - parameter primarily addresses the optimization of CNN based models - for deployment on DPU/NPU. The default is False. **Note**: In the - previous versions, "enable_npu_cnn" was named "enable_dpu". - "enable_dpu" will be deprecated in future releases, please use - "enable_npu_cnn" instead. -* **enable_npu_transformer**: (Boolean) This parameter is a flag that - determines whether to generate a quantized model that is suitable for - the NPU. If set to True, the quantization process will consider the - specific limitations and requirements of the NPU, thus creating a - model that is optimized for NPU computations. This parameter - primarily addresses the optimization of transformer models for - deployment on NPU. The default is False. -* **debug_mode**: (Boolean) Flag to enable debug mode. In this mode, - all debugging message will be printed. Default is False. -* **crypto_mode**: (Boolean) Flag to enable crypto mode. In this mode, - all message will be blocked, and all intermediate data related to the - model will not be saved to disk. In addition, the input model to the - *quantize_model* API should be a ModelProto object. Please that it - only supports <2GB ModelProto object. Default is False. -* **print_summary**: (Boolean) Flag to print summary of quantization. Default is True. -* **ignore_warnings**: (Boolean) Flag to suppress the warnings globally. Default is True. -* **log_severity_level**: (Int) This parameter is used to select the - severity level of screen printing logs. Its value ranges from 0 to 4: 0 for DEBUG, - 1 for INFO, 2 for WARNING, 3 for ERROR and 4 for CRITICAL or FATAL. Default value is 1, - which means printing all messages including INFO, WARNING, ERROR and etc by default. -* **extra_options**: (Dictionary or None) Contains key-value pairs for - various options in different cases. Current used: - - - **ActivationSymmetric**: (Boolean) If True, symmetrize calibration - data for activations. The default is False. - - **WeightSymmetric**: (Boolean) If True, symmetrize calibration - data for weights. The default is True. - - **ActivationScaled**: (Boolean) If True, all activations will be scaled to the exact numeric range. - The default is True for integer data type quantization and False for BFloat16 and Float16, which means - by default the BFloat16/Float16 quantization will cast float32 tensors to BFloat16/Float16 directly. - - **WeightScaled**: (Boolean) If True, all weights will be scaled to the exact numeric range. - The default is True for integer data type quantization and False for BFloat16 and Float16, which means - by default the BFloat16/Float16 quantization will cast float32 tensors to BFloat16/Float16 directly. - - **QuantizeFP16**: (Boolean) If True, the data type of the input model should be float16. It only takes effect when onnxruntime version is 1.18 or above. The default is False. - - **UseFP32Scale**: (Boolean) If True, the scale of the quantized model is converted from float16 to float32 when the quantization is done. It only takes effect only if QuantizeFP16 is True. It must be False when UseMatMulNBits is True. The default is True. - - **UseUnsignedReLU**: (Boolean) If True, the output tensor of ReLU - and Clip, whose min is 0, will be forced to be asymmetric. The - default is False. - - **QuantizeBias**: (Boolean) If True, quantize the Bias as a normal - weights. The default is True. For DPU/NPU devices, this must be - set to True. - - **Int32Bias**: (Boolean) If True, bias will be quantized in int32 - data type; if false, it will have the same data type as weight. The - default is False when enable_npu_cnn is True. Otherwise the - default is True. - - **Int16Bias**: (Boolean) If True, bias will be quantized in int16 - data type; The default is False. **Note**: 1. ONNXRuntime only supports - Int16 Bias inference when the opset version is 21 or higher, so please - ensure that the input model's opset version is 21 or higher. 2. It is - recommended to use this together with ADAROUND or ADAQUANT; otherwise, - the quantized model with Int16 bias may suffer from poor accuracy. - - **RemoveInputInit**: (Boolean) If True, initializer in graph - inputs will be removed because it will not be treated as constant - value/weight. This may prevent some of the graph optimizations, - like const folding. The default is True. - - **SimplifyModel**: (Boolean) If True, The input model will be - simplified using the onnxsim tool. The default is True. - - **EnableSubgraph**: (Boolean) If True, the subgraph will be - quantized. The default is False. More support for this feature is - planned in the future. - - **ForceQuantizeNoInputCheck**: (Boolean) If True, latent operators - such as maxpool and transpose will always quantize their inputs, - generating quantized outputs even if their inputs have not been - quantized. The default behavior can be overridden for specific - nodes using nodes_to_exclude. - - **MatMulConstBOnly**: (Boolean) If True, only MatMul operations - with a constant 'B' will be quantized. The default is False for - static mode and True for dynmaic mode. - - **AddQDQPairToWeight**: (Boolean) If True, both QuantizeLinear and - DeQuantizeLinear nodes are inserted for weight, maintaining its - floating-point format. The default is False, which quantizes - floating-point weight and feeds it solely to an inserted - DeQuantizeLinear node. In the PowerOfTwoMethod calibration method, - this setting will also be effective for the bias. - - **OpTypesToExcludeOutputQuantization**: (List of Strings or None) - If specified, the output of operators with these types will not be - quantized. The default is an empty list. - - **DedicatedQDQPair**: (Boolean) If True, an identical and - dedicated QDQ pair is created for each node. The default is False, - allowing multiple nodes to share a single QDQ pair as their - inputs. - - **QDQOpTypePerChannelSupportToAxis**: (Dictionary) Sets the - channel axis for specific operator types (e.g., {'MatMul': 1}). - This is only effective when per-channel quantization is supported - and per_channel is True. If a specific operator type supports - per-channel quantization but no channel axis is explicitly - specified, the default channel axis will be used. For DPU/NPU - devices, this must be set to {} as per-channel quantization is - currently unsupported. The default is an empty dict ({}). - - **CalibTensorRangeSymmetric**: (Boolean) If True, the final range - of the tensor during calibration will be symmetrically set around - the central point "0". The default is False. In PowerOfTwoMethod - calibration method, the default is True. - - **CalibMovingAverage**: (Boolean) If True, the moving average of - the minimum and maximum values will be computed when the - calibration method selected is MinMax. The default is False. In - PowerOfTwoMethod calibration method, this should be set to False. - - **CalibMovingAverageConstant**: (Float) Specifies the constant - smoothing factor to use when computing the moving average of the - minimum and maximum values. The default is 0.01. This is only - effective when the calibration method selected is MinMax and - CalibMovingAverage is set to True. In PowerOfTwoMethod calibration - method, this option is unsupported. - - **Percentile**: (Float) If the calibration method is set to - 'quark.onnx.CalibrationMethod.Percentile,' then this parameter can - be set to the percentage for percentile. The default is 99.999. - - **LWPMetric**: (String) If the calibration method is set to - 'quark.onnx.LayerWiseMethod.LayerWisePercentile,' then this parameter can - be set to select the metric to judge the percentile value. The default is mae. - - **ActivationBitWidth**: (Int) If the calibration method is set to - 'quark.onnx.LayerWiseMethod.LayerWisePercentile', then this parameter can - be set to calculate the quantize/dequantize error. The default is 8. - - **PercentileCandidates**: (List) If the calibration method is set to - 'quark.onnx.LayerWiseMethod.LayerWisePercentile' then this parameter can - be set to the percentage for percentiles. The default is [99.99, 99.999, 99.9999]. - - **UseRandomData**: (Boolean) Required to be true when the - RandomDataReader is needed. The default value is false. - - **RandomDataReaderInputShape**: (Dict) It is required to use - dict {name : shape} to specify a certain input. For example, - RandomDataReaderInputShape={"image" : [1, 3, 224, 224]} for the - input named "image". The default value is an empty dict {}. - - **RandomDataReaderInputDataRange**: (Dict or None) Specifies the - data range for each inputs if used random data reader - (calibration_data_reader is None). Currently, if set to None then - the random value will be 0 or 1 for all inputs, otherwise range - [-128,127] for unsigned int, range [0,255] for signed int and - range [0,1] for other float inputs. The default is None. - - **Int16Scale**: (Boolean) If True, the float scale will be - replaced by the closest value corresponding to M and 2\ **N, where - the range of M and 2**\ N is within the representation range of - int16 and uint16. The default is False. - - **MinMSEMode**: (String) When using - quark.onnx.PowerOfTwoMethod.MinMSE, you can specify the method for - calculating minmse. By default, minmse is calculated using all - calibration data. Alternatively, you can set the mode to - "MostCommon", where minmse is calculated for each batch separately - and take the most common value. The default setting is 'All'. - - **ConvertOpsetVersion**: (Int or None) Specifies the target opset version for the ONNX model. - If set, the model's opset version will be updated accordingly. The default is None. - - **ConvertBNToConv**: (Boolean) If True, the BatchNormalization - operation will be converted to Conv operation. The default is True - when enable_npu_cnn is True. - - **ConvertReduceMeanToGlobalAvgPool**: (Boolean) If True, the - Reduce Mean operation will be converted to Global Average Pooling - operation. The default is True when enable_npu_cnn is True. - - **SplitLargeKernelPool**: (Boolean) If True, the large kernel - Global Average Pooling operation will be split into multiple - Average Pooling operation. The default is True when enable_npu_cnn - is True. - - **ConvertSplitToSlice**: (Boolean) If True, the Split operation - will be converted to Slice operation. The default is True when - enable_npu_cnn is True. - - **FuseInstanceNorm**: (Boolean) If True, the split instance norm - operation will be fused to InstanceNorm operation. The default is - True. - - **FuseL2Norm**: (Boolean) If True, a set of L2norm operations will - be fused to L2Norm operation. The default is True. - - **FuseGelu**: (Boolean) If True, a set of Gelu operations will - be fused to Gelu operation. The default is True. - - **FuseLayerNorm**: (Boolean) If True, a set of LayerNorm - operations will be fused to LayerNorm operation. The default is - True. - - **ConvertClipToRelu**: (Boolean) If True, the Clip operations that - has a min value of 0 will be converted to ReLU operations. The - default is True when enable_npu_cnn is True. - - **SimulateDPU**: (Boolean) If True, a simulation transformation - that replaces some operations with an approximate implementation - will be applied for DPU when enable_npu_cnn is True. The default - is True. - - **ConvertLeakyReluToDPUVersion**: (Boolean) If True, the Leaky - Relu operation will be converted to DPU version when SimulateDPU - is True. The default is True. - - **ConvertSigmoidToHardSigmoid**: (Boolean) If True, the Sigmoid - operation will be converted to Hard Sigmoid operation when - SimulateDPU is True. The default is True. - - **ConvertHardSigmoidToDPUVersion**: (Boolean) If True, the Hard - Sigmoid operation will be converted to DPU version when - SimulateDPU is True. The default is True. - - **ConvertAvgPoolToDPUVersion**: (Boolean) If True, the global or - kernel-based Average Pooling operation will be converted to DPU - version when SimulateDPU is True. The default is True. - - **ConvertClipToDPUVersion**: (Boolean) If True, the Clip operation - will be converted to DPU version when SimulateDPU is True. The - default is False. - - **ConvertReduceMeanToDPUVersion**: (Boolean) If True, the - ReduceMean operation will be converted to DPU version when - SimulateDPU is True. The default is True. - - **ConvertSoftmaxToDPUVersion**: (Boolean) If True, the Softmax - operation will be converted to DPU version when SimulateDPU is - True. The default is False. - - **NPULimitationCheck**: (Boolean) If True, the quantization position - will be adjust due to the limitation of DPU/NPU. The default is - True. - - **MaxLoopNum**: (Int) The quantizer adjusts or aligns the quantization - position through loops, this option is used to set the maximum number of loops. - The default value is 5. - - **AdjustShiftCut**: (Boolean) If True, adjust the shift cut of - nodes when NPULimitationCheck is True. The default is True. - - **AdjustShiftBias**: (Boolean) If True, adjust the shift bias of - nodes when NPULimitationCheck is True. The default is True. - - **AdjustShiftRead**: (Boolean) If True, adjust the shift read of - nodes when NPULimitationCheck is True. The default is True. - - **AdjustShiftWrite**: (Boolean) If True, adjust the shift write of - nodes when NPULimitationCheck is True. The default is True. - - **AdjustHardSigmoid**: (Boolean) If True, adjust the position of hard - sigmoid nodes when NPULimitationCheck is True. The default is - True. - - **AdjustShiftSwish**: (Boolean) If True, adjust the shift swish - when NPULimitationCheck is True. The default is True. - - **AlignConcat**: (Boolean) If True, adjust the quantization position of - concat when NPULimitationCheck is True. The default is True, - when the power-of-two scale is used, otherwise it's False. - - **AlignPool**: (Boolean) If True, adjust the quantization position of - pooling when NPULimitationCheck is True. The default is True, - when the power-of-two scale is used, otherwise it's False. - - **AlignPad**: (Boolean) If True, adjust the quantization position of - pad when NPULimitationCheck is True. The default is True, - when the power-of-two scale is used, otherwise it's False. - - **AlignSlice**: (Boolean) If True, adjust the quantization position of - slice when NPULimitationCheck is True. The default is True, - when the power-of-two scale is used, otherwise it's False. - - **AlignTranspose**: (Boolean) If True, adjust the quantization position of - transpose when NPULimitationCheck is True. The default is False. - - **AlignReshape**: (Boolean) If True, adjust the quantization position of - reshape when NPULimitationCheck is True. The default is False. - - **AdjustBiasScale**: (Boolean) If True, adjust the bias scale equal to activation scale - multiply by weights scale. The default is True. - - **BFPAttributes**: (Dictionary) A parameter used to specify the - attributes for BFP quantization nodes. - - - **bfp_method**: (String) BFP method. The options are "to_bfp“ and "to_bfp_prime", - corresponding to classic BFP and BFP with micro exponents, respectively. - The default is 'to_bfp'. - - **axis**: (Int) The axis for splitting the input tensor into blocks. The default is 1 - but can be modified by the quantizer according to the tensor's shape. - - **bit_width**: (Int) Bits for the block floating point. For BFP16, - this parameter should be 16, which consists of three parts: 8 bits shared exponent, - 1 bit sign and 7 bits mantissa. The default is 16. - - **block_size**: (Int) Size of block. The default is 8. - - **sub_block_size**: (Int) Size of sub-block, only effective when bfp_method is "to_bfp_prime”. - The default is 2. - - **sub_block_shift_bits**: (Int) Bits for the micro exponents of a sub block, only effective - when bfp_method is "to_bfp_prime”. The default is 1. - - **rounding_mode**: (Int) Rounding mode, 0 for rounding half away from zero, 1 for rounding half - upward and 2 for rounding half to even. The default is 0. - - **convert_to_bfloat_before_bfp**: (Int) If set to 1, convert the input tensor to BFloat16 - before converting to BFP. The default is 0. - - **use_compiler_version_cpu_kernel**: (Int) If set to 1, use a customized cpu kernel. - The default is 0. - - * **MXAttributes**: (Dictionary) A parameter used to specify the - attributes for MX quantization nodes. - - - **element_dtype**: (String) Element data type. The options are "fp8_e5m2", "fp8_e4m3", - "fp6_e3m2", "fp6_e2m3", "fp4_e2m1" and "int8". The default is "int8". - - **axis**: (Int) The axis for splitting the input tensor into blocks. The default is 1 - but can be modified by the quantizer according to the tensor's shape. - - **block_size**: (Int) Size of block. The default is 32. - - **rounding_mode**: (Int) Rounding mode, 0 for rounding half away from zero, 1 for rounding half - upward and 2 for rounding half to even. The default is 0. - - * **ReplaceClip6Relu**: (Boolean) If True, Replace Clip(0,6) with - Relu in the model. The default is False. - * **CLESteps**: (Int) Specifies the steps for CrossLayerEqualization - execution when include_cle is set to true, The default is 1, When - set to -1, an adaptive CrossLayerEqualization will be conducted. - The default is 1. - * **CLETotalLayerDiffThreshold**: (Float) Specifies The threshold - represents the sum of mean transformations of - CrossLayerEqualization transformations across all layers when - utilizing CrossLayerEqualization. The default is 2e-7. - * **CLEScaleAppendBias**: (Boolean) Whether the bias be included - when calculating the scale of the weights, The default is True. - * **CopySharedInit**: (List or None) Specifies the node op_types to run - duplicating initializer in the model for separate quantization use across - different nodes, e.g. ['Conv', 'Gemm', 'Mul'] input, only shared initializer - in these nodes will be duplicated. None means that skip this conversion - while empty list means that run this for all op_types included in the - given model, default is None. - * **CopyBiasInit**: (List or None) Specifies the node operation types to run - duplicating bias initializer in the model for separate quantization use across - different nodes, e.g. ['Conv', 'Gemm', 'Mul'] input, only shared bias initializer - in these nodes will be duplicated. None means that skip this conversion - while empty list means that run this for all operation types included in the - given model. The default is an empty list when using quantization with float scale - like A8W8 and A16W8. The default is None otherwise. - * **FastFinetune**: (Dictionary) A parameter used to specify the - settings for fast finetune. - - - **OptimAlgorithm**: (String) The specified algorithm for fast finetune. Optional values are "adaround" and "adaquant". The - "adaround" adjusts the weights rounding function, which is - relatively stable and might converge faster. The "adaquant" trains - the weight (and bias optional) directly, so might have a greater - improvement if the parameters, especially the learning rate and - batch size, are optimal. The default value is "adaround". - - **OptimDevice**: (String) Specifies the compute device used for - PyTorch model training during fast finetuning. Optional values - are "cpu", and "cuda:0". The default value is "cpu". - - **InferDevice**: (String) Specifies the compute device used for - ONNX model inference during fast finetuning. Optional values are - "cpu" and "cuda:0". The default value is "cpu". - - **FixedSeed**: (Int) Seed for random data generator, that makes - the fast finetuned results could be reproduced. - - **DataSize**: (Int) Specifies the size of the data used for - finetuning. Its recommended setting the batch size of the data to - 1 in the data reader to ensure counting the size accurately. It - uses all the data from the data reader by default. - - **BatchSize**: (Int) Batch size for finetuning. The larger batch - size, usually the better accuracy but the longer training time. - The default value is 1. - - **NumBatches**: (Int) The mini-batches in a iteration. It should - always be 1. The default value is 1. - - **NumIterations**: (Int) The Iterations for finetuning. The more - iterations, the better accuracy but the longer training time. The - default value is 1000. - - **LearningRate**: (Float) Learning rate of finetuning for all - layers. It has a significant impact on the accuracy improvement, - you need to try some learning rates to get a better result for - your model. The default value is 0.1 for AdaRound and 0.00001 for - AdaQuant. - - **EarlyStop**: (Bool) If average loss of a certain number of - iterations decreases comparing with the previous one, the training - of the layer will stop early. It will accelerate the finetuning - process and avoid overfitting. The default value is False. - - **LRAdjust**: (Tuple) Besides the overall learning rate, users - could set up a scheme to adjust learning rate further according to - the mean square error (MSE) between the quantized module and - original float module. Its a tuple contains two members, the - first one is a threshold of the MSE and the second one is the new - learning rate. For example, setting as (1.0, 0.2) means using a - new learning rate 0.2 for the layer whose MSE is bigger than 1.0. - - **TargetOpType**: (List) The target operation types to finetune. - The default value is [Conv, ConvTranspose, Gemm, MatMul, - InstanceNormalization]. The MatMul node must have one and only one - set of weights. - - **SelectiveUpdate**: (Bool) If the end-to-end accuracy does not - improve after finetuned a certain layer, discard the optimized - weight (and bias) of the layer. The default value is False. - - **UpdateBias**: (Bool) Specifies whether to update bias - parameters during fine-tuning. Its only available for AdaQuant. - The default value is False. - - **OutputQDQ**: (Bool) Specifies whether include the output - tensors QDQ pair of the compute nodes for finetuning. The default - value is False. - - **DropRatio**: (Float) Specifies the ratio to drop the input - data from the float module. It ranges from 0 to 1, 0 represents - the input data is from the float module fully, 1 represents all - from quantized module. The default value is 0.5. - - **LogPeriod**: (Int) Indicate how many iterations to print the - log once. The default value is NumIterations/10. - - * **SmoothAlpha**: (Float) This parameter control how much - difficulty we want to migrate from activation to weights, The - default value is 0.5. - * **RMatrixDim**: (Int) Specifies the dimension for constructing - rotation matrix. The default value is 4096. - * **UseRandomHad**: (Boolean) If True, the rotation matrix will be - generated by the random Hadamard scheme. The default is False. - * **RConfigPath**: (String) Set the path for rotation config file. - This is necessary when using QuaRot. The default is "". - * **RemoveQDQConvClip**: (Boolean) If True, the QDQ between - Conv/Add/Gemm and Clip will be removed for DPU. The default is - True. - * **RemoveQDQConvRelu**: (Boolean) If True, the QDQ between - Conv/Add/Gemm and Relu will be removed for DPU. The default is - True. - * **RemoveQDQConvLeakyRelu**: (Boolean) If True, the QDQ between - Conv/Add/Gemm and LeakyRelu will be removed for DPU. The default - is True. - * **RemoveQDQConvPRelu**: (Boolean) If True, the QDQ between - Conv/Add/Gemm and PRelu will be removed for DPU. The default is - True. - * **RemoveQDQConvGelu**: (Boolean) If True, the QDQ between - Conv/Add/Gemm and Gelu will be removed. The default is False. - * **RemoveQDQMulAdd**: (Boolean) If True, the QDQ between - Mul and Add will be removed for NPU. The default is False. - * **RemoveQDQBetweenOps**: (List of tuples (Strings, Strings) or None) - This parameter accepts a list of tuples representing operation type - pairs (e.g., Conv and Relu). If set, the QDQ between the specified - pairs of operations will be removed for NPU. The default is None. - * **RemoveQDQInstanceNorm**: (Boolean) If True, the QDQ between - InstanceNorm and Relu/LeakyRelu/PRelu will be removed for DPU. The - default is False. - * **FoldBatchNorm**: (Boolean) If True, the BatchNormalization - operation will be fused with Conv, ConvTranspose or Gemm - operation. The BatchNormalization operation after Concat operation - will also be fused, if the all input operations of the Concat - operation are Conv, ConvTranspose or Gemm operatons.The default is - True. - * **BF16WithClip**: (Boolean) If True, during BFloat16 - quantization, insert "Clip" node before customized "QuantizeLinear" node to - add boundary protection for activation. The default is False. - * **BF16QDQToCast**: (Boolean) If True, during BFloat16 - quantization, replace QuantizeLinear/DeQuantizeLinear ops with Cast - ops to accelerate BFloat16 quantized inference. The default is False. - * **FixShapes**: (String) Set the input and output shapes of the quantized - model to a fixed shape by default if not explicitly specified. The - example: 'FixShapes':'input_1:[1,224,224,3];input_2:[1,96,96,3];output_1:[1,100];output_2:[1,1000]' - * **MixedPrecisionTensor**: (Dictionary) A parameter used to specify - the settings for mixed precision tensors. It is a dictionary where - the keys are of the ExtendedQuantType/QuantType enumeration type, and - the values are lists containing tensors that need to be processed - using mixed precision. - Example:"MixedPrecisionTensor":{quark.onnx.ExtendedQuantType.QBFloat16:['/stem/stem.2/Relu_output_0', - 'onnx::Conv_664', 'onnx::Conv_665']} **Note**:If there is a tensor - with bias, 'Int32Bias' needs set to False. - - * **AutoMixprecision**: (Dictionary) A parameter used to specify the - settings for auto mixed precision. - - - **DataSize**: (Int) Specifies the size of the data used for mix-precision. The entire data reader will be used by default. - - **TargetOpType**: (Set) The user defined op type set for mix-precision. The default value is ('Conv', 'ConvTranspose', 'Gemm', 'MatMul'). - - **TargetQuantType**: (QuantType) Activation data type to be mixed in the model if 'ActTargetQuantType' is not given. Error will be raised if TargetQuantType is not specified. - - **ActTargetQuantType**: (QuantType) Activation data type to be mixed in the model. - If both ActTargetQuantType and WeightTargetQuantType are not specified, the ActTargetQuantType will be same as TargetQuantType. - If only ActTargetQuantType is not specified, the ActTargetQuantType will be the original activation_type. - - **WeightTargetQuantType**: (QuantType) Weight data type to be mixed in the model. - If both ActTargetQuantType and WeightTargetQuantType are not specified, the ActTargetQuantType will be same as TargetQuantType. - If only WeightTargetQuantType is not specified, the WeightTargetQuantType will be the original weight_type. - - **BiasTargetQuantType**: (QuantType) Bias data type to be mixed in the model. - If BiasTargetQuantType is not specified and Int32Bias is True, the BiasTargetQuantType will be int32. - If BiasTargetQuantType is not specified and Int32Bias is False, the BiasTargetQuantType will be same as WeightTargetQuantType. - - **DualQuantNodes**: (Bool) Some backend compilers require that two types of quantization nodes exist simultaneously on the tensors which connect two different precision nodes, - for example, they require the tensor that connects BFP16 Conv and BF16 Reshape has a BFP node and a QDQ pair both. The default value is False. - - **OutputIndex**: (Int) The index of model output to be calculated for loss. - - **L2Target**: (Float) The L2 loss will be no larger than the L2Target. - If L2Target is not specified, the model will be quantized to the target quant type. - - **Top1AccTarget**: (Float) The Top1 accuracy loss will be no larger than the Top1AccTarget. - If Top1AccTarget is not specified, the model will be quantized to the target quant type. - - **EvaluateFunction**: (Function) The function to measure top1 accuracy loss. Input of the function is model output(numpy tensor), - output of the function is top1 accuracy(between 0~1). If EvaluateFunction is not specified while Top1AccTarget is given, error will be raised. - - **NumTarget**: (Int) Specified the number of nodes for mix-precision to minimize the loss. The default value of NumTarget is 0. - - **TargetTensors**: (List) Specified the names of nodes to mix into the target quant type. It's a experimental option and will be deprecated in the future. The default value is []. - - **TargetIndices**: (List) Specified the indices (based on sensitivity analysis results) of the nodes to mix into the target quant type. The default value is []. - - **ExcludeIndices**: (List) Specified the indices (based on sensitivity analysis results) of the nodes not to mix into the target quant type. The default value is []. - - **NoInputQDQShared**: (Bool) If True, will skip the nodes who shared the input Q/DQ pair with other nodes. The default value is True. - - **AutoMixUseFastFT**: (Bool) If True, will perform fast finetune to improve accuracy after mixed a layer. The default value is False. - - * **FoldRelu**: (Boolean) If True, the Relu will be fold to Conv - when use ExtendedQuantFormat. The default is False. - * **CalibDataSize**: (Int) This parameter controls how many data are - used for calibration. The default to using all the data in the - calibration dataloader. - * **SaveTensorHistFig**: (Boolean) If True, save the tensor - histogram to the file 'tensor_hist' in the working directory. The - default is False. - * **QuantizeAllOpTypes**: (Boolean) If True, all operation types will be quantized. - In the BF16 config, the default is True, while for others, the default is False. - * **WeightsOnly**: (Boolean) If True, only quantize weights of the - model. The default is False. - * **AlignEltwiseQuantType**: (Boolean) If True, quantize weights of the node with the activation quant type if node type in [Mul, Add, Sub, Div, Min, Max] when quant_format is ExtendedQuantFormat.QDQ and enable_npu_cnn is False and enable_npu_transformer is False. The default is False. - * **EnableVaimlBF16**: (Boolean) If True, the bfloat16 quantized model with vitis qdq will be converted to a bfloat16 quantized model with bfloat16 weights stored as float32. Vaiml is the name of a compiler, the bfloat16 quantized model can be directly deployed on the compiler if the parameter is True. The default is False. - * **UseGPTQ**: (Boolean) If True, GPTQ algorithm will be applied to the - model. The default is False. - * **GPTQParams**: (Dictionary) A parameter used to specify the - settings for GPTQ. - - - **Bits**: (int) The quantization bits used in GPTQ. The default is 8. - - **BlockSize**: (int) The block size in GPTQ determines - how many columns of weights will be quantized for one update. The default is 128. - - **GroupSize**: (int) The group size in GPTQ determines how many columns of weights share one set of scale and zero-point. The default is -1. - - **PercDamp**: (int) Percent of the average Hessian diagonal to use for dampening. The default is 0.01. - - **ActOrder**: (Boolean) Determine whether to re-order Hessian matrix according the values of diag. The default is False. - - **PerChannel**: (Boolean) Determine whether perform per-channel quantization in GPTQ. The default is False. - - **MSE**: (Boolean) Determine whether to use MSE method to do data calibration in GPTQ. The default is False. - - * **UseMatMulNBits**: (Boolean) If True, only quantize weights with nbits for MatMul of the - model. The default is False. - * **MatMulNBitsParams**: (Dictionary) A parameter used to specify the - settings for MatMulNBits Quantizer. - - - **Algorithm**: (str) The algorithm in MatMulNBits Quantization determines which algorithm ("DEFAULT", "GPTQ", "HQQ") to be used to quantize weights. The default is "DEFAULT". - - **GroupSize**: (int) The block size in MatMulNBits Quantization determines how many weights share a scale. The default is 128. - - **Symmetric**: (Boolean) If True, symmetrize quantization for weights. The default is True. - - **Bits**: (int) The target bits to quantize. Only 4b quantization is supported for inference, additional bits support is planned. - - **AccuracyLevel**: (int) The quantization level of input, can be: 0(unset), 1(fp32), 2(fp16), 3(bf16), or 4(int8). The default is 0. - - -Table 7. Quantize Types can be selected for different Quantize Formats - -+---------------------------+----------------------------------+---------------------------+ -| quant_format | quant_type | comments | -+===========================+==================================+===========================+ -| QuantFormat.QDQ | QuantType.QUInt16 | | -| | QuantType.QInt16 | | -| | QuantType.QUInt8 | | -| | QuantType.QInt8 | | -| | QuantType.QUInt4 | | -| | QuantType.QInt4 | | -+---------------------------+----------------------------------+---------------------------+ -| ExtendedQuantFormat.QDQ | QuantType.QUInt8 | | -| | QuantType.QInt8 | | -| | ExtendedQuantType.QUInt16 | | -| | ExtendedQuantType.QInt16 | | -| | ExtendedQuantType.QFloat16 | | -| | ExtendedQuantType.QBFloat16 | | -| | ExtendedQuantType.QBFP | | -| | ExtendedQuantType.QMX | | -| | ExtendedQuantType.QUInt32 | | -| | ExtendedQuantType.QInt32 | | -+---------------------------+----------------------------------+---------------------------+ - -**Note**: For UINT4 and INT4 quantization types, ONNX Runtime version 1.19.0 or later is required. -Users must ensure that the ``calibration_method`` is a native ORT quantization method (MinMax, Percentile, etc.). - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_basic_usage_onnx.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_basic_usage_onnx.rst deleted file mode 100644 index 2341e8ef..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_basic_usage_onnx.rst +++ /dev/null @@ -1,144 +0,0 @@ -AMD Quark for ONNX -================== - -The :doc:`Getting started with AMD Quark <../basic_usage>` guide provides a general overview of the quantization process, irrespective of specific hardware or deep learning frameworks. This page details the features supported by the Quark ONNX Quantizer and explains how to use it to quantize ONNX models. - -Basic Example -------------- - -Here is an introductory example of ResNet50 to run a quantization. We are following the :ref:`basic quantization steps from the Getting Started page `. - -1. Prepare the original float model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Download the ONNX float model from the `onnx/models `__ repo directly: - -.. code-block:: bash - - wget -P models https://github.com/onnx/models/raw/new-models/vision/classification/resnet/model/resnet50-v1-12.onnx - -2. Prepare calibration data -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can provide a folder containing PNG or JPG files as calibration data folder. For example, you can download images from https://github.com/microsoft/onnxruntime-inference-examples/tree/main/quantization/image_classification/cpu/test_images as a quick start. - -.. code-block:: bash - - mkdir calib_data - wget -O calib_data/daisy.jpg https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/test_images/daisy.jpg?raw=true - -We will use the `OpenCV `_ library to read images. - -.. code-block:: bash - - pip install opencv-python - -Next, you can refer to the following code to construct a calibration data reader class. - -.. code-block:: python - - import os - import cv2 - import onnx - import copy - import numpy as np - from onnxruntime.quantization import CalibrationDataReader - - def get_model_input_name(input_model_path: str) -> str: - model = onnx.load(input_model_path) - model_input_name = model.graph.input[0].name - return model_input_name - - class ImageDataReader(CalibrationDataReader): - - def __init__(self, calibration_image_folder: str, input_name: str): - self.enum_data = None - - self.input_name = input_name - - self.data_list = self._preprocess_images( - calibration_image_folder) - - def _preprocess_images(self, image_folder: str): - data_list = [] - img_names = [f for f in os.listdir(image_folder) if f.endswith('.png') or f.endswith('.jpg')] - for name in img_names: - input_image = cv2.imread(os.path.join(image_folder, name)) - # Resize the input image. Because the size of Resnet50 is 224. - input_image = cv2.resize(input_image, (224, 224)) - input_data = np.array(input_image).astype(np.float32) - # Custom Pre-Process - input_data = input_data.transpose(2, 0, 1) - input_size = input_data.shape - if input_size[1] > input_size[2]: - input_data = input_data.transpose(0, 2, 1) - input_data = np.expand_dims(input_data, axis=0) - input_data = input_data / 255.0 - data_list.append(input_data) - - return data_list - - def get_next(self): - if self.enum_data is None: - self.enum_data = iter([{self.input_name: data} for data in self.data_list]) - return next(self.enum_data, None) - - def rewind(self): - self.enum_data = None - -3. Set the quantization configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -While Quark ONNX provides a granular API to handle diverse quantization scenarios, it also offers streamlined APIs for common use cases. The example below demonstrates this simplified approach. - -.. code-block:: python - - from quark.onnx.quantization.config.config import Config - from quark.onnx.quantization.config.custom_config import get_default_config - - # Set up quantization with a specified configuration - # For example, use "A8W8" for Ryzen AI INT8 quantization - a8w8_config = get_default_config("A8W8") - quantization_config = Config(global_quant_config=a8w8_config ) - -.. note:: - - The A8W8 configuration is our default setup. To minimize quantization time, accuracy-improvement strategies such as AdaRound or AdaQuant are not applied by default, which may lead to suboptimal accuracy in some cases. For better quantization accuracy, please refer to Section **How to Improve Quantization Accuracy** of :doc:`Float Scales (A8W8 and A16W8) Quantization <../supported_accelerators/ryzenai/tutorial_a8w8_and_a16w8_quantize>` page for details. - - -4. Quantize the model -~~~~~~~~~~~~~~~~~~~~~ - -Once the model, input data, and quantization configuration are ready, quantizing the model is straightforward, as shown below: - -.. code-block:: python - - from quark.onnx import ModelQuantizer - - input_model_path = "models/resnet50-v1-12.onnx" - quantized_model_path = "models/resnet50-v1-12_quantized.onnx" - calib_data_path = "calib_data" - model_input_name = get_model_input_name(input_model_path) - calib_data_reader = ImageDataReader(calib_data_path, model_input_name) - - quantizer = ModelQuantizer(quantization_config) - quantizer.quantize_model(input_model_path, quantized_model_path, calib_data_reader) - -The screenshots of the ResNet50 before and after quantization, viewed with Netron (https://netron.app), are shown in Figure 1. The original float model is shown on the left-hand side, and the right-hand side shows the "a8w8" quantized model. - -.. figure:: ../_static/float_and_a8w8_quantized_resnet50.png - :width: 80% - :align: center - - **Figure 1. Float and A8W8 Quantized ResNet50** - -.. note:: - - During quantization, graph optimization will be automatically performed. The image above shows Batch Normalizations are automatically folded into the Convs. - -Further reading ---------------- - -* **Ryzen AI support**: Refer to :doc:`Ryzen AI <../supported_accelerators/ryzenai/index>` page to learn how to seamlessly integrate Quark and Ryzen AI for the best performance and compatibility! -* Quantized models can be evaluated to compare its performance with the original model. Learn more on :doc:`Model Evaluation <../pytorch/example_quark_torch_llm_eval>`. -* Quark's :ref:`Advanced Features ` can help you quantize more complex ONNX models. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_calibration_datasets.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_calibration_datasets.rst deleted file mode 100644 index c96dd753..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_calibration_datasets.rst +++ /dev/null @@ -1,140 +0,0 @@ -Adding Calibration Datasets -=========================== - -Class DataReader for AMD Quark Quantizer ----------------------------------------- - -AMD Quark for ONNX utilizes ONNX Runtime's `CalibrationDataReader` for normalization during quantization calibration. The following code is an example of how to define the class for the calibration data loader. - -.. code-block:: python - - import onnxruntime - from onnxruntime.quantization.calibrate import CalibrationDataReader - - class ImageDataReader(CalibrationDataReader): - - def __init__(self, calibration_image_folder: str, input_name: str, - input_height: int, input_width: int): - self.enum_data = None - - self.input_name = input_name - - self.data_list = self._preprocess_images( - calibration_image_folder, input_height, input_width) - - # The pre-processing of calibration images should be defined by users. - # Recommended batch_size is 1. - def _preprocess_images(self, image_folder: str, input_height: int, input_width: int, batch_size: int = 1): - data_list = [] - ''' - The pre-processing for each image - ''' - return data_list - - def get_next(self): - if self.enum_data is None: - self.enum_data = iter([{self.input_name: data} for data in self.data_list]) - return next(self.enum_data, None) - - def rewind(self): - self.enum_data = None - - input_model_path = "path/to/your/resnet50.onnx" - output_model_path = "path/to/your/resnet50_quantized.onnx" - calibration_image_folder = "path/to/your/images" - - input_name = 'input_tensor_name' - input_shape = (1, 3, 224, 224) - calib_datareader = ImageDataReader(calibration_image_folder, input_name, - input_shape[2], input_shape[3]) - - -Calibration Data Path for AMD Quark Quantizer ---------------------------------------------- - -AMD Quark for ONNX supports specifying the path to calibration datasets, making it easy to load them for quantization. Currently, this feature only supports data in `.npy` format. -For detailed guidance on creating calibration datasets in NPY format, see :doc:`Generating NPY Calibration Data<./user_guide_onnx_model_inference_save_input_npy>`. - -.. note:: - No preprocessing is applied to the calibration datasets after loading. Ensure that the calibration data is stored in the following format: - -For Single-Input Models: -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Place the calibration data files in a directory as follows: - -.. code-block:: - - calibration_data/ - calib_000001.npy - calib_000002.npy - calib_000003.npy - calib_000004.npy - calib_000005.npy - ... - -For Multi-Input Models: -~~~~~~~~~~~~~~~~~~~~~~~ - -Organize the calibration data in sub-directories named after the input models: - -.. code-block:: - - calibration_data/ - input1_name/ - calib_000001.npy - calib_000002.npy - calib_000003.npy - calib_000004.npy - calib_000005.npy - ... - input2_name/ - calib_000001.npy - calib_000002.npy - calib_000003.npy - calib_000004.npy - calib_000005.npy - ... - ... - -Example Code: -~~~~~~~~~~~~~~~ - -.. code-block:: python - - import onnxruntime - from quark.onnx import ModelQuantizer - from quark.onnx.quantization.config import Config, get_default_config - - input_model_path = "path/to/your/resnet50.onnx" - output_model_path = "path/to/your/resnet50_quantized.onnx" - calib_data_path= "path/to/your/calib/data/folder" - - quant_config = get_default_config("XINT8") - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None, calibration_data_path=calib_data_path) - -Using Random Data for AMD Quark Quantizer ------------------------------------------ - -Random Data Calibration uses random numbers when no calibration data is available. To enable this feature, set the `UseRandomData` parameter to `True`. This option is useful for testing but might yield worse quantization results than using a real calibration dataset. It is recommended to use a real calibration dataset when performing static quantization. - -Example Code: -~~~~~~~~~~~~~ - -.. code-block:: python - - import onnxruntime - from quark.onnx import ModelQuantizer - from quark.onnx.quantization.config import Config, get_default_config - - input_model_path = "path/to/your/resnet50.onnx" - output_model_path = "path/to/your/resnet50_quantized.onnx" - - quant_config = get_default_config("XINT8") - quant_config.extra_options['UseRandomData'] = True - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None) diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_calibration_methods.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_calibration_methods.rst deleted file mode 100644 index a7f793c4..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_calibration_methods.rst +++ /dev/null @@ -1,24 +0,0 @@ -Calibration Methods -=================== - -AMD Quark for ONNX supports these types of calibration methods: - -MinMax Calibration Method -~~~~~~~~~~~~~~~~~~~~~~~~~ -The MinMax calibration method computes the quantization parameters based on the running minimum and maximum values. This method uses the tensor min/max statistics to compute the quantization parameters. The module records the running minimum and maximum of incoming tensors and uses these statistics to compute the quantization parameters. - -Percentile Calibration Method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The Percentile calibration method, often used in robust scaling, involves scaling features based on percentile information from a static histogram, rather than using the absolute minimum and maximum values. This method is particularly useful for managing outliers in data. - -MSE Calibration Method -~~~~~~~~~~~~~~~~~~~~~~ -The MSE (Mean Squared Error) calibration method involves performing calibration by minimizing the mean squared error between the predicted outputs and the actual outputs. This method is typically used in regression contexts where the goal is to adjust model parameters or data transformations to reduce the average squared difference between estimated values and the true values. MSE calibration helps in refining model accuracy by fine-tuning predictions to be as close as possible to the real data points. - -Entropy Calibration Method -~~~~~~~~~~~~~~~~~~~~~~~~~~ -The Entropy calibration method determines the quantization parameters by considering the entropy algorithm of each tensor’s distribution. - -NonOverflow Calibration Method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The NonOverflow calibration method obtains the power-of-two quantization parameters for each tensor to ensure that min/max values do not overflow. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_schemes.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_schemes.rst deleted file mode 100644 index 7c83c726..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_schemes.rst +++ /dev/null @@ -1,11 +0,0 @@ -Quantization Schemes -==================== - -AMD Quark for ONNX is capable of handling ``per tensor`` and ``per channel`` -quantization, supporting both symmetric and asymmetric methods. - -- **Per Tensor Quantization** means quantizing the tensor with one - scalar. The scaling factor is a scalar. - -- **Per Channel Quantization** means that for each dimension, typically the channel dimension of a tensor, you quantize the values in the tensor with different quantization parameters. The scaling factor is a 1-D tensor with the length of the quantization axis. For the input tensor with shape ``(D0, ..., Di, ..., Dn)`` and ``ch_axis=i``, the scaling factor is a 1-D tensor of length ``Di``. - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_strategies.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_strategies.rst deleted file mode 100644 index eadaf753..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_strategies.rst +++ /dev/null @@ -1,12 +0,0 @@ -Quantization Strategies -======================= - -AMD Quark for ONNX offers three distinct quantization strategies tailored to meet the requirements of various hardware backends: - -- **Post Training Weight-Only Quantization**: Quantizes the weights ahead of time, but the activations are not quantized (using the original float data type) during inference. - -- **Post Training Static Quantization**: Quantizes both the weights and activations in the model. To achieve the best results, this process necessitates calibration with a dataset that accurately represents the actual data, which allows for precise determination of the optimal quantization parameters for activations. - -- **Post Training Dynamic Quantization**: Quantizes the weights ahead of time, while the activations are quantized dynamically at runtime. This method allows for a more flexible approach, especially when the activation distribution is not well-known or varies significantly during inference. - -The strategies share the same API. You simply need to set the strategy through the quantization configuration, as demonstrated in the previous example. For more details about setting quantization configuration, refer to the "Configuring AMD Quark for ONNX" chapter. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_symmetry.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_symmetry.rst deleted file mode 100644 index 92cc7213..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_quantization_symmetry.rst +++ /dev/null @@ -1,9 +0,0 @@ -Quantization Symmetry -===================== - -``Symmetric/Asymmetric quantization`` is primarily used to describe the -quantization of integers. ``Symmetric quantization`` involves scaling -the data by a fixed scaling factor, and zero-point is generally set at -zero. ``Asymmetric quantization`` uses a scaling factor and a zero-point -that can shift, allowing the zero of the quantized data to represent a -value other than zero. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_user_guide_onnx_model_inference_save_input_npy.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_user_guide_onnx_model_inference_save_input_npy.rst deleted file mode 100644 index 05d61f30..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_config_user_guide_onnx_model_inference_save_input_npy.rst +++ /dev/null @@ -1,111 +0,0 @@ -Using ONNX Model Inference and Saving Input Data in NPY Format -============================================================== - -This topic explains how to perform inference with an ONNX model using floating-point inputs and save the input data in `.npy` format. This approach facilitates data storage and reuse and can serve as a **calibration dataset** during model quantization, provided that the data adequately reflects the typical distribution of the model inputs. - -Through an example, we demonstrate how to define a simple dataset class (`InputDataset`), perform inference using an ONNX model, and save input data in `.npy` format to support subsequent model quantization. - -Detailed Code -------------- - -.. code-block:: python - - import onnxruntime as ort - import numpy as np - import os - from torch.utils.data import Dataset, DataLoader - - - # A simple dataset with two inputs (`input1`, `input2`) and random tensors. - # Users can customize data generation to match their model's needs. - class InputDataset(Dataset): - def __init__(self, num_samples): - super(InputDataset, self).__init__() - self.num_samples = num_samples - self.input1 = [np.random.rand(3, 224, 224).astype(np.float32) for _ in range(num_samples)] - self.input2 = [np.random.rand(10).astype(np.float32) for _ in range(num_samples)] - self.labels = [np.random.randint(0, 2) for _ in range(num_samples)] - - def __len__(self): - return self.num_samples - - def __getitem__(self, idx): - return { - "input1": self.input1[idx], - "input2": self.input2[idx], - "label": self.labels[idx] - } - - - dataset = InputDataset(num_samples=10) - data_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) - - onnx_model_path = "path/to/your/float_model.onnx" - session = ort.InferenceSession(onnx_model_path) - input_names = [inp.name for inp in session.get_inputs()] - output_names = [out.name for out in session.get_outputs()] - - enable_data_caching = True - calibration_cache_dir = "calibration_data/" - - if enable_data_caching: - for name in input_names: - input_folder_path = os.path.join(calibration_cache_dir, name) - os.makedirs(input_folder_path, exist_ok=True) - - for batch_idx, batch in enumerate(data_loader): - input_feed = {} - - for name in input_names: - input_data = batch[name].numpy() - input_feed[name] = input_data - - # If `enable_data_caching` is True, save input data as `.npy` files by input name for each batch. - if enable_data_caching: - file_path = os.path.join(calibration_cache_dir, name, f"calib_{batch_idx+1:06d}.npy") - np.save(file_path, input_data) - print(f"Saved input data for {name} to {file_path}") - - outputs = session.run(output_names, input_feed) - - predictions = np.argmax(outputs[0], axis=1) - - print(f"Predictions for batch {batch_idx}: {predictions}") - - -The input data saved during ONNX inference can serve as a calibration dataset for model quantization. For instructions on how to use the saved NPY data as a calibration dataset, refer to :doc:`Calibration Data Path for AMD Quark Quantizer <./calibration_datasets>`. The output data format saved during inference is as follows: - -For Single-Input Models -~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: - - calibration_data/ - calib_000001.npy - calib_000002.npy - calib_000003.npy - calib_000004.npy - calib_000005.npy - ... - -For Multi-Input Models -~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: - - calibration_data/ - input1_name/ - calib_000001.npy - calib_000002.npy - calib_000003.npy - calib_000004.npy - calib_000005.npy - ... - input2_name/ - calib_000001.npy - calib_000002.npy - calib_000003.npy - calib_000004.npy - calib_000005.npy - ... - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_BFPQuantizeDequantize.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_BFPQuantizeDequantize.rst deleted file mode 100644 index 5be43a82..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_BFPQuantizeDequantize.rst +++ /dev/null @@ -1,104 +0,0 @@ -BFPQuantizeDequantize -===================== - -BFPQuantizeDequantize - 1 -------------------------- - -Version -``````` -- **name**: BFPQuantizeDequantize - -- **domain**: com.amd.quark - -Summary -``````` - -Block Floating Point (BFP) groups numbers (e.g., tensors, arrays) into blocks, where each block shares a common exponent, and the values in the block are represented with individual mantissas (and the sign bit). This approach offers the performance and speed of 8-bit operations while bringing the precision closer to 16-bit operations. - -MicroeXponents (MX) extends the concept of BFP by introducing two levels of exponents: shared exponents for entire blocks and micro exponents for finer-grained sub-blocks. This two-level approach enables more precise scaling of individual elements within a block, reducing quantization error and improving the representational range. The paper https://arxiv.org/abs/2302.08007 introduces three specific formats: MX4, MX6 and MX9, which have different bits of mantissa. - -This operator converts floating-point values (typically 32-bit floating-point numbers) into BFP or MX values, then convert them back. It approximates the Quantize-Dequantize process and introduces quantization errors. - -.. note:: - - In addition to MicroeXponent, there is another technique Microscaling also abbreviated as MX, which has two levels of exponent as well. Unlike MicroeXponent's micro exponents shared over sub-blocks, Microscaling assigns a small-scale adjustment to individual exponents within the block by letting them have an independent data type, such as FP8, FP6 and etc., meaning that each element has its own micro exponent! This finer scaling granularity improves precision, as each value can adjust more dynamically to its specific range. We have implemented Microscaling data types in another custom operator MXQuantizeDequantize, please check out its specification for more details. - -Attributes -`````````` - -**bfp_method - STRING** (default is 'to_bfp'): - -(Optional) Specify the type of block floating-point, 'to_bfp' for the vanilla BFP and 'to_bfp_prime' for BFP's variant MicroeXponents. - -**axis - INT** (default is '1'): - -(Optional) The axis for spliting the input tensor to blocks. - -**bit_width - INT** (default is '16'): - -(Optional) Bits for the block float-point structure. Default is 16, which corresponds to the commonly used BFP16 that has 8 bits for the shared exponent, 1 bit for sign and 7 bits for mantissa. - -**block_size - INT** (default is '8'): - -(Optional) Number of elements in the block. - -**rounding_mode - INT** (default is '0'): - -(Optional) Rounding mode, 0 for rounding half away from zero, 1 for rounding half upward and 2 for rounding half to even. - -**sub_block_size - INT** (default is '2'): - -(Optional) Size of a sub block, only effective if 'bfp_method' is 'to_bfp_prime'. - -**sub_block_shift_bits - INT** (default is '1'): - -(Optional) Shift bits of a sub block, only effective if 'bfp_method' is 'to_bfp_prime'. - - -Table 1. Configurations of commonly used block float-point series data types - -+----------------------+----------------+------------------+------------------+------------------+ -| | BFP16 | MX4 | MX6 | MX9 | -+======================+================+==================+==================+==================+ -| bfp_method | to_bfp | to_bfp_prime | to_bfp_prime | to_bfp_prime | -+----------------------+----------------+------------------+------------------+------------------+ -| axis | 1 | 1 | 1 | 1 | -+----------------------+----------------+------------------+------------------+------------------+ -| bit_width | 16 | 11 | 13 | 16 | -+----------------------+----------------+------------------+------------------+------------------+ -| block_size | 8 | 16 | 16 | 16 | -+----------------------+----------------+------------------+------------------+------------------+ -| rounding_mode | 2 | 2 | 2 | 2 | -+----------------------+----------------+------------------+------------------+------------------+ -| sub_block_size | N/A | 2 | 2 | 2 | -+----------------------+----------------+------------------+------------------+------------------+ -| sub_block_shift_bits | N/A | 1 | 1 | 1 | -+----------------------+----------------+------------------+------------------+------------------+ - - -Inputs -`````` -- **x** (heterogeneous) - **T**: - -N-D input tensor. - -Outputs -``````` - -- **y** (heterogeneous) - **T**: - -N-D output tensor. It would have accuracy loss compared to the input tensor *x*. - -Type Constraints -```````````````` - -- **T** in ( tensor(float) ): - -Constrain input and output types to float tensors. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedDequantizeLinear.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedDequantizeLinear.rst deleted file mode 100644 index 23744655..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedDequantizeLinear.rst +++ /dev/null @@ -1,75 +0,0 @@ -ExtendedDequantizeLinear -======================== - -ExtendedDequantizeLinear - 1 ----------------------------- - -Version -``````` -- **name**: ExtendedDequantizeLinear - -- **domain**: com.amd.quark - -Summary -``````` - -This operator extends the official ONNX DequantizeLinear operator by adding early support for uint16 and int16 quantization, along with additional support for bfloat16, float16, and uint32. It enables floating-point quantization to deploy models on edge devices and wide-bit quantization to facilitate detailed analysis of accuracy bottlenecks. - -The proposed ExtendedDequantizeLinear operator enhances the official DequantizeLinear by supporting additional data types while maintaining backward compatibility with the official one. If you want to try these new data types, you can consider using this operator, but note that you should register our custom operator library to onnxruntime before running the quantized model, and since onnxruntime will not convert the quantized node to a QOperator anymore, there is no additional acceleration effect at runtime. - -The linear dequantization operator. It consumes a quantized tensor, a scale, and a zero point to compute the full-precision tensor. The dequantization formula is *y = (x - x_zero_point) * x_scale*. *x_scale* and *x_zero_point* must have the same shape, determining the quantization’s granularity: a scalar for per-tensor/per-layer quantization, a 1-D tensor for per-axis/per-channel quantization. See ExtendedQuantizeLinear for details on quantization granularity. - -*x_zero_point* and *x* must have the same type. *x* and *y* must have the same shape. In the case of dequantizing int32, there’s no zero point (zero point is supposed to be 0). zero-point is usually not used in the case of float16 and bfloat16 types quantization, but the dequantization formula remains the same for consistency. The output type is the same as *x_scale*, it also determines the precision of the multiplication operation. - -Attributes -`````````` - -**axis - INT** (default is '1'): - -(Optional) The axis of the dequantizing dimension of the input tensor. Used for per-axis/per-channel quantization. Negative value means counting dimensions from the back. Accepted range is *[-r, r-1]* where *r = rank(input)*. - -Inputs -`````` - -Between 2 and 3 inputs. - -- **x** (heterogeneous) - **T1**: - -N-D quantized input tensor to be de-quantized. - -- **x_scale** (heterogeneous) - **T2**: - -Scale for input *x*. For per-tensor/per-layer dequantization the scale is a scalar, for per-axis/per-channel dequantization it is a 1-D Tensor. - -- **x_zero_point** (optional, heterogeneous) - **T3**: - -Zero point for input *x*. Shape must match *x_scale*. It’s optional. Zero point is 0 when it’s not specified. - -Outputs -``````` - -- **y** (heterogeneous) - **T3**: - -N-D full precision output tensor. It has the same shape as input *x*. - -Type Constraints -```````````````` - -- **T1** in ( tensor(int32), tensor(int16), tensor(int8), tensor(uint32), tensor(uint16), tensor(uint8), tensor(float16), tensor(bfloat16) ): - -The type of the input ‘x’. - -- **T2** in ( tensor(float) ): - -The type of the input ‘y_scale’. - -- **T3** in ( tensor(float) ): - -The type of the output ‘y’. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedInstanceNormalization.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedInstanceNormalization.rst deleted file mode 100644 index be70c599..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedInstanceNormalization.rst +++ /dev/null @@ -1,61 +0,0 @@ -ExtendedInstanceNormalization -============================= - -ExtendedInstanceNormalization - 1 ---------------------------------- - -Version -``````` -- **name**: ExtendedInstanceNormalization - -- **domain**: com.amd.quark - -Summary -``````` - -This is a customized version of the official operator InstanceNormalization, it carries out instance normalization with bfloat16. - -y = scale * (x - mean) / sqrt(variance + epsilon) + B, where mean and variance are computed per instance per channel. - -Attributes -`````````` - -**epsilon - FLOAT** (default is '1e-05'): - -The epsilon value to use to avoid division by zero. - -Inputs -`````` - -- **input** (heterogeneous) - **T**: - -Input data tensor from the previous operator; dimensions for image case are (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. For non image case, the dimensions are in the form of (N x C x D1 x D2 … Dn), where N is the batch size. - -- **scale** (heterogeneous) - **T**: - -The input 1-dimensional scale tensor of size C. - -- **B** (heterogeneous) - **T**: - -The input 1-dimensional bias tensor of size C. - -Outputs -``````` - -- **output** (heterogeneous) - **T**: - -The output tensor of the same shape as input. - -Type Constraints -```````````````` - -- **T** in ( tensor(float) ): - -Constrain input and output types to float tensors. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedLSTM.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedLSTM.rst deleted file mode 100644 index 5a59acd2..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedLSTM.rst +++ /dev/null @@ -1,106 +0,0 @@ -ExtendedLSTM -============ - -ExtendedLSTM - 1 ----------------- - -Version -``````` -- **name**: ExtendedLSTM - -- **domain**: com.amd.quark - -Summary -``````` - -This is a customized version of the official operator LSTM, it computes an one-layer quantized LSTM with bfloat16. - -Attributes -`````````` - -**direction - STRING** (default is 'forward'): - -Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward (default), reverse, or bidirectional, but currently it supports "bidirectional" only. - -**hidden_size - INT**: - -Number of neurons in the hidden layer. - -**input_forget - INT** (default is '0'): - -Couple the input and forget gates if 1, Currently it can only be 0. - -**layout - INT** (default is '0'): - -The shape format of inputs and outputs, Currently it can only be 0. - -**x_scale - FLOAT** : - -Scale for input *X*. It only supports per-tensor/per-layer quantization, so the scale should be a scalar. - -**x_zero_point - INT**: - -Zero point for input *X*. Shape must match *x_scale*. It only supports uint16 quantization, so the zero point value should be in the range of [0, 65535]. - -**w_scale - FLOAT** : - -Scale for input *W*. It only supports per-tensor/per-layer quantization, so the scale should be a scalar. - -**w_zero_point - INT**: - -Zero point for input *W*. Shape must match *w_scale*. It only supports uint16 quantization, so the zero point value should be in the range of [0, 65535]. - -**r_scale - FLOAT** : - -Scale for input *R*. It only supports per-tensor/per-layer quantization, so the scale should be a scalar. - -**r_zero_point - INT**: - -Zero point for input *R*. Shape must match *r_scale*. It only supports uint16 quantization, so the zero point value should be in the range of [0, 65535]. - -**b_scale - FLOAT** : - -Scale for input *B*. It only supports per-tensor/per-layer quantization, so the scale should be a scalar. - -**b_zero_point - INT**: - -Zero point for input *B*. Shape must match *b_scale*. It only supports uint16 quantization, so the zero point value should be in the range of [0, 65535]. - -Inputs -`````` - -- **X** (heterogeneous) - **T**: - -The input sequences packed (and potentially padded) into one 3-D tensor with the shape of *[seq_length, batch_size, input_size]*. - -- **W** (heterogeneous) - **T**: - -The weight tensor for the gates. Concatenation of *W[iofc]* and *WB[iofc]* (if bidirectional) along dimension 0. The tensor has shape *[num_directions, 4*hidden_size, input_size]*. - -- **R** (heterogeneous) - **T**: - -The recurrence weight tensor. Concatenation of *R[iofc]* and *RB[iofc]* (if bidirectional) along dimension 0. This tensor has shape *[num_directions, 4*hidden_size, hidden_size]*. - -- **B** (optional, heterogeneous) - **T**: - -The bias tensor for input gate. Concatenation of *[Wb[iofc]*, *Rb[iofc]]*, and *[WBb[iofc], RBb[iofc]]* (if bidirectional) along dimension 0. This tensor has shape *[num_directions, 8*hidden_size]*. Optional: If not specified - assumed to be 0. - -Outputs -``````` - -- **Y** (optional, heterogeneous) - **T**: - -A tensor that concatenates all the intermediate output values of the hidden. It has shape *[seq_length, num_directions, batch_size, hidden_size]*. - -Type Constraints -```````````````` -- **T** in ( tensor(float) ): - -Constrain input and output types to float tensors. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedQuantizeLinear.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedQuantizeLinear.rst deleted file mode 100644 index ee4fcebc..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_ExtendedQuantizeLinear.rst +++ /dev/null @@ -1,101 +0,0 @@ -ExtendedQuantizeLinear -====================== - -ExtendedQuantizeLinear - 1 --------------------------- - -Version -``````` -- **name**: ExtendedQuantizeLinear - -- **domain**: com.amd.quark - -Summary -``````` - -This operator extends the official ONNX QuantizeLinear operator by adding early support for uint16 and int16 quantization, along with additional support for bfloat16, float16, int32 and uint32. It enables floating-point quantization to deploy models on edge devices and wide-bit quantization to facilitate detailed analysis of accuracy bottlenecks. - -The proposed ExtendedQuantizeLinear operator enhances the official QuantizeLinear by supporting additional data types while maintaining backward compatibility with the official one. If you want to try these new data types, you can consider using this operator, but note that you should register our custom operator library to onnxruntime before running the quantized model, and since onnxruntime will not convert the quantized node to a QOperator anymore, there is no additional acceleration effect at runtime. - -The linear quantization operator consumes a high-precision tensor, a scale, and a zero point to compute the low-precision/quantized tensor. The scale factor and zero point must have the same shape, determining the quantization granularity. The quantization formula is *y = saturate((x / y_scale) + y_zero_point)*. - -Saturation is done according to: - -uint32: [0, 4294967295] - -int32: [−2147483648, 2147483647] - -uint16: [0, 65535] - -int16: [-32768, 32767] - -uint8: [0, 255] - -int8: [-128, 127] - -float16: [-65504, 65504] - -bfloat16: [-3.4e38, 3.4e38] - -For *(x / y_scale)*, it rounds to the nearest even for types. Refer to https://en.wikipedia.org/wiki/Rounding for details. - -*y_zero_point* and *y* must have the same type. *y_zero_point* is usually not used for quantization to float16 and bfloat16 types, but the quantization formula remains the same for consistency, and the type of the attribute *y_zero_point* still determines the quantization type. *x* and *y_scale* are allowed to have different types. The type of *y_scale* determines the precision of the division operation between *x* and *y_scale*, unless the precision attribute is specified. - -There are two supported quantization granularities, determined by the shape of *y_scale*. In all cases, *y_zero_point* must have the same shape as *y_scale*. - -Per-tensor/per-layer quantization: *y_scale* is a scalar. - -Per-axis/per-channel quantization: The scale must be a 1-D tensor, with the length of the quantization axis. For an input shape *(D0, ..., Di, ..., Dn)* and *axis=i*, *y_scale* is a 1-D tensor of length *Di*. - -Attributes -`````````` - -**axis - INT** (default is '1'): - -(Optional) The axis of the dequantizing dimension of the input tensor. Used only for per-axis/per-channel quantization. Negative value means counting dimensions from the back. Accepted range is *[-r, r-1]* where *r = rank(input)*. When the rank of the input is 1, per-tensor/per-layer quantization is applied, rendering the axis unnecessary in this scenario. - -Inputs -`````` - -Between 2 and 3 inputs. - -- **x** (heterogeneous) - **T1**: - -N-D full precision Input tensor to be quantized. - -- **y_scale** (heterogeneous) - **T2**: - -Scale for doing quantization to get *y*. For per-tensor/per-layer quantization the scale is a scalar, for per-axis/per-channel quantization it is a 1-D Tensor. - -- **y_zero_point** (optional, heterogeneous) - **T3**: - -Zero point for doing quantization to get *y*. Shape must match *y_scale*. Default is uint8 with zero point of 0 if it’s not specified. - -Outputs -``````` - -- **y** (heterogeneous) - **T3**: - -N-D quantized output tensor. It has same shape as input *x*. - -Type Constraints -```````````````` - -- **T1** in ( tensor(float) ): - -The type of the input ‘x’. - -- **T2** in ( tensor(float) ): - -The type of the input ‘y_scale’. - -- **T3** in ( tensor(int32), tensor(int16), tensor(int8), tensor(uint32), tensor(uint16), tensor(uint8), tensor(float16), tensor(bfloat16) ): - -The type of the input ‘y_zero_point‘ and the output ‘y‘. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_MXQuantizeDequantize.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_MXQuantizeDequantize.rst deleted file mode 100644 index a7d7fa93..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_custom_operators_MXQuantizeDequantize.rst +++ /dev/null @@ -1,84 +0,0 @@ -MXQuantizeDequantize -==================== - -MXQuantizeDequantize - 1 ------------------------- - -Version -``````` -- **name**: MXQuantizeDequantize - -- **domain**: com.amd.quark - -Summary -``````` - -Microscaling, also known as OCP (Open Compute Project) MX, assigns a small-scale adjustment to individual exponent within the block: in addition to the shared exponent, each element also has its own micro exponent, meaning that the element has an independent data type. This finer granularity improves precision, as each value can adjust more dynamically to its specific range. The `OCP MX specification `_ introduces several concrete formats, including MXFP8(E5M2), MXFP8(E4M3), MXFP6(E3M2), MXFP6(E2M3), MXFP4(E2M1) and MXINT8. - -This operator converts floating-point values (typically 32-bit floating-point numbers) into Microscaling values, and then convert them back. It approximates the Quantize-Dequantize process and introduces quantization errors. - -.. note:: - - Compared with MicroeXponents, Microscaling applies linear scaling per element and is simpler, hardware-friendly, and more stable, making it ideal for production and general-purpose deployment. In contrast, MicroeXponents shares a dynamic exponent within groups, offering better compression and dynamic range at the cost of higher complexity and potential numerical instability. For most applications, especially those targeting standard inference engines or requiring robustness, perhaps Microscaling is the preferred choice. - -Attributes -`````````` - -**element_dtype - STRING** (default is 'int8'): - -(Optional) Specify the type of elements, options are 'fp8_e5m2', 'fp8_e4m3', 'fp6_e3m2', 'fp6_e2m3', 'fp4_e2m1', 'int8'. - -**axis - INT** (default is '1'): - -(Optional) The axis for spliting the input tensor to blocks. - -**block_size - INT** (default is '32'): - -(Optional) Number of elements in the block. - -**rounding_mode - INT** (default is '0'): - -(Optional) Rounding mode, 0 for rounding half away from zero, 1 for rounding half upward and 2 for rounding half to even. - - -Table 1. Configurations of OCP MX data types - -+----------------------+------------------+------------------+------------------+------------------+------------------+------------------+ -| | MXFP8(E5M2) | MXFP8(E4M3) | MXFP6(E3M2) | MXFP6(E2M3) | MXFP4(E2M1) | MXINT8 | -+======================+==================+==================+==================+==================+==================+==================+ -| element_dtype | fp8_e5m2 | fp8_e4m3 | fp6_e3m2 | fp6_e2m3 | fp4_e2m1 | int8 | -+----------------------+------------------+------------------+------------------+------------------+------------------+------------------+ -| axis | 1 | 1 | 1 | 1 | 1 | 1 | -+----------------------+------------------+------------------+------------------+------------------+------------------+------------------+ -| block_size | 32 | 32 | 32 | 32 | 32 | 32 | -+----------------------+------------------+------------------+------------------+------------------+------------------+------------------+ -| rounding_mode | 2 | 2 | 2 | 2 | 2 | 2 | -+----------------------+------------------+------------------+------------------+------------------+------------------+------------------+ - - -Inputs -`````` -- **x** (heterogeneous) - **T**: - -N-D input tensor. - -Outputs -``````` - -- **y** (heterogeneous) - **T**: - -N-D output tensor. It would have accuracy loss compared to the input tensor *x*. - -Type Constraints -```````````````` - -- **T** in ( tensor(float) ): - -Constrain input and output types to float tensors. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_gpu_usage_guide.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_gpu_usage_guide.rst deleted file mode 100644 index e0d06241..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_gpu_usage_guide.rst +++ /dev/null @@ -1,94 +0,0 @@ -Accelerate with GPUs -==================== - -This guide provides detailed instructions on how to use ROCm and CUDA to accelerate models on GPUs. It covers the configuration steps for calibration, fast finetuning, and BFP16 models inference. - -Environment Setup ------------------ - -- **ONNX Runtime with ROCm**: - For AMD GPUs, refer to the `AMD - ROCm | ONNX Runtime documentation `_ for installation and setup instructions. - -- **ONNX Runtime with CUDA**: - For NVIDIA GPUs, refer to the `NVIDIA - CUDA | ONNX Runtime documentation `_ for installation and setup instructions. - -Calibration ------------ - -In the quantization workflow, calibration adjusts the model's weights and activation values based on a small amount of input data to improve quantization accuracy. When using AMD GPUs, you might accelerate the calibration process with `ROCMExecutionProvider`, and also you can use `CUDAExecutionProvider` for NVIDIA GPUs. The following is an example configuration: - -.. code-block:: python - - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - quant_format=quark.onnx.QuantFormat.QDQ, - execution_providers=['ROCMExecutionProvider'] - ) - - config = Config(global_quant_config=quant_config) - -.. note:: - By setting `execution_providers=['ROCMExecutionProvider']`, the calibration process is configured to run on the GPU for faster execution. Please check if GPUs are available beforehand. - -Fast Finetune -------------- - -AMD Quark for ONNX offers a fast finetuning feature that improves model accuracy after post-training quantization (PTQ). By adjusting the relevant parameters, you can ensure that both the PyTorch training phase and ONNX inference phase utilize GPU acceleration. - -Here is an example configuration for the `adaround` optimization algorithm: - -.. code-block:: python - - from quark.onnx import ModelQuantizer, PowerOfTwoMethod, QuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - quant_format=QuantFormat.QDQ, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - include_fast_ft=True, - extra_options={ - 'ActivationSymmetric': True, - 'FastFinetune': { - 'OptimAlgorithm': 'adaround', - 'OptimDevice': "cuda:0", # Use GPU 0 in PyTorch training - 'InferDevice': 'cuda:0', # Use GPU 0 for ONNX inference - 'BatchSize': 1, - 'NumIterations': 1000, - 'LearningRate': 0.1, - } - } - ) - - config = Config(global_quant_config=quant_config) - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None) - -.. note:: - - `OptimDevice: "cuda:0"` indicates that GPU (supports AMD and NVIDIA GPUs) acceleration is used during PyTorch training. - - `InferDevice: 'cuda:0'` indicates that GPU acceleration is used during ONNX inference via the `ROCMExecutionProvider` or `CUDAExecutionProvider`. - -Inference ---------- - -For quantized model's inference, you can also use `ROCMExecutionProvider` or `CUDAExecutionProvider` to enable GPU acceleration. Below is an example that demonstrates how to use AMD GPUs to accelerate ONNX inference: - - -.. code-block:: python - - import onnxruntime as ort - from quark.onnx import get_library_path - - so = ort.SessionOptions() - so.register_custom_ops_library(get_library_path('ROCM')) - session = ort.InferenceSession("quantized_model.onnx", so, providers=['ROCMExecutionProvider']) - print("Execution provider:", session.get_providers()) # Ensure 'ROCMExecutionProvider' is present - - output = session.run(None, {"input": input_data}) - -.. note:: - If the `session.get_providers()` output includes `ROCMExecutionProvider`, the inference process is running on the GPU for acceleration. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_onnx_examples.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_onnx_examples.rst deleted file mode 100644 index c501d0a6..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_onnx_examples.rst +++ /dev/null @@ -1,96 +0,0 @@ -Accessing ONNX Examples -======================= - -Users can get the example code after downloading and unzipping ``amd_quark.zip`` (referring to :doc:`Installation Guide <../install>`). -The example folder is in amd_quark.zip. - - Directory Structure of the ZIP File: - - :: - - + amd_quark.zip - + amd_quark.whl - + examples # HERE IS THE EXAMPLES - + torch - + language_modeling - + diffusers - + ... - + onnx # HERE ARE THE ONNX EXAMPLES - + image_classification - + language_models - + ... - + ... - -ONNX Examples in AMD Quark for This Release -------------------------------------------- - -.. toctree:: - :caption: Improving Model Accuracy - :maxdepth: 1 - - Block Floating Point (BFP) - MX Formats - Fast Finetune AdaRound - Fast Finetune AdaQuant - Cross-Layer Equalization (CLE) - Layer-wise Percentile - GPTQ - Mixed Precision - Smooth Quant - QuaRot - Auto-Search for General Yolov3 ONNX Model Quantization - Auto-Search for Ryzen AI Yolo-nas ONNX Model Quantization - Auto-Search for Ryzen AI Resnet50 ONNX Model Quantization - Auto-Search for Ryzen AI Yolov3 ONNX Quantization with Custom Evalutor - -.. toctree:: - :caption: Dynamic Quantization - :maxdepth: 1 - - Quantizing an Llama-2-7b Model - Quantizing an OPT-125M Model - -.. toctree:: - :caption: Image Classification - :maxdepth: 1 - - Quantizing a ResNet50-v1-12 Model - -.. toctree:: - :caption: Language Models - :maxdepth: 1 - - Quantizing an OPT-125M Model - -.. toctree:: - :caption: Weights-Only Quantization - :maxdepth: 1 - - Quantizing an Llama-2-7b Model Using the ONNX MatMulNBits - Quantizating Llama-2-7b model using MatMulNBits - -.. toctree:: - :caption: Crypto Mode - :maxdepth: 1 - - Quantizing a ResNet50 model in crypto mode - -.. _ryzenai_onnx_examples: -.. toctree:: - :caption: Ryzen AI Quantization - :maxdepth: 1 - - Best Practice for Quantizing an Image Classification Model - Best Practice for Quantizing an Object Detection Model - -.. toctree:: - :caption: Hugging Face TIMM Models - :maxdepth: 1 - - Hugging Face TIMM Quantization - -.. toctree:: - :caption: Yolo_nas and Yolox Models - :maxdepth: 1 - - Yolo_nas and Yolox Quantization diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_onnx_faq.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_onnx_faq.rst deleted file mode 100644 index 466c7ded..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_onnx_faq.rst +++ /dev/null @@ -1,55 +0,0 @@ -Frequently Asked Questions (FAQ) -================================ - - -AMD Quark for ONNX ------------------- - -Model Issues -~~~~~~~~~~~~ - -**Issue 1**: - -Error of "ValueError:Message onnx.ModelProto exceeds maximum protobuf size of 2GB" - -**Solution**: - -This error is caused by the input model size exceeding 2GB. Set ``optimize_model=False`` and ``use_external_data_format=True``. - -**Issue 2**: - -Error of NCHW or NHWC of "index: 1 Got: 244 Expected: 3 index: 2 Got: 3 Expected: 224" - -**Solution**: - -This error is caused by the calibration data is NCHW and the shape of model input is NHWC. Set ``convert_nchw_to_nhwc=True``. For more detailed information, see :doc:`Tools `. - -Quantization Issues -~~~~~~~~~~~~~~~~~~~ - -**Issue 1**: - -Error of "onnxruntime.capi.onnxruntime_pybind11_state.RuntimeException: [ONNXRuntimeError] : 6 : RUNTIME_EXCEPTION : Non-zero status code returned while running Reshape node." - -**Solution**: - -For networks with an ROI head, such as Mask R-CNN or Faster R-CNN, quantization errors might arise if ROIs are not generated in the network. -Use quark.onnx.PowerOfTwoMethod.MinMSE or quark.onnx.CalibrationMethod.Percentile quantization and perform inference with real data. - -Quantization Config Issues -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Issue 1**: - -Does XINT8 refer to INT8? What’s the difference between XINT8 and A8W8? - - -**Solution**: - -XINT8 and A8W8 are both INT8 Quantization. XINT8 and A8W8 are both very common quantization configurations in our Quark ONNX quantizer. A8W8 uses symmetric INT8 activation and weights quantization with float scales. XINT8 uses symmetric INT8 activation and weights quantization with power-of-two scales. A8W8 uses the MinMax method, and XINT8 uses MinMSE to improve quantization precision. XINT8 usually has greater advantages in hardware acceleration. For more detailed information about XINT8, see :doc:`Power-of-Two Scales (XINT8) Quantization <../supported_accelerators/ryzenai/tutorial_xint8_quantize>`. For more details information about A8W8, see :doc:`Float Scales (A8W8 and A16W8) Quantization <../supported_accelerators/ryzenai/tutorial_a8w8_and_a16w8_quantize>`. -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_optional_utilities.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_optional_utilities.rst deleted file mode 100644 index 8591149d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_optional_utilities.rst +++ /dev/null @@ -1,157 +0,0 @@ -Optional Utilities -================== - -Exporting PyTorch Models to ONNX --------------------------------- - -.. note:: - Skip this step if you already have the ONNX format model. - -For PyTorch models, it is recommended to use the TorchScript-based ONNX exporter for exporting ONNX models. Refer to the `PyTorch documentation for guidance `__. - -Tips: ------ - -1. Before exporting, perform `model.eval()`. -2. Models with opset 17 are recommended. -3. NPU_CNN platforms do not support dynamic input shapes and allow only a batch size of 1. Ensure that the input shape is fixed and the batch dimension is set to 1. - -Example code: - -.. code-block:: python - - torch.onnx.export( - model, - input, - model_output_path, - opset_version=17, - input_names=['input'], - output_names=['output'], - ) - -- **Opset Versions**: Models with opset 17 are recommended. Models must use opset 10 or higher to be quantized. If models use an opset lower than 10, you should reconvert them to ONNX from their original framework using a later opset. Alternatively, refer to the usage of the version converter for the `ONNX Version Converter `__. Opset 10 does not support some node fusions and might not achieve the best performance. We recommend updating the model to opset 17 for better performance. Moreover, per-channel quantization is supported for models using opset 13 or higher. - -- **Large Models > 2GB**: Because of the 2 GB file size limit of Protobuf, additional data for ONNX models exceeding 2 GB is stored separately. Ensure that the ``.onnx`` file and the data files are placed in the same directory. Also, set the ``use_external_data_format`` parameter to ``True`` for large models when quantizing. - - -Pre-processing on the Float Model ---------------------------------- - -Pre-processing is the transformation of a float model to prepare it for quantization. It consists of the following three optional steps: - -- **Symbolic shape inference**: This step is best suited for transformer models. -- **Model optimization**: This step uses the ONNX Runtime native library to rewrite the computation graph, including merging computation nodes and eliminating redundancies to improve runtime efficiency. -- **ONNX shape inference**. - -The goal of these steps is to improve quantization quality. The ONNX Runtime quantization tool works best when the tensor's shape is known. Both symbolic shape inference and ONNX shape inference help determine tensor shapes. Symbolic shape inference works best with transformer-based models, and ONNX shape inference works with other models. - -Model optimization performs certain operator fusions that make the quantization tool's job easier. For instance, a convolution operator followed by batch normalization can be fused into one during optimization, which can be quantized very efficiently. - -Unfortunately, a known issue in ONNX Runtime is that model optimization cannot output a model size greater than 2 GB. Therefore, for large models, optimization must be skipped. - -The pre-processing API is in the Python module ``onnxruntime.quantization.shape_inference``, function ``quant_pre_process()``. - -.. code-block:: python - - from onnxruntime.quantization import shape_inference - - shape_inference.quant_pre_process( - input_model_path: str, - output_model_path: str, - skip_optimization: bool = False, - skip_onnx_shape: bool = False, - skip_symbolic_shape: bool = False, - auto_merge: bool = False, - int_max: int = 2**31 - 1, - guess_output_rank: bool = False, - verbose: int = 0, - save_as_external_data: bool = False, - all_tensors_to_one_file: bool = False, - external_data_location: str = "./", - external_data_size_threshold: int = 1024,) - -**Arguments** - -- **input_model_path**: (String) This parameter specifies the file path of the input model that is to be pre-processed for quantization. -- **output_model_path**: (String) This parameter specifies the file path where the pre-processed model is saved. -- **skip_optimization**: (Boolean) This flag indicates whether to skip the model optimization step. If set to True, model optimization is skipped, which may cause ONNX shape inference failure for some models. The default value is False. -- **skip_onnx_shape**: (Boolean) This flag indicates whether to skip the ONNX shape inference step. The symbolic shape inference is most effective with transformer-based models. Skipping all shape inferences may reduce the effectiveness of quantization, as a tensor with an unknown shape cannot be quantized. The default value is False. -- **skip_symbolic_shape**: (Boolean) This flag indicates whether to skip the symbolic shape inference step. Symbolic shape inference is most effective with transformer-based models. Skipping all shape inferences may reduce the effectiveness of quantization, as a tensor with an unknown shape cannot be quantized. The default value is False. -- **auto_merge**: (Boolean) This flag determines whether to automatically merge symbolic dimensions when a conflict occurs during symbolic shape inference. The default value is False. -- **int_max**: (Integer) This parameter specifies the maximum integer value that is to be considered as boundless for operations like slice during symbolic shape inference. The default value is 2**31 - 1. -- **guess_output_rank**: (Boolean) This flag indicates whether to guess the output rank to be the same as input 0 for unknown operations. The default value is False. -- **verbose**: (Integer) This parameter controls the level of detailed information logged during inference. A value of 0 turns off logging, 1 logs warnings, and 3 logs detailed information. The default value is 0. -- **save_as_external_data**: (Boolean) This flag determines whether to save the ONNX model to external data. The default value is False. -- **all_tensors_to_one_file**: (Boolean) This flag indicates whether to save all the external data to one file. The default value is False. -- **external_data_location**: (String) This parameter specifies the file location where the external file is saved. The default value is "./". -- **external_data_size_threshold**: (Integer) This parameter specifies the size threshold for external data. The default value is 1024. - -Evaluating the Quantized Model ------------------------------- - -If you have scripts to evaluate float models, you can replace the float model file with the quantized model for evaluation. - -If BFP/BF16/FP16/int32 data types are used in the quantized model, it is necessary to register the custom operations library to the ONNX Runtime inference session before evaluation. For example: - -.. code-block:: python - - import onnxruntime as ort - - so = ort.SessionOptions() - so.register_custom_ops_library(quark.onnx.get_library_path()) - session = ort.InferenceSession(quantized_model, so) - -Dumping the Simulation Results ------------------------------- - -Sometimes after deploying the quantized model, it is necessary to compare the simulation results on the CPU/GPU and the output values on the DPU. You can use the ``dump_model`` of the AMD Quark ONNX API to dump the simulation results with the quantized_model. Currently, only the models containing FixNeuron nodes support this feature. For models using ``QuantFormat.QDQ``, you can set ``dump_float`` to True to save float data for all nodes' results. - -.. code-block:: python - - # This function dumps the simulation results of the quantized model, - # including weights and activation results. - quark.onnx.dump_model( - model, - dump_data_reader=None, - random_data_reader_input_shape={}, - dump_float=False, - output_dir='./dump_results',) - -**Arguments** - -- **model**: (String or ModelProto) This parameter specifies the file path of or the ModelProto object of the quantized model whose simulation results are to be dumped. -- **dump_data_reader**: (CalibrationDataReader or None) This parameter is a data reader that is used for the dumping process. The first batch is taken as input. If you wish to use random data for a quick test, you can set `dump_data_reader` to None. The default value is None. -- **random_data_reader_input_shape**: (Dict) It is required to use a dict {name: shape} to specify a certain input. For example, `RandomDataReaderInputShape={"image": [1, 3, 224, 224]}` for the input named "image". The default value is an empty dict {}. -- **dump_float**: (Boolean) This flag determines whether to dump the floating-point value of nodes' results. If set to True, the float values are dumped. Note that this may require a lot of storage space. The default value is False. -- **output_dir**: (String) This parameter specifies the directory where the dumped simulation results are saved. After successful execution of the function, dump results are generated in this specified directory. The default value is './dump_results'. - -.. note:: - The `batch_size` of the `dump_data_reader` is better set to 1 for DPU debugging. - -Dump results of each FixNeuron node (including weights and activation) are generated in ``output_dir`` after the command is successfully executed. - -For each quantized node, results are saved in \*.bin and \*.txt formats (\* represents the output name of the node). If ``dump_float`` is set to True, the output of all the nodes is saved in \*_float.bin and \*_float.txt (\* represents the output name of the node), which might require a lot of storage space. - -Examples of dumping results are shown in the following table. Because of the storage path considerations, the '/' in the node name is replaced with '\_'. - -Table 2. Example of Dumping Results - -.. list-table:: - :header-rows: 1 - - * - Quantized - - Node Name - - Saved Weights or Activations - - - * - Yes - - /conv1/Conv_out - - {output_dir}/dump_results/\_conv1_Conv_output_0_DequantizeLinear_Output.bin - - {output_dir}/dump_results/\_conv1_Conv_output_0_DequantizeLinear_Output.txt - * - Yes - - onnx::Conv_501_DequantizeLinear - - {output_dir}/dump_results/onnx::Conv_501_DequantizeLinear_Output.bin - - {output_dir}/dump_results/onnx::Conv_501_DequantizeLinear_Output.txt - * - No - - /avgpool/GlobalAveragePool - - {output_dir}/dump_results/\_avgpool_GlobalAveragePool_output_0_float.bin - - {output_dir}/dump_results/\_avgpool_GlobalAveragePool_output_0_float.txt diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tools.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tools.rst deleted file mode 100644 index cce51de6..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tools.rst +++ /dev/null @@ -1,254 +0,0 @@ -Tools -===== - -Convert a float32 model to a float16 model ------------------------------------------- - -Since the quark.onnx tool supports both float32 and float16 models quantization currently, converting a model from float32 to float16 is required when quantizing a float32 model. - -Use the ``convert_fp32_to_fp16`` tool to convert a float32 model to a float16 model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_fp16 --input $FLOAT_16_ONNX_MODEL_PATH --output $FLOAT_16_ONNX_MODEL_PATH - -If the input model is larger than 2GB, please use this command instead. - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_fp16 --input $FLOAT_16_ONNX_MODEL_PATH --output $FLOAT_16_ONNX_MODEL_PATH --save_as_external_data - -Use the convert_fp32_to_fp16 tool to convert a float32 model to a float16 model with inputs and outputs types maintain the float32 data type: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_fp16 --input $FLOAT_16_ONNX_MODEL_PATH --output $FLOAT_16_ONNX_MODEL_PATH --keep_io_types - - -Convert a Float32 Model to a BFloat16 Model -------------------------------------------- - -Because of the increasing demands for BFloat16 deployment, a conversion tool is necessary to convert a Float32 model to a BFloat16 model. Four BFloat16 implementation formats are provided: **vitisqdq**, **with_cast**, **simulate_bf16**, and **bf16**. - -- **vitisqdq**: Implements BFloat16 conversion by inserting VitisQDQ of BFloat16. -- **with_cast**: Implements BFloat16 conversion by inserting Cast operations to convert from Float32 to BFloat16. -- **simulate_bf16**: Implements BFloat16 conversion by storing all BFloat16 weights in float format. -- **bf16**: Implements BFloat16 conversion by directly converting the Float32 model to BFloat16, with only the input and output remaining as float. - -The default value is **with_cast**. - -Use the ``convert_fp32_to_bf16`` tool to convert a Float32 model to a BFloat16 model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_bf16 --input $FLOAT_32_ONNX_MODEL_PATH --output $BFLOAT_16_ONNX_MODEL_PATH --format $BFLOAT_FORMAT - -If the input model is larger than 2GB, please use this command instead. - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_bf16 --input $FLOAT_32_ONNX_MODEL_PATH --output $BFLOAT_16_ONNX_MODEL_PATH --format $BFLOAT_FORMAT --save_as_external_data - -Convert a Float16 Model to a BFloat16 Model -------------------------------------------- - -Because of increasing demands for BFloat16 deployment, a conversion tool is necessary to convert a Float16 model to a BFloat16 model. Four BFloat16 implementation formats are provided: **vitisqdq**, **with_cast**, **simulate_bf16**, and **bf16**. - -- **vitisqdq**: Implements BFloat16 conversion by inserting VitisQDQ of BFloat16. -- **with_cast**: Implements BFloat16 conversion by inserting Cast operations to convert from Float16 to BFloat16. -- **simulate_bf16**: Implements BFloat16 conversion by storing all BFloat16 weights in float format. -- **bf16**: Implements BFloat16 conversion by directly converting the Float16 model to BFloat16, with only the input and output remaining as Float16. - -The default value is **with_cast**. - -Use the ``convert_fp16_to_bf16`` tool to convert a Float16 model to a BFloat16 model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp16_to_bf16 --input $FLOAT_16_ONNX_MODEL_PATH --output $BFLOAT_16_ONNX_MODEL_PATH --format $BFLOAT_FORMAT - -If the input model is larger than 2GB, please use this command instead. - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp16_to_bf16 --input $FLOAT_16_ONNX_MODEL_PATH --output $BFLOAT_16_ONNX_MODEL_PATH --format $BFLOAT_FORMAT --save_as_external_data - -Convert a Float16 Model to a Float32 Model ------------------------------------------- - -Because the AMD Quark ONNX tool only supports Float32 models quantization currently, converting a model from Float16 to Float32 is required when quantizing a Float16 model. - -Use the ``convert_fp16_to_fp32`` tool to convert a Float16 model to a -Float32 model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp16_to_fp32 --input $FLOAT_16_ONNX_MODEL_PATH --output $FLOAT_32_ONNX_MODEL_PATH - -If the input model is larger than 2GB, please use this command instead. - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp16_to_fp32 --input $FLOAT_16_ONNX_MODEL_PATH --output $FLOAT_32_ONNX_MODEL_PATH --save_as_external_data - -Convert a Float32 Model to a BFP16 Model ----------------------------------------- - -Since there are more and more BFP16 deployment demands, we need a conversion tool to directly convert a Float32 model to a BFP16 model. - -Use the ``convert_fp32_to_bfp16`` tool to convert a Float32 model to a BFP16 model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_bfp16 --input $FLOAT_32_ONNX_MODEL_PATH --output $BFP_16_ONNX_MODEL_PATH - -If the input model is larger than 2GB, please use this command instead. - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_bfp16 --input $FLOAT_32_ONNX_MODEL_PATH --output $BFP_16_ONNX_MODEL_PATH --save_as_external_data - -Convert a Float16 Model to a BFP16 Model ----------------------------------------- - -Because there are more and more BFP16 deployment demands, we need a conversion tool to directly convert a Float16 model to a BFP16 model. - -Use the ``convert_fp16_to_bfp16`` tool to convert a Float16 model to a BFP16 model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp16_to_bfp16 --input $FLOAT_16_ONNX_MODEL_PATH --output $BFP_16_ONNX_MODEL_PATH - -If the input model is larger than 2GB, please use this command instead. - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp16_to_bfp16 --input $FLOAT_16_ONNX_MODEL_PATH --output $BFP_16_ONNX_MODEL_PATH --save_as_external_data - -Convert Quark extended custom ops to deprecated Vitis custom ops ----------------------------------------------------------------- - -For compatibility needs, this tool is used to convert all the Quark extended custom ops to deprecated Vitis custom ops, or vice versa. - -Use the ``convert_custom_ops`` tool to do the conversion: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_custom_ops --input_model INPUT_MODEL_PATH --output_model OUTPUT_MODEL_PATH - -.. note:: - - If you want to convert all the deprecated Vitis custom ops to Quark extended custom ops, pass "--reverse_conversion True" to the command. If the model is larger than 2GB, please add "--external_data True" to the command. - -Convert a NCHW input Model to a NHWC Model ------------------------------------------- - -Given that some models are designed with an input shape of **NCHW** instead of **NHWC**, it is recommended to convert an NCHW input model to NHWC before quantizing a Float32 model. The conversion steps execute even if the model is already NHWC. Therefore, ensure the input model is in NCHW format. - -.. note:: - - The data layout, whether NCHW or NHWC, does not influence the quantization process itself. However, deployment efficiency is affected by the kernel design, which is often optimized for NHWC. Consequently, when input data is in NCHW format, a conversion to NHWC is recommended. This conversion introduces a small computational overhead, though the overall performance benefits from the optimized layout. While a transpose operation is required for the format change, the total number of other operations remains constant. - -Use the ``convert_nchw_to_nhwc`` tool to convert an NCHW model to an NHWC model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_nchw_to_nhwc --input $NCHW_ONNX_MODEL_PATH --output $NHWC_ONNX_MODEL_PATH - -Quantize a Float Model with Random Data ----------------------------------------- - -Customers often need to verify the performance of the quantized model regardless of quantization accuracy. So we support the quantization without calibration dataset using random data generated automatically. - -Use the ``random_quantize`` tool to quantize an ONNX model: - -.. code-block:: bash - - python -m quark.onnx.tools.random_quantize --input_model_path [INPUT_MODEL_PATH] --quantized_model_path [QUANTIZED_MODEL_PATH] - -Convert a A8W8 NPU Model to a A8W8 CPU Model --------------------------------------------- - -Given that some models are quantized by A8W8 NPU, it is convenient and efficient to convert them to A8W8 CPU models. - -Use the ``convert_a8w8_npu_to_a8w8_cpu`` tool to convert a A8W8 NPU model to a A8W8 CPU model: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_a8w8_npu_to_a8w8_cpu --input [INPUT_PATH] --output [OUTPUT_PATH] - -Print Names and Quantity of A16W8 and A8W8 Conv for Mixed-Precision Models --------------------------------------------------------------------------- - -For some models that are mixed precision such as A18W8 and A8W8 mixed, use the ``print_a16w8_a8w8_nodes`` tool to print names and quantity of A16W8 and A8W8 Conv, ConvTranspose, Gemm, and MatMul. The MatMul node must have one and only one set of weights. - -.. code-block:: bash - - python -m quark.onnx.tools.print_a16w8_a8w8_nodes --input [INPUT_PATH] - -Convert a U16U8 Quantized Model to a U8U8 Model ------------------------------------------------ - -Convert a U16U8 (activations are quantized by UINT16 and weights by UINT8) to a U8U8 model without calibration. - -Use the ``convert_u16u8_to_u8u8`` tool to do the conversion: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_u16u8_to_u8u8 --input [INPUT_PATH] --output [OUTPUT_PATH] - -Evaluate accuracy between baseline and quantized results folders ----------------------------------------------------------------- - -We often need to compare the differences in output images before and after quantization. Currently, we support four metrics: cosine similarity, L2 loss, PSNR, and VMAF, as well as three formats: JPG, PNG, and NPY. - -Use the ``evaluate`` tool: - -.. code-block:: bash - - python -m quark.onnx.tools.evaluate.py --baseline_results_folder [BASELINE_RESULTS_FOLDER_PATH] --quantized_results_folder [QUANTIZED_RESULTS_FOLDER_PATH] - -Replace `inf` and `-inf` Values in ONNX Model Weights ------------------------------------------------------ - -Replace `inf` or `-inf` values in ONNX model weights using the ``replace_inf_weights`` tool with a specified value. - -Use the ``replace_inf_weights`` tool to do the conversion: - -.. code-block:: bash - - python -m quark.onnx.tools.replace_inf_weights --input_model [INPUT_MODEL_PATH] --output_model [OUTPUT_MODEL_PATH] --replace_inf_value [REPLACE_INF_VALUE] - -.. note:: - - The default replacement value is `10000.0`. This might lead to precision degradation. Adjust the replacement value based on your model and application needs. - -Assign Shapes for All Tensors in A Given Model ----------------------------------------------- - -An onnx model may be missing the shape of some tensors. So we provide a tool that automatically assigns the correct shape to all tensors, regardless of whether the input model is a float model or a QDQ model. - -Use the ``fix_shapes`` tool: - -.. code-block:: bash - - python -m quark.onnx.tools.fix_shapes --input_model_path [INPUT_MODEL_PATH] --output_model_path [OUTPUT_MODEL_PATH] - -Convert the Int32 Bias of the Quantized Model to Int16 ------------------------------------------------------- - -The bias in a quantized model may need to be int16 instead of int32 in some cases. So we provide a tool that converts the int32 bias of a quantized model to int16. - -.. note:: - - 1. ONNXRuntime only supports Int16 Bias inference when the opset version is 21 or higher, so please ensure that the input model's opset version is 21 or higher. - -.. note:: - - 2. It is recommended to use the parameter **Int16Bias** together with **ADAROUND** or **ADAQUANT**; otherwise, the quantized model with Int16 bias may suffer from poor accuracy. - -Use the ``convert_bias_int32_to_int16`` tool: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_bias_int32_to_int16 --input_model_path [INPUT_MODEL_PATH] --output_model_path [OUTPUT_MODEL_PATH] diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_bf16_quantization.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_bf16_quantization.rst deleted file mode 100644 index f621ac11..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_bf16_quantization.rst +++ /dev/null @@ -1,118 +0,0 @@ -.. raw:: html - - - -Introduction -============ - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -BFloat16 (Brain Floating Point 16) is a floating-point data format used in deep learning to reduce memory usage and computation while maintaining sufficient numerical precision. Unlike other quantization formats like INT8 or FP16, BF16 maintains the same range as FP32 but reduces precision, making it particularly useful for training and inference in neural networks. - -AMD accelerators like latest CPU, NPU and GPU devices support BF16 natively, enabling faster matrix operations and reducing latency. In this tutorial, we will explain how to quantize a model into BF16 using AMD Quark. - -BF16 quantization in AMD Quark for ONNX ---------------------------------------- - -Here is a simple example of how to enable BF16 quantization. - -.. code:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantType, ExtendedQuantFormat - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFloat16, - weight_type=ExtendedQuantType.QBFloat16, - extra_options={'BF16QDQToCast': True} - ) - - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - -The BF16 quantization in the previous example inserts a custom Q/DQ pair for each tensor, which -converts the model weights and activations from FP32 to BF16 directly, just as most frameworks do. - -In fact, BF16 has the same range as FP32, but with only 7 bits for the mantissa, it sacrifices -precision. This means small differences between numbers can disappear, which can amplify -numerical instability and cause overflow problems. - -To address the overflow issue in BF16 quantization, you can apply calibration and re-scale -weights and activations to better align with dynamic range and utilize the dense numeric -area near zero of BF16. To enable this, set ``WeightScaled`` or ``ActivationScaled`` -in extra options if you are seeing overflow issues. - -.. code:: python - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFloat16, - weight_type=ExtendedQuantType.QBFloat16, - extra_options={ - 'WeightScaled': True, - 'ActivationScaled': True, - } - ) - -.. note:: - When inference with ONNXRuntime, you need to register the custom OPs so(Linux) or dll(Windows) file in the ORT session options. - -.. code:: python - - import onnxruntime - from quark.onnx import get_library_path - - if 'ROCMExecutionProvider' in onnxruntime.get_available_providers(): - device = 'ROCM' - providers = ['ROCMExecutionProvider'] - elif 'CUDAExecutionProvider' in onnxruntime.get_available_providers(): - device = 'CUDA' - providers = ['CUDAExecutionProvider'] - else: - device = 'CPU' - providers = ['CPUExecutionProvider'] - - sess_options = onnxruntime.SessionOptions() - sess_options.register_custom_ops_library(get_library_path(device)) - session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=providers) - -How to Further Improve the Accuracy for BF16 Quantization? ----------------------------------------------------------- - -You can finetune the quantized model to further improve the accuracy of BF16 quantization. -The Fast Finetuning function in AMD Quark for ONNX includes two algorithms: AdaRound and AdaQuant. -There is no explicit rounding in BF16 quantization, so only AdaQuant can be used. - -.. code:: python - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFloat16, - weight_type=ExtendedQuantType.QBFloat16, - extra_options={ - 'FastFinetune': { - 'NumIterations': 1000, - 'LearningRate': 1e-6, - 'OptimAlgorithm': 'adaquant', - 'OptimDevice': 'cpu', - 'InferDevice': 'cpu', - } - } - ) - -.. raw:: html - - - -License -------- - -Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_bfp16_quantization.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_bfp16_quantization.rst deleted file mode 100644 index cf247c16..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_bfp16_quantization.rst +++ /dev/null @@ -1,122 +0,0 @@ -BFP16 (Block floating point) Quantization -========================================= - -BFP16 (Block Floating Point 16) quantization is a technique that represents tensors using a block floating-point format, where multiple numbers share a common exponent. This format can provide a balance between dynamic range and precision while using fewer bits than standard floating-point representations. BFP16 quantization aims to reduce the computational complexity and memory footprint of neural networks, making them more efficient for inference on various hardware platforms, particularly those with limited resources. - -Key Concepts ------------- - -1. **Block Floating Point Format**: In BFP16 quantization, data is grouped into blocks, and each block shares a common exponent. This reduces the storage requirements while preserving a sufficient dynamic range for most neural network operations. It differs from standard floating-point formats, which assign an individual exponent to each number. - -2. **Dynamic Range and Precision**: By using a shared exponent for each block, BFP16 can achieve a balance between range and precision. It allows for a more flexible representation of values compared to fixed-point formats and can adapt to the magnitude of the data within each block. - -3. **Reduced Computation Costs**: BFP16 quantization reduces the number of bits required to represent each tensor element, leading to lower memory usage and faster computations. This is particularly useful for deploying models on devices with limited hardware resources. - -4. **Compatibility with Mixed Precision**: BFP16 can be combined with other quantization methods, such as mixed precision quantization, to optimize neural network performance further. This compatibility allows for flexible deployment strategies tailored to specific accuracy and performance requirements. - -Benefits of BFP16 Quantization ------------------------------- - -1. **Improved Efficiency**: BFP16 quantization significantly reduces the - number of bits needed to represent tensor values, leading to reduced - memory bandwidth and faster computation times. This makes it ideal - for resource-constrained environments. - -2. **Maintained Accuracy**: By balancing dynamic range and precision, - BFP16 quantization minimizes the accuracy loss that can occur with - more aggressive quantization methods. - -3. **Hardware Compatibility**: BFP16 is well-supported by modern hardware - accelerators, making it a flexible and efficient choice for - large-scale neural network training and deployment. - -How to enable BFP16 quantization in AMD Quark for ONNX? -------------------------------------------------------- - -Here is a simple example of how to enable BFP16 quantization in AMD Quark -for ONNX. - -.. code-block:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantType, ExtendedQuantFormat - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFP, - weight_type=ExtendedQuantType.QBFP, - ) - config = Config(global_quant_config=quant_config) - -.. note:: When inferring with ONNX Runtime, we need to register the custom op's so (Linux) or dll (Windows) file in the ORT session options. - -.. code-block:: python - - import onnxruntime - from quark.onnx import get_library_path as vai_lib_path - - if 'ROCMExecutionProvider' in onnxruntime.get_available_providers(): - device = 'ROCM' - providers = ['ROCMExecutionProvider'] - elif 'CUDAExecutionProvider' in onnxruntime.get_available_providers(): - device = 'CUDA' - providers = ['CUDAExecutionProvider'] - else: - device = 'CPU' - providers = ['CPUExecutionProvider'] - - sess_options = onnxruntime.SessionOptions() - sess_options.register_custom_ops_library(vai_lib_path(device)) - session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=providers) - -How to further improve the accuracy of a BFP16 quantized model in AMD Quark for ONNX? -------------------------------------------------------------------------------------- - -If you want to further improve the effectiveness of BFP16 quantization after applying it, you can use fast_finetune to enhance the quantization accuracy. Please refer to this :doc:`link ` for more details on how to enable BFP16 Quantization in the configuration of AMD Quark for ONNX. This is a simple example code. - -.. code-block:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantFormat, ExtendedQuantType - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFP, - weight_type=ExtendedQuantType.QBFP, - include_fast_ft=True, - extra_options={ - 'FastFinetune': { - 'DataSize': 100, - 'FixedSeed': 1705472343, - 'BatchSize': 5, - 'NumIterations': 100, - 'LearningRate': 0.000001, - 'OptimAlgorithm': 'adaquant', - 'OptimDevice': 'cpu', - 'InferDevice': 'cpu', - 'EarlyStop': True, - } - } - ) - config = Config(global_quant_config=quant_config) - -.. note:: You can install onnxruntime-gpu instead of onnxruntime to accelerate inference speed. The BFP QuantType only supports fast_finetune with AdaQuant, not AdaRound. Set 'InferDevice' to 'cuda:0' to use the GPU for inference. Additionally, set 'OptimDevice' to 'cuda:0' to accelerate fast_finetune training with the GPU. - -Example -------- - -An example of quantizing a model using the BFP16 quantization is :doc:`available here `. - -.. raw:: html - - - -License -------- - -Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_microexponents_quantization.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_microexponents_quantization.rst deleted file mode 100644 index 9367291a..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_microexponents_quantization.rst +++ /dev/null @@ -1,153 +0,0 @@ -.. raw:: html - - - -Introduction -============ - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -This tutorial explains how to use Microexponents (MX) data types for quantization. - -Microexponents represent an advancement over Block Floating Point (BFP), aiming to improve the numerical efficiency and flexibility of low-precision computations for artificial intelligence. - -Block Floating Point groups numbers (for example, tensors and arrays) into blocks, where each block shares a common exponent, and the values in the block are represented with individual mantissas (and the sign bit). This approach effectively reduces memory usage; however, it is coarse-grained, meaning all numbers within a block are forced to have the same exponent, regardless of their individual value ranges. - -To address this issue, Microexponents extend the concept of BFP by introducing two levels of exponents: shared exponents for entire blocks and micro exponents for finer-grained sub-blocks. This dual-level approach enables more precise scaling of individual elements within a block, reducing quantization error and improving the representational range. By allowing sub-blocks to adjust their scaling more accurately, Microexponents strike a balance between the coarse-grained nature of BFP and the fine-grained precision of floating-point formats. - -This technique is particularly useful for low-precision computations in modern deep learning models, where maintaining accuracy while optimizing memory and power usage is critical. Furthermore, hardware accelerators that support Microexponents can process data more efficiently while preserving the numerical stability of operations such as matrix multiplications and convolutions. - -What is Microexponents Quantization? ------------------------------------- - -`This paper `__ introduces several specific formats, including MX4, MX6, and MX9. We have implemented these formats in AMD Quark ONNX quantizer through a custom op named "BFPQuantizeDequantize". This op supports classical BFP and Microexponents both by setting attribute ``bfp_method`` to ``to_bfp`` for BFP or ``to_bfp_prime`` for Microexponents. To select MX4, MX6, and MX9, set the value for the ``bit_width`` attribute according to the following table. - -+-------------------+------------------------+ -| Formats | "bit_width" values | -+===================+========================+ -| MX4 | 11 | -+-------------------+------------------------+ -| MX6 | 13 | -+-------------------+------------------------+ -| MX9 | 16 | -+-------------------+------------------------+ - -Other parameters should be set as defined in the paper. - -How to enable MX9 quantization in AMD Quark for ONNX? ------------------------------------------------------ - -Here is a simple example of how to enable Microexponents quantization with -MX9 in AMD Quark for ONNX. - -.. code-block:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantType, ExtendedQuantFormat - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFP, - weight_type=ExtendedQuantType.QBFP, - extra_options={ - 'BFPAttributes': { - 'bfp_method': "to_bfp_prime", - 'axis': 1, - 'bit_width': 16, - 'block_size': 16, - 'sub_block_size': 2, - 'sub_block_shift_bits': 1, - 'rounding_mode': 2, - }, - }) - - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - -*Note* : When inferencing with ONNXRuntime, you need to register the custom operator's shared object file (Linux) or DLL file (Windows) in the ORT session options. - -.. code-block:: python - - import onnxruntime - from quark.onnx import get_library_path - - if 'ROCMExecutionProvider' in onnxruntime.get_available_providers(): - device = 'ROCM' - providers = ['ROCMExecutionProvider'] - elif 'CUDAExecutionProvider' in onnxruntime.get_available_providers(): - device = 'CUDA' - providers = ['CUDAExecutionProvider'] - else: - device = 'CPU' - providers = ['CPUExecutionProvider'] - - sess_options = onnxruntime.SessionOptions() - sess_options.register_custom_ops_library(get_library_path(device)) - session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=providers) - -How to Further Improve the Accuracy of a MX9 Quantized Model? -------------------------------------------------------------- - -If you want to further improve the effectiveness of MX9 quantization after applying it, you can use ``fast_finetune`` to enhance the quantization accuracy. Refer to this :doc:`link `. This is a simple example code: - -.. code-block:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantFormat, ExtendedQuantType - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFP, - weight_type=ExtendedQuantType.QBFP, - include_fast_ft=True, - extra_options={ - 'BFPAttributes': { - 'bfp_method': "to_bfp_prime", - 'axis': 1, - 'bit_width': 16, - 'block_size': 16, - 'sub_block_size': 2, - 'sub_block_shift_bits': 1, - 'rounding_mode': 2, - }, - 'FastFinetune': { - 'DataSize': 100, - 'FixedSeed': 1705472343, - 'BatchSize': 2, - 'NumIterations': 1000, - 'LearningRate': 0.00001, - 'OptimAlgorithm': 'adaquant', - 'OptimDevice': 'cpu', - 'InferDevice': 'cpu', - 'EarlyStop': True, - }, - } - ) - config = Config(global_quant_config=quant_config) - -.. note:: - - You can install onnxruntime-rocm or onnxruntime-gpu instead of onnxruntime to accelerate inference speed. Set ``InferDevice`` to ``hip:0`` or ``cuda:0`` to use the GPU for inference. Additionally, set ``OptimDevice`` to ``hip:0`` or ``cuda:0`` to accelerate the training process of fast finetuning with the GPU. - -Examples --------- - -An example of quantizing a model using the Microscaling quantization is :doc:`available here `. - -.. raw:: html - - - -License -------- - -Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_microscaling_quantization.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_microscaling_quantization.rst deleted file mode 100644 index 02e72a91..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_microscaling_quantization.rst +++ /dev/null @@ -1,150 +0,0 @@ - -Microscaling (MX) -================= - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -In this tutorial, you learn how to use Microscaling (MX) quantization. - -MX is an advancement over Block Floating Point (BFP), aiming to improve the numerical efficiency and flexibility of low-precision computations for AI. - -BFP groups numbers (for example, tensors, arrays) into blocks, where each block shares a common exponent, and the values in the block are represented with individual mantissas (and the sign bit). This approach is effective for reducing memory usage, but it is coarse-grained, meaning all numbers within a block are forced to have the same exponent, regardless of their individual value ranges. - -MX, on the other hand, allows for finer-grained scaling within a block. Instead of forcing all elements in the block to share a single exponent, MX assigns a small-scale adjustment to individual or smaller groups of values within the block. This finer granularity improves precision, as each value or subgroup of values can adjust more dynamically to their specific range, reducing the overall quantization error compared to BFP. - -What is MX Quantization? ------------------------- - -The `OCP MX specification `__ -introduces several specific MX formats, including MXFP8, MXFP6, MXFP4, and MXINT8. These formats are implemented in the AMD Quark ONNX quantizer through a custom operation named "MXQuantizeDequantize", which has an ``element_dtype`` attribute to set the data type for the elements (while the data type for the shared scale is always E8M0). - -+-------------------+------------------------+ -| MX Formats | "element_dtype" values | -+===================+========================+ -| MXFP8(E5M2) | 'fp8_e5m2' | -+-------------------+------------------------+ -| MXFP8(E4M3) | 'fp8_e4m3' | -+-------------------+------------------------+ -| MXFP6(E3M2) | 'fp6_e3m2' | -+-------------------+------------------------+ -| MXFP6(E2M3) | 'fp6_e2m3' | -+-------------------+------------------------+ -| MXFP4(E2M1) | 'fp4_e2m1' | -+-------------------+------------------------+ -| MXINT8 | 'int8' | -+-------------------+------------------------+ - -If you initialize the quantizer with the MX configuration, it quantizes all the activations and weights using the MXQuantizeDequantize nodes. - -How to Enable MX Quantization in AMD Quark for ONNX? ----------------------------------------------------- - -Here is a simple example of how to enable MX quantization with MXINT8 in AMD Quark for ONNX: - -.. code-block:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantType, ExtendedQuantFormat - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QMX, - weight_type=ExtendedQuantType.QMX, - extra_options={ - 'MXAttributes': { - 'element_dtype': 'int8', - 'axis': 1, - 'block_size': 32, - 'rounding_mode': 2, - }, - }) - - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - -.. note:: - - When inferencing with ONNXRuntime, you need to register the custom operator's shared object file (Linux) or DLL file (Windows) in the ORT session options. - -.. code-block:: python - - import onnxruntime - from quark.onnx import get_library_path - - if 'ROCMExecutionProvider' in onnxruntime.get_available_providers(): - device = 'ROCM' - providers = ['ROCMExecutionProvider'] - elif 'CUDAExecutionProvider' in onnxruntime.get_available_providers(): - device = 'CUDA' - providers = ['CUDAExecutionProvider'] - else: - device = 'CPU' - providers = ['CPUExecutionProvider'] - - sess_options = onnxruntime.SessionOptions() - sess_options.register_custom_ops_library(get_library_path(device)) - session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=providers) - -How to Further Improve the Accuracy of a MX Quantized Model? ------------------------------------------------------------- - -If you want to further improve the effectiveness of MX quantization after applying it, you can use ``fast_finetune`` to enhance the quantization accuracy. Refer to this :doc:`link `. - -Here is a simple example code which is fast finetuning a MXINT8 model: - -.. code-block:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantFormat, ExtendedQuantType - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QMX, - weight_type=ExtendedQuantType.QMX, - include_fast_ft=True, - extra_options={ - 'MXAttributes': { - 'element_dtype': 'int8', - 'axis': 1, - 'block_size': 32, - 'rounding_mode': 2, - }, - 'FastFinetune': { - 'DataSize': 100, - 'FixedSeed': 1705472343, - 'BatchSize': 2, - 'NumIterations': 1000, - 'LearningRate': 0.00001, - 'OptimAlgorithm': 'adaquant', - 'OptimDevice': 'cpu', - 'InferDevice': 'cpu', - 'EarlyStop': True, - }, - }) - config = Config(global_quant_config=quant_config) - -.. note:: - - You can install onnxruntime-rocm or onnxruntime-gpu instead of onnxruntime to accelerate inference speed. Set 'InferDevice' to 'hip:0' or 'cuda:0' to use the GPU for inference. Additionally, set 'OptimDevice' to 'hip:0' or 'cuda:0' to accelerate the training process of fast finetuning with the GPU. - -Example -------- - -An example of quantizing a model using the Microscaling quantization is :doc:`available here `. - -.. raw:: html - - - -License -------- - -Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_mix_precision.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_mix_precision.rst deleted file mode 100644 index 5121c83e..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_tutorial_mix_precision.rst +++ /dev/null @@ -1,280 +0,0 @@ -.. raw:: html - - - -Mixed Precision -=============== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -As the scale and complexity of AI models continue to grow, optimizing their performance and efficiency becomes a top priority. Quantizing models to mixed precision emerges as a powerful technique, allowing AI practitioners to balance computational speed, memory usage, and model accuracy. This tutorial introduces the characteristics and usage of AMD Quark for ONNX's mixed precision. - -What is Mixed Precision Quantization? -------------------------------------- - -Mixed precision quantization involves using different precision levels for different parts of a neural network, such as using 8-bit integers for some layers while retaining higher precision, for example, 16-bit or 32-bit floating point, for others. This approach leverages the fact that not all parts of a model are equally sensitive to quantization. By carefully selecting which parts of the model can tolerate lower precision, you achieve significant computational savings while minimizing the impact on model accuracy. - -Benefits of Mixed Precision Quantization ----------------------------------------- - -1. **Enhanced Efficiency**: By using lower precision where possible, mixed precision quantization significantly reduces computational load and memory usage, leading to faster inference times and lower power consumption. - -2. **Maintained Accuracy**: By selectively applying higher precision to sensitive parts of the model, mixed precision quantization minimizes the accuracy loss that typically accompanies uniform quantization. - -3. **Flexibility**: Mixed precision quantization is adaptable to various types of neural networks and can be tailored to specific hardware capabilities, making it suitable for a wide range of applications. - -Mixed Precision Quantization in AMD Quark for ONNX --------------------------------------------------- - -AMD Quark for ONNX is designed to push the boundaries of what is possible with mixed precision. Here is what sets it apart: - -1. **Support for All Types of Granularity** - -Granularity refers to the level at which precision can be controlled within a model. AMD Quark for ONNX mixed precision supports: - -- **Element-wise Granularity** - -Element-wise mixed precision allows assigning different numeric precisions to activations and weights at the individual computation level. For example: INT8 Weights for efficient storage and computation and INT16 Activation to preserve dynamic range. - -- **Layer-wise Granularity** - -Different layers of a neural network can have varying levels of sensitivity to quantization. Layer-wise mixed precision assigns precision levels to layers based on their sensitivity, optimizing both performance and accuracy. For example, INT16 to sensitive layers for high accuracy while INT8 to others for efficient inference. - -- **Tensor-wise Granularity** - -Tensor-wise mixed precision enables assigning different precisions to individual tensors within a layer. For example, in an INT8 quantized model, specifying any sensitive tensor as INT16. - -2. **Support for Various Data Types** - -AMD Quark for ONNX mixed precision is not limited to a few integer data types, it supports a wide range of precisions, including but not limited to: - -- **More Integer Data Types** - -Traditional INT8/UINT8 for significant memory and computation savings, INT16/UINT16 for higher precision and INT32/UINT32 for experimental usage. - -- **Half Floating-Point Data Types** - -Float16 and BFloat16, the former can be used for iGPU/GPU applications, while the latter can be used for NPU deployment. - -- **Block Floating-Point Data Types** - -The bit-width for shared exponents and elements can be set arbitrarily. The typical data type is BFP16. - -- **Microexponents Data Types** - -Supports all the Microexponents data types, including MX4, MX6 and MX9. - -- **Microscaling Data Types** - -Supports all the Microscaling data types, including MXINT8, MXFP8_E4M3, MXFP8_E5M2, MXFP6_E3M2, MXFP6_E2M3 and MXFP4. - -How to Enable Mixed Precision in AMD Quark for ONNX? ----------------------------------------------------- - -Here, BF16 mixed with BFP16 is used as an example to illustrate how to build configurations for mixed precision quantization. -In fact, you can mix any two other data types equally. - -- **Element-wise** - -In this configuration, BFP16 is assigned to activations and BFloat16 to weights. Here the BFP16 quantization is -executed by custom operator named "BFPQuantizeDequantize", whose default attributes make it work on BFP16 mode. - -.. code-block:: python - - from quark.onnx import ModelQuantizer, CalibrationMethod, ExtendedQuantFormat, ExtendedQuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - # Build the configuration - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFP, - weight_type=ExtendedQuantType.QBFloat16, - ) - config = Config(global_quant_config=quant_config) - - # Create an ONNX quantizer - quantizer = ModelQuantizer(config) - - # Quantize the ONNX model. Users need to provide the input model path, output model path, - # and a data reader for calibration. - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - - -You can also assign BFloat16 to activations while BFP16 to weights as follows: - -.. code-block:: python - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBloat16, - weight_type=ExtendedQuantType.QBFP, - ) - -- **Layer-wise** - -This is one of the common configurations for deploying models on hardware devices, where the computationally intensive layers are quantized into BFP16 to maintain accuracy while improving computational efficiency, and the remaining layers are quantized into BFloat16. - - -.. code-block:: python - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBloat16, - weight_type=ExtendedQuantType.QBloat16, - include_auto_mp=True, - extra_options={ - "AutoMixprecision": { - "TargetOpType": ["Conv", "ConvTranspose", "Gemm", "MatMul"], - "TargetQuantType": ExtendedQuantType.QBFP, - }, - }, - ) - -At this point, there are many tensors on the precision boundary whose consumers have different precision from the producers. -Some backend compilers require that two types of quantization nodes exist simultaneously on these tensors, such as inserting -a BFP node for BFP16 and custom QDQ pair for BF16 onto the same tensor. In this case, you can enable the ``DualQuantNodes`` option. - -.. code-block:: python - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBloat16, - weight_type=ExtendedQuantType.QBloat16, - include_auto_mp=True, - extra_options={ - "AutoMixprecision": { - "TargetOpType": ["Conv", "ConvTranspose", "Gemm", "MatMul"], - "TargetQuantType": ExtendedQuantType.QBFP, - "DualQuantNodes": True, - }, - }, - ) - -And we can also mix BF16 with MXINT8 as shown below. Please note that for other Microscaling data formats, you need to set MXAttributes -to the parameter "extra_options", see the Microscaling tutorial for details. - -.. code-block:: python - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBloat16, - weight_type=ExtendedQuantType.QBloat16, - include_auto_mp=True, - extra_options={ - "AutoMixprecision": { - "TargetOpType": ["Conv", "ConvTranspose", "Gemm", "MatMul"], - "TargetQuantType": ExtendedQuantType.QMX, - }, - }, - ) - -- **Tensor-wise** - -Certain tensors in a neural network are particularly sensitive to quantization, including weight and activation tensors. Applying -appropriate precision for these sensitive tensors can help maintain model accuracy while reaping the benefits of quantization. -Therefore, after identifying these tensors through sensitivity analysis, you can set the precision separately for these tensors. - -.. code-block:: python - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFP, - weight_type=ExtendedQuantType.QBFP, - specific_tensor_precision=True, - extra_options={ - # MixedPrecisionTensor is a dictionary in which the key is data type and the value - # is a list of the names of sensitive tensors. - "MixedPrecisionTensor": { - ExtendedQuantType.QBFloat16: ['weight_tensor_name', 'activation_tensor_name'], - }, - }, - ) - -You can also assign more data types to more tensors as needed, for example: - -.. code-block:: python - - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFP, - weight_type=ExtendedQuantType.QBFP, - specific_tensor_precision=True, - extra_options={ - # MixedPrecisionTensor is a dictionary in which the key is data type and the value - # is a list of the names of sensitive tensors. - "MixedPrecisionTensor": { - ExtendedQuantType.QBFloat16: ['weight_tensor_name1', 'activation_tensor_name1'], - ExtendedQuantType.QInt16: ['weight_tensor_name2', 'activation_tensor_name2'], - }, - }, - ) - -Automatic Mixed Precision based on Sensitivity Analysis --------------------------------------------------------- - -The previous examples are manually specified mixed precision, but in the practical applications automatically identifying sensitive layers and then -applying mixed precision becomes more critical. - -AMD Quark for ONNX supports automatic mixed precision as follows: - -**Step 1** Sensitivity analysis. This step can involve profiling the model with a new precision settings and measuring the impact on accuracy. - -**Step 2** Sort layers by sensitivity. Layers that show significant accuracy degradation when quantized are deemed "sensitive" and are kept at higher -precision. Less sensitive parts can be quantized more aggressively to lower precision without significant impact on overall model performance. - -**Step 3** Perform mixed precision operations. Perform layer by layer until reach the accuracy target which is specified by users. - -We provide two types of accuracy target: general L2 Norm metric and Top1 metric specific to image classification models. Here is a simple example of -how to use the L2 Norm metric to achieve automatic mixed precision: - -.. code-block:: python - - from quark.onnx import ModelQuantizer, CalibrationMethod, QuantType, ExtendedQuantFormat, ExtendedQuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - # Build the configuration - quant_config = QuantizationConfig( - calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QInt16, - weight_type=QuantType.QInt8, - include_auto_mp=True, - extra_options={ - 'AutoMixprecision': { - "TargetOpType": ["Conv", "ConvTranspose", "Gemm", "MatMul"], # The operation types to perform mixed precision - "ActTargetQuantType": QuantType.QInt8, # The activation input of insensitive layers will be assign to this precision - "WeightTargetQuantType": QuantType.QInt8, # The weight input of insensitive layers will be assign to this precision - "OutputIndex": 0, # The index of outputs for evaluating accuracy indicator - "L2Target": 0.1, # If L2 is less than this value after assigning a new precision to a certain layer, the process continues - }, - }, - ) - config = Config(global_quant_config=quant_config) - - # Create an ONNX quantizer - quantizer = ModelQuantizer(config) - - # Quantize the ONNX model. Users need to provide the input model path, output model path, - # and a data reader for calibration. - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - -For a detailed example of using Top1 metric for mixed precision, refer to the :doc:`Mixed Precision Example `. - -.. raw:: html - - - -License -------- - -Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_auto_search.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_auto_search.rst deleted file mode 100644 index 232e7eef..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_auto_search.rst +++ /dev/null @@ -1,142 +0,0 @@ -Automatic Search for Model Quantization -======================================= - -Overview --------- - -The purpose of the Automatic Search design is to find better configurations for model quantization, aiming to achieve better accuracy while reducing the resource usage of the quantized model. The design is centered around an iterative loop that continuously refines the configuration by evaluating and adjusting various quantization parameters. This loop includes several key components: - -1. **Auto Search Config**: Defines the parameters for the search process. -2. **Quantization Config**: Specifies the quantization settings. -3. **Search Space**: Represents all possible configurations that can be explored. -4. **Search Algorithm**: Determines how to sample configurations from the search space. -5. **Model Quantizer**: Applies the sampled configuration to quantize the model. -6. **Evaluator**: Assesses the performance of the quantized model. -7. **Stop Condition**: Decides when to stop the search process based on the evaluation results. - -The core idea is to explore different configurations to find the optimal settings for the quantized model, improving its accuracy while ensuring it meets specified performance constraints. - -.. image:: ../_static/auto_search_diagram.png - :alt: Overview diagram of the automatic search process - :width: 600px - :align: center - -Components ----------- - -**Search Space** - -The search space defines the possible configurations available for model quantization, based on the parameters set in the auto search config. Initially, all potential configurations are listed. These configurations are then filtered to remove invalid or repeated ones. Each configuration is assigned a priority that dictates the likelihood of it being sampled during the search process. The priority is determined based on factors such as the expected quantization time and resource consumption. - -**Search Algorithm** - -The search algorithm samples configurations from the search space based on the defined priorities and search history. Currently, two search algorithms are supported: -- **Grid Search**: Exhaustively explores all configurations in a structured manner, ensuring a complete search of the space. -- **Random Search**: Randomly samples configurations, providing more flexibility and potentially quicker results for large search spaces. - -The search algorithm is designed to intelligently explore the configuration space to find high-performing settings for the quantization process. - -**Model Quantizer** - -After a configuration is sampled, the Model Quantizer is responsible for quantizing the model using the selected configuration. It takes three inputs: -- The model to be quantized. -- The quantization configuration, which defines the general approach (for example, precision, layer types). -- The sampled configuration, which specifies specific tuning parameters (for example, quantization range, rounding methods). - -The Model Quantizer utilizes existing APIs to perform the quantization process, producing a quantized model as output. - -**Evaluator** -After the model is quantized, the evaluator assesses its performance based on certain metrics. There are two possible evaluation scenarios: -1. **Custom Evaluator**: If you provide an evaluator, it is used to measure the performance of the quantized model. The evaluator is expected to include a test dataset, execution runtime details (such as ONNX model execution), and a metric for evaluation (for example, accuracy, inference speed). -2. **Built-in Evaluator**: If no custom evaluator is provided, the built-in evaluator is used. This evaluator relies on a test dataset (for example, a pre-defined datareader for quantization tasks) and calculates metrics like L1 or L2 norm to evaluate the model's performance. - -The evaluator returns the evaluation results, which are then used to guide the search process. - -**Stop Condition** -The stop condition evaluates the results provided by the evaluator and determines whether the search process should terminate. There are several criteria for stopping: -- If the performance of the quantized model is within a predefined tolerance level (as specified in the configuration), the configuration is added to the list of candidate solutions. -- If the number of candidates meets the desired threshold, the search loop terminates. -- If the maximum number of iterations or time allocated for the search process is exceeded, the loop is also stopped. - -The stop condition ensures that the search process concludes either when a satisfactory set of configurations is found or when the time/resources allocated for the search are exhausted. - -Flow Diagram -------------- - -1. Initialize auto search config and quantization config. -2. Build the search space based on the configuration. -3. Sample configurations using the search algorithm (grid or random search). -4. Apply the model quantizer to the selected configuration. -5. Evaluate the performance of the quantized model. -6. Check the stop condition: - - If the result is within tolerance, add to candidates. - - If the candidate count exceeds the threshold, stop. - - If iterations or time limit is exceeded, stop. -7. Repeat steps 3-6 until the stop condition is met. - -Usage ------ - -To use the automatic search process for model quantization, you need to define the following: -- **Auto Search Config**: This includes parameters like the number of iterations, expected time per configuration, tolerance levels, and the stop condition. -- **Quantization Config**: Defines the quantization method, such as bit width, layer-wise quantization, and rounding methods. -- **Evaluator**: If using a custom evaluator, provide the test dataset and evaluation metric. Otherwise, the built-in evaluator will be used. -- **Float Onnx Model**: This model is the target model to be quantized. -- **DataReader**: Defines the calibration dataset for model quantization. - -Example Configuration: - -.. code-block:: python - - from quark.onnx.auto_search import AutoSearch - from quark.onnx.auto_search import AutoSearchConfig - from quark.onnx.quant_utils import PowerOfTwoMethod - from onnxruntime.quantization.calibrate import CalibrationMethod - - auto_search_config = AutoSearchConfig - auto_search_config.search_space = { - "calibrate_method": [ - PowerOfTwoMethod.MinMSE, PowerOfTwoMethod.NonOverflow, CalibrationMethod.MinMax, CalibrationMethod.Entropy, - CalibrationMethod.Percentile ], - "activation_type": [QuantType.QInt8, QuantType.QInt16], - "weight_type": [QuantType.QInt8, QuantType.QInt16], - "include_cle": [True, False], - "include_auto_mp": [False, True], - "include_fast_ft": [False, True], - "include_sq": [False, True], - "extra_options": { - "ActivationSymmetric": [True, False], - "WeightSymmetric": [True, False], - "CalibMovingAverage": [True, False], - "CalibMovingAverageConstant": [0.01, 0.001], - "Percentile": [99.99, 99.999], - "SmoothAlpha": [0.5, 0.6], - 'FastFinetune': { - 'DataSize': [500, 1000], - 'NumIterations': [100, 1000], - 'OptimAlgorithm': ['adaround', 'adaquant'], - 'LearningRate': [0.01, 0.001, 0.0001], - } - } - } - auto_search_config.search_stop_condition = { - "find_n_candidates": -1, - "find_best_candidate": -1, - "iteration_limit": 1000, - "time_limit": 3600, # in seconds - } - auto_search_config.search_evaluator = None - - auto_search_instance = AutoSearch(quantization_config, auto_search_config, float_onnx_model_path, calibration_data_reader) - searched_candidates = auto_search_instance.search_model() - -Conclusion ----------- - -The Automatic Search for model quantization provides a systematic approach to explore different quantization configurations in search of the best-performing model. By leveraging intelligent search algorithms and efficient evaluation processes, this approach can significantly improve the accuracy and efficiency of model quantization, making it easier to deploy optimized models in real-world applications. - -License -------- - -Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_config_description.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_config_description.rst deleted file mode 100644 index ef844f48..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_config_description.rst +++ /dev/null @@ -1,121 +0,0 @@ -Configuring ONNX Quantization -============================= - -Configuration of quantization in ``AMD Quark for ONNX`` is set by Python ``dataclass`` because it is rigorous and can help you avoid typos. We provide a class ``Config`` in ``quark.onnx.quantization.config.config`` for configuration, as demonstrated in the previous example. In ``Config``, you should set certain instances (all instances are optional except ``global_quant_config``): - -- ``global_quant_config``: Global quantization configuration applied to the entire model. - -The ``Config`` should be like: - -.. code-block:: python - - from quark.onnx.quantization.config import Config, get_default_config - config = Config(global_quant_config=...) - -We define some default global configurations, including ``XINT8`` and ``U8S8_AAWS``, which can be used like this: - -.. code-block:: python - - quant_config = get_default_config("U8S8_AAWS") - config = Config(global_quant_config=quant_config) - -More Quantization Default Configurations ----------------------------------------- - -AMD Quark for ONNX provides you with default configurations to quickly start model quantization. - -- ``INT8_CNN_DEFAULT``: Perform 8-bit, optimized for CNN quantization. -- ``INT16_CNN_DEFAULT``: Perform 16-bit, optimized for CNN quantization. -- ``INT8_TRANSFORMER_DEFAULT``: Perform 8-bit, optimized for transformer quantization. -- ``INT16_TRANSFORMER_DEFAULT``: Perform 16-bit, optimized for transformer quantization. -- ``INT8_CNN_ACCURATE``: Perform 8-bit, optimized for CNN quantization. Some advanced algorithms are applied to achieve higher accuracy but consume more time and memory space. -- ``INT16_CNN_ACCURATE``: Perform 16-bit, optimized for CNN quantization. Some advanced algorithms are applied to achieve higher accuracy but consume more time and memory space. -- ``INT8_TRANSFORMER_ACCURATE``: Perform 8-bit, optimized for transformer quantization. Some advanced algorithms are applied to achieve higher accuracy but consume more time and memory space. -- ``INT16_TRANSFORMER_ACCURATE``: Perform 16-bit, optimized for transformer quantization. Some advanced algorithms are applied to achieve higher accuracy but consume more time and memory space. - -AMD Quark for ONNX also provides more advanced default configurations to help you quantize models with more options. - -- ``UINT8_DYNAMIC_QUANT``: Perform dynamic activation, uint8 weight quantization. -- ``XINT8``: Perform uint8 activation, int8 weight, optimized for NPU quantization. -- ``XINT8_ADAROUND``: Perform uint8 activation, int8 weight, optimized for NPU quantization. The adaround fast finetune applies to preserve quantized accuracy. -- ``XINT8_ADAQUANT``: Perform uint8 activation, int8 weight, optimized for NPU quantization. The adaquant fast finetune applies to preserve quantized accuracy. -- ``S8S8_AAWS``: Perform int8 asymmetric activation, int8 symmetric weight quantization. -- ``S8S8_AAWS_ADAROUND``: Perform int8 asymmetric activation, int8 symmetric weight quantization. The adaround fast finetune applies to preserve quantized accuracy. -- ``S8S8_AAWS_ADAQUANT``: Perform int8 asymmetric activation, int8 symmetric weight quantization. The adaquant fast finetune applies to preserve quantized accuracy. -- ``U8S8_AAWS``: Perform uint8 asymmetric activation int8 symmetric weight quantization. -- ``U8S8_AAWS_ADAROUND``: Perform uint8 asymmetric activation, int8 symmetric weight quantization. The adaround fast finetune applies to preserve quantized accuracy. -- ``U8S8_AAWS_ADAQUANT``: Perform uint8 asymmetric activation, int8 symmetric weight quantization. The adaquant fast finetune applies to preserve quantized accuracy. -- ``S16S8_ASWS``: Perform int16 symmetric activation, int8 symmetric weight quantization. -- ``S16S8_ASWS_ADAROUND``: Perform int16 symmetric activation, int8 symmetric weight quantization. The adaround fast finetune applies to preserve quantized accuracy. -- ``S16S8_ASWS_ADAQUANT``: Perform int16 symmetric activation, int8 symmetric weight quantization. The adaquant fast finetune applies to preserve quantized accuracy. -- ``A8W8``: Perform int8 symmetric activation, int8 symmetric weight quantization and optimize for deployment. -- ``A16W8``: Perform int16 symmetric activation, int8 symmetric weight quantization and optimize for deployment. -- ``U16S8_AAWS``: Perform uint16 asymmetric activation, int8 symmetric weight quantization. -- ``U16S8_AAWS_ADAROUND``: Perform uint16 asymmetric activation, int8 symmetric weight quantization. The adaround fast finetune applies to preserve quantized accuracy. -- ``U16S8_AAWS_ADAQUANT``: Perform uint16 asymmetric activation, int8 symmetric weight quantization. The adaquant fast finetune applies to preserve quantized accuracy. -- ``BF16``: Perform BFloat16 activation, BFloat16 weight quantization. -- ``BFP16``: Perform BFP16 activation, BFP16 weight quantization. -- ``S16S16_MIXED_S8S8``: Perform int16 activation, int16 weight mix-precision quantization. - -Customized Configurations -------------------------- - -Besides the default configurations in AMD Quark for ONNX, you can also customize the quantization configuration like the following example: - -.. toctree:: - :hidden: - :caption: Advanced AMD Quark Features for PyTorch - :maxdepth: 1 - - Full List of Quantization Config Features - -.. code-block:: python - - from quark.onnx import ModelQuantizer, PowerOfTwoMethod, QuantType - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig( - quant_format=quark.onnx.QuantFormat.QDQ, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - input_nodes=[], - output_nodes=[], - op_types_to_quantize=[], - per_channel=False, - reduce_range=False, - activation_type=quark.onnx.QuantType.QInt8, - weight_type=quark.onnx.QuantType.QInt8, - nodes_to_quantize=[], - nodes_to_exclude=[], - subgraphs_to_exclude=[], - optimize_model=True, - use_dynamic_quant=False, - use_external_data_format=False, - execution_providers=['CPUExecutionProvider'], - enable_npu_cnn=False, - enable_npu_transformer=False, - convert_fp16_to_fp32=False, - convert_nchw_to_nhwc=False, - include_cle=True, - include_sq=False, - extra_options={},) - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - quantizer.quantize_model(input_model_path, output_model_path, calibration_data_reader=None) - -.. toctree:: - :hidden: - :maxdepth: 1 - - Calibration methods - Calibration datasets - Quantization Strategies - Quantization Schemes - Quantization Symmetry - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_supported_optype_datatype.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_supported_optype_datatype.rst deleted file mode 100644 index 9a83b37f..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_onnx_user_guide_supported_optype_datatype.rst +++ /dev/null @@ -1,322 +0,0 @@ -Supported Data and Op Types -=========================== - -Supported Data Types --------------------- - -Summary Table -~~~~~~~~~~~~~ - -+------------------------------------------------------------------------------+ -| Supported Data Types | -+==============================================================================+ -| Int4 / UInt4 | -+------------------------------------------------------------------------------+ -| Int8 / UInt8 | -+------------------------------------------------------------------------------+ -| Int16 / UInt16 | -+------------------------------------------------------------------------------+ -| Int32 / UInt32 | -+------------------------------------------------------------------------------+ -| Float16 | -+------------------------------------------------------------------------------+ -| BFloat16 | -+------------------------------------------------------------------------------+ -| BFP16 | -+------------------------------------------------------------------------------+ -| MX4 / MX6 / MX9 | -+------------------------------------------------------------------------------+ -| MXFP8(E5M2) / MXFP8(E4M3) / MXFP6(E3M2) / MXFP6(E2M3) / MXFP4(E2M1) / MXINT8 | -+------------------------------------------------------------------------------+ - -You can see in the table there are many non integer data types that onnxruntime official operators do not support. In order to support these new features, we have developed several custom operators using onnxruntime's custom operation C APIs. Here are these ops and their specifications: - -**ExtendedQuantizeLinear** - :doc:`specification ` - -**ExtendedDequantizeLinear** - :doc:`specification ` - -**BFPQuantizeDequantize** - :doc:`specification ` - -**MXQuantizeDequantize** - :doc:`specification ` - -.. note:: - - When installing on Windows, Visual Studio is required. The minimum version of Visual Studio is Visual Studio 2022. During the compilation process, there are two ways to use it: - -1. **Use the Developer Command Prompt for Visual Studio** - When installing Visual Studio, ensure that the Developer Command Prompt for Visual Studio is installed as well. Execute programs in the CMD window of the Developer Command Prompt for Visual Studio. - -2. **Manually Add Paths to Environment Variables** - Visual Studio's ``cl.exe``, ``MSBuild.exe``, and ``link.exe`` will be used. Ensure that the paths are added to the `PATH` environment variable. These programs are located in the Visual Studio installation directory. In the *Edit Environment Variables* window, click **New**, then paste the path to the folder containing ``cl.exe``, ``link.exe``, and ``MSBuild.exe``. Click **OK** on all windows to apply the changes. - -1. Quantizing to Other Precisions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In addition to the INT8/UINT8, the quark.onnx supports quantizing models to other data formats, including INT16/UINT16, INT32/UINT32, Float16 and BFloat16, which can provide better accuracy or be used for experimental purposes. These new data formats are achieved by a customized version of QuantizeLinear and DequantizeLinear named "ExtendedQuantizeLinear" and "ExtendedDequantizeLinear", which expand onnxruntime's UInt8 and Int8 quantization to support UInt16, Int16, UInt32, Int32, Float16, and -BFloat16. This customized Q/DQ was implemented by a custom operations library in quark.onnx using onnxruntime's custom operation C API. - -The custom operations library was developed based on Linux and Windows. - -To use this feature, the ``quant_format`` should be set to ExtendedQuantFormat.QDQ. You might have noticed that in both the recommended NPU_CNN and NPU_Transformer configurations, the ``quant_format`` is set to QuantFormat.QDQ. NPU targets that support acceleration for models quantized to INT8/UINT8, do not support other precisions. - -.. note:: - - When the Quant_Type is Int4/UInt4, the onnxruntime version must be 1.19.0 or higher. Only the onnxruntime native "CalibrationMethod" is supported (MinMax, Percentile), and the quant_format is required to be QuantFormat. - -1.1 Quantizing Float32 Models to Int16 or Int32 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The quantizer supports quantizing float32 models to Int16 or Int32 data formats. To enable this, you need to set the ``activation_type`` and ``weight_type`` in the quantize_static API to the new data types. Options are ExtendedQuantType.QInt16/ExtendedQuantType.QUInt16 or ExtendedQuantType.QInt32/ExtendedQuantType.QUInt32. - -.. code:: python - - quark.onnx.quantize_static( - model_input, - model_output, - calibration_data_reader, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - quant_format=quark.onnx.ExtendedQuantFormat.QDQ, - activation_type=quark.onnx.ExtendedQuantType.QInt16, - weight_type=quark.onnx.ExtendedQuantType.QInt16, - ) - -1.2 Quantizing Float32 Models to Float16 or BFloat16 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Besides integer data formats, the quantizer also supports quantizing Float32 models to Float16 or BFloat16 data formats. Set the ``activation_type`` and ``weight_type`` to ``ExtendedQuantType.QFloat16`` or ``ExtendedQuantType.QBFloat16``. - -.. code:: python - - quark.onnx.quantize_static( - model_input, - model_output, - calibration_data_reader, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - quant_format=quark.onnx.ExtendedQuantFormat.QDQ, - activation_type=quark.onnx.ExtendedQuantType.QFloat16, - weight_type=quark.onnx.ExtendedQuantType.QFloat16, - ) - -1.3 Quantizing Float32 Models to BFP16 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The quantizer also supports quantizing Float32 models to BFP16 data formats. The block size can be modified by changing the ``block_size`` parameter in the ``extra_options``. The following is the configuration for BFP16 with a block size of 8. - -.. code:: python - - quark.onnx.quantize_static( - model_input, - model_output, - calibration_data_reader, - calibrate_method=quark.onnx.PowerOfTwoMethod.NonOverflow, - quant_format=quark.onnx.ExtendedQuantFormat.QDQ, - activation_type=quark.onnx.ExtendedQuantType.QBFP, - weight_type=quark.onnx.ExtendedQuantType.QBFP, - extra_options={ - "BFPAttributes": { - "bfp_method": "to_bfp", - "bit_width": 16, - "block_size": 8, - } - }, - ) - -1.4 Quantizing Float32 Models to MXINT8 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The quantizer also supports quantizing Float32 models to MXINT8 data formats. The block size can be modified by changing the ``block_size`` parameter in the ``extra_options``. The following is the configuration for MXINT8 with a block size of 32. - -.. code:: python - - quark.onnx.quantize_static( - model_input, - model_output, - calibration_data_reader, - calibrate_method=quark.onnx.PowerOfTwoMethod.NonOverflow, - quant_format=quark.onnx.ExtendedQuantFormat.QDQ, - activation_type=quark.onnx.ExtendedQuantType.QMX, - weight_type=quark.onnx.ExtendedQuantType.QMX, - extra_options={ - "MXAttributes": { - "element_dtype": "int8", - "block_size": 32, - } - }, - ) - -.. note:: - - When inference with ONNX Runtime, we need to register the custom op's so(Linux) or dll(Windows) file in the ORT session options. - -.. code:: python - - import onnxruntime - from quark.onnx import get_library_path - - device = 'CPU' - providers = ['CPUExecutionProvider'] - - # Also We can use the GPU configuration: - # device='ROCM' - # providers = ['ROCMExecutionProvider'] - # device='CUDA' - # providers = ['CUDAExecutionProvider'] - - sess_options = onnxruntime.SessionOptions() - sess_options.register_custom_ops_library(get_library_path(device)) - session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=providers) - -1.5 Quantizing Float32 Models to Mixed Data Formats -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The quantizer even supports setting the activation and weight to different precisions. For example, activation is Int16 while weight is Int8. This can be used when pure Int8 quantization can not meet accuracy requirements. - -.. code:: python - - quark.onnx.quantize_static( - model_input, - model_output, - calibration_data_reader, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - quant_format=quark.onnx.ExtendedQuantFormat.QDQ, - activation_type=quark.onnx.ExtendedQuantType.QInt16, - weight_type=QuantType.QInt8, - ) - -2. Quantizing Float16 Models -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For models in Float16, we recommend setting ``convert_fp16_to_fp32`` to True. This first converts your Float16 model to a Float32 model before quantization, reducing redundant nodes such as cast in the model. - -.. code:: python - - quark.onnx.quantize_static( - model_input, - model_output, - calibration_data_reader, - quant_format=QuantFormat.QDQ, - calibrate_method=quark.onnx.PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_NPU_cnn=True, - convert_fp16_to_fp32=True, - extra_options={'ActivationSymmetric':True} - ) - -.. note:: - When using ``convert_fp16_to_fp32`` in quark.onnx, it requires onnxsim to simplify the ONNX model. Ensure that onnxsim is installed by using ``python -m pip install onnxsim``. - -Supported Op Type ------------------ - -.. _quark-onnx-supported-ops: - -Summary Table -~~~~~~~~~~~~~ - -Table: List of Quark ONNX Supported Quantized Ops - -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Supported Ops | Comments | -+=======================+===========================================================================================================================================================================================================+ -| Add | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ArgMax | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| AveragePool | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| BatchNormalization | By default, the "optimize_model" parameter will fuse BatchNormalization to Conv/ConvTranspose/Gemm. For standalone BatchNormalization, quantization is supported only for NPU_CNN platforms by converting | -| | BatchNormalization to Conv. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Clip | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Concat | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Conv | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ConvTranspose | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| DepthToSpace | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Div | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Erf | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Gather | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Gemm | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| GlobalAveragePool | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| HardSigmoid | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| InstanceNormalization | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| LayerNormalization | Supported for opset>=17. Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| LeakyRelu | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| LpNormalization | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| MatMul | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Min | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Max | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| MaxPool | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Mul | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Pad | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| PRelu | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ReduceMean | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Relu | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Reshape | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Resize | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Slice | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Sigmoid | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Softmax | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| SpaceToDepth | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Split | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Squeeze | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Sub | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Tanh | Quantization is supported only for NPU_CNN platforms. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Transpose | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Unsqueeze | Will be quantized only when its input is quantized. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Where | | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -.. toctree:: - :hidden: - :maxdepth: 1 - - ExtendedQuantizeLinear - ExtendedDequantizeLinear - ExtendedInstanceNormalization - ExtendedLSTM - BFPQuantizeDequantize - MXQuantizeDequantize - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_mx.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_mx.rst deleted file mode 100644 index 3dbe1f34..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_mx.rst +++ /dev/null @@ -1,155 +0,0 @@ -Using OCP MX (Microscaling) -=========================== - -Introduction ------------- - -This tutorial explains how to use OCP MX data types with AMD Quark. - -OCP MX is a new family of quantization data types defined by this `specification `__ and explored thoroughly in `Microscaling Data Formats for Deep Learning `__. - -The key feature of OCP MX is that it subdivides tensors into arbitrary blocks of elements that share a scale, instead of using a single per tensor scale like many other data types. - -This allows for better accuracy with more fine-grained scaling while still reducing storage and computational requirements. - -How to use OCP MX in AMD Quark ------------------------------- - -1. Install AMD Quark -~~~~~~~~~~~~~~~~~~~~ - -Follow the steps in the :doc:`installation guide <../install>`. - -2. Set the model -~~~~~~~~~~~~~~~~ - -.. code-block:: python - - from transformers import AutoModelForCausalLM, AutoTokenizer - model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b", token=) - model.eval() - tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b", token=) - -Retrieve the model from `Hugging Face `__ using their `Transformers `__ library. - -The model `meta-llama/Llama-2-7b `__ is a gated model, meaning access must be requested and a `Hugging Face token `__ generated. - -Replace all instances of ```` with the token. - -3. Set the quantization configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python - - from quark.torch.quantization.config.config import Config, OCP_MXFP8E4M3Spec, QuantizationConfig - - mxfp8_spec = OCP_MXFP8E4M3Spec(is_dynamic=False, - ch_axis=-1).to_quantization_spec() - - mxfp8_config = QuantizationConfig(weight=mxfp8_spec) - quant_config = Config(global_quant_config=mxfp8_config) - -For OCP MX quantization, which always uses per-group quantization with group size 32, helper classes are available to instantiate the necessary tensor quantization spec: - -- For FP8 E4M3: ``OCP_MXFP8E4M3Spec``, -- For FP8 E5M2: ``OCP_MXFP8E5M2Spec``, -- For FP6 (E3M2): ``OCP_MXFP6E3M2Spec``, -- For FP6 (E2M3): ``OCP_MXFP6E2M3Spec``, -- For FP4 (E2M1): ``OCP_MXFP4Spec``, -- For INT8: ``OCP_MXINT8Spec``. - -In terms of what element type to choose, according to `Microscaling Data Formats for Deep Learning `__, INT8 can be used as a drop-in replacement for FP32 without any further work needed and FP8 is almost as good. However, FP6 and FP4 will generally require fine-tuning and will incur a minor accuracy loss. - -How is the tensor turned into blocks? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Reshaping of the tensor into blocks is controlled by ``ch_axis`` and ``group_size``. - -Let us use a tensor of shape (2,4) as an example: - -.. figure:: ../_static/mx/tensor_base.png - :align: center - -The parameter ``ch_axis`` determines along which axis elements will be grouped into blocks: - -.. figure:: ../_static/mx/tensor_axis_0.png - :align: center - -.. figure:: ../_static/mx/tensor_axis_1.png - :align: center - -The ``group_size`` parameter determines how many elements to bunch together into a single block. - -If it is larger than the number of elements along the axis, the block is padded with zeros until it reaches the correct size: - -.. figure:: ../_static/mx/tensor_axis_0_padded.png - :align: center - - ch_axis = 0 and group_size = 4 - -If the ``group_size`` is less than the number of elements, the axis is broken up into block tiles: - -.. figure:: ../_static/mx/tensor_axis_1_tiled.png - :align: center - - ch_axis = 1 and group_size = 2 - -Each block has its own scale value. - -4. Set up the calibration data (this is required for weight only and dynamic quantization as well) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python - - from torch.utils.data import DataLoader - text = "Hello, how are you?" - tokenized_outputs = tokenizer(text, return_tensors="pt") - calib_dataloader = DataLoader(tokenized_outputs['input_ids']) - -If using static quantization, ensure the tensor shape of the calibration data matches the shape of the data intended for use with the model. - -5. Apply the quantization -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python - - from quark.torch import ModelQuantizer - quantizer = ModelQuantizer(quant_config) - quant_model = quantizer.quantize_model(model, calib_dataloader) - -This step calculates the block scales, applies them to the element values, and performs quantization to the selected element data type. - -How are the scales calculated? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. Calculate the maximum absolute value for every block: - -.. figure:: ../_static/mx/tensor_abs_max.png - :align: center - -2. Using this value, calculate the shared exponent by: - - a. Getting its log2 value, - b. Rounding it down to the nearest integer power, and - c. Subtracting the maximum exponent value the chosen element data type can represent. - -.. figure:: ../_static/mx/shared_exponent.png - :align: center - -3. Finally, raise 2 to the power of the shared exponent to obtain the scale: - -.. figure:: ../_static/mx/scale_po2.png - :align: center - -How are the scales used? -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. figure:: ../_static/mx/quant_dequant.png - :align: center - -Conclusion ----------- - -Congratulations! By following the steps above, you should now have a model quantized with MX data types ready for inference. - -This tutorial also provides a better understanding of what MX means and why it might be beneficial to use. \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_two_level - Copy.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_two_level - Copy.rst deleted file mode 100644 index a1c3c251..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_two_level - Copy.rst +++ /dev/null @@ -1,107 +0,0 @@ -Two Level Quantization Formats (MX4, MX6, MX9: shared Microexponents) -===================================================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -AMD Quark supports the MX6 and MX9 quantization formats through ``quark.torch``, as introduced in `With Shared Microexponents, A Little Shifting Goes a Long Way `__. - -The novelty of these quantization formats lies in the way quantization scales are computed and stored. For a general introduction to quantization and its use in AMD Quark, refer to the :doc:`Quantization with AMD Quark <../intro>` documentation. - -Context: Uniform Integer Quantization -------------------------------------- - -Quantization typically aims to use fewer bits per weight of a high-precision matrix :math:`W` of shape :math:`[m, n]`, originally in float32, float16, or bfloat16 precision. A classic quantization technique is uniform integer quantization, such as INT8 quantization, which uses the following scheme: - -.. math:: - - x_Q &= \text{round}\left(\frac{x_R}{s}\right) \in [-128, -127,..., 127] \\ - x_R &= s \times x_Q \hspace{3ex}\text{(float value).} - -Here, :math:`s` is the **scale** factor, :math:`x_Q` represents a quantized value (e.g., an int8 value), and :math:`x_R` represents the high-precision value (typically float16, bfloat16, or float32). - -.. figure:: ../_static/quant/per_tensor.png - :align: center - :width: 75% - - Uniform integer per-tensor quantization. - -Such a quantization scheme necessarily leads to quantization error. To preserve model prediction quality, a strategy is to allow more granular scales. For example, instead of computing a single scale :math:`s` for the whole matrix :math:`W`, increase the granularity by computing one scale per column, or even one scale per group of size :math:`k` within a column, as shown below. - -.. figure:: ../_static/quant/block.png - :align: center - :width: 75% - - Per-block quantization, with the block size :math:`k_1 = 6`. - -Increasing this granularity effectively means considering only a subset of values from :math:`W` to compute the relevant scale :math:`s` for this subset. - -Another strategy to balance quantization error with the number of bits per weight is to use a different data type to store the scales. A common approach is to store scales as float16 or float32 values, but scales can also be constrained to be powers of two, implementing the dequantization operation :math:`s \times x_Q` as a simple bit shift (similarly for the quantization operation). Thus, instead of storing the scale :math:`s` on 16 or 32 bits, it can be stored on a lower bitwidth, e.g., 8 bits. - -Two-level Quantization: MX6 and MX9 Data Types ----------------------------------------------- - -Refer to MX9, MX6, and MX4 specifications in `[1] `__. -The MX6 and MX9 data types leverage both the granularity of the scale factors and the precision allocated to them to: - -- Minimize the number of bits per weight -- Minimize degradation in predictive performance due to quantization -- Be hardware-friendly - -To achieve these goals, the classic quantization scheme :math:`x_R = s \times x_Q` is decomposed into - -.. math:: - - x_R = s_b \times s_{sb} \times x_Q - -where :math:`s_b` stands for the block scale (1st level), and :math:`s_{sb}` stands for the subblock scale (2nd level). - -.. figure:: ../_static/quant/mx_layout.png - :align: center - :width: 75% - - A dummy example for a two-level quantization scheme, with the block size :math:`k_1 = 6`. The different colors for :math:`s_{sb}` indicate different values per subblock. - -For example, in the MX9 data type, the block scale :math:`s_b` is an 8-bit (:math:`d_1 = 8`) power of two (within :math:`[2^{-127}, ..., 2^{127}]`) scale, shared over :math:`k_1 = 16` values, while the subblock scale :math:`s_{sb}` is a 1-bit (:math:`d_2 = 1`) power of two scale (effectively, :math:`2^{0}` or :math:`2^{-1}`) shared over :math:`k_2 = 2` values. - -The mantissa bit-width :math:`m` represents the number of bits used to store the quantized value :math:`x_Q`, effectively using :math:`2^m` possible different bins. - -The total number of bits per value is - -.. math:: - - (m + 1) + \frac{d_1}{k_1} + \frac{d_2}{k_2} - -where :math:`m + 1` accounts for the sign bit and the :math:`m` bits for storing :math:`x_Q`, and the two other terms split the storing cost of :math:`s_b` and :math:`s_{sb}` over the values within the block and subblock. - -The intuition behind this quantization scheme is that while a few block scales :math:`s_b` are stored in relatively high precision (8 bits per scale per block of 16 values), many more subscales :math:`s_{sb}` are stored (with :math:`k_2 = 2`, half the number of values in the matrix) to allow for lower quantization error for each floating point value in subblocks. As these subscales use a very low bitwidth (1 bit), it is a storage (and compute, as bit shifts are used) cost that can be afforded. - -How are These Two-Level Scales Obtained? ----------------------------------------- - -Several strategies can be chosen, as long as they respect the constraints on the scales and sub-scales. In AMD Quark, this can be found at `quark/torch/kernel/hw_emulation/hw_emulation_interface.py`. The scales and sub-scales are computed as follows (using MX9 as an example): - -1. From the original float32, bfloat16, or float16 :math:`W` matrix, retrieve the maximum power of two exponent of each block of size :math:`k_1 = 16`, denoted :math:`e_{b,max}`. This can be retrieved from the exponent bits from the floating point representation :math:`(-1)^s2^e \times 1.m`. - -2. For each subblock of :math:`k_2 = 2` values within the block, determine whether both floating point values have an exponent strictly smaller than :math:`e_{b,max}`. - - - If that is the case, the values within the block are comparatively small, hence a **smaller scale** is desired, which amounts to a smaller quantization range and finer quantization of small values. Choose :math:`s_{sb} = 2^{-1}`. - - - If that is not the case, choose :math:`s_{sb} = 1` (no bit shift, no subscale really applied). - -3. The block scale is chosen as :math:`s_b = 2^{e_{b,max} - 8 + 2}`, where the :math:`2^{-(8 - 1 - 1)}` term is an implementation detail accounting for the hidden bit of floating point numbers, and base 2 to base 10 conversion of the mantissa :math:`(1.m)_2` [1]_. - -Finally, the global scale for a subblock of two values is :math:`s = s_b \times s_{sb} = 2^{e_{b,max} - 8 + 2} \times 2^{(\text{-1 or 0})}`. - -Hardware Mapping ----------------- - -Why is this quantization scheme interesting in terms of mapping it to hardware? - -One element is that scaling can be implemented as bit shifts, both for the block scales and subblock scales, as these are stored as powers of two. - -Notes ------ - -.. [1] In short, for MX9: :math:`(1.m)_2 = 2^1 \times (0.m)_2 = 2^1 \times 2^{-7} \times m_{10}`, where subscripts represent the base 2 and 10. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_two_level.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_two_level.rst deleted file mode 100644 index a1c3c251..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_adv_two_level.rst +++ /dev/null @@ -1,107 +0,0 @@ -Two Level Quantization Formats (MX4, MX6, MX9: shared Microexponents) -===================================================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -AMD Quark supports the MX6 and MX9 quantization formats through ``quark.torch``, as introduced in `With Shared Microexponents, A Little Shifting Goes a Long Way `__. - -The novelty of these quantization formats lies in the way quantization scales are computed and stored. For a general introduction to quantization and its use in AMD Quark, refer to the :doc:`Quantization with AMD Quark <../intro>` documentation. - -Context: Uniform Integer Quantization -------------------------------------- - -Quantization typically aims to use fewer bits per weight of a high-precision matrix :math:`W` of shape :math:`[m, n]`, originally in float32, float16, or bfloat16 precision. A classic quantization technique is uniform integer quantization, such as INT8 quantization, which uses the following scheme: - -.. math:: - - x_Q &= \text{round}\left(\frac{x_R}{s}\right) \in [-128, -127,..., 127] \\ - x_R &= s \times x_Q \hspace{3ex}\text{(float value).} - -Here, :math:`s` is the **scale** factor, :math:`x_Q` represents a quantized value (e.g., an int8 value), and :math:`x_R` represents the high-precision value (typically float16, bfloat16, or float32). - -.. figure:: ../_static/quant/per_tensor.png - :align: center - :width: 75% - - Uniform integer per-tensor quantization. - -Such a quantization scheme necessarily leads to quantization error. To preserve model prediction quality, a strategy is to allow more granular scales. For example, instead of computing a single scale :math:`s` for the whole matrix :math:`W`, increase the granularity by computing one scale per column, or even one scale per group of size :math:`k` within a column, as shown below. - -.. figure:: ../_static/quant/block.png - :align: center - :width: 75% - - Per-block quantization, with the block size :math:`k_1 = 6`. - -Increasing this granularity effectively means considering only a subset of values from :math:`W` to compute the relevant scale :math:`s` for this subset. - -Another strategy to balance quantization error with the number of bits per weight is to use a different data type to store the scales. A common approach is to store scales as float16 or float32 values, but scales can also be constrained to be powers of two, implementing the dequantization operation :math:`s \times x_Q` as a simple bit shift (similarly for the quantization operation). Thus, instead of storing the scale :math:`s` on 16 or 32 bits, it can be stored on a lower bitwidth, e.g., 8 bits. - -Two-level Quantization: MX6 and MX9 Data Types ----------------------------------------------- - -Refer to MX9, MX6, and MX4 specifications in `[1] `__. -The MX6 and MX9 data types leverage both the granularity of the scale factors and the precision allocated to them to: - -- Minimize the number of bits per weight -- Minimize degradation in predictive performance due to quantization -- Be hardware-friendly - -To achieve these goals, the classic quantization scheme :math:`x_R = s \times x_Q` is decomposed into - -.. math:: - - x_R = s_b \times s_{sb} \times x_Q - -where :math:`s_b` stands for the block scale (1st level), and :math:`s_{sb}` stands for the subblock scale (2nd level). - -.. figure:: ../_static/quant/mx_layout.png - :align: center - :width: 75% - - A dummy example for a two-level quantization scheme, with the block size :math:`k_1 = 6`. The different colors for :math:`s_{sb}` indicate different values per subblock. - -For example, in the MX9 data type, the block scale :math:`s_b` is an 8-bit (:math:`d_1 = 8`) power of two (within :math:`[2^{-127}, ..., 2^{127}]`) scale, shared over :math:`k_1 = 16` values, while the subblock scale :math:`s_{sb}` is a 1-bit (:math:`d_2 = 1`) power of two scale (effectively, :math:`2^{0}` or :math:`2^{-1}`) shared over :math:`k_2 = 2` values. - -The mantissa bit-width :math:`m` represents the number of bits used to store the quantized value :math:`x_Q`, effectively using :math:`2^m` possible different bins. - -The total number of bits per value is - -.. math:: - - (m + 1) + \frac{d_1}{k_1} + \frac{d_2}{k_2} - -where :math:`m + 1` accounts for the sign bit and the :math:`m` bits for storing :math:`x_Q`, and the two other terms split the storing cost of :math:`s_b` and :math:`s_{sb}` over the values within the block and subblock. - -The intuition behind this quantization scheme is that while a few block scales :math:`s_b` are stored in relatively high precision (8 bits per scale per block of 16 values), many more subscales :math:`s_{sb}` are stored (with :math:`k_2 = 2`, half the number of values in the matrix) to allow for lower quantization error for each floating point value in subblocks. As these subscales use a very low bitwidth (1 bit), it is a storage (and compute, as bit shifts are used) cost that can be afforded. - -How are These Two-Level Scales Obtained? ----------------------------------------- - -Several strategies can be chosen, as long as they respect the constraints on the scales and sub-scales. In AMD Quark, this can be found at `quark/torch/kernel/hw_emulation/hw_emulation_interface.py`. The scales and sub-scales are computed as follows (using MX9 as an example): - -1. From the original float32, bfloat16, or float16 :math:`W` matrix, retrieve the maximum power of two exponent of each block of size :math:`k_1 = 16`, denoted :math:`e_{b,max}`. This can be retrieved from the exponent bits from the floating point representation :math:`(-1)^s2^e \times 1.m`. - -2. For each subblock of :math:`k_2 = 2` values within the block, determine whether both floating point values have an exponent strictly smaller than :math:`e_{b,max}`. - - - If that is the case, the values within the block are comparatively small, hence a **smaller scale** is desired, which amounts to a smaller quantization range and finer quantization of small values. Choose :math:`s_{sb} = 2^{-1}`. - - - If that is not the case, choose :math:`s_{sb} = 1` (no bit shift, no subscale really applied). - -3. The block scale is chosen as :math:`s_b = 2^{e_{b,max} - 8 + 2}`, where the :math:`2^{-(8 - 1 - 1)}` term is an implementation detail accounting for the hidden bit of floating point numbers, and base 2 to base 10 conversion of the mantissa :math:`(1.m)_2` [1]_. - -Finally, the global scale for a subblock of two values is :math:`s = s_b \times s_{sb} = 2^{e_{b,max} - 8 + 2} \times 2^{(\text{-1 or 0})}`. - -Hardware Mapping ----------------- - -Why is this quantization scheme interesting in terms of mapping it to hardware? - -One element is that scaling can be implemented as bit shifts, both for the block scales and subblock scales, as these are stored as powers of two. - -Notes ------ - -.. [1] In short, for MX9: :math:`(1.m)_2 = 2^1 \times (0.m)_2 = 2^1 \times 2^{-7} \times m_{10}`, where subscripts represent the base 2 and 10. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_basic_usage_pytorch.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_basic_usage_pytorch.rst deleted file mode 100644 index e1a48251..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_basic_usage_pytorch.rst +++ /dev/null @@ -1,111 +0,0 @@ -AMD Quark for PyTorch -===================== - -The :doc:`Getting started with AMD Quark <../basic_usage>` guide provides a general overview of the quantization process, irrespective of specific hardware or deep learning frameworks. This page details the features supported by the Quark PyTorch Quantizer and explains how to use it to quantize PyTorch models. - -Basic Example -------------- - -This example shows a basic use case on how to quantize the ``opt-125m`` model with the ``int8`` data type for ``symmetric`` ``per tensor`` ``weight-only`` quantization. We are following the :ref:`basic quantization steps from the Getting Started page `. - -1. Load the original floating-point model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We will use `Transformers `_, from Hugging Face, to fetch the model. - -.. code-block:: bash - - pip install transformers - -We start by specifying the model we want to quantize. For this PyTorch example, we instantiate the model through Hugging Face API: - -.. code-block:: python - - from transformers import AutoModelForCausalLM, AutoTokenizer - model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m") - model.eval() - tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m") - -2. (Optional) Define the data loader for calibration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The requirements of data loader are divided into two categories: - -**DataLoader not required** - -* Weight-only quantization (if advanced algorithms like AWQ are not used). -* Weight and activation dynamic quantization (if advanced algorithms like AWQ are not used). -* Advanced algorithms: Rotation. - -**DataLoader required** - -* Weight and activation static quantization. -* Advanced algorithms: SmoothQuant, AWQ and GPTQ. - -.. code-block:: python - - from torch.utils.data import DataLoader - text = "Hello, how are you?" - tokenized_outputs = tokenizer(text, return_tensors="pt") - calib_dataloader = DataLoader(tokenized_outputs['input_ids']) - -Refer to :doc:`Adding Calibration Datasets ` to learn more about how to use calibration datasets efficiently. - -3. Set the quantization configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Quark for PyTorch provides a granular API to handle diverse quantization scenarios, and it also offers streamlined APIs for common use cases. The example below demonstrates the granular API approach. - -.. code-block:: python - - from quark.torch.quantization.config.type import Dtype, ScaleType, RoundType, QSchemeType - from quark.torch.quantization.config.config import Config, QuantizationConfig - from quark.torch.quantization.observer.observer import PerTensorMinMaxObserver - from quark.torch.quantization import Int8PerTensorSpec - DEFAULT_INT8_PER_TENSOR_SYM_SPEC = Int8PerTensorSpec(observer_method="min_max", - symmetric=True, - scale_type="float", - round_method="half_even", - is_dynamic=False).to_quantization_spec() - - DEFAULT_W_INT8_PER_TENSOR_CONFIG = QuantizationConfig(weight=DEFAULT_INT8_PER_TENSOR_SYM_SPEC) - quant_config = Config(global_quant_config=DEFAULT_W_INT8_PER_TENSOR_CONFIG) - -4. Quantize the model -~~~~~~~~~~~~~~~~~~~~~ - -Once the model, input data, and quantization configuration are ready, quantizing the model is straightforward, as shown below: - -.. code-block:: python - - from quark.torch import ModelQuantizer - quantizer = ModelQuantizer(quant_config) - quant_model = quantizer.quantize_model(model, calib_dataloader) - -5. (Optional) Export the quantized model to other formats for deployment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Exporting a model is only needed when users want to deploy models in another Deep Learning framework, such as ONNX, Hugging Face safetensors. To export a quantized model, users need to freeze the quantized model. - -.. code-block:: python - - freezed_quantized_model = quantizer.freeze(quant_model) - from quark.torch import ModelExporter - - # Generate dummy input - for data in calib_dataloader: - input_args = data - break - - quant_model = quant_model.to('cuda') - input_args = input_args.to('cuda') - exporter = ModelExporter('export_path') - exporter.export_onnx_model(quant_model, input_args) - -If the code runs successfully, the terminal displays `[QUARK-INFO]: Model quantization has been completed.` - -Further reading ---------------- - -* Quantized models can be evaluated to compare its performance with the original model. Learn more on :doc:`Model Evaluation `. -* For more detailed information, see the section on :ref:`Advanced AMD Quark Features for PyTorch `. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_calibration_datasets.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_calibration_datasets.rst deleted file mode 100644 index 28beeaf2..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_calibration_datasets.rst +++ /dev/null @@ -1,52 +0,0 @@ -Adding Calibration Datasets -=========================== - -AMD Quark utilizes the `PyTorch Dataloader `__ for normalization during quantization calibration. The PyTorch Dataloader accepts instances of the PyTorch Dataset class as input. PyTorch datasets can be formatted as torch.Tensors, lists, or other types, provided they conform to specific rules. For the official guide on creating a Dataset, please refer to this `link `__. - -We provide an example of quantizing large language models using typical datasets such as ``pileval`` and ``wikitext``. You can find the example in the path ``quark/examples/torch/language_modeling/data_preparation.py``. We provide a detailed example of how to set up a dataloader and how to convert a Hugging Face dataset to a PyTorch dataloader. - -For large language models, the input data for PyTorch models is often represented as either a torch.Tensor or a dictionary. Here we provide three types of input PyTorch Dataset formats for the dataloader as examples. You can define your own PyTorch Dataset for the Dataloader, which must be compatible with PyTorch model input. - -Dataloader with Dataset as torch.Tensor ---------------------------------------- - -If the Dataset format is torch.Tensor, the method of generating a PyTorch Dataloader is simple. For example: - -.. code-block:: python - - input_tensor = torch.rand(128, 128) - calib_dataloader = DataLoader(input_tensor, batch_size=4, shuffle=False) - -Dataloader with List[Dict[str, torch.Tensor]] or List[torch.Tensor] -------------------------------------------------------------------- - -If the Dataset format is a list of dictionaries or a list of tensors: - -.. code-block:: python - - input_list = [{'input_ids': torch.rand(128, 128)}, {'input_ids': torch.rand(128, 128)}] - calib_dataloader = DataLoader(input_list, batch_size=None, shuffle=False) - -Dataloader with Dict[str, torch.Tensor] ---------------------------------------- - -If the Dataset format is a dictionary, you should define the function `collate_fn`, for example: - -.. code-block:: python - - def my_collate_fn(blocks: List[Dict[str, List[List[str]]]]) -> Dict[str, torch.Tensor]: - data_batch = {} - data_batch["input_ids"] = torch.Tensor([block["input_ids"] for block in blocks]) - if device: - data_batch["input_ids"] = data_batch["input_ids"].to(device) - return data_batch - - input_dict = {'input_ids': torch.rand(128, 128)} - calib_dataloader = DataLoader(input_dict, batch_size=4, collate_fn=my_collate_fn) - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_calibration_methods.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_calibration_methods.rst deleted file mode 100644 index fa1c3413..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_calibration_methods.rst +++ /dev/null @@ -1,27 +0,0 @@ -Calibration Methods -=================== - -AMD Quark for PyTorch supports the following calibration methods: - -- **MinMax Calibration Method**: The ``MinMax`` calibration method for - computing the quantization parameters based on the running min and - max values. This method uses the tensor min/max statistics to compute - the quantization parameters. The module records the running minimum - and maximum of incoming tensors and uses these statistics to compute - the quantization parameters. - -- **Percentile Calibration Method**: The ``Percentile`` calibration - method, often used in robust scaling, involves scaling features based - on percentile information from a static histogram, rather than using - the absolute minimum and maximum values. This method is particularly - useful for managing outliers in data. - -- **MSE Calibration Method**: The ``MSE`` (Mean Squared Error) - calibration method refers to a method where calibration is performed - by minimizing the mean squared error between the predicted outputs - and the actual outputs. This method is typically used in regression - contexts where the goal is to adjust model parameters or data - transformations to reduce the average squared difference between - estimated values and the true values. MSE calibration helps in - refining model accuracy by fine-tuning predictions to be as close as - possible to the real data points. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_debug.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_debug.rst deleted file mode 100644 index 4e571826..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_debug.rst +++ /dev/null @@ -1,110 +0,0 @@ -Debugging quantization degradation in AMD Quark -=============================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -Quantization is a destructive compression method that may degrade the predictive performance of quantized models. As we strive to strike a balance between model compression and preserving predictive capabilities of quantized models, it is useful to gain insight into **which layers are most sensitive to quantization, and thus likely which quantized layers degrade prediction quality the most**. - -AMD Quark provides a tool to analyze the quantization error of each of the quantized layers in a given model. This tool currently only supports quantization in **eager mode**, that is to say PyTorch default mode without using graph-based (``torch.compile``, ``torch.fx.GraphModule``) quantization. - -When using AMD Quark quantizer in eager mode, typically - -.. code-block:: python - - from quark.torch import ModelQuantizer - - # Define quant_config, model, optionally define dataloader for static quantization. - - quantizer = ModelQuantizer(quant_config) - quant_model = quantizer.quantize_model(model, dataloader) - -one can enable debugging features using the following environment variables: - -* ``QUARK_DEBUG``: Path to a folder that will store statistics and distribution plots of the quantized weights/activations. -* ``QUARK_DEBUG_ACT_HIST``: Whether to plot histograms for activations distributions. This is disabled by default, ``QUARK_DEBUG_ACT_HIST=1`` should be used to enable the feature. -* ``QUARK_DEBUG_INPUT_PICKLE``: Path to a pickled model input (typically a ``.pt`` file saved using ``torch.save``) that should be used to collect activations statistics (and optionally, distributions histograms). If this argument is not specified, the ``dataloader`` first batch will be used instead. -* ``QUARK_DEBUG_NAN``: Whether to raise an exception if a NaN is detected during the quantization process. This is disabled by default. - -Relevant metrics and plots are saved in the folder specified by the ``QUARK_DEBUG`` environment variable, for example: - -.. code-block:: - - ├── model.layers.0.mlp.down_proj.input_histogram.png - ├── model.layers.0.mlp.down_proj.input_qdq_histogram.png - ├── model.layers.0.mlp.down_proj.input_ref_histogram.png - ├── model.layers.0.mlp.down_proj.input_ref_histogram_absmean_ch0.png - ├── model.layers.0.mlp.down_proj.input_ref_histogram_absmean_ch1.png - ├── model.layers.0.mlp.down_proj.weight.png - ├── model.layers.0.mlp.down_proj.weight_stats.json - ├── model.layers.0.mlp.gate_proj.input_histogram.png - ├── model.layers.0.mlp.gate_proj.input_qdq_histogram.png - ├── model.layers.0.mlp.gate_proj.input_ref_histogram.png - ├── model.layers.0.mlp.gate_proj.input_ref_histogram_absmean_ch0.png - ├── model.layers.0.mlp.gate_proj.input_ref_histogram_absmean_ch1.png - ├── model.layers.0.mlp.gate_proj.weight.png - ├── model.layers.0.mlp.gate_proj.weight_stats.json - ├── ... - ... - ├── summary_io_quantization_error.png - ├── summary_ref_input_error.png - ├── summary_ref_output_error.png - └── summary_weight_error.png - -The file names correspond to the following: - -* ``*input_histogram.png``: Histogram of the activation inputs to a ``FakeQuantize`` layer. -* ``*input_qdq_histogram.png`: Histogram of the activation outputs of the ``FakeQuantize`` layer (after QDQ). -* ``*input_ref_histogram.png``: Histogram of the reference inputs at the point the ``FakeQuantize`` layer is inserted (input or output of a module). Note that this histogram is based on the **non-quantized model**. -* ``*input_ref_histogram_absmean_ch0.png``: Histogram of the reference inputs at the point the ``FakeQuantize`` layer is inserted, mean of absolute values reduced on the -2 dimension. Note that this histogram is based on the **non-quantized model**. -* ``*input_ref_histogram_absmean_ch1.png``: Histogram of the reference inputs at the point the ``FakeQuantize`` layer is inserted, mean of absolute values reduced on the -1 dimension. Note that this histogram is based on the **non-quantized model**. -* ``*weight.png``: Histogram of the non-quantized weight values. -* ``*summary_io_quantization_error.png``: **Bar plot over all layers** of the relative error of the output tensor of ``FakeQuantize`` compared to its input tensor, i.e. - -.. math:: - \text{mean}\left(\frac{|\text{FakeQuantize}_{output} - \text{FakeQuantize}_{input}|}{|\text{FakeQuantize}_{input}| + \epsilon}\right) - -* ``*summary_ref_input_error.png``: **Bar plot over all layers** of the relative error of the input tensor of ``FakeQuantize`` compared to the reference input tensor (non-quantized model), i.e. - -.. math:: - \text{mean}\left(\frac{|\text{FakeQuantize}_{input} - \text{ref_input}|}{|\text{ref_input}| + \epsilon}\right) - -* ``*summary_ref_output_error.png``: **Bar plot over all layers** of the relative error of the output tensor of ``FakeQuantize`` compared to the reference input tensor (non-quantized model, QDQ is identity), i.e. - -.. math:: - \text{mean}\left(\frac{|\text{FakeQuantize}_{output} - \text{ref_input}|}{|\text{ref_input}| + \epsilon}\right) - -* ``*summary_weight_error.png``: Summary of weight quantization error over each layers, **bar plot over all layers**. - -Here are some examples of these statistics/plots on a naive A8W8 integer static per-tensor quantization of ``meta-llama/Meta-Llama-3-8B-Instruct``: - -.. figure:: ../_static/debug/model.layers.0.mlp.up_proj.weight.png - :align: center - :scale: 30 % - - Example of a weight tensor distribution. - - -.. figure:: ../_static/debug/summary_ref_input_error.png - :align: center - :target: ../_static/debug/summary_ref_input_error.png - - Summary over all quantized layers of the relative error of the quantized module input compared to the non-quantized module input (from the reference non-quantized model). - -We see that the layer 31 (last layer) is very sensitive to quantization. In fact, the distribution of activations before ``down_proj`` layer is very wide, making its quantization difficult with a simple min-max scheme: - -.. figure:: ../_static/debug/model.layers.31.mlp.down_proj.input_ref_histogram.png - :align: center - :scale: 30 % - - `model.layers.31.mlp.down_proj` reference (non-quantized) input distribution. We see a very large range of values. - -These indications may motivate us to quantize ``down_proj`` from the layer 31 (or perhaps all layers, or some other layers) in a different fashion, or to exclude it from being quantized. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_gguf_llamacpp.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_gguf_llamacpp.rst deleted file mode 100644 index 655f8561..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_gguf_llamacpp.rst +++ /dev/null @@ -1,212 +0,0 @@ -Bridge from Quark to llama.cpp -============================== - -Introduction ------------- - -`Quark `__ is a deep learning model quantization toolkit for quantizing models from PyTorch, ONNX, and other frameworks. It provides easy-to-use APIs for quantization and more advanced features than native frameworks. Quark supports multiple hardware backends and a variety of data types with state-of-the-art quantization algorithms integrated, such as AWQ, SmoothQuant, GPTQ, and more. - -After quantization, Quark can export the quantized model in different formats. Quark has already implemented :doc:`ONNX exporting ` and :doc:`Quark Format `. Now we introduce GGUF exporting in this tutorial. Thanks to this feature, you can obtain both high accuracy with Quark and high performance with GGML-based frameworks like ``llama.cpp``. - -What Is GGUF ------------- - -`GGUF `__ is a file format that aims to store models weights for inference and also execute them based on GGML runtimes. GGUF is a binary format designed for fast loading, fast saving, and easy reading. Models are traditionally developed using PyTorch or another framework, and then converted to GGUF to be executed by `llama.cpp `__, a new popular inference framework aiming to enable LLM inference with minimal setup and state-of-the-art performance on a wide variety of hardware - locally and in the cloud. Our experiments are all based on ``llama.cpp``. - -The structure of the GGUF file is shown in Figure 1: - -.. figure:: https://github.com/ggerganov/ggml/assets/1991296/c3623641-3a1d-408e-bfaf-1b7c4e16aa63 - :align: center - :alt: GGUF file structure - - Figure 1 - -One may think of a GGUF file as model config + PyTorch's model state_dict. The ``metadata`` key-value pairs correspond to model config while the ``tensors info`` key-value pairs + tensors data correspond to model state_dict. The quantization process actually converts tensors in fp32 or fp16 to tensors in other data types with less memory usage and more computing efficiency. GGUF exporting is mainly about writing quantized tensors to the tensor part of the GGUF file in the appropriate format. - -How Does Quark Do Quantization ------------------------------- - -Quark implements quantization by inserting quantization operators before and after normal operators, as shown in Figure 2. Quantizers are quite versatile as to support several data types and quantization schemes. - -.. figure:: ../../_static/quant_workflow.png - :align: center - :alt: Quantization workflow - - Figure 2 - -Quantizers are stateful containing information on data types and quantization schemes, such as scale, zero_point, group size for per-group quantization, etc. Exporting is to store weights and quantizer states in some format. - -How to Use GGUF Export in Quark -------------------------------- - -Step 1: Quantize Your Model -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There's a handy API named ``ModelQuantizer`` in Quark. After initializing quantization-related configs, a simple method call ``quantizer.quantize_model`` can get the work done. - -.. code:: python - - # 1. Set model - from transformers import AutoModelForCausalLM, AutoTokenizer - model = AutoModelForCausalLM.from_pretrained("llama2-7b") - model.eval() - tokenizer = AutoTokenizer.from_pretrained("llama2-7b") - - # 2. Set quantization configuration - from quark.torch.quantization.config.type import Dtype, ScaleType, RoundType, QSchemeType - from quark.torch.quantization.config.config import Config, QuantizationSpec, QuantizationConfig - from quark.torch.quantization.observer.observer import PerTensorMinMaxObserver - DEFAULT_UINT4_PER_GROUP_ASYM_SPEC = QuantizationSpec(dtype=Dtype.uint4, - observer_cls=PerChannelMinMaxObserver, - symmetric=False, - scale_type=ScaleType.float, - round_method=RoundType.half_even, - qscheme=QSchemeType.per_group, - ch_axis=0, - is_dynamic=False, - group_size=32) - - DEFAULT_W_UINT4_PER_GROUP_CONFIG = QuantizationConfig(weight=DEFAULT_UINT4_PER_GROUP_ASYM_SPEC) - quant_config = Config(global_quant_config=DEFAULT_W_UINT4_PER_GROUP_CONFIG) - - # 3. Define calibration dataloader (still need this step for weight only and dynamic quantization) - from torch.utils.data import DataLoader - text = "Hello, how are you?" - tokenized_outputs = tokenizer(text, return_tensors="pt") - calib_dataloader = DataLoader(tokenized_outputs['input_ids']) - - # 4. In-place replacement with quantized modules in model - from quark.torch import ModelQuantizer - quantizer = ModelQuantizer(quant_config) - quant_model = quantizer.quantize_model(model, calib_dataloader) - -Step 2: Export to GGUF -~~~~~~~~~~~~~~~~~~~~~~ - -There's another easy-to-use API named ``ModelExporter`` to export quantized models. To export GGUF models, call ``exporter.export_gguf_model`` - -.. code:: python - - # If you want to export the quantized model, please freeze the quantized model first - freezed_quantized_model = quantizer.freeze(quant_model) - export_path = "./output_dir" - model_dir = "" - from quark.torch import ModelExporter - from quark.torch.export.config.config import ExporterConfig, JsonExporterConfig - config = ExporterConfig(json_export_config=JsonExporterConfig()) - exporter = ModelExporter(config=config, export_dir=export_path) - exporter.export_gguf_model(model, model_dir, model_type) - -After running the code above successfully, there will be a ``.gguf`` file under export_path, ``./output_dir/llama.gguf`` for example. You can refer to `user guide ` for more information. - -Step 3: Run with llama.cpp -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -First, follow the official `docs `__ to build ``llama.cpp``. After building successfully, there will be a few executables, such as *main* for inference, *perplexity* for evaluation, *quantize* for quantization, etc. Most of the executables take GGUF model as input. You can evaluate the exported GGUF model to get the perplexity value by running: - -.. code:: bash - - perplexity -m -f - -How Does It Work ----------------- - -As mentioned above, the export API stores weights and quantizer states into GGUF files. To export quantized models to valid GGUF models, weights and quantizer states have to be encoded into valid GGUF data types. There are some defined GGUF data types corresponding to different quantization schemes, such as ``Q4_0``, ``Q4_1``, ``Q8_0``, ``Q8_1``, etc. You can refer to `ggml-common.h `__ for more data types and their definition. Some of the GGUF dtypes and their corresponding quant schemes are shown in Table 1. - -.. table:: Some of GGUF dtypes and their corresponding quant schemes - :align: center - - ========== ========================================================== - GGUF dtype quant scheme - ========== ========================================================== - Q4_0 symmetric uint4 per-group quantization with group size 32 - Q4_1 asymmetric uint4 per-group quantization with group size 32 - Q8_0 symmetric uint8 per-group quantization with group size 32 - Q8_1 asymmetric uint8 per-group quantization with group size 32 - ========== ========================================================== - -As long as you find the GGUF data type that matches the quantization scheme of the quantized model in Quark, exporting to GGUF model is feasible. Thankfully, Quark supports a whole bunch of quantization schemes which match the majority of defined GGUF data types. - -Let's take *asymmetric int4 per-group* quantization with *group size 32* as an example, which is ``Q4_1`` in GGUF spec. Quantizer state for this quantization scheme are tensors for *weight*, *scale* and *zero_point* for each group. For example, for weight of shape *(N, 32)*, the shape of *scale* tensor and *zero_point* tensor are both *(N, 1)*. The definition of ``Q4_1`` in GGUF is as follows: - -.. code:: cpp - - #define QK4_1 32 - typedef struct { - union { - struct { - ggml_half d; // delta - ggml_half m; // min - } GGML_COMMON_AGGR; - ggml_half2 dm; - }; - uint8_t qs[QK4_1 / 2]; // nibbles / quants - } block_q4_1; - -Note that ``d`` is scale. ``m`` is the minimum value of this block. According to this definition, you need to convert *weight* + *scale tensor* + *zero_point tensor* to ``Q4_1`` blocks. There's one last question and we are done. In Quark, the storage is *weight* + *scale* + *zero_point*, however, in GGUF the storage is *weight* + *scale* + *min_val*. Are they equivalent to each other? The *quant* + *dequant* processes of each storage are shown in equation (1) and (2) respectively. :math:`x` denotes float value. :math:`\hat{x}` denotes the value after quant and dequant. - -.. math:: - - \begin{align} - \hat{x} &= [clamp(\lfloor \frac{x}{s} \rceil + z, 0, max\_quant) - z] \times s \tag{1} \\ - \hat{x} &= clamp(\lfloor \frac{x - min\_val}{s} \rceil, 0, max\_quant) \times s + min\_val \tag{2} \\ - \end{align} - -If you set :math:`min\_val` to the minimum value of the block, then Equation (1) and (2) are not equivalent, because Equation (1) could guarantee that 0 is still 0 after the transformation, but Equation (2) couldn't. Equation (2) could guarantee that the minimum value of the block will keep the same after the transformation but Equation (1) couldn't. - -However, if you set :math:`min\_val` to :math:`-s \times z`, they are equivalent. For :math:`min\_val = -s \times z`, we get: - -.. math:: - - \begin{align} - \hat{x} &= clamp(\lfloor \frac{x + s \times z}{s} \rceil, 0, max\_quant) \times s - s \times z \tag{3} \\ - \hat{x} &= clamp(\lfloor \frac{x}{s} + z \rceil, 0, max\_quant) \times s - s \times z \tag{4} \\ - \hat{x} &= clamp(\lfloor \frac{x}{s}\rceil + z, 0, max\_quant) \times s - s \times z \tag{5} \\ - \hat{x} &= [clamp(\lfloor \frac{x}{s} \rceil + z, 0, max\_quant) - z] \times s \tag{6} \\ - \end{align} - -It's exactly the same as Equation (1). - -Note that the process mentioned above doesn't involve any quantization algorithms. Quantization algorithms are agnostic to GGUF exporting, which means quantized models with ANY quantization algorithms can be exported to GGUF models. As long as the exported GGUF model matches the quant scheme involved. - -Experiments ------------ - -The dataset used for evaluation is ``wikitext2``. Download and extract the `wikitext-2-raw-v1.zip file `__. All the experiments are based on ``llama.cpp``'s commit ``bdcb8f42221bc40c411150a009a3d3a30fa74722``. - -First, use the script `convert_hf_to_gguf.py `__ to convert Hugging Face model ``Llama-2-7b`` to GGUF model named ``llama-2-7b-float.gguf``. Then, use the quantization feature of ``llama.cpp`` to get a quantized model named ``llama-2-7b-Q4_1.gguf`` with the command - -.. code:: bash - - quantize Llama-2-7b-float.gguf Llama-2-7b-Q4_1.gguf Q4_1 - -Next, use Quark to quantize ``Llama-2-7b`` with a scheme of weight-only int4 asymmetric along with AWQ and export the quantized model to a GGUF model named ``quark_exported_model.gguf``. Please refer to :doc:`../example_quark_torch_llm_ptq` to get the command. Then, evaluate all the three models and get perplexities with the command below: - -.. code:: bash - - perplexity -m quark_exported_model.gguf -f - -The results are shown in Table 2: - -.. table:: Experiment results - :align: center - - ========================= ================== - model perplexity - ========================= ================== - llama-2-7b-float.gguf 5.7964 +/- 0.03236 - llama-2-7b-Q4_1.gguf 5.9994 +/- 0.03372 - quark_exported_model.gguf 5.8952 +/- 0.03302 - ========================= ================== - -.. note:: - - There might be a discrepancy between the perplexity obtained from the GGUF model and that from Quark evaluation. There are two main reasons: - - 1. The implementation of perplexity calculation is a little different between ``llama.cpp`` and Quark. - - 2. For the experiment settings above, the quantization process in Quark is a little different from that in ``llama.cpp``. In Quark, only weights are quantized and activations are kept in float32 without being quantized. However, in ``llama.cpp``, activations are quantized to ``Q8_1`` implicitly when weights are in ``Q4_1``. - -.. note:: - - You should choose quant schemes that match ``llama.cpp`` as much as possible. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export.rst deleted file mode 100644 index d732a192..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export.rst +++ /dev/null @@ -1,14 +0,0 @@ -Exporting Quantized Models -========================== - -Quark torch not only supports our own torch export format `Quark format` (Json-Pth), -but also support exporting in popular formats requested by downstream tools, including `ONNX`, `format for Hugging Face & vLLM (HF format)`, and `GGUF`. - -.. toctree:: - :maxdepth: 1 - - ONNX format - Hugging Face format (safetensors) - GGUF format - Quark format - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_gguf.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_gguf.rst deleted file mode 100644 index cd811b7d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_gguf.rst +++ /dev/null @@ -1,27 +0,0 @@ -GGUF Exporting -============== - -Currently, only support asymmetric int4 per_group weight-only -quantization, and the group_size must be 32.The models supported include -Llama2-7b, Llama2-13b, Llama2-70b, and Llama3-8b. - -Example of GGUF Exporting -------------------------- - -.. code:: python - - export_path = "./output_dir" - from quark.torch import ModelExporter - from quark.torch.export.config.config import ExporterConfig, JsonExporterConfig - export_config = ExporterConfig(json_export_config=JsonExporterConfig()) - exporter = ModelExporter(config=export_config, export_dir=export_path) - exporter.export_gguf_model(model, tokenizer_path, model_type) - -After running the code above successfully, there will be a ``.gguf`` -file under export_path, ``./output_dir/llama.gguf`` for example. - -.. toctree:: - :hidden: - :maxdepth: 1 - - gguf_llamacpp.rst diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_hf.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_hf.rst deleted file mode 100644 index c35d2c41..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_hf.rst +++ /dev/null @@ -1,113 +0,0 @@ -Hugging Face format (safetensors format) -======================================== - -Hugging Face format (safetensors format) is an optional exporting format for Quark, and the file list of this exporting format is the same as the file list of the original Hugging Face model, with quantization information added to these files. Taking the llama2-7b model as an example, the exported file list and added information are as below: - -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ -| File name | Additional Quantization Information | -+==============================+=====================================================================================================================+ -| config.json | Original configuration, with quantization configuration added in a ``"quantization_config"`` key | -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ -| generation_config.json | \- | -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ -| model*.safetensors | Quantized checkpoint (weights, scaling factors, zero points) | -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ -| model.safetensors.index.json | Mapping of weights names to safetensors files, in case the model weights are sharded into multiple files (optional) | -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ -| special_tokens_map.json | \- | -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ -| tokenizer_config.json | \- | -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ -| tokenizer.json | \- | -+------------------------------+---------------------------------------------------------------------------------------------------------------------+ - -Exporting to Hugging Face format (safetensors format) ------------------------------------------------------ - -Here is an example of how to export to Hugging Face format (safetensors format) a Quark model using :py:meth:`.ModelExporter.export_safetensors_model`: - -.. code-block:: python - - from quark.torch.export.config.config import ExporterConfig, JsonExporterConfig - from quark.torch.export.api import ModelExporter - from quark.torch.quantization.api import ModelQuantizer - from quark.torch.quantization.config.config import Int8PerTensorSpec, QuantizationConfig, Config - - from transformers import AutoModelForCausalLM - - quant_spec = Int8PerTensorSpec( - observer_method="min_max", - symmetric=True, - scale_type="float", - round_method="half_even", - is_dynamic=False - ).to_quantization_spec() - - global_quant_config = QuantizationConfig(weight=quant_spec) - quant_config = Config(global_quant_config=global_quant_config) - - export_config = ExporterConfig( - json_export_config=JsonExporterConfig(weight_format="real_quantized") - ) - - model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m") - - quantizer = ModelQuantizer(quant_config) - quantized_model = quantizer.quantize_model(model, dataloader=None) - quantized_model = quantizer.freeze(quantized_model) - - model_exporter = ModelExporter( - config=export_config, - export_dir="./opt-125m-quantized" - ) - model_exporter.export_safetensors_model( - model=quantized_model, - quant_config=quant_config - ) - -By default, :py:meth:`.ModelExporter.export_safetensors_model` exports models with |save_pretrained|_ using a Quark-specific format for the checkpoint and ``"quantization_config"`` key in the ``config.json`` file. This format may not directly be usable by some downstream libraries (AutoAWQ, vLLM). - -.. |save_pretrained| replace:: ``model.save_pretrained()`` -.. _save_pretrained: https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel.save_pretrained - -Until downstream libraries support Quark quantized models, one may export models so that the weight checkpoint and ``config.json`` file targets a specific downstream libraries, using ``custom_mode="awq"`` or ``custom_mode="fp8"``. Example: - -.. code-block:: python - - # `custom_mode="awq"` would e.g. use `qzeros` instead of `weight_zero_point`, `qweight` instead of `weight` in the checkpoint. - # Moreover, the `quantization_config` in the `config.json` file is custom, and the full quark `Config` is not serialized. - model_exporter.export_safetensors_model( - model, - quant_config=quant_config, - custom_mode="awq" - ) - -In the ``config.json``, such an export results in using ``"quant_method": "awq"``, that can e.g. be loaded through `AutoAWQ `__ in `Transformers library `__. - -Loading quantized models saved in Hugging Face format (safetensors format) --------------------------------------------------------------------------- - -Quark provides the importing function for HF format export files. In other words, these files can be reloaded into Quark. After reloading, the weights of the quantized operators in the model are stored in the real_quantized format. - -Currently, this importing function supports weight-only, static, and dynamic quantization for FP8, INT8/UINT8, FP4, INT4/UINT, AWQ and GPTQ. - -Here is an example of how to load a serialized quantized model from a folder containing the model (as ``*.safetensors``) and its artifacts (``config.json``, etc.), using :py:meth:`.ModelImporter.import_model_info`: - -.. code-block:: python - - from quark.torch.export.api import ModelImporter - from transformers import AutoConfig, AutoModelForCausalLM - import torch - - model_importer = ModelImporter( - model_info_dir="./opt-125m-quantized", - saved_format="safetensors" - ) - - # We only need the backbone/architecture of the original model, - # not its weights, as weights are loaded from the quantized checkpoint. - config = AutoConfig.from_pretrained("facebook/opt-125m") - with torch.device("meta"): - original_model = AutoModelForCausalLM.from_config(config) - - quantized_model = model_importer.import_model_info(original_model) diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_onnx.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_onnx.rst deleted file mode 100644 index daf3511a..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_onnx.rst +++ /dev/null @@ -1,35 +0,0 @@ -ONNX Exporting -============== - -PyTorch provides a function to export the ONNX graph at this -`link `__. -Quark supports the export of onnx graph for int4, int8, fp8 , float16 and -bfloat16 quantized models. For int4, int8, and fp8 quantization, the -quantization operators used in onnx graph are -`QuantizerLinear `__\ \_\ `DequantizerLinear `__ -pair. For float16 and bfloat16 quantization, the quantization operators -are the cast_cast pair. Mix quantization of int4/uint4 and int8/uint8 is -not supported currently. In other words, if the model contains both -quantized nodes of uint4/int4 and uint8/int8, this function cannot be -used to export the ONNX graph -Only support weight-only and static quantization for now. - -Example of Onnx Exporting -------------------------- - -.. code:: python - - - export_path = "./output_dir" - batch_iter = iter(calib_dataloader) - input_args = next(batch_iter) - if args.quant_scheme in ["w_int4_per_channel_sym", "w_uint4_per_group_asym", "w_int4_per_group_sym", "w_uint4_a_bfloat16_per_group_asym"]: - uint4_int4_flag = True - else: - uint4_int4_flag = False - - from quark.torch import ModelExporter - from quark.torch.export.config.config import ExporterConfig, JsonExporterConfig - export_config = ExporterConfig(json_export_config=JsonExporterConfig()) - exporter = ModelExporter(config=export_config, export_dir=export_path) - exporter.export_onnx_model(model, input_args, uint4_int4_flag=uint4_int4_flag) diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_quark.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_quark.rst deleted file mode 100644 index 20afaa34..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_export_quark_export_quark.rst +++ /dev/null @@ -1,65 +0,0 @@ -Quark Format -============ - -Quark Format Exporting ----------------------- - -.. note:: - For most use cases with external open-source libraries (Transformers, vLLM, etc.), the serialization format described on this page should not be used, and you should refer to the :doc:`safetensors format <./quark_export_hf>` instead. - -Quark format is a proprietary export format for Quark, and the file list of -this exporting format contains the quantized parameters (in a ``model_state_dict.pth`` file) such as weight, scale, and zero point and config.json with quantization configuration. - -Note that this model currently only supports exporting linear parts (which is sufficient for general large language modeling) -For other needs using quark export (e.g., exporting embedding layers, convolutional layers), use `Saving & Loading` below. -In fact, we are gradually migrating the `save and load` functionality to ``ModelExporter`` in `quark format`. - -Example of Quark Format Exporting -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: python - - from quark.torch.export.config.config import ExporterConfig, JsonExporterConfig - from quark.torch.export.api import ModelExporter - - json_export_config = JsonExporterConfig( - weight_format="real_quantized", - pack_method="reorder" - ) - export_config = ExporterConfig(json_export_config=json_export_config) - - exporter = ModelExporter(config=export_config, export_dir="./exported_model_dir") - exporter.export_quark_model(model, quant_config=quant_config) - -By default, ``ModelExporter.export_quark_model`` exports models using a Quark-specific format for the checkpoint and ``quantization_config`` format in the ``config.json`` file. - -This format may not directly be usable by some downstream libraries (vLLM) until downstream libraries support Quark quantized models. But it can be loaded and used by quark itself. - -This format supports two forms of weight saving, ``fake quantized`` will save the high precision weight after quantization , while ``real quantized`` will save the weights after the real quantization. You can configure this with ``weight_format``. - -.. code:: python - - from quark.torch.export.config.config import ExporterConfig, JsonExporterConfig - from quark.torch.export.api import ModelExporter - - json_export_config = JsonExporterConfig(weight_format="real_quantized", pack_method="reorder") - export_config = ExporterConfig(json_export_config=json_export_config) - - exporter = ModelExporter(config=export_config, export_dir=args.output_dir) - exporter.export_quark_model(model, quant_config=quant_config, custom_mode=args.custom_mode) - - -Quark Format Importing ----------------------- - -Models exported using quark format can be imported directly using quark. Models exported using quark format can be imported directly using quark. quark chooses how to load the weights based on the information in the config. - -Example of Quark Format Importing -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: python - - from quark.torch import ModelImporter - - importer = ModelImporter(model_info_dir=args.import_model_dir) - model = importer.import_model_info(model) diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_extensions.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_extensions.rst deleted file mode 100644 index a1aa57b1..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_extensions.rst +++ /dev/null @@ -1,9 +0,0 @@ - -Extensions for PyTorch -====================== - -.. toctree:: - :maxdepth: 1 - - example_quark_torch_pytorch_light - example_quark_torch_brevitas diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_llm_quark.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_llm_quark.rst deleted file mode 100644 index ae03a4b6..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_llm_quark.rst +++ /dev/null @@ -1,13 +0,0 @@ -Language Model Optimization -============================ - - -.. toctree:: - :maxdepth: 1 - - example_quark_torch_llm_pruning - example_quark_torch_llm_ptq - example_quark_torch_llm_qat - example_quark_torch_llm_eval - tutorial_rotation - tutorial_quarot diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_pytorch_examples.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_pytorch_examples.rst deleted file mode 100644 index 372c842b..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_pytorch_examples.rst +++ /dev/null @@ -1,35 +0,0 @@ -Accessing PyTorch Examples -========================== - -You can get the example code after downloading and unzipping ``amd_quark.zip`` (refer to :doc:`Installation Guide <../install>`). -The example folder is in amd_quark.zip. - - Directory Structure of the ZIP File: - - :: - - + quark.zip - + amd_quark.whl - + examples - + torch # HERE ARE THE PYTORCH EXAMPLES - + language_modeling - + diffusers - + ... - + onnx - + image_classification - + language_models - + ... - + ... - -.. toctree:: - :caption: PyTorch Examples in Quark for This Release - :maxdepth: 1 - - Diffusion Model Quantization - AMD Quark Extension for Brevitas Integration - Integration with AMD Pytorch-light (APL) - Language Model Pruning - Language Model PTQ - Language Model QAT - Language Model Evaluation - Vision Model Quantization using FX Graph Mode diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_pytorch_faq.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_pytorch_faq.rst deleted file mode 100644 index 2f42bd23..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_pytorch_faq.rst +++ /dev/null @@ -1,37 +0,0 @@ -Frequently Asked Questions (FAQ) -================================ - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -AMD Quark for Pytorch ---------------------- - -Environment Issues -~~~~~~~~~~~~~~~~~~ - -**Known Issue**: Windows CPU mode does not support fp16. - -Because of an existing PyTorch `issue `__\ , Windows CPU mode cannot perfectly support fp16. - -C++ Compilation Issues -~~~~~~~~~~~~~~~~~~~~~~ - -**Known Issue**: Stuck in the compilation phase for a long time (over ten minutes), and terminal shows: - -.. code-block:: bash - - [QUARK-INFO]: Configuration checking start. - [QUARK-INFO]: C++ kernel build directory [cache folder path]/torch_extensions/py39... - -**Solution**: - -Delete the cache folder ``[cache folder path]/torch_extensions`` and run AMD Quark again. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_schemes.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_schemes.rst deleted file mode 100644 index 4f61d3ce..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_schemes.rst +++ /dev/null @@ -1,22 +0,0 @@ -Quantization Schemes -==================== - -AMD Quark for PyTorch is capable of handling ``per tensor``, ``per channel`` -, and ``per group`` quantization, supporting both symmetric and asymmetric -methods. - -- **Per Tensor Quantization** means quantizing the tensor with one - scalar. The scaling factor is a scalar. - -- **Per Channel Quantization** means that for each dimension, typically - the channel dimension of a tensor, the values in the tensor are - quantized with different quantization parameters. The scaling factor - is a 1-D tensor, with the length of the quantization axis. For the - input tensor with shape ``(D0, ..., Di, ..., Dn)`` and ``ch_axis=i``, - the scaling factor is a 1-D tensor of length ``Di``. - -- **Per Group Quantization** means dividing the tensor into smaller - blocks that are independently quantized. The scaling factor has the - same dimension with the input tensor. For the input tensor with shape - ``(D0, ..., Di, ..., Dn)``, ``ch_axis=i``, and ``group_size=m``, - the scaling factor has the shape of ``(D0, ..., Di/m, ..., Dn)``. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_strategies.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_strategies.rst deleted file mode 100644 index f455c6a2..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_strategies.rst +++ /dev/null @@ -1,74 +0,0 @@ -Quantization Strategies -======================= - -AMD Quark for Pytorch offers three distinct quantization strategies tailored to meet the requirements of various hardware backends: - -- **Post Training Weight-Only Quantization**: The weights are quantized ahead of time, but the activations are not quantized (using the original float data type) during inference. - -- **Post Training Static Quantization**: Quantizes both the weights and activations in the model. To achieve the best results, this process necessitates calibration with a dataset that accurately represents the actual data, which allows for precise determination of the optimal quantization parameters for activations. - -- **Post Training Dynamic Quantization**: Quantizes the weights ahead of time, while the activations are quantized dynamically at runtime. This method allows for a more flexible approach, especially when the activation distribution is not well-known or varies significantly during inference. - -Here is one sample example for different quant strategies: - -.. code:: python - - # 1. Set model - from transformers import AutoModelForCausalLM, AutoTokenizer - model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m") - model.eval() - tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m") - - # 2. Set quantization configuration - from quark.torch.quantization.config.type import Dtype, ScaleType, RoundType, QSchemeType - from quark.torch.quantization.config.config import Config, QuantizationSpec, QuantizationConfig - from quark.torch.quantization.observer.observer import PerTensorMinMaxObserver - - # 2-1. For weight only quantization, please uncomment the following lines. - DEFAULT_UINT4_PER_GROUP_ASYM_SPEC = QuantizationSpec(dtype=Dtype.uint4, - observer_cls=PerChannelMinMaxObserver, - symmetric=False, - scale_type=ScaleType.float, - round_method=RoundType.half_even, - qscheme=QSchemeType.per_group, - ch_axis=0, - is_dynamic=False, - group_size=32) - DEFAULT_W_UINT4_PER_GROUP_CONFIG = QuantizationConfig(weight=DEFAULT_UINT4_PER_GROUP_ASYM_SPEC) - quant_config = Config(global_quant_config=DEFAULT_W_UINT4_PER_GROUP_CONFIG) - - # 2-2. For dynamic quantization, please uncomment the following lines. - # INT8_PER_TENSER_DYNAMIC_SPEC = QuantizationSpec(dtype=Dtype.int8, - # qscheme=QSchemeType.per_tensor, - # observer_cls=PerTensorMinMaxObserver, - # symmetric=True, - # scale_type=ScaleType.float, - # round_method=RoundType.half_even, - # is_dynamic=True) - # DEFAULT_W_INT8_A_INT8_PER_TENSOR_DYNAMIC_CONFIG = QuantizationConfig(input_tensors=INT8_PER_TENSER_DYNAMIC_SPEC, - # weight=INT8_PER_TENSER_DYNAMIC_SPEC) - # quant_config = Config(global_quant_config=DEFAULT_W_INT8_A_INT8_PER_TENSOR_DYNAMIC_CONFIG) - - # 2-3. For static quantization , please uncomment the following lines. - # FP8_PER_TENSOR_SPEC = QuantizationSpec(dtype=Dtype.fp8_e4m3, - # qscheme=QSchemeType.per_tensor, - # observer_cls=PerTensorMinMaxObserver, - # is_dynamic=False) - # DEFAULT_W_FP8_A_FP8_PER_TENSOR_CONFIG = QuantizationConfig(input_tensors=FP8_PER_TENSOR_SPEC, - # weight=FP8_PER_TENSOR_SPEC) - # quant_config = Config(global_quant_config=DEFAULT_W_FP8_A_FP8_PER_TENSOR_CONFIG) - - # 3. Define calibration dataloader (still need this step for weight only and dynamic quantization) - from torch.utils.data import DataLoader - text = "Hello, how are you?" - tokenized_outputs = tokenizer(text, return_tensors="pt") - calib_dataloader = DataLoader(tokenized_outputs['input_ids']) - - # 4. In-place replacement with quantized modules in model - from quark.torch import ModelQuantizer - quantizer = ModelQuantizer(quant_config) - quant_model = quantizer.quantize_model(model, calib_dataloader) - -The strategies share the same user API. -You simply need to set the strategy through the quantization configuration, as demonstrated in the previous example. -For more details about setting quantization configuration, refer to the "Configuring AMD Quark for PyTorch" chapter. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_symmetry.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_symmetry.rst deleted file mode 100644 index 726404e7..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quantization_symmetry.rst +++ /dev/null @@ -1,4 +0,0 @@ -Quantization Symmetry -===================== - -``Symmetric/Asymmetric Quantization`` is primarily used to describe the quantization of integers. ``Symmetric Quantization`` involves scaling the data by a fixed scaling factor, and the zero-point is generally set at zero. ``Asymmetric Quantization`` uses a scaling factor and a zero-point that can shift, allowing the zero of the quantized data to represent a value other than zero. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quark_save_load.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quark_save_load.rst deleted file mode 100644 index ec112e97..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quark_save_load.rst +++ /dev/null @@ -1,61 +0,0 @@ -Save & Load Quantized Models -============================ - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -Saving ------- - -- Save the network architecture or configurations and parameters of the quantized model. - -- Support both eager and fx-graph model quantization. - -- For eager mode quantization, the model's configurations are stored in JSON file, and parameters including weight, bias, scale, and zero_point are stored in safetensors file. - -- For fx_graph mode quantization, the model's network architecture and parameters are stored in PTH file. - - -Example of Saving in Eager Mode -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: python - - from quark.torch import save_params - save_params(model, model_type=model_type, export_dir="./save_dir") - -Example of Saving in FX-graph Mode -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: python - - from quark.torch.export.api import save_params - save_params(model, - model_type=model_type, - args=example_inputs, - export_dir="./save_dir", - quant_mode=QuantizationMode.fx_graph_mode) - -Loading -------- - -- Instantiates a quantized model from saved model files, which were generated using the previous saving function. -- Supports both eager and FX-Graph model quantization. -- Only supports weight-only and static quantization for now. - -Example of Loading in Eager Mode -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: python - - from quark.torch import load_params - model = load_params(model, json_path=json_path, safetensors_path=safetensors_path) - -Example of Loading in FX-graph Mode -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: python - - from quark.torch.quantization.api import load_params - model = load_params(pth_path=model_file_path, quant_mode=QuantizationMode.fx_graph_mode) diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quark_torch_best_practices.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quark_torch_best_practices.rst deleted file mode 100644 index 25cc4e98..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_quark_torch_best_practices.rst +++ /dev/null @@ -1,138 +0,0 @@ -Best Practices for Post-Training Quantization (PTQ) -=================================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -This topic outlines best practices for Post-Training Quantization (PTQ) in AMD Quark PyTorch. It provides guidance on fine-tuning your quantization strategy to address accuracy degradation issues. The model ``meta-llama/Llama-3.1-8B-Instruct`` and code files from ``Quark/examples/torch/language_modeling/llm_ptq`` are used as an example to demonstrate the methodology in the following image. - - -.. figure:: ../_static/best_practice.png - :align: center - :width: 85% - - **Figure 1. Best Practices for AMD Quark Torch Quantization** - -Exclude Outlier Layers ----------------------- - -Outlier layers can significantly degrade accuracy during quantization. Excluding these layers can enhance the performance of the quantized model. In AMD Quark, you can exclude specific layers using the following commands: - -.. code-block:: bash - - cd Quark/examples/torch/language_modeling/llm_ptq/ - exclude_layers="*lm_head *layers.0.mlp.down_proj" - python3 quantize_quark.py --model_dir meta-llama/Llama-3.1-8B-Instruct \ - --quant_scheme w_fp8_a_fp8 \ - --exclude_layers $exclude_layers \ - -Apply Quantization Algorithms ------------------------------ - -AMD Quark supports various quantization algorithms specifically designed for Large Language Models (LLMs). You can experiment with the following algorithms to enhance accuracy: - -- **AWQ (Activation-aware Weight Quantization)** - -AWQ determines optimal scaling factors for smooth through grid search and is widely used in low-bit weight-only quantization (for example, W4 quantization with group size 128). The algorithm can be used in the following command: - - -.. code-block:: bash - - python3 quantize_quark.py --model_dir meta-llama/Llama-3.1-8B-Instruct \ - --quant_scheme w_uint4_per_group_asym \ - --group_size 128 \ - --dataset pileval_for_awq_benchmark \ - --quant_algo awq - -- **GPTQ** - -This method is primarily used for low-bit weight-only quantization (for example, W4/W3 per-channel). It quantizes weights column by column, minimizing second-order approximation errors. - -.. code-block:: bash - - python3 quantize_quark.py --model_dir meta-llama/Llama-3.1-8B-Instruct \ - --quant_scheme w_uint4_per_group_asym \ - --dataset wikitext_for_gptq_benchmark \ - --quant_algo gptq - -- **SmoothQuant** - -SmoothQuant reduces activation outliers by shifting the quantization challenge from activations to weights. The parameter :math:`\alpha` controls the degree of merging. If you find the accuracy is not good after using SmoothQuant, consider fine-tuning the value of :math:`\alpha` in ``./models/llama/smooth_config.json``. - -.. code-block:: bash - - python3 quantize_quark.py --model_dir meta-llama/Llama-3.1-8B-Instruct \ - --quant_scheme w_int8_a_int8_per_tensor_sym \ - --pre_quantization_optimization smoothquant - -- **AutoSmoothQuant** - -AutoSmoothQuant enhances SmoothQuant by automatically selecting the optimal :math:`\alpha` values for each layer, guided by the Mean Squared Error (MSE) loss across blocks. - -.. code-block:: bash - - python3 quantize_quark.py --model_dir meta-llama/Llama-3.1-8B-Instruct \ - --quant_scheme w_int8_a_int8_per_tensor_sym \ - --dataset pileval_for_awq_benchmark \ - --quant_algo autosmoothquant - - -- **QuaRot** - -QuaRot eliminates activation outliers using a rotation technique (Hadamard transform). AMD Quark supports QuaRot algorithm that can be used as follows: - -.. code-block:: bash - - python3 quantize_quark.py --model_dir meta-llama/Llama-3.1-8B-Instruct \ - --quant_scheme w_int8_a_int8_per_tensor_sym \ - --pre_quantization_optimization quarot - - - -- **Rotation** - -QuaRot employs an online Hadamard transform in its algorithm, requiring kernel support for hardware deployment. Inspired by QuaRot and QServer, AMD Quark introduces the "Rotation" method, which enhances accuracy without requiring kernel modifications. - -.. code-block:: bash - - python3 quantize_quark.py --model_dir meta-llama/Llama-3.1-8B-Instruct \ - --quant_scheme w_int8_a_int8_per_tensor_sym \ - --pre_quantization_optimization rotation - -Try Different Quantization Schemes ----------------------------------- - -Experimenting with various quantization schemes can help improve accuracy. But keep in mind that how to select an appropriate scheme depends on your specific requirements and hardware constraints. - -**Key Quantization Schemes:** - -- **Weight-only vs. Weight-Activation Quantization:** Activation quantization might lead to significant accuracy drop while weight-only quantization with extremely low bit-width might yield better results. - -- **Quantization Granularity:** - - - Weight quantization: Options include per-tensor, per-channel, or per-group quantization. - - - Activation quantization: Options include per-tensor or per-token quantization. - -- **Dynamic vs. Static Quantization:** For activation quantization, dynamic quantization often results in better accuracy than static quantization. - -- **Symmetric vs. Asymmetric:** Try experimenting with symmetric or asymmetric quantization based on the model's sensitivity to signed or unsigned values. - -- **Data Types (Dtypes):** AMD Quark supports several data types, including INT4, INT8, FP8, MX-FPX, FP16, and BFloat16. Choose the proper data type that best balances accuracy and efficiency for your model. - -- **KV Cache Quantization:** Skipping KV cache quantization typically results in better performance. Applying this approach to the entire KV cache or specific parts of it might lead to better accuracy. - -If accuracy issues persist after applying the above methods, consider trying :doc:`AMD Quark's debug tool ` to identify outlier layers and exclude them from quantization. - -Try QAT -------- - -Quantization-Aware Training (QAT) often delivers superior performance compared to PTQ, as demonstrated in models such as ChatGLM-3-6B. Consider using the AMD Quark QAT method. - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_smoothquant.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_smoothquant.rst deleted file mode 100644 index 5d788d93..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_smoothquant.rst +++ /dev/null @@ -1,267 +0,0 @@ -Activation/weight smoothing (SmoothQuant) -========================================= - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark." - -AMD Quark supports through ``quark.torch`` a pre-processing step called SmoothQuant, introduced in `SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models `__. Other libraries (for example, Brevitas) sometimes refer to SmoothQuant as **activation equalization**. - -The key idea of SmoothQuant is to apply a non-destructive rescaling on the weights and activations in order to balance out the distribution of the two. This means that SmoothQuant can be applied to a model alone, without quantization, and the model outputs are identical to the original output. - -This is, for example, useful when later applying quantization, where the quantization difficulty is effectively then balanced between weights and activations, which typically results in better quantization results than without applying this pre-processing step. - -How does SmoothQuant work? -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Consider a linear layer, say - -.. math:: - - y = xW - -where :math:`x` is an activation of shape ``(batch_size, in_features)`` and :math:`W` is a weight of shape ``(in_features, out_features)``. - -This is equivalent to - -.. math:: - - y = \left(x \frac{1}{s}\right) \times s^TW - -where :math:`s` is called the *scaling factor*, which is a scalar or of shape ``(1, in_features)``. - -Because weights are frozen/fixed at inference time, the scale :math:`s^T` can be fused ahead of time into an updated weight :math:`W' = s^TW`. - -For activations, the scaling factor :math:`\frac{1}{s}` can be fused into a frozen preceding layer (AMD Quark approach), or in the worst case added as a pointwise ``mul`` node in the graph. - -In practice, for transformer-based networks, SmoothQuant is easily applied on the QKV projection, as well as on the first linear of the MLP (Multi-Layer Perceptron) layer, as seen in the following figure. SmoothQuant might be applied on some other linear layers, for which special care needs to be taken when fusing the activation scale in the preceding layer: - -* ``Linear1 -> activation -> Linear2``: This works well if the activation is pointwise linear (which may not be the case). - -.. note:: - - Fusing of :math:`\frac{1}{s_2}` into ``Linear1`` weight might compromise its quantization. - -* ``Linear1 -> any linear op -> Linear2``: The fusing of :math:`\frac{1}{s_2}` into ``Linear1`` weight might compromise its quantization. - -SmoothQuant implementation in AMD Quark supports these cases as well. - -.. figure:: ../_static/smoothquant/llama.png - :align: center - :scale: 45 % - - Simplified transformer architecture (based on llama), with SmoothQuant applied. - -If quantization is applied after this pre-processing, effectively the quantized tensors are :math:`W' = s^TW` and :math:`x' = x \frac{1}{s}`, which might have a distribution less sensitive to quantization due to the rescaling. - -The scaling factor is defined as: - -.. math:: - s = \frac{\max(|x|)^\alpha}{\max(|W|)^{(1 - \alpha)}}. - -Typically, the scaling factors are determined by using a calibration dataset that is run through the model in order to collect activation statistics. - -.. tip:: - - SmoothQuant has a hyperparameter ``alpha`` that specifies the balance between the quantization difficulty in weights and in activations. - - * When weight-only quantization is used after smoothing, ``alpha = 0.0`` is recommended to shift all the quantization difficulty from the activations into the weights. - * When activation-only quantization is used after smoothing, ``alpha = 1.0`` is recommended to shift all the quantization difficulty from the weights into the activations. - * When both weights and activations are quantized after smoothing, ``alpha`` must be tuned, but the SmoothQuant paper typically recommends a value between 0.4 and 0.9 depending on the model. - -It is possible to verify the idea that SmoothQuant helps lower the output quantization error on a minimal dummy example that uses a single ``Linear`` layer and a single ``LayerNorm`` to fold the activation scaling into. - -.. container:: toggle - - .. container:: header - - // - - .. code-block:: python - - import torch - import torch.nn as nn - import copy - from torch.utils.data import DataLoader - - from quark.torch import ModelQuantizer - from quark.torch.quantization.config.type import Dtype, ScaleType, RoundType, QSchemeType - from quark.torch.quantization.config.config import Config, QuantizationSpec, QuantizationConfig, SmoothQuantConfig - from quark.torch.quantization.observer.observer import PerTensorMinMaxObserver - - in_feat = 32 * 128 - out_feat = 64 * 128 - - class MySubModule(nn.Module): - def __init__(self): - super().__init__() - - self.layer_norm = nn.LayerNorm(in_feat, bias=False) - self.lin1 = nn.Linear(in_feat, out_feat, bias=False) - self.lin1.weight.data = torch.normal(0, 1, (out_feat, in_feat)) - - def forward(self, x): - x = self.layer_norm(x) - x = self.lin1(x) - return x - - class MyModel(nn.Module): - def __init__(self): - super().__init__() - - # We put the Linear + LayerNorm in a ModuleList, which is expected by AMD Quark, - # as the implementation is tailored for multi-layer transformer models. - self.layers = nn.ModuleList([MySubModule() for i in range(1)]) - - def forward(self, x): - for layer in self.layers: - x = layer(x) - return x - - model = MyModel() - model = model.eval() - model_copy = copy.deepcopy(model) - - # Create reference tensor with long tail. - inp = torch.empty(1, in_feat) - inp.cauchy_(sigma=5e-3) - inp = inp + torch.normal(0, 1, (out_feat, in_feat)) - - # Save the reference output. - with torch.no_grad(): - res_orig = model(inp) - - # Quantize the model using smoothquant. - quant_spec = QuantizationSpec( - dtype=Dtype.int8, - qscheme=QSchemeType.per_tensor, - observer_cls=PerTensorMinMaxObserver, - symmetric=False, - scale_type=ScaleType.float, - round_method=RoundType.half_even, - is_dynamic=False, - ch_axis=None, - group_size=None - ) - global_config = QuantizationConfig(weight=quant_spec, input_tensors=quant_spec) - quant_config = Config(global_quant_config=global_config) - - pre_quant_optimization = SmoothQuantConfig( - scaling_layers=[{"prev_op": "layer_norm", "layers": ["lin1"], "inp": "lin1"}], - model_decoder_layers="layers", - alpha=0.5, - scale_clamp_min=1e-12, - ) - quant_config.pre_quant_opt_config.append(pre_quant_optimization) - - quantizer = ModelQuantizer(quant_config) - calib_dataloader = DataLoader([{"x": inp}]) - - quant_model_smooth = quantizer.quantize_model(model, calib_dataloader) - quant_model_smooth = quant_model_smooth.eval() - - with torch.no_grad(): - res_quant_smooth = quant_model_smooth(inp) - - # Quantize the model without using smoothquant. - quant_config = Config(global_quant_config=global_config) - - quantizer = ModelQuantizer(quant_config) - - quant_model_nonsmooth = quantizer.quantize_model(model_copy, calib_dataloader) - quant_model_nonsmooth = quant_model_nonsmooth.eval() - - with torch.no_grad(): - res_quant_nonsmooth = quant_model_nonsmooth(inp) - - print("L1 error non-smooth:", (res_orig - res_quant_nonsmooth).abs().mean()) - print("L1 error smooth:", (res_orig - res_quant_smooth).abs().mean()) - -Giving: - -.. code:: - - L1 error non-smooth: 3.3892 - L1 error smooth: 1.5210 - -We see that applying SmoothQuant reduces the output error, compared to the reference non-quantized model. Beware that this may not always be the case though, and **where SmoothQuant is applied as well as which alpha hyperparameter to used needs to be tuned.** - -It is easy to check the difference in the weight and activation distribution before and after applying SmoothQuant: - -.. figure:: ../_static/smoothquant/weight.png - :align: center - - Weight quantization is originally easy (weights well spaced over all quantization bins). - -.. figure:: ../_static/smoothquant/activation.png - :align: center - - Activation distribution is originally "hard" (activation distribution is very narrow, does not use many quantization bins). - -As seen in the figures, increasing the weight quantization relative error and decreasing the activation quantization relative error can benefit the model by overall decreasing the output error compared to the reference model. - - -Using SmoothQuant in ``quark.torch`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The implementation of SmoothQuant in AMD Quark is designed for LLM models. One needs to define a pre-processing configuration: - -.. code-block:: python - - from quark.torch.quantization.config.config import SmoothQuantConfig, Config - - smoothquant_config = SmoothQuantConfig( - scaling_layers=[{"prev_op": "layer_norm", "layers": ["lin1"], "inp": "lin1"}], - model_decoder_layers="layers", - alpha=0.5, - scale_clamp_min=1e-12, - ) - - # There may be several pre-quantization optimization, hence the list. - quant_config = Config(..., pre_quant_opt_config=[smoothquant_config]) - -The key ``scaling_layers`` is a list of dictionaries, each dictionary corresponding to one linear module in the model to apply SmoothQuant on, with: - -* ``prev_op``: The previous operator to fuse the activation scaling factor :math:`\frac{1}{s}` into. -* ``layers``: The list of linear layer (or layers) to apply SmoothQuant on. There may be several in case several layers have a common ``prev_op`` parent layer (for example: ``q_proj``, ``k_proj``, ``v_proj`` in a transformer). -* ``inp``: One of ``layers``. - -The key ``model_decoder_layers`` is the named of a ``ModuleList`` module holding the layers in the model. - -Examples of such configs can be found in ``quark/examples/torch/language_modeling/llm_ptq/models``. Here is an example for -`Transformers' implementation of OPT `__: - -.. code-block:: json - - { - "name": "smooth", - "alpha": 0.5, - "scale_clamp_min": 1e-3, - "scaling_layers":[ - { - "prev_op": "self_attn_layer_norm", - "layers": ["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj"], - "inp": "self_attn.q_proj", - }, - { - "prev_op": "self_attn.v_proj", - "layers": ["self_attn.out_proj"], - "inp":"self_attn.out_proj" - }, - { - "prev_op": "final_layer_norm", - "layers": ["fc1"], - "inp": "fc1" - } - ], - "model_decoder_layers": "model.decoder.layers" - } - - - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_bfp16.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_bfp16.rst deleted file mode 100644 index c5be5146..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_bfp16.rst +++ /dev/null @@ -1,93 +0,0 @@ -.. raw:: html - - - -BFP16 (Block floating point) Quantization -========================================= - -Introduction ------------- - -In this tutorial, you learn how to use the BFP16 data type with AMD Quark. - -BFP is short for Block Floating Point. A floating-point number consists of one sign bit, eight exponent bits, and 23 mantissa bits. The main idea of Block Floating Point is a block of numbers sharing one exponent, and the mantissa of each number shifts right accordingly. - -This `paper `__ introduces an attempt to apply BFP to deep neural networks (DNNs). BFP16 is widely used across the AI industry. The definition of BFP16 in AMD Quark is a block consisting of eight numbers, the shared exponent consisting of eight bits, and the rest of each number consisting of one sign bit and seven mantissa bits. - - -How to use BFP16 in AMD Quark ------------------------------ - - -1. Install AMD Quark -~~~~~~~~~~~~~~~~~~~~ - -Follow the steps in the :doc:`installation guide <../install>` to install AMD Quark. - -2. Set the model: -~~~~~~~~~~~~~~~~~ - -.. code:: python - - from transformers import AutoModelForCausalLM, AutoTokenizer - model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m") - model.eval() - tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m") - -The model is retrieved from `Hugging Face `__ using their `Transformers `__ -library. The ``facebook/opt-125m`` model is used as an example. - -3. Set the quantization configuration: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: python - - from quark.torch.quantization.config.type import Dtype, ScaleType, RoundType, QSchemeType - from quark.torch.quantization.config.config import Config, QuantizationSpec, QuantizationConfig - from quark.torch.quantization.observer.observer import PerBlockBFPObserver - DEFAULT_BFP16_PER_BLOCK = QuantizationSpec(dtype=Dtype.int8, - symmetric=True, - observer_cls=PerBlockBFPObserver, # for BFP16 the observer_cls is always PerBlockBFPObserver - qscheme=QSchemeType.per_group, # for BFP16 the qscheme is always QSchemeType.per_group - is_dynamic=True, # this controls whether static or dynamic quantization is performed - ch_axis=-1, - scale_type=ScaleType.float, - group_size=8, - round_method=RoundType.half_even) - - DEFAULT_W_BFP16_PER_BLOCK_CONFIG = QuantizationConfig(weight=DEFAULT_BFP16_PER_BLOCK) - quant_config = Config(global_quant_config=DEFAULT_W_BFP16_PER_BLOCK_CONFIG) - -In AMD Quark, the one sign bit and seven mantissa bits are stored as a single ``int8``, so the ``dtype`` is ``Dtype.int8``. The observer class ``PerBlockBFPObserver`` is used for shared exponent calculation. - - -4. Do quantization -~~~~~~~~~~~~~~~~~~ - -To perform quantization, initialize a ``ModelQuantizer`` with the ``quant_config`` constructed above and call the method ``quantize_model``: - -.. code:: python - - from quark.torch import ModelQuantizer - from torch.utils.data import DataLoader - import torch - calib_dataloader = DataLoader(torch.randint(0, 1000, (1, 64))) # Using random inputs is for demonstration purpose only - quantizer = ModelQuantizer(quant_config) - quant_model = quantizer.quantize_model(model, calib_dataloader) - -In practice, users should construct meaningful calibration datasets. - -How BFP16 works in AMD Quark ----------------------------- - - -Quantizing a floating-point tensor to a BFP16 tensor consists of three main steps: - -1. Obtaining the shared exponent. -2. Shifting mantissas right accordingly. -3. Performing rounding on the mantissas. - -The maximum exponent in each block is used as the shared exponent. Then, the mantissa of each element is shifted right accordingly. Note that in BFP, the implicit one is included in the mantissa. Finally, rounding is performed and the trailing mantissa bits are removed. Currently, only the rounding method ``half_to_even`` is supported. diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_quarot.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_quarot.rst deleted file mode 100644 index 68b1f185..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_quarot.rst +++ /dev/null @@ -1,80 +0,0 @@ -Rotation-based quantization with QuaRot -======================================= - -QuaRot is a rotation-based quantization method that inserts rotation matrices into a model to reduce outliers. Reducing outliers significantly improves quantization accuracy. To illustrate the idea, consider the vector [1, 10], which has an outlier value of 10. If you rotate it by 45 degrees clockwise, you obtain [7.7782, 6.3640]; the values are closer together, effectively removing the outlier. In rotation-based quantization, this idea is applied to tensors that are much larger than 2×1 vectors. Specifically, a rotation matrix is inserted before quantization, and its inverse is applied after quantization. Thus, at a floating-point level, the network remains unchanged, but the quantized network achieves much better accuracy. - -The QuaRot method uses Hadamard matrices for rotations. An :math:`n x n` Hadamard matrix is an orthogonal matrix of the form :math:`\frac{1}{sqrt{n}}A`, where the entries of :math:`A` are all :math:`1` and :math:`-1` (see `QuaRot: Outlier-Free 4-Bit Inference in Rotated LLMs `_). Hadamard rotations are a standard choice for rotation matrices, and Hadamard transforms can often be accelerated using hardware-optimized kernels. In 2D, there are four Hadamard rotations: 45 degrees and 135 degrees clockwise, and 45 degrees and 135 degrees counterclockwise. - -QuaRot inserts four fundamental rotations into the model, called R1, R2, R3, and R4 (see `SpinQuant: LLM Quantization with Learned Rotations `_). R1 and R2 are offline rotations incorporated directly into the model's weights. R3 and R4 are online operations. They incur a small performance overhead since new operations are added into the model's computation graph. However, using kernels for fast Hadamard transforms, these operations can be accelerated if necessary. - -R3 and R4 are online operations. R3 is only needed when performing KV cache quantization, and R4 is only needed when performing activation quantization. - -AMD Quark supports the QuaRot method for Llama models by default and can be run in one line with the quantize_quark.py script. For example, to quantize Llama 3-8B, both weights and activations, to int8 per tensor while applying the QuaRot method to perform rotations before quantization, navigate to the ``examples/torch/language_modeling/llm_ptq`` folder and run: - -.. code-block:: bash - - python quantize_quark.py --model_dir meta-llama/Meta-Llama-3-8B --quant_scheme w_int8_a_int8_per_tensor_sym --pre_quantization_optimization quarot - -Here are the results for the perplexity of the quantized model Llama-3-8B, with and without Quarot: - -+----------------------------------------------+---------------------+-------------------------+ -| Quantization Strategy | Algorithm | Perplexity (Wikitext-2) | -+==============================================+=====================+=========================+ -| no quantization | | 6.13908052444458 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_per_tensor static quantization | N/A | 6.622321128845215 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_per_tensor static quantization | QuaRot (R1+R2 only) | 6.181324005126953 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_a_int8_per_tensor static quantization | N/A | 253.269912719726 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_a_int8_per_tensor static quantization | QuaRot | 6.6984167098999 | -+----------------------------------------------+---------------------+-------------------------+ - - -Here is an example of creating a QuaRot configuration file for an LLM such as Qwen, which has a standard decoder-only transformer architecture: - -.. figure:: ../_static/quarot/qwen_architecture.png - :align: center - :scale: 45 % - -The V and O projections in the attention block can be accessed as ``layer.self_attn.v_proj`` and ``layer.self_attn.o_proj``, respectively, for every layer in the list ``model.layers``. However, notice that the number of input features to the down-projection (intermediate-size) is :math:`18944 = 148*2^7`. AMD Quark currently only supports :math:`n x n` Hadamard matrices when :math:`n = m * 2^k`, where :math:`m` is in :math:`{4, 12, 20, 40, 36, 52, 60, 108, 140, 156, 172}` and :math:`k >= 0`. Therefore, the online R4 rotation cannot be performed in this case. Instead, perform only the offline operations of R1 and R2 by setting the online-had flag to ``False``. Use the following configuration: - -.. code-block:: json - - { - "name": "quarot", - "online-had": false, - "backbone": "model", - "model_decoder_layers": "model.layers", - "v_proj": "self_attn.v_proj", - "o_proj":"self_attn.o_proj", - "self_attn": "self_attn" - } - - -Here are the results for the perplexity of the quantized model Qwen2-7B, with and without quarot: - -+----------------------------------------------+---------------------+-------------------------+ -| Quantization Strategy | Algorithm | Perplexity (Wikitext-2) | -+==============================================+=====================+=========================+ -| no quantization | | 7.891325950622559 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_per_tensor static quantization | N/A | 8.883856773376465 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_per_tensor static quantization | QuaRot (R1+R2 only) | 7.948962688446045 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_a_int8_per_tensor static quantization | N/A | 172.43882751464844 | -+----------------------------------------------+---------------------+-------------------------+ -| w_int8_a_int8_per_tensor static quantization | QuaRot (R1+R2 only) | 123.24969482421875 | -+----------------------------------------------+---------------------+-------------------------+ - -To further improve W8A8 quantization, we might combine QuaRot with SmoothQuant. - - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_rotation.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_rotation.rst deleted file mode 100644 index 729e8b7e..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_tutorial_rotation.rst +++ /dev/null @@ -1,68 +0,0 @@ -.. raw:: html - - - -Quantizing with Rotation and SmoothQuant -======================================== - -Introduction ------------- - -Weight INT8 and Activation INT8 symmetric post-training quantization (W8A8) is one of the most common quantization methods supported by current hardware. It is highly compatible with hardware acceleration, facilitating efficient deployment on various platforms. - -The following are the four most common quantization strategies for W8A8: - -- Weight INT8 (per tensor) activation INT8 (per tensor) static quantization -- Weight INT8 (per channel) activation INT8 (per tensor) static quantization -- Weight INT8 (per channel) activation INT8 (per tensor) dynamic quantization -- Weight INT8 (per channel) activation INT8 (per token) dynamic quantization - -AMD Quark-Torch now offers two pre-optimizations that are suitable for W8A8 quantization: - -- Activation/weight smoothing (SmoothQuant). For more details, see :doc:`here `. -- `Rotation `_ (R1 in `SpinQuant `_ with Hadamard matrix) - -Sometimes, these two methods are combined by smoothing ``Linear-Linear`` patterns (Smooth_fc_fc) in decoder layers and rotating ``RSMNorm-Linear`` patterns. - -Results -------- - -In this example, ``meta-llama/Meta-Llama-3.1-8B-Instruct`` is used. All linear layers, excluding lm_head, are quantized using the pre-trained Float16 model (original Float16 model perplexity: 7.2155). - -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| Quantization Strategy | Smooth(alpha=0.85) | Smooth(alpha=0.5) | Smooth_fc_fc(alpha=0.5) + Rotation | -+====================================================================+====================+===================+==========================================+ -| w_int8_per_tensor_a_int8_per_tensor static quantization | - | 19.42 | **8.58** | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| w_int8_per_channel_a_int8_per_tensor static quantization | **8.37** | 15.95 | 8.40 | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| w_int8_per_channel_a_int8_per_tensor dynamic quantization | **9.08** | 23.35 | 9.22 | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| w_int8_per_channel_a_int8_per_token dynamic quantization | 7.35 | 7.29 | **7.27** | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| w_int8_per_tensor_a_int8_per_tensor_kv_cache_int8_per_tensor | - | 20.51 | **8.58** | -| static quantization | | | | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| w_int8_per_channel_a_int8_per_tensor_kv_cache_int8_per_tensor | **8.38** | 16.87 | 8.42 | -| static quantization | | | | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| w_int8_per_channel_a_int8_per_tensor_kv_cache_int8_per_tensor | **9.09** | 23.46 | 9.26 | -| dynamic quantization | | | | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ -| w_int8_per_channel_a_int8_per_token_kv_cache_int8_per_token | 7.35 | 7.29 | **7.28** | -| dynamic quantization | | | | -+--------------------------------------------------------------------+--------------------+-------------------+------------------------------------------+ - -'-' means perplexity > 30 - -Expanding with more models --------------------------- - -Examples are provided for some typical large language models (LLMs). However, if you want to try these strategies with new models, you may need to follow several steps. - -For Smooth, you can set hyperparameters and layer patterns manually with code or a JSON file. In addition, scripts are provided for generating the configuration automatically. - -For Rotation, you simply need to enable the rotation option. AMD Quark-Torch supports analyzing the model structure with the ``torch.compile`` graph, helping you identify if there are any pattern layers that could be rotated. This feature is user-friendly. \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_user_guide_config_description.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_user_guide_config_description.rst deleted file mode 100644 index 1866dddd..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_pytorch_user_guide_config_description.rst +++ /dev/null @@ -1,209 +0,0 @@ -Configuring PyTorch Quantization -================================ - -This topic describes the steps on how to set the quantization configuration in AMD Quark for PyTorch. - -Configuration of quantization in ``AMD Quark for PyTorch`` is set using Python ``dataclass`` because it is rigorous and helps you avoid typos. The class ``Config`` in ``quark.torch.quantization.config.config`` is provided for configuration. There are several steps to set up the configuration: - -- **Step 1**: Configure :py:class:`.QuantizationSpec` for ``torch.Tensors``. Specify attributes such as ``dtype``, ``observer_cls``, etc. -- **Step 2**: Establish ``QuantizationConfig`` for ``nn.Module``. Define the ``QuantizationSpec`` of ``input_tensors``, ``output_tensors``, ``weight``, and ``bias``. -- **Step 3** [Optional]: Set ``AlgoConfig`` for the model. -- **Step 4**: Set up the overall ``Config`` for the model. This includes: - - -.. toctree:: - :hidden: - :maxdepth: 1 - - Calibration Methods - Calibration Datasets - Quantization Strategies - Quantization Schemes - Quantization Symmetry - -Step 1: Configuring ``QuantizationSpec`` for torch.Tensors -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The class :py:class:`.QuantizationSpec` aims to describe the quantization specification for each tensor, including dtype, observer_cls, qscheme, is_dynamic, symmetric, etc. For example: - -.. code-block:: python - - from quark.torch.quantization.config.config import QuantizationSpec - from quark.torch.quantization.config.type import Dtype, QSchemeType, ScaleType, RoundType - from quark.torch.quantization.observer.observer import PlaceholderObserver, PerTensorMinMaxObserver, PerGroupMinMaxObserver - - BFLOAT16_SPEC = QuantizationSpec(dtype=Dtype.bfloat16, observer_cls=PlaceholderObserver) - - FP8_PER_TENSOR_SPEC = QuantizationSpec(dtype=Dtype.fp8_e4m3, - qscheme=QSchemeType.per_tensor, - observer_cls=PerTensorMinMaxObserver, - is_dynamic=False) - - INT8_PER_TENSOR_SPEC = Int8PerTensorSpec(observer_method="min_max", - symmetric=True, - scale_type=ScaleType.float, - round_method=RoundType.half_even, - is_dynamic=False).to_quantization_spec() - - UINT4_PER_GROUP_ASYM_SPEC = QuantizationSpec(dtype=Dtype.uint4, - observer_cls=PerGroupMinMaxObserver, - symmetric=False, - scale_type=ScaleType.float, - round_method=RoundType.half_even, - qscheme=QSchemeType.per_group, - ch_axis=1, - is_dynamic=False, - group_size=128) - -Details about each parameters of :py:class:`.QuantizationSpec` as well as of each utility classes (as :py:class:`.Int8PerTensorSpec` to define more easily the quantization spec) are available in :doc:`the API documentation <../autoapi/quark/torch/quantization/config/config/index>`. - -Step 2: Establishing ``QuantizationConfig`` for nn.Module -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The class :py:class:`.QuantizationConfig` is used to describe the global, layer-type-wise, or layer-wise quantization information for each ``nn.Module``, such as ``nn.Linear``. For example: - -.. code-block:: python - - from quark.torch.quantization.config.config import QuantizationConfig - - W_FP8_A_FP8_PER_TENSOR_CONFIG = QuantizationConfig(input_tensors=FP8_PER_TENSOR_SPEC, - weight=FP8_PER_TENSOR_SPEC) - - W_INT8_A_INT8_PER_TENSOR_CONFIG = QuantizationConfig(input_tensors=INT8_PER_TENSOR_SPEC, - weight=INT8_PER_TENSOR_SPEC) - - W_UINT4_PER_GROUP_CONFIG = QuantizationConfig(weight=UINT4_PER_GROUP_ASYM_SPEC) - -Details about each parameters of :py:class:`.QuantizationConfig` are available in :doc:`the API documentation <../autoapi/quark/torch/quantization/config/config/index>`. - -Step 3: [Optional] Setting ``AlgoConfig`` for the model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you want to use AMD Quark's advanced algorithms such as AWQ, you should set up the required configuration. - -You should possess a thorough understanding of the methods and hyperparameters associated with the algorithms before configuring them. Algorithms only support certain ``QuantizationSpec``. Ensure compatibility before running. - -Here is the algorithms configuration of Llama2-7b as an example: - -.. code-block:: python - - from quark.torch.algorithm.awq.awq import AwqProcessor - from quark.torch.algorithm.awq.smooth import SmoothQuantProcessor - from quark.torch.algorithm.gptq.gptq import GptqProcessor - from quark.torch.quantization.config.config import AWQConfig, SmoothQuantConfig, GPTQConfig - - ALGORITHM_CONFIG=AWQConfig( - scaling_layers=[ - {'prev_op': 'input_layernorm', 'layers': ['self_attn.q_proj', 'self_attn.k_proj', 'self_attn.v_proj'], 'inp': 'self_attn.q_proj', 'module2inspect': 'self_attn'}, - {'prev_op': 'self_attn.v_proj', 'layers': ['self_attn.o_proj'], 'inp': 'self_attn.o_proj'}, - {'prev_op': 'post_attention_layernorm', 'layers': ['mlp.gate_proj', 'mlp.up_proj'], 'inp': 'mlp.gate_proj', 'module2inspect': 'mlp', 'help': 'linear 1'}, - {'prev_op': 'mlp.up_proj', 'layers': ['mlp.down_proj'], 'inp': 'mlp.down_proj', 'help': 'linear 2'}], - model_decoder_layers='model.layers') - - ALGORITHM_CONFIG=SmoothQuantConfig( - alpha=0.5, - scale_clamp_min=0.001, - scaling_layers=[ - {'prev_op': 'input_layernorm', 'layers': ['self_attn.q_proj', 'self_attn.k_proj', 'self_attn.v_proj'], 'inp': 'self_attn.q_proj', 'module2inspect': 'self_attn'}, - {'prev_op': 'self_attn.v_proj', 'layers': ['self_attn.o_proj'], 'inp': 'self_attn.o_proj'}, - {'prev_op': 'post_attention_layernorm', 'layers': ['mlp.gate_proj', 'mlp.up_proj'], 'inp': 'mlp.gate_proj', 'module2inspect': 'mlp', 'help': 'linear 1'}, - {'prev_op': 'mlp.up_proj', 'layers': ['mlp.down_proj'], 'inp': 'mlp.down_proj', 'help': 'linear 2'}], - model_decoder_layers='model.layers') - - ALGORITHM_CONFIG = GPTQConfig( - damp_percent=0.01, - desc_act=True, - static_groups=True, - true_sequential=True, - inside_layer_modules=['self_attn.k_proj', 'self_attn.v_proj', 'self_attn.q_proj', 'self_attn.o_proj', 'mlp.up_proj', 'mlp.gate_proj', 'mlp.down_proj'], - model_decoder_layers='model.layers' - ) - - ALGORITHM_CONFIG = RotationConfig( - scaling_layers = { - "first_layer": [ - {"prev_modules": ["model.embed_tokens"], - "norm_module": "model.layers.layer_id.input_layernorm", - "next_modules": ["model.layers.layer_id.self_attn.q_proj", "model.layers.layer_id.self_attn.k_proj", "model.layers.layer_id.self_attn.v_proj"]}, - {"prev_modules": ["model.layers.layer_id.self_attn.o_proj"], - "norm_module": "model.layers.layer_id.post_attention_layernorm", - "next_modules": ["model.layers.layer_id.mlp.up_proj", "model.layers.layer_id.mlp.gate_proj"]}], - "middle_layers": [ - {"prev_modules": ["model.layers.pre_layer_id.mlp.down_proj"], - "norm_module": "model.layers.layer_id.input_layernorm", - "next_modules": ["model.layers.layer_id.self_attn.q_proj", "model.layers.layer_id.self_attn.k_proj", "model.layers.layer_id.self_attn.v_proj"]}, - {"prev_modules": ["model.layers.layer_id.self_attn.o_proj"], - "norm_module": "model.layers.layer_id.post_attention_layernorm", - "next_modules": ["model.layers.layer_id.mlp.up_proj", "model.layers.layer_id.mlp.gate_proj"]}], - "last_layer": [ - {"prev_modules": ["model.layers.layer_id.mlp.down_proj"], - "norm_module": "model.norm", - "next_modules": ["lm_head"]}] - } - ) - - ALGORITHM_CONFIG = QuaRotConfig( - scaling_layers = { - "first_layer": [ - {"prev_modules": ["model.embed_tokens"], - "norm_module": "model.layers.layer_id.input_layernorm", - "next_modules": ["model.layers.layer_id.self_attn.q_proj", "model.layers.layer_id.self_attn.k_proj", "model.layers.layer_id.self_attn.v_proj"]}, - {"prev_modules": ["model.layers.layer_id.self_attn.o_proj"], - "norm_module": "model.layers.layer_id.post_attention_layernorm", - "next_modules": ["model.layers.layer_id.mlp.up_proj", "model.layers.layer_id.mlp.gate_proj"]}], - "middle_layers": [ - {"prev_modules": ["model.layers.pre_layer_id.mlp.down_proj"], - "norm_module": "model.layers.layer_id.input_layernorm", - "next_modules": ["model.layers.layer_id.self_attn.q_proj", "model.layers.layer_id.self_attn.k_proj", "model.layers.layer_id.self_attn.v_proj"]}, - {"prev_modules": ["model.layers.layer_id.self_attn.o_proj"], - "norm_module": "model.layers.layer_id.post_attention_layernorm", - "next_modules": ["model.layers.layer_id.mlp.up_proj", "model.layers.layer_id.mlp.gate_proj"]}], - "last_layer": [ - {"prev_modules": ["model.layers.layer_id.mlp.down_proj"], - "norm_module": "model.norm", - "next_modules": ["lm_head"]}] - } - ) - -For AWQ, AMD Quark for PyTorch only supports ``AWQ`` with quantization data type as ``uint4/int4`` and ``per group``, running on ``Linux`` with the ``GPU mode`` for now. More details are available in the :py:class:`.AWQConfig` documentation. - - -For SmoothQuant, more details are available in the :py:class:`.SmoothQuantConfig` documentation. A high-level explanation about SmoothQuant is available in :doc:`Activation/weight smoothing (SmoothQuant) documentation `. - -For GPTQ, AMD Quark for PyTorch only supports ``GPTQ`` with quantization -data type as ``uint4/int4`` and ``per group``, running on ``Linux`` with -the ``GPU mode`` for now. More details are available in the :py:class:`.GPTQConfig` documentation. - - -Step 4: Setting up the overall ``Config`` for the model. --------------------------------------------------------- - -In :py:class:`.quark.torch.quantization.config.config.Config`, you should set instances for all information of quantization (all instances are optional except ``global_quant_config``). - -For example: - -.. code-block:: python - - # Example 1: W_INT8_A_INT8_PER_TENSOR - quant_config = Config(global_quant_config=W_INT8_A_INT8_PER_TENSOR_CONFIG) - - # Example 2: W_UINT4_PER_GROUP with advanced algorithm - quant_config = Config(global_quant_config=W_UINT4_PER_GROUP_CONFIG, algo_config=ALGORITHM_CONFIG) - EXCLUDE_LAYERS = ["lm_head"] # For language models - quant_config = replace(quant_config, exclude=EXCLUDE_LAYERS) - - # Example 3: W_FP8_A_FP8_PER_TENSOR with KV_CACHE_FP8 - quant_config = Config(global_quant_config=W_FP8_A_FP8_PER_TENSOR_CONFIG) - KV_CACHE_CFG = { - "*v_proj": - QuantizationConfig(input_tensors=quant_config.global_quant_config.input_tensors, - weight=quant_config.global_quant_config.weight, - output_tensors=FP8_PER_TENSOR_SPEC), - "*k_proj": - QuantizationConfig(input_tensors=quant_config.global_quant_config.input_tensors, - weight=quant_config.global_quant_config.weight, - output_tensors=FP8_PER_TENSOR_SPEC), - } - quant_config = replace(quant_config, layer_quant_config=KV_CACHE_CFG) - -More details are available in the :py:class:`.quark.torch.quantization.config.config.Config` documentation. \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_mi_gpus_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_mi_gpus_index.rst deleted file mode 100644 index 14e9904e..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_mi_gpus_index.rst +++ /dev/null @@ -1,21 +0,0 @@ -Quark for AMD Instinct Accelerators -=================================== - -Depending on the GPU to be used, different quantization schemes may or may not have accelerated support in the underlying hardware. - -On all GPUs supported by PyTorch, quantized models can be evaluated using fake quantization (quantize-dequantize), effectively using a higher widely supported precision for compute (e.g., ``float16``). - -.. note:: - - As an example, AMD Instinct MI300 supports ``float8`` compute, which means that linear layers quantized in ``float8`` for both the activation and weights may use ``float8 @ float8 -> float16`` computation. - - On the other hand, Instinct MI210 and Instinct MI250 GPUs (CDNA2 architecture) do not support ``float8`` computations, and only ``QDQ`` can be used for this specific ``dtype`` and hardware. - -Below are some references on how you can leverage Quark to seamlessly run accelerated quantized models on AMD Instinct GPUs: - -.. toctree:: - :caption: Resources - :maxdepth: 1 - - FP8 (OCP fp8_e4m3) Quantization & Json_SafeTensors_Export with KV Cache <../../pytorch/example_quark_torch_llm_ptq> - Evaluation of Quantized Models <../../pytorch/example_quark_torch_llm_eval_perplexity> diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_index.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_index.rst deleted file mode 100644 index 8772bb7d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_index.rst +++ /dev/null @@ -1,44 +0,0 @@ -Quark for Ryzen AI NPU -====================== - -This section provides guidance on leveraging AMD Quark to deploy quantized models on the Ryzen AI Neural Processing Unit (NPU). -By utilizing the `Ryzen AI Software `_, developers can seamlessly run optimized models trained in PyTorch or TensorFlow on Ryzen AI-enabled processors. -Ryzen AI Software has integrated tools and libraries like AMD Quark, ONNX Runtime and the Vitis AI Execution Providers (EP) that facilitates efficient inference across various accelerators, including CPU and integrated GPU (iGPU), in addition to the NPU. - -.. image:: ../../_static/quark_ryzenai_overview.png - :alt: Quark Ryzen AI Overview - :align: center - :width: 80% - -Development Flow Steps ----------------------- - -- Trained Models: Trained models in popular frameworks such as PyTorch / TensorFlow are exported to ONNX format, to leverage ONNX Runtime to run on Ryzen AI supported processors. - -- Model Quantization: Use AMD Quark quantizer tools to convert your ONNX model into a quantized version, using the following quantization schemes: - - For CNN models: INT8 or BF16 - - For Transformer models: BF16 - - For LLMs: INT4 or BF16 - -- Deployment and Inference: Deploy the quantized on Ryzen AI-enabled hardware through ONNX runtime and Vitis AI Execution Provider. - -AMD Quark provides advanced tools for model quantization. This documentation will help you navigate the capabilities of Quark to run with Ryzen AI. - -Here you will find references on how you can leverage Quark to seamlessly run quantized models on the Ryzen AI NPU. -Ryzen AI leverages ONNX models to represent models and execute them through ONNX Runtime. - -To help you get started, we also have examples at the :ref:`ONNX Examples ` page! - -.. toctree:: - :caption: Resources - :maxdepth: 1 - - Quick Start for Ryzen AI - Best Practice for Ryzen AI in AMD Quark ONNX - Auto-Search for Ryzen AI ONNX Model Quantization <../../onnx/example_quark_onnx_ryzenai> - Quantizing LLMs for ONNX Runtime GenAI - FP32/FP16 to BF16 Model Conversion - Power-of-Two Scales (XINT8) Quantization - Float Scales (A8W8 and A16W8) Quantization - -Quark also delivers a plethora of post-processing tools that might be of use for Ryzen AI. refer to the :doc:`ONNX Tools <../../onnx/tools>` to learn more! diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_ryzen_ai_best_practice.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_ryzen_ai_best_practice.rst deleted file mode 100644 index 36a37433..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_ryzen_ai_best_practice.rst +++ /dev/null @@ -1,157 +0,0 @@ -Best Practice for Ryzen AI in AMD Quark ONNX -============================================ - -This topic outlines the best practice for Post-Training Quantization (PTQ) in AMD Quark ONNX. It provides guidance on fine-tuning your quantization strategy to meet target quantization accuracy. - - -.. figure:: ../../_static/best_practice_in_quark_onnx.png - :align: center - :width: 85% - - **Figure 1. Best Practices for Quark ONNX Quantization** - -Pip Requirements ----------------- - -Install the necessary python packages: - -.. code-block:: bash - - python -m pip install -r requirements.txt - -Prepare model -------------- - -Download the ONNX float model from the `onnx/models `__ repo directly: - -.. code-block:: bash - - wget -P models https://github.com/onnx/models/raw/new-models/vision/classification/resnet/model/resnet50-v1-12.onnx - -Prepare Calibration Data ------------------------- - -You can provide a folder containing PNG or JPG files as calibration data folder. For example, you can download images from https://github.com/microsoft/onnxruntime-inference-examples/tree/main/quantization/image_classification/cpu/test_images as a quick start. Specifically, you can provide the preprocessing code at line 63 in ``quantize_quark.py`` - -.. code-block:: bash - - mkdir calib_data - wget -O calib_data/daisy.jpg https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/test_images/daisy.jpg?raw=true - - -Quantization ------------- - -- **XINT8** - -XINT8 uses symmetric INT8 activation and weights quantization with power-of-two scales. Typically, the calibration method uses MinMSE. Refer to the following sections, such as **ADAROUND** and **ADAQUANT**, for methods to improve quantization accuracy based on this configuration. - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config XINT8 - -- **A8W8** - -A8W8 uses symmetric INT8 activation and weights quantization with float scales. Typically, the calibration method uses MinMax. Refer to the following sections, such as **ADAROUND** and **ADAQUANT**, for methods to improve quantization accuracy based on this configuration. - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config A8W8 - -- **A16W8** - -A16W8 uses symmetric INT16 activation and symmetric INT8 weights quantization with float scales. Typically, the calibration method uses MinMax. - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config A16W8 - -- **BF16** - -BFLOAT16 (BF16) is a 16-bit floating-point format designed for machine learning. It has the same exponent size as FP32, allowing a wide dynamic range, but with reduced precision to save memory and speed up computations. - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config BF16 - -- **BFP16** - -Block Floating Point (BFP) quantization reduces computational complexity by grouping numbers to share a common exponent, thereby preserving accuracy efficiently. BFP offers both reduced storage requirements and high quantization precision. - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config BFP16 - -- **CLE** - -The CLE (Cross Layer Equalization) algorithm is a quantization technique that balances weights across layers by scaling them proportionally, aiming to reduce accuracy loss and improve robustness in low-bit quantized neural networks. Consider XINT8 as the example: - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config XINT8 \ - --cle - -- **ADAROUND** - -ADAROUND (Adaptive Rounding) is a quantization algorithm that optimizes the rounding of weights by minimizing the reconstruction error, ensuring better accuracy retention for neural networks in post-training quantization. Consider XINT8 as the example: - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config XINT8 \ - --adaround \ - --learning_rate 0.1 \ - --num_iters 3000 - -- **ADAQUANT** - -ADAQUANT (Adaptive Quantization) is a post-training quantization algorithm that optimizes quantization parameters by minimizing layer-wise reconstruction errors, enabling improved accuracy for low-bit quantized neural networks. Consider XINT8 as the example: - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config XINT8 \ - --adaquant \ - --learning_rate 0.00001 \ - --num_iters 10000 - -- **Exclude Nodes** - -Excluding some nodes means that these nodes are quantized. The method can improve quantization accuracy. Consider XINT8 as the example: - -.. code-block:: bash - - python quantize_quark.py --input_model_path models/resnet50-v1-12.onnx \ - --calib_data_path calib_data \ - --output_model_path models/resnet50-v1-12_quantized.onnx \ - --config XINT8 \ - --exclude_nodes "resnetv17_conv0_fwd; resnetv17_stage1_conv0_fwd" - -.. raw:: html - - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_a8w8_and_a16w8_quantize.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_a8w8_and_a16w8_quantize.rst deleted file mode 100644 index 707d0405..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_a8w8_and_a16w8_quantize.rst +++ /dev/null @@ -1,206 +0,0 @@ -Float Scales (A8W8 and A16W8) Quantization -========================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark". - -Introduction ------------- - -A8W8 and A16W8 are two commonly used quantization configurations for the Ryzen AI NPU. - - A8W8: Uses symmetric INT8 activation, symmetric INT8 weight, and symmetric INT32 bias quantization with float scales. - - A16W8: Uses symmetric INT16 activation, symmetric INT8 weight, and symmetric INT32 bias quantization with float scales. - -As the activation bit width increases, the quantized model's accuracy improves. This means A16W8 generally offers better accuracy than A8W8. However, A8W8 provides better performance compared to A16W8. - -Please choose the appropriate quantization configuration based on your specific needs. This guide explains how to quantize a float model using the A8W8 or A16W8 configuration and provides strategies to improve accuracy. - -How to Quantize a Float Model with A8W8/A16W8 Config ----------------------------------------------------- - -.. figure:: ../../_static/a8w8_and_a16w8_quantize.png - :width: 30% - :align: center - - **Figure 1. How to Quantize a Float Model with A8W8/A16W8 Config** - - -As the Figure 1 shows, you can refer to codes below: - -.. code-block:: python - - from onnxruntime.quantization.calibrate import CalibrationDataReader - from quark.onnx.quantization.config import Config, get_default_config - from quark.onnx import ModelQuantizer - - # Define model paths - # Path to the float model to be quantized - float_model_path = "path/to/float_model.onnx" - # Path where the quantized model will be saved - quantized_model_path = "path/to/quantized_model.onnx" - calib_data_folder = "path/to/calibration_data" - model_input_name = 'model_input_name' - - # Define calibration data reader for static quantization - class CalibDataReader(CalibrationDataReader): - def __init__(self, calib_data_folder: str, model_input_name: str): - self.input_name = model_input_name - self.data = self._load_calibration_data(calib_data_folder) - self.data_iter = None - - # Customize this function to preprocess calibration datasets as needed - def _load_calibration_data(self, data_folder: str): - # Example: Implement the actual data preprocessing here - processed_data = [] - """ - Define preprocessing steps for your dataset. - For instance, read images and apply necessary transformations. - """ - return processed_data - - def get_next(self): - if self.data_iter is None: - self.data_iter = iter([{self.input_name: data} for data in self.data]) - return next(self.data_iter, None) - - # Instantiate the calibration data reader - calib_data_reader = CalibDataReader(calib_data_folder, model_input_name) - - # Set up quantization with a specified configuration - # For example, use "A8W8" for Ryzen AI A8W8 quantization - quant_config = get_default_config("A8W8") # Replace "A8W8" with "A16W8" - quantization_config = Config(global_quant_config=quant_config ) - quantizer = ModelQuantizer(quantization_config) - - # Quantize the ONNX model and save to specified path - quantizer.quantize_model(float_model_path, quantized_model_path, calib_data_reader) - -.. note:: - - In the quantization, graph optimization will be automatically performed. - -How to Measure Accuracy (Compare Differences between FP32 and A8W8/A16W8) -------------------------------------------------------------------------- - -- **infer float32 and a8w8/a16w8 quantized models and save results** - -You can refer to the following code to infer the float32 and A8W8/A16W8 quantized models and save the results. - -.. code-block:: python - - import numpy as np - import os - import onnxruntime as ort - - def infer_model_and_save_output(onnx_model_path, input_data_loader, output_dir): - ort_session = ort.InferenceSession(onnx_model_path) - # Assume the model has only one input. - input_name = ort_session.get_inputs()[0].name - for index, input_data in enumerate(input_data_loader): - ort_inputs = {input_name: input_data} - ort_outs = ort_session.run(None, ort_inputs) - output_numpy = ort_outs[0] - os.makedirs(output_dir, exist_ok=True) - output_file = os.path.join(output_dir, str(index) + ".npy") - np.save(output_file, output_numpy) - print(f"Results saved to {output_dir}.") - - - onnx_model_path = "float32_model.onnx" # Replace with "a8w8_quantized_model.onnx" or "a16w8_quantized_model.onnx" - # input_data_loader is an iterable object that returns a numpy tensor each time. It is user-defined. - output_dir = "baseline_results" # Replace with "quantized_results" - infer_model_and_save_output(onnx_model_path, input_data_loader, output_dir) - -- **calculate differences** - -If you need to compare the differences between float32 and A8W8/A16W8 quantized models after conversion. We support some metrics (cosine similarity, L2 loss, PSNR) for comparing differences between float32 and A8W8/A16W8 quantized inference results. The formats (JPG, PNG and NPY) of inference result in folders are supported. you can use this command to compare: - -.. code-block:: bash - - python -m quark.onnx.tools.evaluate.py --baseline_results_folder $BASELINE_RESULTS_FOLDER_PATH --quantized_results_folder $QUANTIZED_RESULTS_FOLDER_PATH - - -How to Improve Quantization Accuracy ------------------------------------- - -If the accuracy of A8W8/A16W8 quantized model can not meet your target, you can improve quantization accuracy with adaround and adaquant finetuning. Here is examples of how to improve quantization accuracy with finetuning. For more detailed information, see :doc:`Quantization Using AdaQuant and AdaRound <../../onnx/accuracy_algorithms/ada>`. - -- **ADAROUND** - -.. code:: python - - from quark.onnx import ModelQuantizer, QuantType, QuantFormat, ExtendedQuantFormat, ExtendedQuantType - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=QuantType.QInt8, # Replace with "activation_type=ExtendedQuantType.QInt16," when using A16W8 - weight_type=QuantType.QInt8, - include_fast_ft=True, - extra_options={ - 'ActivationSymmetric': True, - 'AlignSlice': False, - 'FoldRelu': True, - 'AlignConcat': True, - 'AlignEltwiseQuantType': True, - 'FastFinetune': { - 'NumIterations': 1000, - 'LearningRate': 0.1, - 'OptimAlgorithm': 'adaround', - } - } - ) - - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - -- **ADAQUANT** - -.. code:: python - - from quark.onnx import ModelQuantizer, QuantType, ExtendedQuantFormat, ExtendedQuantType - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=QuantType.QInt8, # Replace with "activation_type=ExtendedQuantType.QInt16," when using A16W8 - weight_type=QuantType.QInt8, - include_fast_ft=True, - extra_options={ - 'ActivationSymmetric': True, - 'AlignSlice': False, - 'FoldRelu': True, - 'AlignConcat': True, - 'AlignEltwiseQuantType': True, - 'FastFinetune': { - 'NumIterations': 1000, - 'LearningRate': 1e-6, - 'OptimAlgorithm': 'adaquant', - } - } - ) - - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - - -.. raw:: html - - - -License -------- - -Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_convert_fp32_or_fp16_to_bf16.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_convert_fp32_or_fp16_to_bf16.rst deleted file mode 100644 index 3bcc74cf..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_convert_fp32_or_fp16_to_bf16.rst +++ /dev/null @@ -1,136 +0,0 @@ -FP32/FP16 to BF16 Model Conversion -================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark". - -Introduction ------------- - -BFloat16 (Brain Floating Point 16) is a floating-point format designed for deep learning, offering reduced memory usage and faster computation while maintaining sufficient numerical precision. - -AMD’s latest NPU and GPU devices natively support BF16, enabling more efficient matrix operations and lower latency. This guide explains how to convert an FP32/FP16 model to BF16 using Quark. - -.. figure:: ../../_static/convert_fp32_or_fp16_to_bf16.png - :width: 30% - :align: center - - **Figure 1. How to Convert FP32/FP16 to BF16** - -How to Convert FP32 to BF16 ---------------------------- - -As the Figure 1 shows, you can use this command to convert a float32 model to bfloat16: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp32_to_bf16 --input $FLOAT32_ONNX_MODEL_PATH --output $BFLOAT16_ONNX_MODEL_PATH --format with_cast - -How to Convert FP16 to BF16 ---------------------------- - -As the Figure 1 shows, you can use this command to convert a float16 model to bfloat16: - -.. code-block:: bash - - python -m quark.onnx.tools.convert_fp16_to_bf16 --input $FLOAT16_ONNX_MODEL_PATH --output $BFLOAT16_ONNX_MODEL_PATH --format with_cast - -.. note:: - - In the conversion, graph optimization and saturation (overflow protection) will be automatically performed, and the ONNX converted from float32/float16 to bfloat16 looks like Figure 2. As you can see, compared to the float32/float16 model on the left, the bfloat16 model on the right includes additional pairs of Cast operations and some graph optimizations, for example merging three MatMul operations into one. - -.. figure:: ../../_static/example_of_converting_fp_to_bf16.png - :width: 90% - :align: center - - **Figure 2. Convert FP32/FP16 Models to BF16** - -How to Measure Accuracy (Compare Differences between FP32/FP16 and BF16) ------------------------------------------------------------------------- - -- **infer float/float16 and bfloat16 models and save results** - -You can refer to the following code to infer the float32/float16 and bfloat16 models and save the results. - -.. code-block:: python - - import numpy as np - import os - import onnxruntime as ort - - def infer_model_and_save_output(onnx_model_path, input_data_loader, output_dir): - ort_session = ort.InferenceSession(onnx_model_path) - # Assume the model has only one input. - input_name = ort_session.get_inputs()[0].name - for index, input_data in enumerate(input_data_loader): - ort_inputs = {input_name: input_data} - ort_outs = ort_session.run(None, ort_inputs) - output_numpy = ort_outs[0] - os.makedirs(output_dir, exist_ok=True) - output_file = os.path.join(output_dir, str(index) + ".npy") - np.save(output_file, output_numpy) - print(f"Results saved to {output_dir}.") - - - onnx_model_path = "float32_model.onnx" # Replace with "float16_model.onnx" or "bfloat16_model.onnx" - # input_data_loader is an iterable object that returns a numpy tensor each time. It is user-defined. - output_dir = "baseline_results" # Replace with "quantized_results" - infer_model_and_save_output(onnx_model_path, input_data_loader, output_dir) - -- **calculate differences** - -If you need to compare the differences between float32/float16 and bfloat16 models after conversion. We support some metrics (cosine similarity, L2 loss, PSNR) for comparing differences between float32/float16 and bfloat16 inference results. The formats (JPG, PNG and NPY) of inference result in folders are supported. you can use this command to compare: - -.. code-block:: bash - - python -m quark.onnx.tools.evaluate --baseline_results_folder $BASELINE_RESULTS_FOLDER_PATH --quantized_results_folder $QUANTIZED_RESULTS_FOLDER_PATH - -How to Improve BF16 Accuracy ----------------------------- - -If the accuracy of bfloat16 model can not meet your target, you can improve bfloat16 accuracy with adaquant finetuning. Here is a simple example of how to improve BF16 accuracy. **NumIterations** and **LearningRate** are two important parameters for improving accuracy during the finetuning process. Their explanations are as follows. For more detailed information, see :doc:`BF16 Quantization <../../onnx/tutorial_bf16_quantization>`. - - - **NumIterations**: (Int) The number of iterations for finetuning. More iterations can lead to better accuracy but also longer training time. The default value is 1000. - - - **LearningRate**: (Float) Learning rate for finetuning. It significantly impacts the improvement of fast finetune, and experimenting with different learning rates might yield better results for your model. The default value is 1e-6. - -.. code:: python - - from quark.onnx import ModelQuantizer, ExtendedQuantType, ExtendedQuantFormat - from onnxruntime.quantization.calibrate import CalibrationMethod - from quark.onnx.quantization.config.config import Config, QuantizationConfig - - quant_config = QuantizationConfig(calibrate_method=CalibrationMethod.MinMax, - quant_format=ExtendedQuantFormat.QDQ, - activation_type=ExtendedQuantType.QBFloat16, - weight_type=ExtendedQuantType.QBFloat16, - include_fast_ft=True, - extra_options={ - 'BF16QDQToCast': True, - 'QuantizeAllOpTypes': True, - 'ForceQuantizeNoInputCheck': True, - 'FastFinetune': { - 'NumIterations': 1000, - 'LearningRate': 1e-6, - 'OptimAlgorithm': 'adaquant', - } - } - ) - - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - - -.. raw:: html - - - -License -------- - -Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_quick_start_for_ryzenai.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_quick_start_for_ryzenai.rst deleted file mode 100644 index 61a31f0d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_quick_start_for_ryzenai.rst +++ /dev/null @@ -1,255 +0,0 @@ -Quick Start for Ryzen AI -======================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark". - -Following the :doc:`Basic usage <../../basic_usage>` guideline page, this document will go through all four steps to quantize a model using a minimalistic approach and build on it to show off some advanced features of AMD Quark. Towards the end an evaluation will be performed to assess the quality of the resulting quantization. - - -1. Prepare the original float model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For this quick start guide, we will take the **ResNet-50** as example. **ResNet-50** is a Convolutional Neural Network (CNN) that's commonly used for image classification. It's part of the ResNet (Residual Networks) family of models, which were developed to address challenges with training deep neural networks. ResNet-50 is made up of 50 weight layers, including convolution layers, residual blocks, and fully connected layers. The model has skip connections that allow the model to skip one or more layers, preventing vanishing gradients. Typically, ResNet-50 is trained on over a million images from the **ImageNet** database and applications in real life include medical imaging, anomaly detection, and inventory management. - -To get started, first, download the model from the `onnx/models `__ repo directly: - -.. code-block:: bash - - wget -P models https://github.com/onnx/models/raw/new-models/vision/classification/resnet/model/resnet50-v1-12.onnx - -.. note:: - - In the quantization, graph optimization will be automatically performed. - -2. Prepare calibration data -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Typically, quantization can be performed without calibration data. However, feeding a representative dataset during the calibration stage yields better results. Here we are going to show how to quantize models with and without calibration data for learning purposes. - -2.1. Quantization without Calibration Data -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Models can be quantized without calibration data. For such, AMD Quark provides an API to perform quantization using auto generated random data. The command line below shows how to quantize a float model without calibration data. Here we are going to use the default quantization config is **A8W8**, you can also use **XINT8**, **A16W8**, and so on. Refer to :doc:`Quark-ONNX Configuration page <../../onnx/user_guide_config_description>` to learn more about the supported data type and quantization configuration. - -.. code-block:: bash - - python -m quark.onnx.tools.random_quantize --input_model_path models/resnet50-v1-12.onnx --quantized_model_path models/resnet50-v1-12_random_quantized.onnx --config A8W8 - -.. note:: - - Since the calibration data is an automatically generated tensor with values in the range [0, 1], errors may occur when models require integer input. In such cases, this tool cannot be used and real calibration data must be provided. Similarly, if you want to achieve good quantization accuracy, you must use calibration data. - -2.2. Quantization with Calibration Data -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Here we are going to perform calibration to learn characteristics of the user input and yield better accuracy. Users must provide a representative dataset in this step. ResNet-50 expects a calibration data folder with images in PNG or JPG formats. For example, you can download images from `Microsoft ONNX Runtime test images `_. - -.. code-block:: bash - - mkdir calib_data - wget -O calib_data/daisy.jpg https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/test_images/daisy.jpg?raw=true - -Next, implement the calibration data reader API as shown: - -.. code-block:: python - - import os - import cv2 - import numpy as np - from torchvision import transforms - - calib_data_folder = "calib_data" - model_input_name = 'data' - - # You can define your preprocess method - def preprocess_image(image_path): - transform = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize((224, 224)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - image = transform(image) - image = image.unsqueeze(0) - image = image.numpy() - return image - - class CalibrationDataReader: - def __init__(self, calib_data_folder: str, model_input_name: str): - super().__init__() - self.input_name = model_input_name - self.processed_data = [] - self.data = self._load_calibration_data(calib_data_folder) - self.index = 0 - - def _load_calibration_data(self, data_folder: str): - for image_filename in os.listdir(data_folder): - if image_filename.lower().endswith(('.jpg', '.png')): - image_path = os.path.join(data_folder, image_filename) - image = preprocess_image(image_path) - self.processed_data.append(image) - return self.processed_data - - def get_next(self): - if self.index < len(self.processed_data): - input_data = {self.input_name: self.processed_data[self.index]} - self.index += 1 - return input_data - return None - - # Instantiate the calibration data reader - calib_data_reader = CalibrationDataReader(calib_data_folder, model_input_name) - -3. Set the quantization configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The code below shows how to quantize a float model with **A8W8**. For more detailed information about basic quantization, please see :doc:`Basic Usage <../../onnx/basic_usage_onnx>`. - -.. code-block:: python - - from quark.onnx.quantization.config import Config, get_default_config - from quark.onnx import ModelQuantizer - - # Set up quantization with a specified configuration - # For example, use "A8W8" for Ryzen AI INT8 quantization - a8w8_config = get_default_config("A8W8") - quantization_config = Config(global_quant_config=a8w8_config) - quantizer = ModelQuantizer(quantization_config) - -.. note:: - - The A8W8 configuration is our default setup. To minimize quantization time, accuracy-improvement strategies such as AdaRound or AdaQuant are not applied by default, which may lead to suboptimal accuracy in some cases. For better quantization accuracy, please refer to Section **How to Improve Quantization Accuracy** of :doc:`Float Scales (A8W8 and A16W8) Quantization ` page for details. - -4. Quantize the model -~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python - - float_model_path = "models/resnet50-v1-12.onnx" - - quantized_model_path = "models/resnet50-v1-12_quantized.onnx" - - # Quantize the ONNX model and save to specified path - quantizer.quantize_model(float_model_path, quantized_model_path, calib_data_reader) - -4.1 Quantize the model with Advanced Features -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -By this point, the model has been quantized and a certain level of performance was observed. many times the achieved performance is not sufficient and users might be interested in using Advanced Features to improve the results. AMD Quark advanced features include **ADAROUND** and **ADAQUANT**. Compared to basic quantization, the user only needs to update the quantization configuration. For example, user could replace **A8W8** with **A8W8_ADAROUND** or **A8W8_ADAQUANT**. - -Let's try replacing the above corresponding two lines with the following a few lines of code. - -.. code-block:: python - - a8w8_adaround_config = get_default_config("A8W8_ADAROUND") - # a8w8_adaquant_config = get_default_config("A8W8_ADAQUANT") - quantization_config = Config(global_quant_config=a8w8_adaround_config) - # quantization_config = Config(global_quant_config=a8w8_adaquant_config) - -For more detailed information about AdaRound and AdaQuant, please see :doc:`Quantization Using AdaQuant and AdaRound <../../onnx/accuracy_algorithms/ada>`. - -5. Evaluation -~~~~~~~~~~~~~ - -Now that the model is quantized, let's measure how good the model performs. Let's take an image in calibration data folder as input and dump the output ``NumPy`` tensor. - -.. code-block:: python - - import os - import numpy as np - import cv2 - import onnx - from torchvision import transforms - from onnxruntime import InferenceSession - - def preprocess_image(image_path): - transform = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize((224, 224)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - image = transform(image) - image = image.unsqueeze(0) - return image - - def load_onnx_model(model_path): - session = InferenceSession(model_path) - return session - - def infer_on_image(session, image): - input_name = session.get_inputs()[0].name - output_name = session.get_outputs()[0].name - result = session.run([output_name], {input_name: image.numpy()}) - return result[0] - - def process_images_and_infer(input_folder, onnx_model_path, output_folder): - if not os.path.exists(output_folder): - os.makedirs(output_folder) - session = load_onnx_model(onnx_model_path) - for image_filename in os.listdir(input_folder): - if image_filename.lower().endswith(('.jpg', '.png')): - image_path = os.path.join(input_folder, image_filename) - print(f"Processing {image_path}...") - image = preprocess_image(image_path) - result = infer_on_image(session, image) - output_filename = os.path.splitext(image_filename)[0] + '_output.npy' - output_path = os.path.join(output_folder, output_filename) - np.save(output_path, result) - print(f"Saved result to {output_path}") - - input_folder = "calib_data" - onnx_model_path = "models/resnet50-v1-12.onnx" # Replace with "models/resnet50-v1-12_random_quantized.onnx" or "models/resnet50-v1-12_quantized.onnx" - output_folder = "float_output" # Repalce with "random_quantized_output" or "quantized_output" - process_images_and_infer(input_folder, onnx_model_path, output_folder) - -Quark provides a tool to compare the differences between float and quantized models using ``L2 Loss`` and other metrics. For example: - -.. code-block:: bash - - python -m quark.onnx.tools.evaluate --baseline_results_folder float_output --quantized_results_folder random_quantized_output - -6. Results -~~~~~~~~~~ - -As shown in the table below, random quantization results in a very large L2 loss. Using calibration data can significantly reduce the loss, and advanced features can further minimize it. - -.. list-table:: - :header-rows: 1 - - * - - - Float Model - - Quantized Model without Calibration Data - - Quantized Model with A8W8 Config - - Quantized Model with A8W8 + AdaRound Config - - Quantized Model with A8W8 + AdaQuant Config - * - Model Size - - 99 MB - - 25 MB - - 25 MB - - 25 MB - - 25 MB - * - L2 Loss (compared with float model) - - 0 - - 30.26 - - 9.78 - - 1.43 - - 1.15 - -.. raw:: html - - - -License -~~~~~~~ - -Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_uint4_oga.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_uint4_oga.rst deleted file mode 100644 index cc554453..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_uint4_oga.rst +++ /dev/null @@ -1,122 +0,0 @@ -Quantizing LLMs for ONNX Runtime GenAI -====================================== - -This document provides examples of quantizing large language models (LLMs) to **UINT4** using the **AWQ algorithm** via the Quark API, and exporting them to ONNX format using the **ONNX Runtime Gen AI Model Builder**. - -`ONNX Runtime GenAI `__ offers an end-to-end pipeline for working with ONNX models, including inference using ONNX Runtime, logits processing, search and sampling, and key-value (KV) cache management. For detailed documentation, visit the `ONNX Runtime Gen AI Documentation `_. The tool includes a `Model Builder `_ that facilitates exporting models to the ONNX format. - -.. note:: - - For large models, it is recommended to run this workflow on a data center GPU, as it is memory-intensive. Most laptop and desktop GPUs will not have enough memory to process larger parameter models efficiently. If you haven't done so already, and have a GPU, we suggest you create a fresh Quark environment with a PyTorch ROCm or CUDA install for your platform https://pytorch.org/get-started/locally/. - -Preparation ------------ - -Model Preparation -~~~~~~~~~~~~~~~~~ - -To use **Llama2 models**, download the HF Llama2 checkpoint. Access to these checkpoints requires a permission request to Meta. For more information, refer to the `Llama2 page on Hugging Face `_. Once permission is granted, download the checkpoint and save it to the ````. - -Installation -~~~~~~~~~~~~ - -We will use a script from the ``examples/`` directory for Quark. This directory is found in the Quark ``.zip`` that can be downloaded at `📥amd_quark.zip release_version `__. - -The ``quantize_quark.py`` script, that we will use, is found in a sub-directory of ``examples/``. It requires some additional dependencies be installed: - -.. code-block:: bash - - cd examples/torch/language_modeling/llm_ptq/ - pip install -r requirements.txt - -Quark UINT4 Quantization with AWQ ---------------------------------- - -**Quantization Configuration**: AWQ / Group 128 / Asymmetric / FP16 activations - -Use the following command to quantize the model: - -.. code-block:: bash - - python3 quantize_quark.py --model_dir \ - --output_dir \ - --quant_scheme w_uint4_per_group_asym \ - --num_calib_data 128 \ - --quant_algo awq \ - --dataset pileval_for_awq_benchmark \ - --seq_len 512 \ - --model_export hf_format \ - --data_type float16 \ - --custom_mode awq - -This will generate a directory containing the safe tensors at the specified ````. - -.. note:: - - To include the ``lm_head`` layer in the quantization process, add the ``--exclude_layers`` flag. This overrides the default behavior of excluding the ``lm_head`` layer. - -.. note:: - - To quantize the model for BF16 activations, use the ``--data_type bfloat16`` flag. - -.. note:: - - To specify a group size other than 128, such as 32, use the ``--group_size 32`` flag. - - -(Optional) Quark UINT4 Quantization with Different Group Sizes per Layer -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Quark supports quantizing layers with different group sizes, providing finer-grained control over the quantization process. This allows you to better balance performance and accuracy. -For example, to quantize the model with 32 group size for lm_head, while 128 group size for the rest, use the following command: - -.. code-block:: bash - - python3 quantize_quark.py --model_dir \ - --output_dir \ - --quant_scheme w_uint4_per_group_asym \ - --num_calib_data 128 \ - --quant_algo awq \ - --dataset pileval_for_awq_benchmark \ - --seq_len 512 \ - --model_export hf_format \ - --data_type float16 \ - --exclude_layers \ - --group_size 128 \ - --group_size_per_layer lm_head 32 - -.. note:: - - This is an advanced feature that is **not supported** by the standard AWQ model format. - As a result, the quantized model is stored in the Quark model format, which does **not** require - the ``--custom_mode awq`` argument. - - Support for the Quark model format in **ONNX Runtime GenAI** is coming soon in v0.7 release. - For early access you can try this `feature branch `_. - -Exporting Using ONNX Runtime Gen AI Model Builder -------------------------------------------------- - -Install the ONNX Runtime Gen AI tool package using ``pip``: - -.. code-block:: bash - - pip install onnxruntime-genai - -To export the quantized model to ONNX format, run the following command: - -.. code-block:: bash - - python3 -m onnxruntime_genai.models.builder \ - -i \ - -o \ - -p int4 \ - -e dml - -.. note:: - - The activation data type of the ONNX model depends on the combination of the ``-p`` (precision) and ``-e`` (execution provider) flags. For example: - - - Using ``-p int4 -e dml`` will generate an ONNX model with float16 activations prepared for the DirectML execution provider for hybrid (NPU + iGPU) flow. - - To generate an ONNX model with float32 activations for NPU flow, use the ``-p int4 -e cpu`` flag. - diff --git a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_xint8_quantize.rst b/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_xint8_quantize.rst deleted file mode 100644 index 32a2e2a5..00000000 --- a/LLM-examples/RAG-OGA/Dataset/quark_docs_repo_supported_accelerators_ryzenai_tutorial_xint8_quantize.rst +++ /dev/null @@ -1,154 +0,0 @@ -Power-of-Two Scales (Xint8) Quantization -======================================== - -.. note:: - - In this documentation, **AMD Quark** is sometimes referred to simply as **"Quark"** for ease of reference. When you encounter the term "Quark" without the "AMD" prefix, it specifically refers to the AMD Quark quantizer unless otherwise stated. Please do not confuse it with other products or technologies that share the name "Quark". - -Introduction ------------- - -XINT8 is a specialized INT8 quantization configuration for Ryzen AI NPU. It uses symmetric INT8 activation, weight and bias quantization with power-of-two scales. XINT8 is optimized for peak performance on the Ryzen AI NPU, fully leveraging its acceleration capabilities. However, due to the constraints of symmetric quantization and power-of-two scaling, some models may experience accuracy loss. This guide explains how to quantize a float model using the XINT8 configuration and provides strategies to improve its accuracy. - -How to Quantize a Float Model with XINT8 Config ------------------------------------------------ - -.. figure:: ../../_static/xint8_quantize.png - :width: 30% - :align: center - - **Figure 1. How to Quantize a Float Model with XINT8 Config** - - -As the Figure 1 shows, you can refer to codes below: - -.. code-block:: python - - from onnxruntime.quantization.calibrate import CalibrationDataReader - from quark.onnx.quantization.config import Config, get_default_config - from quark.onnx import ModelQuantizer - - # Define model paths - # Path to the float model to be quantized - float_model_path = "path/to/float_model.onnx" - # Path where the quantized model will be saved - quantized_model_path = "path/to/quantized_model.onnx" - calib_data_folder = "path/to/calibration_data" - model_input_name = 'model_input_name' - - # Define calibration data reader for static quantization - class CalibDataReader(CalibrationDataReader): - def __init__(self, calib_data_folder: str, model_input_name: str): - self.input_name = model_input_name - self.data = self._load_calibration_data(calib_data_folder) - self.data_iter = None - - # Customize this function to preprocess calibration datasets as needed - def _load_calibration_data(self, data_folder: str): - # Example: Implement the actual data preprocessing here - processed_data = [] - """ - Define preprocessing steps for your dataset. - For instance, read images and apply necessary transformations. - """ - return processed_data - - def get_next(self): - if self.data_iter is None: - self.data_iter = iter([{self.input_name: data} for data in self.data]) - return next(self.data_iter, None) - - # Instantiate the calibration data reader - calib_data_reader = CalibDataReader(calib_data_folder, model_input_name) - - # Set up quantization with a specified configuration - # For example, use "XINT8" for Ryzen AI INT8 quantization - quant_config = get_default_config("XINT8") - quantization_config = Config(global_quant_config=quant_config ) - quantizer = ModelQuantizer(quantization_config) - - # Quantize the ONNX model and save to specified path - quantizer.quantize_model(float_model_path, quantized_model_path, calib_data_reader) - -.. note:: - - In the quantization, graph optimization will be automatically performed. - -How to Measure Accuracy (Compare Differences between FP32 and XINT8) --------------------------------------------------------------------- - -- **infer float32 and xint8 quantized models and save results** - -You can refer to the following code to infer the float32 and xint8 quantized models and save the results. - -.. code-block:: python - - import numpy as np - import os - import onnxruntime as ort - - def infer_model_and_save_output(onnx_model_path, input_data_loader, output_dir): - ort_session = ort.InferenceSession(onnx_model_path) - # Assume the model has only one input. - input_name = ort_session.get_inputs()[0].name - for index, input_data in enumerate(input_data_loader): - ort_inputs = {input_name: input_data} - ort_outs = ort_session.run(None, ort_inputs) - output_numpy = ort_outs[0] - os.makedirs(output_dir, exist_ok=True) - output_file = os.path.join(output_dir, str(index) + ".npy") - np.save(output_file, output_numpy) - print(f"Results saved to {output_dir}.") - - - onnx_model_path = "float32_model.onnx" # Replace with "xint8_quantized_model.onnx" - # input_data_loader is an iterable object that returns a numpy tensor each time. It is user-defined. - output_dir = "baseline_results" # Replace with "quantized_results" - infer_model_and_save_output(onnx_model_path, input_data_loader, output_dir) - -- **calculate differences** - -If you need to compare the differences between float32 and xint8 quantized models after conversion. We support some metrics (cosine similarity, L2 loss, PSNR) for comparing differences between float32 and xint8 quantized inference results. The formats (JPG, PNG and NPY) of inference result in folders are supported. you can use this command to compare: - -.. code-block:: bash - - python -m quark.onnx.tools.evaluate.py --baseline_results_folder $BASELINE_RESULTS_FOLDER_PATH --quantized_results_folder $QUANTIZED_RESULTS_FOLDER_PATH - - -How to Improve Quantization Accuracy ------------------------------------- - -If the accuracy of xint8 quantized model can not meet your target, you can improve quantization accuracy with adaround and adaquant finetuning. Here is examples of how to improve quantization accuracy with finetuning. For more detailed information, see :doc:`Quantization Using AdaQuant and AdaRound <../../onnx/accuracy_algorithms/ada>`. - -- **ADAROUND** - -.. code-block:: python - - quant_config = get_default_config("XINT8_ADAROUND_CONFIG") - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - -- **ADAQUANT** - -.. code-block:: python - - quant_config = get_default_config("XINT8_ADAQUANT_CONFIG") - config = Config(global_quant_config=quant_config) - - quantizer = ModelQuantizer(config) - - quantizer.quantize_model(input_model_path, output_model_path, data_reader) - - -.. raw:: html - - - -License -------- - -Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. -SPDX-License-Identifier: MIT diff --git a/LLM-examples/RAG-OGA/Dataset/relnotes.rst b/LLM-examples/RAG-OGA/Dataset/relnotes.rst deleted file mode 100644 index b1e20f54..00000000 --- a/LLM-examples/RAG-OGA/Dataset/relnotes.rst +++ /dev/null @@ -1,782 +0,0 @@ -.. include:: /icons.txt - -############# -Release Notes -############# - -.. _supported-configurations: - -************************ -Supported Configurations -************************ - -Ryzen AI 1.4 Software supports AMD processors codenamed Phoenix, Hawk Point, Strix, Strix Halo, and Krackan Point. These processors can be found in the following Ryzen series: - -- Ryzen 200 Series -- Ryzen 7000 Series, Ryzen PRO 7000 Series -- Ryzen 8000 Series, Ryzen PRO 8000 Series -- Ryzen AI 300 Series, Ryzen AI PRO Series, Ryzen AI Max 300 Series - -For a complete list of supported devices, refer to the `processor specifications `_ page (look for the "AMD Ryzen AI" column towards the right side of the table, and select "Available" from the pull-down menu). - -The rest of this document will refer to Phoenix as PHX, Hawk Point as HPT, Strix and Strix Halo as STX, and Krackan Point as KRK. - - -************************* -Model Compatibility Table -************************* - -The following table lists which types of models are supported on what hardware platforms. - -.. list-table:: - :header-rows: 1 - - * - Model Type - - PHX/HPT - - STX/KRK - * - CNN INT8 - - |checkmark| - - |checkmark| - * - CNN BF16 - - - - |checkmark| - * - NLP BF16 - - - - |checkmark| - * - LLM (OGA) - - - - |checkmark| - - -*********** -Version 1.4 -*********** - -- New Features: - - - `New architecture support for Ryzen AI 300 series processors `_ - - Unified support for LLMs, INT8, and BF16 models in a single release package - - Public release for compilation of BF16 CNN and NLP models on Windows - - `Public release of the LLM Hybrid OGA flow `_ - - `LLM building flow for finetuned LLM `_ - - Support for up to 16 hardware contexts on Ryzen AI 300 series processors - - Vitis AI EP now supports the ONNX Runtime EP context cache feature (for custom handling of pre-compiled models) - - Ryzen AI environment variables converted to VitisAI EP session options - - Improved exception handling and fallback to CPU - -- `New Hybrid execution mode LLMs `_: - - - DeepSeek-R1-Distill-Llama-8B - - DeepSeek-R1-Distill-Qwen-1.5B - - DeepSeek-R1-Distill-Qwen-7B - - Gemma2-2B - - Qwen2-1.5B - - Qwen2-7B - - AMD-OLMO-1B-SFT-DPO - - Mistral-7B-Instruct-v0.1 - - Mistral-7B-Instruct-v0.2 - - Mistral-7B-v0.3 - - Llama3.1-8B-Instruct - - Codellama-7B-Instruct - -- :doc:`New BF16 model examples `: - - - Image classification - - Finetuned DistilBERT for text classification - - Text embedding model Alibaba-NLP/gte-large-en-v1.5 - -- New Tools: - - - `Lemonade SDK `_ - - - `Lemonade Server `_: A server interface that uses the standard Open AI API, allowing applications in any language to integrate with Lemonade Server for local LLM deployment and compatibility with existing Open AI apps. - - `Lemonade Python API `_: Offers High-Level API for easy integration of Lemonade LLMs into Python applications and Low-Level API for custom experiments with specific checkpoints, devices, and tools. - - `Lemonade Command Line `_ Interface easily benchmark, measure accuracy, prompt or gather memory usage of your LLM. - - `TurnkeyML `_ – Open-source tool that includes low-code APIs for general ONNX workflows. - - `Digest AI `_ – A Model Ingestion and Analysis Tool in collaboration with the Linux Foundation. - - `GAIA `_ – An open-source application designed for the quick setup and execution of generative AI applications on local PC hardware. - -- Quark-torch: - - - Added ROUGE and METEOR evaluation metrics for LLMs - - Support for evaluating ONNX models exported using OGA - - Support for offline evaluation (evaluation without generation) for LLMs - - Support for Hugging Face integration - - Support for Gemma2 quantization using the OGA flow - - Support for Llama-3.2 quantization with FP8 (weights, activation, and KV-cache) for the vision and language components - -- Quark-onnx: - - - Support compatibility with ONNX Runtime version 1.20.0, and 1.20.1 - - Support for microexponents (MX) data types, including MX4, MX6, and MX9 - - Support for BF16 data type for VAIML - - Support for excluding pre and post-processing from quantization - - Support for mixed precision with any data type - - Support for Quarot rotation R1 algorithm - - Support for microexponents and microscaling AdaQuant - - Support for an auto-search algorithm to automatically find the best accuracy quantized model - - Added tools for evaluating L2, PSNR, VMAF, and cosine - -- ONNX Runtime EP: - - - Support for Chinese characters in the ``filename/cache_dir/cache_key/xclbin`` - - Support for ``int4/uint4`` data type - - Support for configurable failure handling: CPU fallback or exception - - Update for encrypt/decrypt feature - -- Known Issues: - - - Microsoft Windows Insider Program (WIP) users may see warnings or need to restart when running all applications concurrently. - - - NPU driver and workloads will continue to work. - - - Context creation may appear to be limited when some application do not close contexts quickly. - - -*********** -Version 1.3 -*********** - -- New Features: - - - Initial release of the Quark quantizer - - Support for mixed precision data types - - Compatibility with Copilot+ applications - -- Improved support for :doc:`LLMs using OGA ` - -- New EoU Tools: - - - CNN profiling tool for VAI-ML flow - - Idle detection and suspension of contexts - - Rebalance feature for AIE hardware resource optimization - -- NPU and Compiler: - - - New Op Support: - - - MAC - - QResize Bilinear - - LUT Q-Power - - Expand - - Q-Hsoftmax - - A16 Q-Pad - - Q-Reduce-Mean along H/W dimension - - A16 Q-Global-AvgPool - - A16 Padding with non-zero values - - A16 Q-Sqrt - - Support for XINT8/XINT16 MatMul and A16W16/A8W8 Q-MatMul - - - Performance Improvements: - - - Q-Conv, Q-Pool, Q-Add, Q-Mul, Q-InstanceNorm - - Enhanced QDQ support for a range of operations - - Enhanced the tiling algorithm - - Improved graph-level optimization with extra transpose removal - - Enhanced AT/MT fusion - - Optimized memory usage and compile time - - Improved compilation messages - -- Quark for PyTorch: - - - Model Support: - - - Examples of LLM PTQ, such as Llama3.2 and Llama3.2-Vision models - - Example of YOLO-NAS detection model PTQ/QAT - - Example of SDXL v1.0 with weight INT8 activation INT8 - - - PyTorch Quantizer Enhancements: - - - Partial model quantization by user configuration under FX mode - - Quantization of ConvTranspose2d in Eager Mode and FX mode - - Advanced Quantization Algorithms with auto-generated configurations - - Optimized Configuration with DataTypeSpec for ease of use - - Accelerated in-place replacement under Eager Mode - - Loading configuration from file of algorithms and pre-optimizations - -- Quark for ONNX: - - - New Features: - - - Compatibility with ONNX Runtime version 1.18, 1.19 - - Support for int4, uint4, Microscaling data types - - Quantization for arbitrary specified operators - - Quantization type alignment of element-wise operators for mixed precision - - ONNX graph cleaning - - Int32 bias quantization - - - ONNX Quantizer Enhancements: - - - Fast fine-tuning support for the MatMul operator, BFP data type, and GPU acceleration - - Improved ONNX quantization of LLM models - - Optimized quantization of FP16 models - - Custom operator compilation process - - Default parameters for auto mixed precision - - Optimized Ryzen AI workflow by aligning with hardware constraints of the NPU - -- ONNX Runtime EP: - - - Support for ONNX Runtime EP shared libraries - - Python dependency removal - - Memory optimization during the compile phase - - Pattern API enhancement with multiple outputs and commutable arguments support - -- Known Issues: - - - Extended compile time for some models with BF16/BFP16 data types - - LLM models with 4K sequence length may revert to CPU execution - - Accuracy drop in some Transformer models using BF16/BFP16 data types, requiring Quark intervention - -*********** -Version 1.2 -*********** - -- New features: - - - Support added for Strix Point NPUs - - Support added for integrated GPU - - Smart installer for Ryzen AI 1.2 - - NPU DPM based on power slider - -- New model support: - - - `LLM flow support `_ for multiple models in both PyTorch and ONNX flow (optimized model support will be released asynchronously) - - SDXL-T with limited performance optimization - -- New EoU tools: - - - `AI Analyzer `_ : Analysis and visualization of model compilation and inference profiling - - Platform/NPU inspection and management tool (`xrt-smi `_) - - `Onnx Benchmarking tool `_ - -- New Demos: - - - NPU-GPU multi-model pipeline application `demo `_ - -- NPU and Compiler - - - New device support: Strix Nx4 and 4x4 Overlay - - New Op support: - - - InstanceNorm - - Silu - - Floating scale quantization operators (INT8, INT16) - - Support new rounding mode (Round to even) - - Performance Improvement: - - - Reduced the model compilation time - - Improved instruction loading - - Improved synchronization in large overlay - - Enhanced strided_slice performance - - Enhanced convolution MT fusion - - Enhanced convolution AT fusion - - Enhanced data movement op performance -- ONNX Quantizer updates - - - Improved usability with various features and tools, including weights-only quantization, graph optimization, dynamic shape fixing, and format transformations. - - Improved the accuracy of quantized models through automatic mixed precision and enhanced AdaRound and AdaQuant techniques. - - Enhanced support for the BFP data type, including more attributes and shape inference capability. - - Optimized the NPU workflow by aligning with the hardware constraints of the NPU. - - Supported compilation for Windows and Linux. - - Bugfix: - - - Fixed the problem where per-channel quantization is not compatible with onnxruntime 1.17. - - Fixed the bug of CLE when conv with groups. - - Fixed the bug of bias correction. -- Pytorch Quantizer updates - - - Tiny value quantization protection. - - Higher onnx version support in quantized model exporting. - - Relu6 hardware constrains support. - - Support of mean operation with keepdim=True. -- Resolved issues: - - - NPU SW stack will fail to initialize when the system is out of memory. This could impact camera functionality when Microsoft Effect Pack is enabled. - - If Microsoft Effects Pack is overloaded with other 4+ applications that use NPU to do inference, then camera functionality can be impacted. Can be fixed with a reboot. This will be fixed in the next release. - -*********** -Version 1.1 -*********** - -- New model support: - - - Llama 2 7B with w4abf16 (3-bit and 4-bit) quantization (Beta) - - Whisper base (EA access) - -- New EoU tools: - - - CNN Benchmarking tool on RyzenAI-SW Repo - - Platform/NPU inspection and management tool - -Quantizer -========= - -- ONNX Quantizer: - - - Improved usability with various features and tools, including diverse parameter configurations, graph optimization, shape fixing, and format transformations. - - Improved quantization accuracy through the implementation of experimental algorithmic improvements, including AdaRound and AdaQuant. - - Optimized the NPU workflow by distinguishing between different targets and aligning with the hardware constraints of the NPU. - - Introduced new utilities for model conversion. - -- PyTorch Quantizer: - - - Mixed data type quantization enhancement and bug fix. - - Corner bug fixes for add, sub, and conv1d operations. - - Tool for converting the S8S8 model to the U8S8 model. - - Tool for converting the customized Q/DQ to onnxruntime contributed Q/DQ with the "microsoft" domain. - - Tool for fixing a dynamic shapes model to fixed shape model. - -- Bug fixes - - - Fix for incorrect logging when simulating the LeakyRelu alpha value. - - Fix for useless initializers not being cleaned up during optimization. - - Fix for external data cannot be found when using use_external_data_format. - - Fix for custom Ops cannot be registered due to GLIBC version mismatch - -NPU and Compiler -================ - -- New op support: - - - Support Channel-wie Prelu. - - Gstiling with reverse = false. -- Fixed issues: - - - Fixed Transpose-convolution and concat optimization issues. - - Fixed Conv stride 3 corner case hang issue. -- Performance improvement: - - - Updated Conv 1x1 stride 2x2 optimization. - - Enhanced Conv 7x7 performance. - - Improved padding performance. - - Enhanced convolution MT fusion. - - Improved the performance for NCHW layout model. - - Enhanced the performance for eltwise-like op. - - Enhanced Conv and eltwise AT fusion. - - Improved the output convolution/transpose-convolution’s performance. - - Enhanced the logging message for EoU. - - -ONNX Runtime EP -=============== - -- End-2-End Application support on NPU - - - Enhanced existing support: Provided high-level APIs to enable seamless incorporation of pre/post-processing operations into the model to run on NPU - - Two examples (resnet50 and yolov8) published to demonstrate the usage of these APIs to run end-to-end models on the NPU -- Bug fixes for ONNXRT EP to support customers’ models - -Misc -==== - -- Contains mitigation for the following CVEs: CVE-2024-21974, CVE-2024-21975, CVE-2024-21976 - -************* -Version 1.0.1 -************* - -- Minor fix for Single click installation without given env name. -- Perform improvement in the NPU driver. -- Bug fix in elementwise subtraction in the compiler. -- Runtime stability fixes for minor corner cases. -- Quantizer update to resolve performance drop with default settings. - -*********** -Version 1.0 -*********** -Quantizer -========= - -- ONNX Quantizer - - - Support for ONNXRuntime 1.16. - - Support for the Cross-Layer-Equalization (CLE) algorithm in quantization, which can balance the weights of consecutive Conv nodes to make it more quantize-friendly in per-tensor quantization. - - Support for mixed precision quantization including UINT16/INT16/UINT32/INT32/FLOAT16/BFLOAT16, and support asymmetric quantization for BFLOAT16. - - Support for the MinMSE method for INT16/UINT16/INT32/UINT32 quantization. - - Support for quantization using the INT16 scale. - - Support for unsigned ReLU in symmetric activation configuration. - - Support for converting Float16 to Float32 during quantization. - - Support for converting NCHW model to NHWC model during quantization. - - Support for two more modes for MinMSE for better accuracy. The "All" mode computes the scales with all batches while the "MostCommon" mode computes the scale for each batch and uses the most common scales. - - Support for the quantization of more operations: - - - PReLU, Sub, Max, DepthToSpace, SpaceToDepth, Slice, InstanceNormalization, and LpNormalization. - - Non-4D ReduceMean. - - Leakyrelu with arbitrary alpha. - - Split by converting it to Slice. - - - Support for op fusing of InstanceNormalization and L2Normalization in NPU workflow. - - Support for converting Clip to ReLU when the minimal value is 0. - - Updated shift_bias, shift_read, and shift_write constraints in the NPU workflow and added an option "IPULimitationCheck" to disable it. - - Support for disabling the op fusing of Conv + LeakyReLU/PReLU in the NPU workflow. - - Support for logging for quantization configurations and summary information. - - Support for removing initializer from input to support models converted from old version pytorch where weights are stored as inputs. - - Added a recommended configuration for the IPU_Transformer platform. - - New utilities: - - - Tool for converting the float16 model to the float32 model. - - Tool for converting the NCHW model to the NHWC model. - - Tool for quantized models with random input. - - - Three examples for quantization models from Timm, Torchvision, and ONNXRuntime modelzoo respectively. - - Bugfixes: - - - Fix a bug that weights are quantized with the "NonOverflow" method when using the "MinMSE" method. - -- Pytorch Quantizer - - - Support of some operations quantization in quantizer: inplace div, inplace sub - - Log and document enhancement to emphasize fast-finetune - - Timm models quantization script example - - Bug fix for operators: clamp and prelu - - QAT Support quantization of operations with multiple outputs - - QAT EOU enhancements: significantly reduces the need for network modifications - - QAT ONNX exporting enhancements: support more configurations - - New QAT examples - -- TF2 Quantizer - - - Support for Tensorflow 2.11 and 2.12. - - Support for the 'tf.linalg.matmul' operator. - - Updated shift_bias constraints for NPU workflow. - - Support for dumping models containing operations with multiple outputs. - - Added an example of a sequential model. - - Bugfixes: - - - Fix a bug that Hardsigmoid and Hardswish are not mapped to DPU without Batch Normalization. - - Fix a bug when both align_pool and align_concat are used simultaneously. - - Fix a bug in the sequential model when a layer has multiple consumers. - -- TF1 Quantizer - - - Update shift_bias constraints for NPU workflow. - - Bugfixes: - - - Fix a bug in fast_finetune when the 'input_node' and 'quant_node' are inconsistent. - - Fix a bug that AddV2 op identified as BiasAdd. - - Fix a bug when the data type of the concat op is not float. - - Fix a bug in split_large_kernel_pool when the stride is not equal to 1. - -ONNXRuntime Execution Provider -============================== - -- Support new OPs, such as PRelu, ReduceSum, LpNormlization, DepthToSpace(DCR). -- Increase the percentage of model operators performed on the NPU. -- Fixed some issues causing model operators allocation to CPU. -- Improved report summary -- Support the encryption of the VOE cache -- End-2-End Application support on NPU - - - Enable running pre/post/custom ops on NPU, utilizing ONNX feature of E2E extensions. - - Two examples published for yolov8 and resnet50, in which preprocessing custom op is added and runs on NPU. - -- Performance: latency improves by up to 18% and power savings by up to 35% by additionally running preprocessing on NPU apart from inference. -- Multiple NPU overlays support - - - VOE configuration that supports both CNN-centric and GEMM-centric NPU overlays. - - Increases number of ops that run on NPU, especially for models which have both GEMM and CNN ops. - - Examples published for use with some of the vision transformer models. - -NPU and Compiler -============================== - -- New operators support - - - Global average pooling with large spatial dimensions - - Single Activation (no fusion with conv2d, e.g. relu/single alpha PRelu) - -- Operator support enhancement - - - Enlarge the width dimension support range for depthwise-conv2d - - Support more generic broadcast for element-wise like operator - - Support output channel not aligned with 4B GStiling - - Support Mul and LeakyRelu fusion - - Concatenation’s redundant input elimination - - Channel Augmentation for conv2d (3x3, stride=2) - -- Performance optimization - - - PDI partition refine to reduce the overhead for PDI swap - - Enabled cost model for some specific models - -- Fixed asynchronous error in multiple thread scenario -- Fixed known issue on tanh and transpose-conv2d hang issue - -Known Issues -============================== - -- Support for multiple applications is limited to up to eight -- Windows Studio Effects should be disabled when using the Latency profile. To disable Windows Studio Effects, open **Settings > Bluetooth & devices > Camera**, select your primary camera, and then disable all camera effects. - - - -*********** -Version 0.9 -*********** - -Quantizer -========= - -- Pytorch Quantizer - - - Dict input/output support for model forward function - - Keywords argument support for model forward function - - Matmul subroutine quantization support - - Support of some operations in quantizer: softmax, div, exp, clamp - - Support quantization of some non-standard conv2d. - - -- ONNX Quantizer - - - Add support for Float16 and BFloat16 quantization. - - Add C++ kernels for customized QuantizeLinear and DequantizeLinaer operations. - - Support saving quantizer version info to the quantized models' producer field. - - Support conversion of ReduceMean to AvgPool in NPU workflow. - - Support conversion of BatchNorm to Conv in NPU workflow. - - Support optimization of large kernel GlobalAvgPool and AvgPool operations in NPU workflow. - - Supports hardware constraints check and adjustment of Gemm, Add, and Mul operations in NPU workflow. - - Supports quantization for LayerNormalization, HardSigmoid, Erf, Div, and Tanh for NPU. - -ONNXRuntime Execution Provider -============================== - -- Support new OPs, such as Conv1d, LayerNorm, Clip, Abs, Unsqueeze, ConvTranspose. -- Support pad and depad based on NPU subgraph’s inputs and outputs. -- Support for U8S8 models quantized by ONNX quantizer. -- Improve report summary tools. - -NPU and Compiler -================ - -- Supported exp/tanh/channel-shuffle/pixel-unshuffle/space2depth -- Performance uplift of xint8 output softmax -- Improve the partition messages for CPU/DPU -- Improve the validation check for some operators -- Accelerate the speed of compiling large models -- Fix the elew/pool/dwc/reshape mismatch issue and fix the stride_slice hang issue -- Fix str_w != str_h issue in Conv - - -LLM -=== - -- Smoothquant for OPT1.3b, 2.7b, 6.7b, 13b models. -- Huggingface Optimum ORT Quantizer for ONNX and Pytorch dynamic quantizer for Pytorch -- Enabled Flash attention v2 for larger prompts as a custom torch.nn.Module -- Enabled all CPU ops in bfloat16 or float32 with Pytorch -- int32 accumulator in AIE (previously int16) -- DynamicQuantLinear op support in ONNX -- Support different compute primitives for prefill/prompt and token phases -- Zero copy of weights shared between different op primitives -- Model saving after quantization and loading at runtime for both Pytorch and ONNX -- Enabled profiling prefill/prompt and token time using local copy of OPT Model with additional timer instrumentation -- Added demo mode script with greedy, stochastic and contrastive search options - -ASR -=== -- Support Whipser-tiny -- All GEMMs offloaded to AIE -- Improved compile time -- Improved WER - -Known issues -============ - -- Flow control OPs including "Loop", "If", "Reduce" not supported by VOE -- Resizing OP in ONNX opset 10 or lower is not supported by VOE -- Tensorflow 2.x quantizer supports models within tf.keras.model only -- Running quantizer docker in WSL on Ryzen AI laptops may encounter OOM (Out-of-memory) issue -- Running multiple concurrent models using temporal sharing on the 5x4 binary is not supported -- Only batch sizes of 1 are supported -- Only models with the pretrained weights setting = TRUE should be imported -- Launching multiple processes on 4 1x4 binaries can cause hangs, especially when models have many sub-graphs - -| -| - -*********** -Version 0.8 -*********** - -Quantizer -========= - -- Pytorch Quantizer - - - Pytorch 1.13 and 2.0 support - - Mixed precision quantization support, supporting float32/float16/bfloat16/intx mixed quantization - - Support of bit-wise accuracy cross check between quantizer and ONNX-runtime - - Split and chunk operators were automatically converted to slicing - - Add support for BFP data type quantization - - Support of some operations in quantizer: where, less, less_equal, greater, greater_equal, not, and, or, eq, maximum, minimum, sqrt, Elu, Reduction_min, argmin - - QAT supports training on multiple GPUs - - QAT supports operations with multiple inputs or outputs - -- ONNX Quantizer - - - Provided Python wheel file for installation - - Support OnnxRuntime 1.15 - - Supports setting input shapes of random data reader - - Supports random data reader in the dump model function - - Supports saving the S8S8 model in U8S8 format for NPU - - Supports simulation of Sigmoid, Swish, Softmax, AvgPool, GlobalAvgPool, ReduceMean and LeakyRelu for NPU - - Supports node fusions for NPU - -ONNXRuntime Execution Provider  -============================== - -- Supports for U8S8 quantized ONNX models -- Improve the function of falling back to CPU EP -- Improve AIE plugin framework - - - Supports LLM Demo - - Supports Gemm ASR - - Supports E2E AIE acceleration for Pre/Post ops - - Improve the easy-of-use for partition and  deployment -- Supports  models containing subgraphs -- Supports report summary about OP assignment -- Supports report summary about DPU subgraphs falling back to CPU -- Improve log printing and troubleshooting tools. -- Upstreamed to ONNX Runtime Github repo for any data type support and bug fix - -NPU and Compiler -================ - -- Extended the support range of some operators - - - Larger input size: conv2d, dwc - - Padding mode: pad - - Broadcast: add - - Variant dimension (non-NHWC shape): reshape, transpose, add -- Support new operators, e.g. reducemax(min/sum/avg), argmax(min) -- Enhanced multi-level fusion -- Performance enhancement for some operators -- Add quantization information validation -- Improvement in device partition - - - User friendly message - - Target-dependency check - -Demos -===== - -- New Demos link: https://account.amd.com/en/forms/downloads/ryzen-ai-software-platform-xef.html?filename=transformers_2308.zip - - - LLM demo with OPT-1.3B/2.7B/6.7B - - Automatic speech recognition demo with Whisper-tiny - -Known issues -============ -- Flow control OPs including "Loop", "If", "Reduce" not supported by VOE -- Resize OP in ONNX opset 10 or lower not supported by VOE -- Tensorflow 2.x quantizer supports models within tf.keras.model only -- Running quantizer docker in WSL on Ryzen AI laptops may encounter OOM (Out-of-memory) issue -- Run multiple concurrent models by temporal sharing on the Performance optimized overlay (5x4.xclbin) is not supported -- Support batch size 1 only for NPU - - -| -| - -*********** -Version 0.7 -*********** - -Quantizer -========= - -- Docker Containers - - - Provided CPU dockers for Pytorch, Tensorflow 1.x, and Tensorflow 2.x quantizer - - Provided GPU Docker files to build GPU dockers - -- Pytorch Quantizer - - - Supports multiple output conversion to slicing - - Enhanced transpose OP optimization - - Inspector support new IP targets for NPU - -- ONNX Quantizer - - - Provided Python wheel file for installation - - Supports quantizing ONNX models for NPU as a plugin for the ONNX Runtime native quantizer - - Supports power-of-two quantization with both QDQ and QOP format - - Supports Non-overflow and Min-MSE quantization methods - - Supports various quantization configurations in power-of-two quantization in both QDQ and QOP format. - - - Supports signed and unsigned configurations. - - Supports symmetry and asymmetry configurations. - - Supports per-tensor and per-channel configurations. - - Supports bias quantization using int8 datatype for NPU. - - Supports quantization parameters (scale) refinement for NPU. - - Supports excluding certain operations from quantization for NPU. - - Supports ONNX models larger than 2GB. - - Supports using CUDAExecutionProvider for calibration in quantization - - Open source and upstreamed to Microsoft Olive Github repo - -- TensorFlow 2.x Quantizer - - - Added support for exporting the quantized model ONNX format. - - Added support for the keras.layers.Activation('leaky_relu') - -- TensorFlow 1.x Quantizer - - - Added support for folding Reshape and ResizeNearestNeighbor operators. - - Added support for splitting Avgpool and Maxpool with large kernel sizes into smaller kernel sizes. - - Added support for quantizing Sum, StridedSlice, and Maximum operators. - - Added support for setting the input shape of the model, which is useful in deploying models with undefined input shapes. - - Add support for setting the opset version in exporting ONNX format - -ONNX Runtime Execution Provider -=============================== - -- Vitis ONNX Runtime Execution Provider (VOE) - - - Supports ONNX Opset version 18, ONNX Runtime 1.16.0, and ONNX version 1.13 - - Supports both C++ and Python APIs(Python version 3) - - Supports deploy model with other EPs - - Supports falling back to CPU EP - - Open source and upstreamed to ONNX Runtime Github repo - - Compiler - - - Multiple Level op fusion - - Supports the same muti-output operator like chunk split - - Supports split big pooling to small pooling - - Supports 2-channel writeback feature for Hard-Sigmoid and Depthwise-Convolution - - Supports 1-channel GStiling - - Explicit pad-fix in CPU subgraph for 4-byte alignment - - Tuning the performance for multiple models - -NPU -=== - -- Two configurations - - - Power Optimized Overlay - - - Suitable for smaller AI models (1x4.xclbin) - - Supports spatial sharing, up to 4 concurrent AI workloads - - - Performance Optimized Overlay (5x4.xclbin) - - - Suitable for larger AI models - -Known issues -============ -- Flow control OPs including "Loop", "If", "Reduce" are not supported by VOE -- Resize OP in ONNX opset 10 or lower not supported by VOE -- Tensorflow 2.x quantizer supports models within tf.keras.model only -- Running quantizer docker in WSL on Ryzen AI laptops may encounter OOM (Out-of-memory) issue -- Run multiple concurrent models by temporal sharing on the Performance optimized overlay (5x4.xclbin) is not supported - - - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzenAI_sw_rst_files.py b/LLM-examples/RAG-OGA/Dataset/ryzenAI_sw_rst_files.py deleted file mode 100644 index 6e3a8fdd..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzenAI_sw_rst_files.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import shutil -from git import Repo - -# --- Configuration --- -REPOS = [ - { - "url": "https://github.com/amd/ryzen-ai-documentation.git", - "branch": "main", - "clone_dir": "ryzen_ai_docs_repo", - "rst_source": "docs" - }, - { - "url": "https://gitenterprise.xilinx.com/AMDNeuralOpt/Quark.git", - "branch": "main", - "clone_dir": "quark_docs_repo", - "rst_source": "docs/source" - }, - { - "url": "https://gitenterprise.xilinx.com/AMDNeuralOpt/Quark.git", - "branch": "main", - "clone_dir": "quark_docs_repo_onnx", - "rst_source": "docs/source/onnx" - }, - { - "url": "https://gitenterprise.xilinx.com/AMDNeuralOpt/Quark.git", - "branch": "main", - "clone_dir": "quark_docs_repo_pytorch", - "rst_source": "docs/source/pytorch" - } -] - -# --- Process each repo config --- -for repo in REPOS: - url = repo["url"] - branch = repo["branch"] - clone_dir = repo["clone_dir"] - rst_source_dir = os.path.join(clone_dir, repo["rst_source"]) - - # Clean previous clone - if os.path.exists(clone_dir): - shutil.rmtree(clone_dir) - - # Clone the repository - print(f"Cloning {url} (branch: {branch})...") - Repo.clone_from(url, clone_dir, branch=branch, depth=1) - print("Cloned successfully.\n") - - # Extract .rst files - print(f"Extracting .rst files from '{rst_source_dir}'...\n") - for root, _, files in os.walk(rst_source_dir): - for fname in files: - if fname.endswith(".rst"): - src_path = os.path.join(root, fname) - rel_path = os.path.relpath(src_path, rst_source_dir) - flat_name = rel_path.replace(os.sep, "_") - dst_path = os.path.join(".", f"{clone_dir}_{flat_name}") - - shutil.copyfile(src_path, dst_path) - print(f" Saved: {dst_path}") - -print("\nAll .rst files have been saved to the current directory.") diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_app_development.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_app_development.rst deleted file mode 100644 index cfcac200..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_app_development.rst +++ /dev/null @@ -1,198 +0,0 @@ -.. include:: /icons.txt - -####################### -Application Development -####################### - -This page captures requirements and recommendations for developers looking to create, package and distribute applications targeting NPU-enabled AMD processors. - - - -.. _driver-compatibility: - -************************************* -VitisAI EP / NPU Driver Compatibility -************************************* - -The VitisAI EP requires a compatible version of the NPU drivers. For each version of the VitisAI EP, compatible drivers are bounded by a minimum version and a maximum release date. NPU drivers are backward compatible with VitisAI EP released up to 3 years before. The maximum driver release date is therefore set to 3 years after the release date of the corresponding VitisAI EP. - -The table below summarizes the driver requirements for the different versions of the VitisAI EP. - -.. list-table:: - :header-rows: 1 - - * - VitisAI EP version - - Minimum NPU Driver version - - Maximum NPU Driver release date - * - 1.4 - - 32.0.203.257 - - March 25th, 2028 - * - 1.3.1 - - 32.0.203.242 - - January 17th, 2028 - * - 1.3 - - 32.0.203.237 - - November 26th, 2027 - * - 1.2 - - 32.0.201.204 - - July 30th, 2027 - -The application must check that NPU drivers compatible with the version of the Vitis AI EP being used are installed. - -.. _apu-types: - -***************** -APU Types -***************** - -The Ryzen AI Software supports different types of NPU-enabled APUs. These APU types are referred to as PHX, HPT, STX and KRK. - -To programmatically determine the type of the local APU, it is possible to enumerate the PCI devices and check for an instance with a matching Hardware ID. - -.. list-table:: - :header-rows: 1 - - * - Vendor - - Device - - Revision - - APU Type - * - 0x1022 - - 0x1502 - - 0x00 - - PHX or HPT - * - 0x1022 - - 0x17F0 - - 0x00 - - STX - * - 0x1022 - - 0x17F0 - - 0x10 - - STX - * - 0x1022 - - 0x17F0 - - 0x11 - - STX - * - 0x1022 - - 0x17F0 - - 0x20 - - KRK - -The application must check that it is running on an AMD processor with an NPU, and that the NPU type is supported by the version of the Vitis AI EP being used. - - - -************************************ -Application Development Requirements -************************************ - -ONNX-RT Session -=============== - -The application should only use the Vitis AI Execution Provider if the following conditions are met: - -- The application is running on an AMD processor with an NPU type supported by the version of the Vitis AI EP being used. See :ref:`list ` above in this page. -- NPU drivers compatible with the version of the Vitis AI EP being used are installed. See :ref:`compatibility table ` above in this page. - -|memo| **NOTE**: Sample C++ code implementing the compatibility checks to be performed before using the VitisAI EP is provided here: https://github.com/amd/RyzenAI-SW/tree/main/utilities/npu_check - - -VitisAI EP Provider Options -=========================== - -For INT8 models, the application should detect which type of APU is present (PHX/HPT/STX/KRK) and set the ``xclbin`` provider option accordingly. Refer to the section about :ref:`compilation of INT8 models ` for details about this. - -For BF16 models, the application should set the ``config_file`` provider option to use the same file as the one which was used to precompile the BF16 model. Refer to the section about :ref:`compilation of BF16 models ` for details about this. - - -Cache Management -================ - -Cache directories generated by the Vitis AI Execution Provider should not be reused across different versions of the Vitis AI EP or across different version of the NPU drivers. - -The application should check the version of the Vitis AI EP and of the NPU drivers. If the application detects a version change, it should delete the cache, or create a new cache directory with a different name. - - -Pre-Compiled Models -=================== - -The deployment version of the VitisAI Execution Provider (EP) does not support the on-the-fly compilation of BF16 models. Applications utilizing BF16 models must include pre-compiled versions of these models. The VitisAI EP can then load the pre-compiled models and deploy them efficiently on the NPU. - -Although including pre-compiled versions of INT8 models is not mandatory, it is beneficial as it reduces session creation time and enhances the end-user experience. - -| - -********************************** -Application Packaging Requirements -********************************** - -|excl| **IMPORTANT**: A patched version of the ``%RYZEN_AI_INSTALLATION_PATH%\deployment`` folder is available for download at the following link: `Download Here `_. This patched ``deployment`` folder is designed to replace the one included in the official installation of Ryzen AI 1.4. The following instructions assume that the original ``deployment`` folder has been replaced with the updated version. - -A C++ application built on the Ryzen AI ONNX Runtime requires the following components to be included in its distribution package. - -.. rubric:: For INT8 models - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\dyn_dispatch_core.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_shared.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_vitisai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitisai_ep.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\transaction.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\xclbin.dll - -- NPU Binary files (.xclbin) from the ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins`` folder - -- Recommended but not mandatory: pre-compiled models in the form of :ref:`Vitis AI EP cache folders ` or :ref:`Onnx Runtime EP context models ` - -.. rubric:: For BF16 models - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\dyn_dispatch_core.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_shared.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_vitisai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitisai_ep.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\transaction.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\xclbin.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\flexmlrt\\flexmlrt.dll - -- Pre-compiled models in the form of :ref:`Vitis AI EP cache folders ` - -.. rubric:: For Hybrid LLMs - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\onnx_custom_ops.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\onnxruntime-genai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\ryzen_mm.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\hybrid-llm\\ryzenai_onnx_utils.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\DirectML.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - -.. rubric:: For NPU-only LLMs - -- DLLs: - - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\npu-llm\\onnxruntime-genai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitis_ai_custom_ops.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_shared.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_vitisai_ep.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\dyn_dispatch_core.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime_providers_vitisai.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\transaction.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\onnxruntime.dll - - %RYZEN_AI_INSTALLATION_PATH%\\deployment\\voe\\xclbin.dll - -- VAIP LLM configuration file: %RYZEN_AI_INSTALLATION_PATH%\\deployment\\npu-llm\\vaip_llm.json - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_examples.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_examples.rst deleted file mode 100644 index 6cb76f49..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_examples.rst +++ /dev/null @@ -1,52 +0,0 @@ -########################## -Examples, Demos, Tutorials -########################## - -This page introduces various demos, examples, and tutorials currently available with the Ryzen™ AI Software. - -************************* -Getting Started Tutorials -************************* - -NPU -~~~ - -- The :doc:`Getting Started Tutorial ` deploys a custom ResNet model demonstrating: - - - Pretrained model conversion to ONNX - - Quantization using AMD Quark quantizer - - Deployment using ONNX Runtime C++ and Python code - -- `Hello World Jupyter Notebook Tutorial `_ - -- New BF16 Model examples: - - - `Image Classification `_ - - `Finetuned DistilBERT for Text Classification `_ - - `Text Embedding Model Alibaba-NLP/gte-large-en-v1.5 `_ - -iGPU -~~~~ - -- `ResNet50 on iGPU `_ - - -************************************ -Other examples, demos, and tutorials -************************************ - -- Refer to `RyzenAI-SW repo `_ - - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. - - - diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_getstartex.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_getstartex.rst deleted file mode 100644 index 14616b99..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_getstartex.rst +++ /dev/null @@ -1,414 +0,0 @@ -:orphan: - -######################## -Getting Started Tutorial -######################## - -This tutorial uses a fine-tuned version of the ResNet model (using the CIFAR-10 dataset) to demonstrate the process of preparing, quantizing, and deploying a model using Ryzen AI Software. The tutorial features deployment using both Python and C++ ONNX runtime code. - -.. note:: - In this documentation, "NPU" is used in descriptions, while "IPU" is retained in some of the tool's language, code, screenshots, and commands. This intentional - distinction aligns with existing tool references and does not affect functionality. Avoid making replacements in the code. - -- The source code files can be downloaded from `this link `_. Alternatively, you can clone the RyzenAI-SW repo and change the directory into "tutorial". - -.. code-block:: - - git clone https://github.com/amd/RyzenAI-SW.git - cd tutorial/getting_started_resnet - -| - -The following are the steps and the required files to run the example: - -.. list-table:: - :widths: 20 25 25 - :header-rows: 1 - - * - Steps - - Files Used - - Description - * - Installation - - ``requirements.txt`` - - Install the necessary package for this example. - * - Preparation - - ``prepare_model_data.py``, - ``resnet_utils.py`` - - The script ``prepare_model_data.py`` prepares the model and the data for the rest of the tutorial. - - 1. To prepare the model the script converts pre-trained PyTorch model to ONNX format. - 2. To prepare the necessary data the script downloads and extracts CIFAR-10 dataset. - - * - Pretrained model - - ``models/resnet_trained_for_cifar10.pt`` - - The ResNet model trained using CIFAR-10 is provided in .pt format. - * - Quantization - - ``resnet_quantize.py`` - - Convert the model to the NPU-deployable model by performing Post-Training Quantization flow using AMD Quark Quantization. - * - Deployment - Python - - ``predict.py`` - - Run the Quantized model using the ONNX Runtime code. We demonstrate running the model on both CPU and NPU. - * - Deployment - C++ - - ``cpp/resnet_cifar/.`` - - This folder contains the source code ``resnet_cifar.cpp`` that demonstrates running inference using C++ APIs. We additionally provide the infrastructure (required libraries, CMake files and header files) required by the example. - - -| -| - -************************ -Step 1: Install Packages -************************ - -* Ensure that the Ryzen AI Software is correctly installed. For more details, see the :doc:`installation instructions `. - -* Use the conda environment created during the installation for the rest of the steps. This example requires a couple of additional packages. Run the following command to install them: - - -.. code-block:: - - python -m pip install -r requirements.txt - -| -| - - -************************************** -Step 2: Prepare dataset and ONNX model -************************************** - -In this example, we utilize a custom ResNet model finetuned using the CIFAR-10 dataset - -The ``prepare_model_data.py`` script downloads the CIFAR-10 dataset in pickle format (for python) and binary format (for C++). This dataset will be used in the subsequent steps for quantization and inference. The script also exports the provided PyTorch model into ONNX format. The following snippet from the script shows how the ONNX model is exported: - -.. code-block:: - - dummy_inputs = torch.randn(1, 3, 32, 32) - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} - tmp_model_path = str(models_dir / "resnet_trained_for_cifar10.onnx") - torch.onnx.export( - model, - dummy_inputs, - tmp_model_path, - export_params=True, - opset_version=13, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - ) - -Note the following settings for the onnx conversion: - -- Ryzen AI supports a batch size=1, so dummy input is fixed to a batch_size =1 during model conversion -- Recommended ``opset_version`` setting 13 is used. - -Run the following command to prepare the dataset and export the ONNX model: - -.. code-block:: - - python prepare_model_data.py - -* The downloaded CIFAR-10 dataset is saved in the current directory at the following location: ``data/*``. -* The ONNX model is generated at models/resnet_trained_for_cifar10.onnx - -| -| - -************************** -Step 3: Quantize the Model -************************** - -Quantizing AI models from floating-point to 8-bit integers reduces computational power and the memory footprint required for inference. This example utilizes Quark for ONNX quantizer workflow. Quark takes the pre-trained float32 model from the previous step (``resnet_trained_for_cifar10.onnx``) and provides a quantized model. - -.. code-block:: - - python resnet_quantize.py - -This generates a quantized model using QDQ quant format and generate Quantized model with default configuration. After the completion of the run, the quantized ONNX model ``resnet_quantized.onnx`` is saved to models/resnet_quantized.onnx - -The :file:`resnet_quantize.py` file has ``ModelQuantizer::quantize_model`` function that applies quantization to the model. - -.. code-block:: - - from quark.onnx.quantization.config import (Config, get_default_config) - from quark.onnx import ModelQuantizer - - # Get quantization configuration - quant_config = get_default_config("XINT8") - config = Config(global_quant_config=quant_config) - - # Create an ONNX quantizer - quantizer = ModelQuantizer(config) - - # Quantize the ONNX model - quantizer.quantize_model(input_model_path, output_model_path, dr) - -The parameters of this function are: - -* **input_model_path**: (String) The file path of the model to be quantized. -* **output_model_path**: (String) The file path where the quantized model is saved. -* **dr**: (Object or None) Calibration data reader that enumerates the calibration data and producing inputs for the original model. In this example, CIFAR10 dataset is used for calibration during the quantization process. - - -| -| - -************************ -Step 4: Deploy the Model -************************ - -We demonstrate deploying the quantized model using both Python and C++ APIs. - -* :ref:`Deployment - Python ` -* :ref:`Deployment - C++ ` - -.. note:: - During the Python and C++ deployment, the compiled model artifacts are saved in the cache folder named ``/modelcachekey``. Ryzen-AI does not support the complied model artifacts across the versions, so if the model artifacts exist from the previous software version, ensure to delete the folder ``modelcachekey`` before the deployment steps. - - -.. _dep-python: - -Deployment - Python -=========================== - -The ``predict.py`` script is used to deploy the model. It extracts the first ten images from the CIFAR-10 test dataset and converts them to the .png format. The script then reads all those ten images and classifies them by running the quantized custom ResNet model on CPU or NPU. - -Deploy the Model on the CPU ----------------------------- - -By default, ``predict.py`` runs the model on CPU. - -.. code-block:: - - python predict.py - -Typical output - -.. code-block:: - - Image 0: Actual Label cat, Predicted Label cat - Image 1: Actual Label ship, Predicted Label ship - Image 2: Actual Label ship, Predicted Label airplane - Image 3: Actual Label airplane, Predicted Label airplane - Image 4: Actual Label frog, Predicted Label frog - Image 5: Actual Label frog, Predicted Label frog - Image 6: Actual Label automobile, Predicted Label automobile - Image 7: Actual Label frog, Predicted Label frog - Image 8: Actual Label cat, Predicted Label cat - Image 9: Actual Label automobile, Predicted Label automobile - - -Deploy the Model on the Ryzen AI NPU ------------------------------------- - -To successfully run the model on the NPU, run the following setup steps: - -- Ensure ``RYZEN_AI_INSTALLATION_PATH`` points to ``path\to\ryzen-ai-sw-\``. If you installed Ryzen-AI software using the MSI installer, this variable should already be set. Ensure that the Ryzen-AI software package has not been moved post installation, in which case ``RYZEN_AI_INSTALLATION_PATH`` will have to be set again. - -- By default, the Ryzen AI Conda environment automatically sets the standard binary for all inference sessions through the ``XLNX_VART_FIRMWARE`` environment variable. However, explicitly passing the xclbin option in provider_options overrides the default setting. - -.. code-block:: - - parser = argparse.ArgumentParser() - parser.add_argument('--ep', type=str, default ='cpu',choices = ['cpu','npu'], help='EP backend selection') - opt = parser.parse_args() - - providers = ['CPUExecutionProvider'] - provider_options = [{}] - - if opt.ep == 'npu': - providers = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - provider_options = [{ - 'cacheDir': str(cache_dir), - 'cacheKey': 'modelcachekey', - 'xclbin': 'path/to/xclbin' - }] - - session = ort.InferenceSession(model.SerializeToString(), providers=providers, - provider_options=provider_options) - - -Run the ``predict.py`` with the ``--ep npu`` switch to run the custom ResNet model on the Ryzen AI NPU: - - -.. code-block:: - - python predict.py --ep npu - -Typical output - -.. code-block:: - - [Vitis AI EP] No. of Operators : CPU 2 IPU 398 99.50% - [Vitis AI EP] No. of Subgraphs : CPU 1 IPU 1 Actually running on IPU 1 - ... - Image 0: Actual Label cat, Predicted Label cat - Image 1: Actual Label ship, Predicted Label ship - Image 2: Actual Label ship, Predicted Label ship - Image 3: Actual Label airplane, Predicted Label airplane - Image 4: Actual Label frog, Predicted Label frog - Image 5: Actual Label frog, Predicted Label frog - Image 6: Actual Label automobile, Predicted Label truck - Image 7: Actual Label frog, Predicted Label frog - Image 8: Actual Label cat, Predicted Label cat - Image 9: Actual Label automobile, Predicted Label automobile - - -.. _dep-cpp: - -Deployment - C++ -=========================== - -Prerequisites -------------- - -1. Visual Studio 2022 Community edition, ensure "Desktop Development with C++" is installed -2. cmake (version >= 3.26) -3. opencv (version=4.6.0) required for the custom resnet example - -Install OpenCV --------------- - -It is recommended to build OpenCV from the source code and use static build. The default installation location is "\install" , the following instruction installs OpenCV in the location "C:\\opencv" as an example. You may first change the directory to where you want to clone the OpenCV repository. - -.. code-block:: bash - - git clone https://github.com/opencv/opencv.git -b 4.6.0 - cd opencv - cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -G "Visual Studio 17 2022" "-DCMAKE_INSTALL_PREFIX=C:\opencv" "-DCMAKE_PREFIX_PATH=C:\opencv" -DCMAKE_BUILD_TYPE=Release -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_WITH_STATIC_CRT=OFF -B build - cmake --build build --config Release - cmake --install build --config Release - -The build files will be written to ``build\``. - -Build and Run Custom Resnet C++ sample --------------------------------------- - -The C++ source files, CMake list files and related artifacts are provided in the ``cpp/resnet_cifar/*`` folder. The source file ``cpp/resnet_cifar/resnet_cifar.cpp`` takes 10 images from the CIFAR-10 test set, converts them to .png format, preprocesses them, and performs model inference. The example has onnxruntime dependencies, that are provided in ``%RYZEN_AI_INSTALLATION_PATH%/onnxruntime/*``. - -Run the following command to build the resnet example. Assign ``-DOpenCV_DIR`` to the OpenCV build directory. - -.. code-block:: bash - - cd getting_started_resnet/cpp - cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -DCMAKE_INSTALL_PREFIX=. -DCMAKE_PREFIX_PATH=. -B build -S resnet_cifar -DOpenCV_DIR="C:/opencv/build" -G "Visual Studio 17 2022" - -This should generate the build directory with the ``resnet_cifar.sln`` solution file along with other project files. Open the solution file using Visual Studio 2022 and build to compile. You can also use "Developer Command Prompt for VS 2022" to open the solution file in Visual Studio. - -.. code-block:: bash - - devenv build/resnet_cifar.sln - -Now to deploy our model, we will go back to the parent directory (getting_started_resnet) of this example. After compilation, the executable should be generated in ``cpp/build/Release/resnet_cifar.exe``. We will copy this application over to the parent directory: - -.. code-block:: bash - - cd .. - xcopy cpp\build\Release\resnet_cifar.exe . - -Additionally, we will also need to copy the onnxruntime DLLs from the Vitis AI Execution Provider package to the current directory. The following commands copy the required files in the current directory: - -.. code-block:: bash - - xcopy %RYZEN_AI_INSTALLATION_PATH%\onnxruntime\bin\* /E /I - - -The C++ application that was generated takes 3 arguments: - -#. Path to the quantized ONNX model generated in Step 3 -#. The execution provider of choice (cpu or NPU) -#. vaip_config.json (pass None if running on CPU) - - -Deploy the Model on the CPU -**************************** - -To run the model on the CPU, use the following command: - -.. code-block:: bash - - resnet_cifar.exe models\resnet_quantized.onnx cpu - -Typical output: - -.. code-block:: bash - - model name:models\resnet_quantized.onnx - ep:cpu - Input Node Name/Shape (1): - input : -1x3x32x32 - Output Node Name/Shape (1): - output : -1x10 - Final results: - Predicted label is cat and actual label is cat - Predicted label is ship and actual label is ship - Predicted label is ship and actual label is ship - Predicted label is airplane and actual label is airplane - Predicted label is frog and actual label is frog - Predicted label is frog and actual label is frog - Predicted label is truck and actual label is automobile - Predicted label is frog and actual label is frog - Predicted label is cat and actual label is cat - Predicted label is automobile and actual label is automobile - -Deploy the Model on the NPU -**************************** - -To successfully run the model on the NPU: - -- Ensure ``RYZEN_AI_INSTALLATION_PATH`` points to ``path\to\ryzen-ai-sw-\``. If you installed Ryzen-AI software using the MSI installer, this variable should already be set. Ensure that the Ryzen-AI software package has not been moved post installation, in which case ``RYZEN_AI_INSTALLATION_PATH`` will have to be set again. - -- By default, the Ryzen AI Conda environment automatically sets the standard binary for all inference sessions through the ``XLNX_VART_FIRMWARE`` environment variable. However, explicitly passing the xclbin option in provider_options overrides the default setting. - -The following code block from ``reset_cifar.cpp`` shows how ONNX Runtime is configured to deploy the model on the Ryzen AI NPU: - -.. code-block:: bash - - auto session_options = Ort::SessionOptions(); - - auto cache_dir = std::filesystem::current_path().string(); - - if(ep=="npu") - { - auto options = - std::unordered_map{ {"cacheDir", cache_dir}, {"cacheKey", "modelcachekey"}, {"xclbin", "path/to/xclbin"}}; - session_options.AppendExecutionProvider_VitisAI(options) - } - - auto session = Ort::Session(env, model_name.data(), session_options); - -To run the model on the NPU, we will pass the npu flag and the vaip_config.json file as arguments to the C++ application. Use the following command to run the model on the NPU: - -.. code-block:: bash - - resnet_cifar.exe models\resnet_quantized.onnx npu - -Typical output: - -.. code-block:: - - [Vitis AI EP] No. of Operators : CPU 2 IPU 398 99.50% - [Vitis AI EP] No. of Subgraphs : CPU 1 IPU 1 Actually running on IPU 1 - ... - Final results: - Predicted label is cat and actual label is cat - Predicted label is ship and actual label is ship - Predicted label is ship and actual label is ship - Predicted label is airplane and actual label is airplane - Predicted label is frog and actual label is frog - Predicted label is frog and actual label is frog - Predicted label is truck and actual label is automobile - Predicted label is frog and actual label is frog - Predicted label is cat and actual label is cat - Predicted label is automobile and actual label is automobile -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_gpu_ryzenai_gpu.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_gpu_ryzenai_gpu.rst deleted file mode 100644 index 89bbbf0d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_gpu_ryzenai_gpu.rst +++ /dev/null @@ -1,56 +0,0 @@ -########################### -DirectML Flow -########################### - -************* -Prerequisites -************* - -- DirectX12 capable Windows OS (Windows 11 recommended) -- Latest AMD `GPU device driver `_ installed -- `Microsoft Olive `_ for model conversion and optimization -- Latest `ONNX Runtime DirectML EP `_ - -You can ensure GPU driver and DirectX version from ``Windows Task Manager`` -> ``Performance`` -> ``GPU`` - -****************************** -Running models on Ryzen AI GPU -****************************** - -Running models on the Ryzen AI GPU is accomplished in two simple steps: - -**Model Conversion and Optimization**: After the model is trained, Microsoft Olive Optimizer can be used to convert the model to ONNX and optimize it for optimal target execution. - -For additional information, refer to the `Microsoft Olive Documentation `_ - - -**Deployment**: Once the model is in the ONNX format, the ONNX Runtime DirectML EP (``DmlExecutionProvider``) is used to run the model on the AMD Ryzen AI GPU. - -For additional information, refer to the `ONNX Runtime documentation for the DirectML Execution Provider `_ - - -******** -Examples -******** - -- Optimizing and running `ResNet on Ryzen AI GPU `_ - - -******************** -Additional Resources -******************** - - -- Article on how AMD and Black Magic Design worked together to accelerate `Davinci Resolve Studio `_ workload on AMD hardware: - - - `AI Accelerated Video Editing with DaVinci Resolve 18.6 & AMD Radeon Graphics `_ - -| - -- Blog posts on using the Ryzen AI Software for various generative AI workloads on GPU: - - - `Automatic1111 Stable Diffusion WebUI with DirectML Extension on AMD GPUs `_ - - - `Running Optimized Llama2 with Microsoft DirectML on AMD Radeon Graphics `_ - - - `AI-Assisted Mobile Workstation Workflows Powered by AMD Ryzen™ AI `_ diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_hybrid_oga.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_hybrid_oga.rst deleted file mode 100644 index 673dd3d4..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_hybrid_oga.rst +++ /dev/null @@ -1,209 +0,0 @@ -############################ -OnnxRuntime GenAI (OGA) Flow -############################ - -Ryzen AI Software supports deploying LLMs on Ryzen AI PCs using the native ONNX Runtime Generate (OGA) C++ or Python API. The OGA API is the lowest-level API available for building LLM applications on a Ryzen AI PC. This documentation covers the Hybrid execution mode for LLMs, which utilizes both the NPU and GPU - -**Note**: Refer to :doc:`npu_oga` for NPU only execution mode. - -************************ -Supported Configurations -************************ - -The Ryzen AI OGA flow supports Strix and Krackan Point processors. Phoenix (PHX) and Hawk (HPT) processors are not supported. - - -************ -Requirements -************ - -- Install NPU Drivers and Ryzen AI MSI installer according to the :doc:`inst` -- Install GPU device driver: Ensure GPU device driver https://www.amd.com/en/support is installed -- Install Git for Windows (needed to download models from HF): https://git-scm.com/downloads - -******************** -Pre-optimized Models -******************** - -AMD provides a set of pre-optimized LLMs ready to be deployed with Ryzen AI Software and the supporting runtime for hybrid execution. These models can be found on Hugging Face: - -- https://huggingface.co/amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/chatglm3-6b-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Llama-2-7b-hf-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Llama-2-7b-chat-hf-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Llama-3-8B-awq-g128-int4-asym-fp16-onnx-hybrid/tree/main -- https://huggingface.co/amd/Llama-3.1-8B-awq-g128-int4-asym-fp16-onnx-hybrid/tree/main -- https://huggingface.co/amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid -- https://huggingface.co/amd/Mistral-7B-Instruct-v0.1-hybrid -- https://huggingface.co/amd/Mistral-7B-Instruct-v0.2-hybrid -- https://huggingface.co/amd/Mistral-7B-v0.3-hybrid -- https://huggingface.co/amd/Llama-3.1-8B-Instruct-hybrid -- https://huggingface.co/amd/CodeLlama-7b-instruct-g128-hybrid -- https://huggingface.co/amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid -- https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-asym-uint4-g128-lmhead-onnx-hybrid -- https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid -- https://huggingface.co/amd/AMD-OLMo-1B-SFT-DPO-hybrid -- https://huggingface.co/amd/Qwen2-7B-awq-uint4-asym-g128-lmhead-fp16-onnx-hybrid -- https://huggingface.co/amd/Qwen2-1.5B-awq-uint4-asym-global-g128-lmhead-g32-fp16-onnx-hybrid -- https://huggingface.co/amd/gemma-2-2b-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid - - -The steps for deploying the pre-optimized models using Python or C++ are described in the following sections. - -****************************** -Hybrid Execution of OGA Models -****************************** - -Setup -===== - -Activate the Ryzen AI 1.4 Conda environment: - -.. code-block:: - - conda activate ryzen-ai-1.4.0 - -Copy the required files in a local folder to run the LLMs from: - -.. code-block:: - - mkdir hybrid_run - cd hybrid_run - xcopy /Y /E "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\onnxruntime_genai\benchmark" . - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\amd_genai_prompt.txt" . - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\onnxruntime-genai.dll" . - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\onnx_custom_ops.dll" . - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\ryzen_mm.dll" . - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\ryzenai_onnx_utils.dll" . - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\DirectML.dll" . - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime.dll" . - -Download Models from HuggingFace -================================ - -Download the desired models from the list of pre-optimized models on Hugging Face: - -.. code-block:: - - # Make sure you have git-lfs installed (https://git-lfs.com) - git lfs install - git clone - -For example, for Llama-2-7b-chat: - -.. code-block:: - - git lfs install - git clone https://huggingface.co/amd/Llama-2-7b-chat-hf-awq-g128-int4-asym-fp16-onnx-hybrid - - -Enabling Performance Mode (Optional) -==================================== - -To run the LLMs in the best performance mode, follow these steps: - -- Go to ``Windows`` → ``Settings`` → ``System`` → ``Power`` and set the power mode to Best Performance. -- Execute the following commands in the terminal: - -.. code-block:: - - cd C:\Windows\System32\AMD - xrt-smi configure --pmode performance - - -Sample C++ Program -================== - -The ``model_benchmark.exe`` test application provides a simple mechanism for running and evaluating Hybrid OGA models using the native OGA C++ APIs. The source code for this application can be used a reference implementation for how to integrate LLMs using the native OGA C++ APIs. - -The ``model_benchmark.exe`` test application can be used as follows: - -.. code-block:: - - # To see available options and default settings - .\model_benchmark.exe -h - - # To run with default settings - .\model_benchmark.exe -i $path_to_model_dir -f $prompt_file -l $list_of_prompt_lengths - - # To show more informational output - .\model_benchmark.exe -i $path_to_model_dir -f $prompt_file --verbose - - # To run with given number of generated tokens - .\model_benchmark.exe -i $path_to_model_dir -f $prompt_file -l $list_of_prompt_lengths -g $num_tokens - - # To run with given number of warmup iterations - .\model_benchmark.exe -i $path_to_model_dir -f $prompt_file -l $list_of_prompt_lengths -w $num_warmup - - # To run with given number of iterations - .\model_benchmark.exe -i $path_to_model_dir -f $prompt_file -l $list_of_prompt_lengths -r $num_iterations - - -For example, for Llama-2-7b-chat: - -.. code-block:: - - .\model_benchmark.exe -i Llama-2-7b-chat-hf-awq-g128-int4-asym-fp16-onnx-hybrid -f amd_genai_prompt.txt -l "1024" --verbose - -| - -**NOTE**: The C++ source code for the ``model_benchmark.exe`` executable can be found in the ``%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\c`` folder. This source code can be modified and recompiled if necessary using the commands below. - -.. code-block:: - - :: Copy project files - xcopy /E /I "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\c" .\sources - xcopy /E /I "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\onnxruntime_genai\include" .\sources\include - xcopy /E /I "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\onnxruntime_genai\lib" .\sources\lib - - :: Build project - cd sources - cmake -G "Visual Studio 17 2022" -A x64 -S . -B build - cmake --build build --config Release - - :: Copy runtime DLLs - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\onnxruntime-genai.dll" .\build\Release - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\onnx_custom_ops.dll" .\build\Release - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\ryzen_mm.dll" .\build\Release - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\hybrid-llm\ryzenai_onnx_utils.dll" .\build\Release - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\DirectML.dll" .\build\Release - xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime.dll" .\build\Release - - -Sample Python Scripts -===================== - -To run LLMs other than ChatGLM, use the following command: - -.. code-block:: - - python "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\python\llama3\run_model.py" --model_dir - -To run ChatGLM, use the following command: - -.. code-block:: - - pip install transformers==4.44.0 - python "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\python\chatglm\model-generate-chatglm3.py" --model - - -For example, for Llama-2-7b-chat: - -.. code-block:: - - python "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\python\llama3\run_model.py" --model_dir Llama-2-7b-chat-hf-awq-g128-int4-asym-fp16-onnx-hybrid - - -*********************** -Using Fine-Tuned Models -*********************** - -It is also possible to run fine-tuned versions of the pre-optimized OGA models. - -To do this, the fine-tuned models must first be prepared for execution with the OGA Hybrid flow. For instructions on how to do this, refer to the page about :doc:`oga_model_prepare`. - -Once a fine-tuned model has been prepared for Hybrid execution, it can be deployed by following the steps described above in this page. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_index.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_index.rst deleted file mode 100644 index 30733847..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_index.rst +++ /dev/null @@ -1,111 +0,0 @@ -########################## -Ryzen AI Software -########################## - -AMD Ryzen™ AI Software includes the tools and runtime libraries for optimizing and deploying AI inference on AMD Ryzen™ AI powered PCs. Ryzen AI software enables applications to run on the neural processing unit (NPU) built in the AMD XDNA™ architecture, as well as on the integrated GPU. This allows developers to build and deploy models trained in PyTorch or TensorFlow and run them directly on laptops powered by Ryzen AI using ONNX Runtime and the Vitis™ AI Execution Provider (EP). - -.. image:: images/rai-sw.png - :align: center - -*********** -Quick Start -*********** - -- :ref:`Supported Configurations ` -- :doc:`inst` -- :doc:`examples` - -************************* -Development Flow Overview -************************* - -The Ryzen AI development flow does not require any modifications to the existing model training processes and methods. The pre-trained model can be used as the starting point of the Ryzen AI flow. - -Quantization -============ -Quantization involves converting the AI model's parameters from floating-point to lower-precision representations, such as bfloat16 floating-point or 8-bit integer. Quantized models are more power-efficient, utilize less memory, and offer better performance. - -**AMD Quark** is a comprehensive cross-platform deep learning toolkit designed to simplify and enhance the quantization of deep learning models. Supporting both PyTorch and ONNX models, Quark empowers developers to optimize their models for deployment on a wide range of hardware backends, achieving significant performance gains without compromising accuracy. - -For more details, refer to the :doc:`model_quantization` page. - -Compilation and Deployment -========================== -The AI model is deployed using the ONNX Runtime with either C++ or Python APIs. The Vitis AI Execution Provider included in the ONNX Runtime intelligently determines what portions of the AI model should run on the NPU, optimizing workloads to ensure optimal performance with lower power consumption. - -For more details, refer to the :doc:`modelrun` page. - - -| -| - - -.. toctree:: - :maxdepth: 1 - :hidden: - - relnotes.rst - - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Getting Started on the NPU - - inst.rst - examples.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Running Models on the NPU - - model_quantization.rst - modelrun.rst - app_development.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Running LLMs on the NPU - - llm/overview.rst - llm/high_level_python.rst - llm/server_interface.rst - hybrid_oga.rst - oga_model_prepare.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Running Models on the GPU - - gpu/ryzenai_gpu.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Additional Features - - xrt_smi.rst - ai_analyzer.rst - ryzen_ai_libraries.rst - - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Additional Topics - - Model Zoo - Licensing Information - - - -.. - ------------ - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_inst.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_inst.rst deleted file mode 100644 index 9f26d368..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_inst.rst +++ /dev/null @@ -1,150 +0,0 @@ -.. include:: /icons.txt - -######################### -Installation Instructions -######################### - - - -************* -Prerequisites -************* - -The Ryzen AI Software supports AMD processors with a Neural Processing Unit (NPU). Consult the release notes for the full list of :ref:`supported configurations `. - -The following dependencies must be present on the system before installing the Ryzen AI Software: - -.. list-table:: - :widths: 25 25 - :header-rows: 1 - - * - Dependencies - - Version Requirement - * - Windows 11 - - build >= 22621.3527 - * - Visual Studio - - 2022 - * - cmake - - version >= 3.26 - * - Anaconda or Miniconda - - Latest version - -| - -|warning| **IMPORTANT**: - -- Visual Studio 2022 Community: ensure that "Desktop Development with C++" is installed - -- Anaconda or Miniconda: ensure that the following path is set in the System PATH variable: ``path\to\anaconda3\Scripts`` or ``path\to\miniconda3\Scripts`` (The System PATH variable should be set in the *System Variables* section of the *Environment Variables* window). - -| - -.. _install-driver: - -******************* -Install NPU Drivers -******************* - -- Download the NPU driver installation package :download:`NPU Driver ` - -- Install the NPU drivers by following these steps: - - - Extract the downloaded ``NPU_RAI1.4_GA_257_WHQL.zip`` zip file. - - Open a terminal in administrator mode and execute the ``.\npu_sw_installer.exe`` exe file. - -- Ensure that NPU MCDM driver (Version:32.0.203.257, Date:3/12/2025) is correctly installed by opening ``Device Manager`` -> ``Neural processors`` -> ``NPU Compute Accelerator Device``. - - -.. _install-bundled: - -************************* -Install Ryzen AI Software -************************* - -- Download the RyzenAI Software installer :download:`ryzen-ai-1.4.0.exe `. - -- Launch the MSI installer and follow the instructions on the installation wizard: - - - Accept the terms of the Licence agreement - - Provide the destination folder for Ryzen AI installation (default: ``C:\Program Files\RyzenAI\1.4.0``) - - Specify the name for the conda environment (default: ``ryzen-ai-1.4.0``) - - -The Ryzen AI Software packages are now installed in the conda environment created by the installer. - - -.. _quicktest: - - -********************* -Test the Installation -********************* - -The Ryzen AI Software installation folder contains test to verify that the software is correctly installed. This installation test can be found in the ``quicktest`` subfolder. - -- Open a Conda command prompt (search for "Anaconda Prompt" in the Windows start menu) - -- Activate the Conda environment created by the Ryzen AI installer: - -.. code-block:: - - conda activate - -- Run the test: - -.. code-block:: - - cd %RYZEN_AI_INSTALLATION_PATH%/quicktest - python quicktest.py - - -- The quicktest.py script sets up the environment and runs a simple CNN model. On a successful run, you will see an output similar to the one shown below. This indicates that the model is running on the NPU and that the installation of the Ryzen AI Software was successful: - -.. code-block:: - - [Vitis AI EP] No. of Operators : CPU 2 NPU 398 - [Vitis AI EP] No. of Subgraphs : NPU 1 Actually running on NPU 1 - ... - Test Passed - ... - - -.. note:: - - The full path to the Ryzen AI Software installation folder is stored in the ``RYZEN_AI_INSTALLATION_PATH`` environment variable. - - -************************** -Other Installation Options -************************** - -Linux Installer -~~~~~~~~~~~~~~~ - -Compiling BF16 models requires more processing power than compiling INT8 models. If a larger BF16 model cannot be compiled on a Windows machine due to hardware limitations (e.g., insufficient RAM), an alternative Linux-based compilation flow is supported. More details can be found here: :doc:`rai_linux` - - - -Lightweight Installer -~~~~~~~~~~~~~~~~~~~~~ - -A lightweight installer is available with reduced features. It cannot be used for compiling BF16 models but fully supports compiling and running INT8 models and running LLM models. - -- Download the RyzenAI Software Runtime MSI installer :download:`ryzen-ai-rt-1.4.0.msi `. - -- Launch the MSI installer and follow the instructions on the installation wizard: - - - Accept the terms of the Licence agreement - - Provide the destination folder for Ryzen AI installation - - Specify the name for the conda environment - - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_licenses.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_licenses.rst deleted file mode 100644 index f1239ed3..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_licenses.rst +++ /dev/null @@ -1,7 +0,0 @@ -Licensing Information -===================== - -Ryzen AI is released by Advanced Micro Devices, Inc. (AMD) and is subject to the licensing terms listed below. Some components may include third-party software that is subject to additional licenses. Review the following links for more information: - -- `AMD End User License Agreement `_ -- `Third Party End User License Agreement `_ diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_high_level_python.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_high_level_python.rst deleted file mode 100644 index 2341d76b..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_high_level_python.rst +++ /dev/null @@ -1,97 +0,0 @@ -.. Heading guidelines -.. # with overline, for parts -.. * with overline, for chapters -.. =, for sections -.. -, for subsections -.. ^, for subsubsections -.. “, for paragraphs - -##################### -High-Level Python SDK -##################### - -A Python environment offers flexibility for experimenting with LLMs, profiling them, and integrating them into Python applications. We use the `Lemonade SDK `_ to get up and running quickly. - -To get started, follow these instructions. - -*************************** -System-level pre-requisites -*************************** - -You only need to do this once per computer: - -1. Make sure your system has the recommended Ryzen AI driver installed as described in :ref:`install-driver`. -2. Download and install `Miniconda for Windows `_. -3. Launch a terminal and call ``conda init``. - - -***************** -Environment Setup -***************** - -To create and set up an environment, run these commands in your terminal: - -.. code-block:: bash - - conda create -n ryzenai-llm python=3.10 - conda activate ryzenai-llm - pip install lemonade-sdk[llm-oga-hybrid] - lemonade-install --ryzenai hybrid - -**************** -Validation Tools -**************** - -Now that you have completed installation, you can try prompting an LLM like this (where ``PROMPT`` is any prompt you like). - -Run this command in a terminal that has your environment activated: - -.. code-block:: bash - - lemonade -i amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid oga-load --device hybrid --dtype int4 llm-prompt --max-new-tokens 64 -p PROMPT - -Each example linked in the :ref:`featured-llms` table also has `example commands `_ for validating the speed and accuracy of each model. - -********** -Python API -********** -You can also run this code to try out the high-level Lemonade API in a Python script: - -.. code-block:: python - - from lemonade.api import from_pretrained - - model, tokenizer = from_pretrained( - "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid", recipe="oga-hybrid" - ) - - input_ids = tokenizer("This is my prompt", return_tensors="pt").input_ids - response = model.generate(input_ids, max_new_tokens=30) - - print(tokenizer.decode(response[0])) - -Each example linked in the :ref:`featured-llms` table also has an `example script `_ for streaming the text output of the LLM. - -********** -Next Steps -********** - -From here, you can check out `an example `_ or any of the other :ref:`featured-llms`. - -The examples pages also provide code for: - -#. Additional validation tools for measuring speed and accuracy. -#. Streaming responses with the API. -#. Integrating the API into applications. -#. Launching the server interface from the Python environment. - - - - -.. - ------------ - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_overview.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_overview.rst deleted file mode 100644 index 57ead57e..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_overview.rst +++ /dev/null @@ -1,211 +0,0 @@ -######## -Overview -######## - -************************************ -OGA-based Flow with Hybrid Execution -************************************ - -Ryzen AI Software supports deploying quantized 4-bit LLMs on Ryzen AI 300-series PCs. This solution uses a hybrid execution mode, which leverages both the NPU and integrated GPU (iGPU), and is built on the OnnxRuntime GenAI (OGA) framework. - -Hybrid execution mode optimally partitions the model such that different operations are scheduled on NPU vs. iGPU. This minimizes time-to-first-token (TTFT) in the prefill-phase and maximizes token generation (tokens per second, TPS) in the decode phase. - -OGA is a multi-vendor generative AI framework from Microsoft that provides a convenient LLM interface for execution backends such as Ryzen AI. - -Supported Configurations -======================== - -- Only Ryzen AI 300-series Strix Point (STX) and Krackan Point (KRK) processors support OGA-based hybrid execution. -- Developers with Ryzen AI 7000- and 8000-series processors can get started using the CPU-based examples linked in the :ref:`featured-llms` table. -- Windows 11 is the required operating system. - - -******************************* -Development Interfaces -******************************* - -The Ryzen AI LLM software stack is available through three development interfaces, each suited for specific use cases as outlined in the sections below. All three interfaces are built on top of native OnnxRuntime GenAI (OGA) libraries, as shown in the :ref:`software-stack-table` diagram below. - -The high-level Python APIs, as well as the Server Interface, also leverage the Lemonade SDK, which is multi-vendor open-source software that provides everything necessary for quickly getting started with LLMs on OGA. - -A key benefit of both OGA and Lemonade is that software developed against their interfaces is portable to many other execution backends. - -.. _software-stack-table: - -.. flat-table:: Ryzen AI Software Stack - :header-rows: 1 - :class: center-table - - * - Your Python Application - - Your LLM Stack - - Your Native Application - * - `Lemonade Python API* <#high-level-python-sdk>`_ - - `Lemonade Server Interface* <#server-interface-rest-api>`_ - - :rspan:`1` `OGA C++ Headers <../hybrid_oga.html>`_ - * - :cspan:`1` `OGA Python API* `_ - * - :cspan:`2` `Custom AMD OnnxRuntime GenAI (OGA) `_ - * - :cspan:`2` `AMD Ryzen AI Driver and Hardware `_ - -\* indicates open-source software (OSS). - -High-Level Python SDK -===================== - -The high-level Python SDK, Lemonade, allows you to get started using PyPI installation in approximately 5 minutes. - -This SDK allows you to: - -- Experiment with models in hybrid execution mode on Ryzen AI hardware. -- Validate inference speed and task performance. -- Integrate with Python apps using a high-level API. - -To get started in Python, follow these instructions: :doc:`high_level_python`. - - -Server Interface (REST API) -=========================== - -The Server Interface provides a convenient means to integrate with applications that: - -- Already support an LLM server interface, such as the Ollama server or OpenAI API. -- Are written in any language (C++, C#, Javascript, etc.) that supports REST APIs. -- Benefits from process isolation for the LLM backend. - -To get started with the server interface, follow these instructions: :doc:`server_interface`. - -For example applications that have been tested with Lemonade Server, see the `Lemonade Server Examples `_. - - -OGA APIs for C++ Libraries and Python -===================================== - -Native C++ libraries for OGA are available to give full customizability for deployment into native applications. - -The Python bindings for OGA also provide a customizable interface for Python development. - -To get started with the OGA APIs, follow these instructions: :doc:`../hybrid_oga`. - - -.. _featured-llms: - -******************************* -Featured LLMs -******************************* - -The following tables contain a curated list of LLMs that have been validated on Ryzen AI hybrid execution mode. The hybrid examples are built on top of OnnxRuntime GenAI (OGA). - -The comprehensive set of pre-optimized models for hybrid execution used in these examples are available in the `AMD hybrid collection on Hugging Face `_. It is also possible to run fine-tuned versions of the models listed (for example, fine-tuned versions of Llama2 or Llama3). For instructions on how to prepare a fine-tuned OGA model for hybrid execution, refer to :doc:`../oga_model_prepare`. - -.. _ryzen-ai-oga-featured-llms: - -.. flat-table:: Ryzen AI OGA Featured LLMs - :header-rows: 2 - :class: llm-table - - * - - - :cspan:`1` CPU Baseline (HF bfloat16) - - :cspan:`3` Ryzen AI Hybrid (OGA int4) - * - Model - - Example - - Validation - - Example - - TTFT Speedup - - Tokens/S Speedup - - Validation - - * - `DeepSeek-R1-Distill-Qwen-7B `_ - - `Link `__ - - 🟢 - - `Link `__ - - 3.4x - - 8.4x - - 🟢 - * - `DeepSeek-R1-Distill-Llama-8B `_ - - `Link `__ - - 🟢 - - `Link `__ - - 4.2x - - 7.6x - - 🟢 - * - `Llama-3.2-1B-Instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 1.9x - - 5.1x - - 🟢 - * - `Llama-3.2-3B-Instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 2.8x - - 8.1x - - 🟢 - * - `Phi-3-mini-4k-instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 3.6x - - 7.8x - - 🟢 - * - `Qwen1.5-7B-Chat `_ - - `Link `__ - - 🟢 - - `Link `__ - - 4.0x - - 7.3x - - 🟢 - * - `Mistral-7B-Instruct-v0.3 `_ - - `Link `__ - - 🟢 - - `Link `__ - - 5.0x - - 8.1x - - 🟢 - * - `Llama-3.1-8B-Instruct `_ - - `Link `__ - - 🟢 - - `Link `__ - - 3.9x - - 8.9x - - 🟢 - -The :ref:`ryzen-ai-oga-featured-llms` table was compiled using validation, benchmarking, and accuracy metrics as measured by the `ONNX TurnkeyML v6.1.0 `_ ``lemonade`` commands in each example link. After this table was created, the Lemonade SDK moved to the new location found `here `_. - -Data collection details: - -* All validation, performance, and accuracy metrics are collected on the same system configuration: - - * System: HP OmniBook Ultra Laptop 14z - * Processor: AMD Ryzen AI 9 HX 375 W/ Radeon 890M - * Memory: 32GB of RAM - -* The Hugging Face ``transformers`` framework is used as the baseline implementation for speedup and accuracy comparisons. - - * The baseline checkpoint is the original ``safetensors`` Hugging Face checkpoint linked in each table row, in the ``bfloat16`` data type. - -* All speedup numbers are the measured performance of the model with input sequence length (ISL) of ``1024`` and output sequence length (OSL) of ``64``, on the specified backend, divided by the measured performance of the baseline. -* We assign the 🟢 validation score based on this criteria: all commands in the example guide ran successfully. - - -************************************** -OGA-based Flow with NPU-only Execution -************************************** - -The primary OGA-based flow for LLMs employs a hybrid execution mode which leverages both the NPU and iGPU. AMD also provides support for an OGA-based flow where the iGPU is not solicited and where the compute-intensive operations are exclusively offloaded to the NPU. - -The OGA-based NPU-only execution mode is supported on STX and KRK platforms. - -To get started with the OGA-based NPU-only execution mode, follow these instructions :doc:`../npu_oga`. - - - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_server_interface.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_server_interface.rst deleted file mode 100644 index 1b4a5e6e..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_llm_server_interface.rst +++ /dev/null @@ -1,89 +0,0 @@ -.. Heading guidelines -.. # with overline, for parts -.. * with overline, for chapters -.. =, for sections -.. -, for subsections -.. ^, for subsubsections -.. “, for paragraphs - -########################### -Server Interface (REST API) -########################### - -The Lemonade SDK offers a server interface that allows your application to load an LLM on Ryzen AI hardware in a process, and then communicate with this process using standard ``REST`` APIs. This allows applications written in any language (C#, JavaScript, Python, C++, etc.) to easily integrate with Ryzen AI LLMs. - -Server interfaces are used across the LLM ecosystem because they allow for no-code plug-and-play between the higher level of the application stack (GUIs, agents, RAG, etc.) with the LLM and hardware that have been abstracted by the server. - -For example, open source projects such as `Open WebUI <#open-webui-demo>`_ have out-of-box support for connecting to a variety of server interfaces, which in turn allows users to quickly start working with LLMs in a GUI. - -************ -Server Setup -************ - -Lemonade Server can be installed via the Lemonade Server Installer executable by following these steps: - -1. Make sure your system has the recommended Ryzen AI driver installed as described in :ref:`install-driver`. -2. Download and install ``Lemonade_Server_Installer.exe`` from the `latest Lemonade release `_. -3. Launch the server by double-clicking the ``lemonade_server`` shortcut added to your desktop. - -See the `Lemonade Server README `_ for more details. - -************ -Server Usage -************ - -The Lemonade Server provides the following OpenAI-compatible endpoints: - -- POST ``/api/v0/chat/completions`` - Chat Completions (messages to completions) -- POST ``/api/v0/completions`` - Text Completions (prompt to completion) -- GET ``/api/v0/models`` - List available models - -Please refer to the `server specification `_ document in the Lemonade repository for details about the request and response formats for each endpoint. - -The `OpenAI API documentation `_ also has code examples for integrating streaming completions into an application. - -Open WebUI Demo -=============== - -To experience the Lemonade Server, try using it with an OpenAI-compatible application, such as Open WebUI. - -Instructions: -------------- - -1. **Launch Lemonade Server:** Double-click the lemon icon on your desktop. See `server setup <#server-setup>`_ for installation instructions. - -2. **Install and Run Open WebUI:** In a terminal, install Open WebUI using the following commands: - -.. code-block:: bash - - conda create -n webui python=3.11 - conda activate webui - pip install open-webui - open-webui serve - -3. **Launch Open WebUI**: In a browser, navigate to ``_. - -4. **Connect Open WebUI to Lemonade Server:** In the top-right corner of the UI, click the profile icon and then: - - - Go to ``Settings`` → ``Connections``. - - Click the ``+`` button to add our OpenAI-compatible connection. - - In the URL field, enter ``http://localhost:8000/api/v0``, and in the key field put ``-``, then press save. - -**Done!** You are now able to run Open WebUI with Hybrid models. Feel free to choose any of the available “-Hybrid” models in the model selection menu. - -********** -Next Steps -********** - -- See `Lemonade Server Examples `_ to find applications that have been tested with Lemonade Server. -- Check out the `Lemonade Server specification `_ to learn more about supported features. -- Try out your Lemonade Server install with any application that uses the OpenAI chat completions API. - - -.. - ------------ - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_model_quantization.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_model_quantization.rst deleted file mode 100644 index ec8fd5a6..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_model_quantization.rst +++ /dev/null @@ -1,78 +0,0 @@ -################## -Model Quantization -################## - -**Model quantization** is the process of mapping high-precision weights/activations to a lower precision format, such as BF16/INT8, while maintaining model accuracy. This technique enhances the computational and memory efficiency of the model for deployment on NPU devices. It can be applied post-training, allowing existing models to be optimized without the need for retraining. - -The Ryzen AI compiler supports input models quantized to either INT8 or BF16 format: - -- CNN models: INT8 or BF16 -- Transformer models: BF16 - -Quantization introduces several challenges, primarily revolving around the potential drop in model accuracy. Choosing the right quantization parameters—such as data type, bit-width, scaling factors, and the decision between per-channel or per-tensor quantization—adds layers of complexity to the design process. - -********* -AMD Quark -********* - -**AMD Quark** is a comprehensive cross-platform deep learning toolkit designed to simplify and enhance the quantization of deep learning models. Supporting both PyTorch and ONNX models, Quark empowers developers to optimize their models for deployment on a wide range of hardware backends, achieving significant performance gains without compromising accuracy. - -For more challenging model quantization needs **AMD Quark** supports advanced quantization technique like **Fast Finetuning** that helps recover the lost accuracy of the quantized model. - -Documentation -============= -The complete documentation for AMD Quark for Ryzen AI can be found here: https://quark.docs.amd.com/latest/supported_accelerators/ryzenai/index.html - - -INT8 Examples -============= -**AMD Quark** provides default configrations that support INT8 quantization configuration. For example, `XINT8` uses symmetric INT8 activation and weights quantization with power-of-two scales using the MinMSE calibration method. -The quantization configuration can be customized using the `QuantizationConfig` class. The following example shows how to set up the quantization configuration for INT8 quantization: - -.. code-block:: - - quant_config = QuantizationConfig(calibrate_method=PowerOfTwoMethod.MinMSE, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_npu_cnn=True, - extra_options={'ActivationSymmetric': True}) - config = Config(global_quant_config=quant_config) - print("The configuration of the quantization is {}".format(config)) - -The user can use the `get_default_config('XINT8')` function to get the default configuration for INT8 quantization. - -For more details -~~~~~~~~~~~~~~~~ -- `AMD Quark Tutorial `_ for Ryzen AI Deployment -- Running INT8 model on NPU using :doc:`Getting Started Tutorial ` -- Advanced quantization techniques `Fast Finetuning and Cross Layer Equalization `_ for INT8 model - - -BF16 Examples -============= -**AMD Quark** provides default configrations that support BFLOAT16 (BF16) model quantization. For example, BF16 is a 16-bit floating-point format designed to have same exponent size as FP32, allowing a wide dynamic range, but with reduced precision to save memory and speed up computations. -The BFLOAT16 (BF16) model needs to be converted from QDQ nodes to Cast operations to run with VAIML compiler. AMD Quark support this conversion with the configuration option `BF16QDQToCast`. - -.. code-block:: - - quant_config = get_default_config("BF16") - quant_config.extra_options["BF16QDQToCast"] = True - config = Config(global_quant_config=quant_config) - print("The configuration of the quantization is {}".format(config)) - -For more details -~~~~~~~~~~~~~~~~ -- `Image Classification `_ using ResNet50 to run BF16 model on NPU -- `Finetuned DistilBERT for Text Classification `_ -- `Text Embedding Model Alibaba-NLP/gte-large-en-v1.5 `_ -- Advanced quantization techniques `Fast Finetuning `_ for BF16 models. - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_modelcompat.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_modelcompat.rst deleted file mode 100644 index 92d04267..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_modelcompat.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. include:: icons.txt - -################### -Model Compatibility -################### - -The Ryzen AI Software supports deploying quantized model saved in the ONNX format. - -Currently, the NPU supports a subset of the ONNX operators. At runtime, the ONNX graph is automatically partitioned into multiple subgraphs by the Vitis AI ONNX Execution Provider (VAI EP). The subgraph(s) containing operators supported by the NPU are executed on the NPU. The remaining subgraph(s) are executed on the CPU. This graph partitioning and deployment technique across CPU and NPU is fully automated by the VAI EP and is totally transparent to the end-user. - -|memo| **NOTE**: Models with ONNX opset 17 are recommended. If your model uses a different opset version, consider converting it using the `ONNX Version Converter `_ - - -The Ryzen AI compiler supports input models quantized to either INT8 or BF16 format: - -- CNN models: INT8 or BF16 -- Transformer models: BF16 - -BF16 models (CNN or Transformer) require processing power in terms of core count and memory, depending on model size. If a larger model cannot be compiled on a Windows machine due to hardware limitations (e.g., insufficient RAM), an alternative Linux-based compilation flow is supported. More details can be found here: . - -The list of the ONNX operators currently supported by the NPU is as follows: - - - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_modelrun.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_modelrun.rst deleted file mode 100644 index f6bed98d..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_modelrun.rst +++ /dev/null @@ -1,380 +0,0 @@ -.. include:: /icons.txt - -################################ -Model Compilation and Deployment -################################ - -***************** -Introduction -***************** - -The Ryzen AI Software supports compiling and deploying quantized model saved in the ONNX format. The ONNX graph is automatically partitioned into multiple subgraphs by the VitisAI Execution Provider (EP). The subgraph(s) containing operators supported by the NPU are executed on the NPU. The remaining subgraph(s) are executed on the CPU. This graph partitioning and deployment technique across CPU and NPU is fully automated by the VAI EP and is totally transparent to the end-user. - -|memo| **NOTE**: Models with ONNX opset 17 are recommended. If your model uses a different opset version, consider converting it using the `ONNX Version Converter `_ - -Models are compiled for the NPU by creating an ONNX inference session using the Vitis AI Execution Provider (VAI EP): - -.. code-block:: python - - providers = ['VitisAIExecutionProvider'] - session = ort.InferenceSession( - model, - sess_options = sess_opt, - providers = providers, - provider_options = provider_options - ) - - -The ``provider_options`` parameter allows passing special options to the Vitis AI EP. - -.. list-table:: - :widths: 20 35 - :header-rows: 1 - - * - Provider Options - - Description - * - config_file - - Configuration file to pass certain compile-specific options, used for BF16 compilation. - * - xclbin - - NPU binary file to specify NPU configuration, used for INT8 models. - * - cache_dir - - The path and name of the cache directory. - * - cache_key - - The subfolder in the cache directory where the compiled model is stored. - * - encryptionKey - - Used for generating an encrypted compiled model. - -Detailed usage of these options is discussed in the following sections of this page. - - -.. _compile-bf16: - -************************** -Compiling BF16 models -************************** - -|memo| **NOTE**: For compiling large BF16 models a machine with at least 32GB of memory is recommended. The machine does not need to have an NPU. It is also possible to compile BF16 models on a Linux workstation. More details can be found here: :doc:`rai_linux` - -When compiling BF16 models, a compilation configuration file must be provided through the ``config_file`` provider options. - -.. code-block:: python - - providers = ['VitisAIExecutionProvider'] - - provider_options = [{ - 'config_file': 'vai_ep_config.json' - }] - - session = ort.InferenceSession( - "resnet50.onnx", - providers=providers, - provider_options=provider_options - ) - - -By default, the configuration file for compiling BF16 models should contain the following: - -.. code-block:: json - - { - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": {} - } - ] - } - - -Additional options can be specified in the ``vaiml_config`` section of the configuration file, as described below. - -**Performance Optimization** - -The default compilation optimization level is 1. The optimization level can be changed as follows: - -.. code-block:: json - - "vaiml_config": {"optimize_level": 2} - -Supported values: 1 (default), 2 - - -**Automatic FP32 to BF16 Conversion** - -If a FP32 model is used, the compiler will automatically cast it to BF16 if this option is enabled. For better control over accuracy, it is recommended to quantize the model to BF16 using Quark. - -.. code-block:: json - - "vaiml_config": {"enable_f32_to_bf16_conversion": true} - -Supported values: false (default), true - - -**Optimizations for Transformer-Based Models** - -By default, the compiler vectorizes the data to optimize performance for CNN models. However, transformers perform best with unvectorized data. To better optimize transformer-based models, set: - -.. code-block:: json - - "vaiml_config": {"preferred_data_storage": "unvectorized"} - -Supported values: "vectorized" (default), "unvectorized" - - -.. _compile-int8: - -************************** -Compiling INT8 models -************************** - -When compiling INT8 models, the NPU configuration must be specified through the ``xclbin`` provider option. This option is not required for BF16 models. - -There are two types of NPU configurations for INT8 models: standard and benchmark. Setting the NPU configuration involves specifying a specific ``.xclbin`` binary file, which is located in the Ryzen AI Software installation tree. - -Depending on the target processor and binary type (standard/benchmark), the following ``.xclbin`` files should be used: - -**For STX/KRK APUs**: - -- Standard binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\strix\AMD_AIE2P_Nx4_Overlay.xclbin`` -- Benchmark binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\strix\AMD_AIE2P_4x4_Overlay.xclbin`` - -**For PHX/HPT APUs**: - -- Standard binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\phoenix\1x4.xclbin`` -- Benchmark binary: ``%RYZEN_AI_INSTALLATION_PATH%\voe-4.0-win_amd64\xclbins\phoenix\4x4.xclbin`` - -Python example selecting the standard NPU configuration for STX/KRK: - -.. code-block:: python - - providers = ['VitisAIExecutionProvider'] - - provider_options = [{ - 'xclbin': '{}\\voe-4.0-win_amd64\\xclbins\\strix\\AMD_AIE2P_Nx4_Overlay.xclbin'.format(os.environ["RYZEN_AI_INSTALLATION_PATH"]) - }] - - session = ort.InferenceSession( - "resnet50.onnx", - providers=providers, - provider_options=provider_options - ) - -| - -By default, the Ryzen AI Conda environment automatically sets the standard binary for all inference sessions through the ``XLNX_VART_FIRMWARE`` environment variable. However, explicitly passing the xclbin option in the provider options overrides the environment variable. - -.. code-block:: - - > echo %XLNX_VART_FIRMWARE% - C:\Program Files\RyzenAI\1.4.0\voe-4.0-win_amd64\xclbins\strix\AMD_AIE2P_Nx4_Overlay.xclbin - - - -| - -************************************ -Managing Compiled Models -************************************ - -To avoid the overhead of recompiling models, it is very advantageous to save the compiled models and use these pre-compiled versions in the final application. Pre-compiled models can be loaded instantaneously and immediately executed on the NPU. This greatly improves the session creation time and overall end-user experience. - -The RyzenAI Software supports two mechanisms for saving and reloading compiled models: - -- VitisAI EP Cache -- OnnxRuntime EP Context Cache - -.. _vitisai-ep-cache: - -VitisAI EP Cache -================ - -The VitisAI EP includes a built-in caching mechanism. This mechanism is enabled by default. When a model is compiled for the first time, it is automatically saved in the VitisAI EP cache directory. Any subsequent creation of an ONNX Runtime session using the same model will load the precompiled model from the cache directory, thereby reducing session creation time. - -The location of the VitisAI EP cache is specified with the ``cache_dir`` and ``cache_key`` provider options: - -- ``cache_dir`` - Specifies the path and name of the cache directory. -- ``cache_key`` - Specifies the subfolder in the cache directory where the compiled model is stored. - -Python example: - -.. code-block:: python - - from pathlib import Path - - providers = ['VitisAIExecutionProvider'] - cache_dir = Path(__file__).parent.resolve() - provider_options = [{'cache_dir': str(cache_dir), - 'cache_key': 'compiled_resnet50'}] - - session = ort.InferenceSession( - "resnet50.onnx", - providers=providers, - provider_options=provider_options - ) - - -In the example above, the cache directory is set to the absolute path of the folder containing the script being executed. Once the session is created, the compiled model is saved inside a subdirectory named ``compiled_resnet50`` within the specified cache folder. - -Default Settings ----------------- -In the current release, if ``cache_dir`` is not set, the default cache location is determined by the type of model: - -- INT8 models - ``C:\temp\%USERNAME%\vaip\.cache`` -- BF16 models - The directory where the script or program is executed - - -Disabling the Cache -------------------- -To ignore cached models and force recompilation, unset the ``XLNX_ENABLE_CACHE`` environment variable before running the application: - -.. code-block:: - - set XLNX_ENABLE_CACHE= - - - -VitisAI EP Cache Encryption ---------------------------- - -The contents of the VitisAI EP cache folder can be encrypted using AES256. Cache encryption is enabled by passing an encryption key through the VAI EP provider options. The same key must be used to decrypt the model when loading it from the cache. The key is a 256-bit value represented as a 64-digit string. - -Python example: - -.. code-block:: python - - session = onnxruntime.InferenceSession( - "resnet50.onnx", - providers=["VitisAIExecutionProvider"], - provider_options=[{ - "config_file":"/path/to/vaip_config.json", - "encryptionKey": "89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2" - }]) - -C++ example: - -.. code-block:: cpp - - auto onnx_model_path = "resnet50.onnx" - Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "resnet50"); - auto session_options = Ort::SessionOptions(); - auto options = std::unorderd_map({}); - options["config_file"] = "/path/to/vaip_config.json"; - options["encryptionKey"] = "89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2"; - - session_options.AppendExecutionProvider("VitisAI", options); - auto session = Ort::Experimental::Session(env, model_name, session_options); - -As a result of encryption, the model generated in the cache directory cannot be opened with Netron. Additionally, dumping is disabled to prevent the leakage of sensitive information about the model. - -.. _ort-ep-context-cache: - -OnnxRuntime EP Context Cache -============================ - -The Vitis AI EP supports the ONNX Runtime EP context cache feature. This features allows dumping and reloading a snapshot of the EP context before deployment. Currently, this feature is only available for INT8 models. - -The user can enable dumping of the EP context by setting the ``ep.context_enable`` session option to 1. - -The following options can be used for additional control: - -- ``ep.context_file_path`` – Specifies the output path for the dumped context model. -- ``ep.context_embed_mode`` – Embeds the EP context into the ONNX model when set to 1. - -For further details, refer to the official ONNX Runtime documentation: https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html - - -EP Context Encryption ---------------------- - -By default, the generated context model is unencrypted and can be used directly during inference. If needed, the context model can be encrypted using one of the methods described below. - -User-managed encryption -~~~~~~~~~~~~~~~~~~~~~~~ -After the context model is generated, the developer can encrypt the generated file using a method of choice. At runtime, the encrypted file can be loaded by the application, decrypted in memory and passed as a serialized string to the inference session. This method gives complete control to the developer over the encryption process. - -EP-managed encryption -~~~~~~~~~~~~~~~~~~~~~~~ -The Vitis AI EP encryption mechanism can be used to encrypt the context model. This is enabled by passing an encryption key via the ``encryptionKey`` provider option (discussed in the previous section). The model is encrypted using AES256. At runtime, the same encryption key must be provided to decrypt and load the context model. With this method, encryption and decryption is seamlessly managed by the VitisAI EP. - -Python example: - -.. code-block:: python - - # Compilation session - session_options = ort.SessionOptions() - session_options.add_session_config_entry('ep.context_enable', '1') - session_options.add_session_config_entry('ep.context_file_path', 'context_model.onnx') - session_options.add_session_config_entry('ep.context_embed_mode', '1') - session = ort.InferenceSession( - path_or_bytes='resnet50.onnx', - sess_options=session_options, - providers=['VitisAIExecutionProvider'], - provider_options=[{'encryptionKey': '89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2'}] - ) - - # Inference session - session_options = ort.SessionOptions() - session = ort.InferenceSession( - path_or_bytes='context_model.onnx', - sess_options=session_options, - providers=['VitisAIExecutionProvider'], - provider_options=[{'encryptionKey': '89703f950ed9f738d956f6769d7e45a385d3c988ca753838b5afbc569ebf35b2'}] - ) - - -**NOTE**: When compiling with encryptionKey, ensure that any existing cache directory (either the default cache directory or the directory specified by the ``cache_dir`` provider option) is deleted before compiling. - -| - -************************** -Operator Assignment Report -************************** - - -Vitis AI EP generates a file named ``vitisai_ep_report.json`` that provides a report on model operator assignments across CPU and NPU. This file is automatically generated in the cache directory if no explicit cache location is specified in the code. This report includes information such as the total number of nodes, the list of operator types in the model, and which nodes and operators runs on the NPU or on the CPU. Additionally, the report includes node statistics, such as input to a node, the applied operation, and output from the node. - - -.. code-block:: - - { - "deviceStat": [ - { - "name": "all", - "nodeNum": 400, - "supportedOpType": [ - "::Add", - "::Conv", - ... - ] - }, - { - "name": "CPU", - "nodeNum": 2, - "supportedOpType": [ - "::DequantizeLinear", - "::QuantizeLinear" - ] - }, - { - "name": "NPU", - "nodeNum": 398, - "supportedOpType": [ - "::Add", - "::Conv", - ... - ] - ... - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_npu_oga.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_npu_oga.rst deleted file mode 100644 index 134613f2..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_npu_oga.rst +++ /dev/null @@ -1,242 +0,0 @@ -:orphan: - -###################### -OGA NPU Execution Mode -###################### - -Ryzen AI Software supports deploying LLMs on Ryzen AI PCs using the native ONNX Runtime Generate (OGA) C++ or Python API. The OGA API is the lowest-level API available for building LLM applications on a Ryzen AI PC. This documentation covers the NPU execution mode for LLMs, which utilizes only the NPU. - -**Note**: Refer to :doc:`hybrid_oga` for Hybrid NPU + GPU execution mode. - - -************************ -Supported Configurations -************************ - -The Ryzen AI OGA flow supports Strix and Krackan Point processors. Phoenix (PHX) and Hawk (HPT) processors are not supported. - - -************ -Requirements -************ -- Install NPU Drivers and Ryzen AI MSI installer according to the :doc:`inst` -- Install Git for Windows (needed to download models from HF): https://git-scm.com/downloads - - -******************** -Pre-optimized Models -******************** - -AMD provides a set of pre-optimized LLMs ready to be deployed with Ryzen AI Software and the supporting runtime for NPU execution. These models can be found on Hugging Face in the following collection: - -- https://huggingface.co/amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Llama-2-7b-hf-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Llama2-7b-chat-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Llama-3-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Llama-3.1-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix -- https://huggingface.co/amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix - -The steps for deploying the pre-optimized models using C++ and python are described in the following sections. - -*************************** -NPU Execution of OGA Models -*************************** - -Setup -===== - -Activate the Ryzen AI 1.4 Conda environment: - -.. code-block:: - - conda activate ryzen-ai-1.4.0 - -Create a folder to run the LLMs from, and copy the required files: - -.. code-block:: - - mkdir npu_run - cd npu_run - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\npu-llm\exe" .\libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\npu-llm\libs\vaip_llm.json" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\npu-llm\onnxruntime-genai.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_vitis_ai_custom_ops.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_providers_shared.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_vitisai_ep.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\dyn_dispatch_core.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_providers_vitisai.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\transaction.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\xclbin.dll" libs - - -Download Models from HuggingFace -================================ - -Download the desired models from the list of pre-optimized models on Hugging Face: - -.. code-block:: - - # Make sure you have git-lfs installed (https://git-lfs.com) - git lfs install - git clone - -For example, for Llama-2-7b: - -.. code-block:: - - git lfs install - git clone https://huggingface.co/amd/Llama-2-7b-hf-awq-g128-int4-asym-bf16-onnx-ryzen-strix - - -**NOTE**: Ensure the models are cloned in the ``npu_run`` folder. - - -Enabling Performance Mode (Optional) -==================================== - -To run the LLMs in the best performance mode, follow these steps: - -- Go to ``Windows`` → ``Settings`` → ``System`` → ``Power`` and set the power mode to Best Performance. -- Execute the following commands in the terminal: - -.. code-block:: - - cd C:\Windows\System32\AMD - xrt-smi configure --pmode performance - - - -Sample C++ Programs -=================== - -The ``run_llm.exe`` test application provides a simple interface to run LLMs. The source code for this application can also be used a reference for how to integrate LLMs using the native OGA C++ APIs. - -It supports the following command line options:: - - -m: model path - -f: prompt file - -n: max new tokens - -c: use chat template - -t: input prompt token length - -l: max length to be set in search options - -h: help - - -Example usage: - -.. code-block:: - - .\libs\run_llm.exe -m "Llama-2-7b-hf-awq-g128-int4-asym-bf16-onnx-ryzen-strix" -f "Llama-2-7b-hf-awq-g128-int4-asym-bf16-onnx-ryzen-strix\prompts.txt" -t "1024" -n 20 - -| - -The ``model_benchmark.exe`` program can be used to profile the execution of LLMs and report various metrics. It supports the following command line options:: - - -i,--input_folder - Path to the ONNX model directory to benchmark, compatible with onnxruntime-genai. - -l,--prompt_length - List of number of tokens in the prompt to use. - -p,--prompt_file - Name of prompt file (txt) expected in the input model directory. - -g,--generation_length - Number of tokens to generate. Default: 128 - -r,--repetitions - Number of times to repeat the benchmark. Default: 5 - -w,--warmup - Number of warmup runs before benchmarking. Default: 1 - -t,--cpu_util_time_interval - Sampling time interval for peak cpu utilization calculation, in milliseconds. Default: 250 - -v,--verbose - Show more informational output. - -h,--help - Show this help message and exit. - - -For example, for Llama-2-7b: - -.. code-block:: - - .\libs\model_benchmark.exe -i "Llama-2-7b-hf-awq-g128-int4-asym-bf16-onnx-ryzen-strix" -g 20 -p "Llama-2-7b-hf-awq-g128-int4-asym-bf16-onnx-ryzen-strix\prompts.txt" -l "2048,1024,512,256,128" - -| - -**NOTE**: The C++ source code for the ``run_llm.exe`` and ``model_benchmark.exe`` executables can be found in the ``%RYZEN_AI_INSTALLATION_PATH%\npu-llm\cpp`` folder. This source code can be modified and recompiled using the commands below. - -.. code-block:: - - :: Copy project files - xcopy /E /I "%RYZEN_AI_INSTALLATION_PATH%\npu-llm\cpp" .\sources - - :: Build project - cd sources - cmake -G "Visual Studio 17 2022" -A x64 -S . -B build - cmake --build build --config Release - - :: Copy executables in the "libs" folder - xcopy /I build\Release .\libs - - :: Copy runtime dependencies in the "libs" folder - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\npu-llm\libs\vaip_llm.json" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\npu-llm\onnxruntime-genai.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_vitis_ai_custom_ops.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_providers_shared.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_vitisai_ep.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\dyn_dispatch_core.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime_providers_vitisai.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\transaction.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\onnxruntime.dll" libs - xcopy /I "%RYZEN_AI_INSTALLATION_PATH%\deployment\voe\xclbin.dll" libs - -Sample Python Scripts -===================== - -In the model directory, open the ``genai_config.json`` file located in the folder of the downloaded model. Update the value of the "custom_ops_library" key with the path to the ``onnxruntime_vitis_ai_custom_ops.dll``, located in the ``npu_run\libs`` folder: - -.. code-block:: - - "session_options": { - ... - "custom_ops_library": "libs\\onnxruntime_vitis_ai_custom_ops.dll", - ... - } - -To run LLMs other than ChatGLM, use the following command: - -.. code-block:: - - python "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\python\llama3\run_model.py" --model_dir - -To run ChatGLM, use the following command: - -.. code-block:: - - pip install transformers==4.44.0 - python "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\python\chatglm\model-generate-chatglm3.py" -m - -For example, for Llama-2-7b: - -.. code-block:: - - python "%RYZEN_AI_INSTALLATION_PATH%\hybrid-llm\examples\python\llama3\run_model.py" --model_dir Llama-2-7b-hf-awq-g128-int4-asym-bf16-onnx-ryzen-strix - - - -*********************** -Using Fine-Tuned Models -*********************** - -It is also possible to run fine-tuned versions of the pre-optimized OGA models. - -To do this, the fine-tuned models must first be prepared for execution with the OGA NPU-only flow. For instructions on how to do this, refer to the page about :doc:`oga_model_prepare`. - -Once a fine-tuned model has been prepared for NPU-only execution, it can be deployed by following the steps described above in this page. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_oga_model_prepare.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_oga_model_prepare.rst deleted file mode 100644 index 90dbe2ee..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_oga_model_prepare.rst +++ /dev/null @@ -1,115 +0,0 @@ -#################### -Preparing OGA Models -#################### - -This section describes the process for preparing LLMs for deployment on a Ryzen AI PC using the hybrid or NPU-only execution mode. Currently, the flow supports only fine-tuned versions of the models already supported (as listed in :doc:`hybrid_oga` page). For example, fine-tuned versions of Llama2 or Llama3 can be used. However, different model families with architectures not supported by the hybrid flow cannot be used. - -Preparing a LLM for deployment on a Ryzen AI PC involves 2 steps: - -1. **Quantization**: The pretrained model is quantized to reduce memory footprint and better map to compute resources in the hardware accelerators -2. **Postprocessing**: During the postprocessing the model is exported to OGA followed by NPU-only or Hybrid execution mode specific postprocess to obtain the final deployable model. - -************ -Quantization -************ - -Prerequisites -============= -Linux machine with AMD (e.g., AMD Instinct MI Series) or Nvidia GPUs - -Setup -===== - -1. Create and activate Conda Environment  - -.. code-block:: - - conda create --name python=3.11 - conda activate - -2. If Using AMD GPUs, update PyTorch to use ROCm  - -.. code-block:: - - pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1 - python -c "import torch; print(torch.cuda.is_available())" # Must return `True` - -3. Download :download:`AMD Quark 0.8 ` and unzip the archive - -4. Install Quark:  - -.. code-block:: - - cd - pip install amd_quark-0.8+<>.whl - -5. Install other dependencies - -.. code-block:: - - pip install datasets - pip install transformers - pip install accelerate - pip install evaluate - - -Some models may require a specific version of ``transformers``. For example, ChatGLM3 requires version 4.44.0. - -Generate Quantized Model -======================== - -Use following command to run Quantization. In a GPU equipped Linux machine the quantization can take about 30-60 minutes. - -.. code-block:: - - cd examples/torch/language_modeling/llm_ptq/ - - python quantize_quark.py \ - --model_dir "meta-llama/Llama-2-7b-chat-hf" \ - --output_dir \ - --quant_scheme w_uint4_per_group_asym \ - --num_calib_data 128 \ - --quant_algo awq \ - --dataset pileval_for_awq_benchmark \ - --model_export hf_format \ - --data_type \ - --exclude_layers - - -- To generate OGA model for NPU only execution mode use ``--datatype float32`` -- To generate OGA model for Hybrid execution mode use ``--datatype float16`` -- For a BF16 pretrained model, you can use ``--data_type bfloat16``. - -The quantized model is generated in the folder. - -************** -Postprocessing -************** - -Copy the quantized model to the Windows PC with Ryzen AI installed, activate the Ryzen AI Conda environment, and execute ``model_generate`` command to generate the final model. - -Generate the final model for Hybrid execution mode: - -.. code-block:: - - conda activate ryzen-ai- - - model_generate --hybrid - - -Generate the final model for NPU execution mode: - -.. code-block:: - - conda activate ryzen-ai- - - model_generate --npu - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under `MIT License `_ . Refer to the `LICENSE File `_ for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_rai_linux.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_rai_linux.rst deleted file mode 100644 index 78b56545..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_rai_linux.rst +++ /dev/null @@ -1,70 +0,0 @@ -:orphan: - -########################## -Ryzen AI Software on Linux -########################## - -This guide provides instructions for using Ryzen AI 1.4 on Linux for model compilation and followed by running inference on Windows. - -************* -Prerequisites -************* -The following are the recommended system configuration for RyzenAI Linux installer - -.. list-table:: - :widths: 25 25 - :header-rows: 1 - - * - Dependencies - - Version Requirement - * - Ubuntu - - 22.04 - * - RAM - - 32GB or Higher - * - CPU cores - - >= 8 - * - Python - - 3.10 or Higher - - -************************* -Installation Instructions -************************* - -- Download the Ryzen AI Software Linux installer: :download:`ryzen_ai-1.4.0.tgz `. - -- Extract the .tgz using the following command: - -.. code-block:: - - tar -xvzf ryzen_ai-1.4.0.tgz - -- Run the installer with default settings. This will prompt to read and agree to the EULA: - -.. code-block:: - - cd ryzen_ai-1.4.0 - ./install_ryzen_ai_1_4.sh - -- After reading the EULA, re-run the installer with options to agree to the EULA and create a Python virtual environment: - -.. code-block:: - - ./install_ryzen_ai_1_4.sh -a yes -p -l - -- Activate the virtual environment to start using the Ryzen AI Software: - -.. code-block:: - - source /bin/activate - - -****************** -Usage Instructions -****************** - -The process for model compilation on Linux is similar to that on Windows. Refer to the instructions provided in the :doc:`modelrun` page for complete details. - -Once the model has been successfully compiled on your Linux machine, proceed to copy the entire working directory to a Windows machine that operates on an STX-based system. - -Prior to running the compiled model on the Windows machine, ensure that all required prerequisites are satisfied as listed in the :doc:`inst` page. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_ryzen_ai_libraries.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_ryzen_ai_libraries.rst deleted file mode 100644 index 85948651..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_ryzen_ai_libraries.rst +++ /dev/null @@ -1,87 +0,0 @@ -.. Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. - -##################### -Ryzen AI CVML library -##################### - -The Ryzen AI CVML libraries build on top of the Ryzen AI drivers and execution infrastructure to provide powerful AI capabilities to C++ applications without having to worry about training specific AI models and integrating them to the Ryzen AI framework. - -Each Ryzen AI CVML library feature offers a simple C++ application programming interface (API) that can be easily incorporated into existing applications. - -The Ryzen AI CVML library is distributed through the RyzenAI-SW Github repository: https://github.com/amd/RyzenAI-SW/tree/main/Ryzen-AI-CVML-Library - -************* -Prerequisites -************* -Ensure that the following software tools/packages are installed on the development system. - -1. Visual Studio 2022 Community edition or newer, ensure “Desktop Development with C++” is installed -2. Cmake (version >= 3.18) -3. OpenCV (version=4.8.1 or newer) - -************************************************** -Building sample applications -************************************************** -This section describes the steps to build Ryzen AI CVML library sample applications. - -Navigate to the folder containing Ryzen AI samples -================================================== -Download the Ryzen AI CVML sources, and go to the 'samples' sub-folder of the library. :: - - git clone https://github.com/amd/RyzenAI-SW.git -b main --depth-1 - chdir RyzenAI-SW\Ryzen-AI-CVML-Library\samples - -OpenCV libraries -================ -Ryzen AI CVML library samples make use of OpenCV, so set an environment variable to let the build scripts know where to find OpenCV. :: - - set OPENCV_INSTALL_ROOT= - -Build Instructions -================== -Create a build folder and use CMAKE to build the sample(s). :: - - mkdir build-samples - cmake -S %CD% -B %CD%\build-samples -DOPENCV_INSTALL_ROOT=%OPENCV_INSTALL_ROOT% - cmake --build %CD%\build-samples --config Release - -The compiled sample application(s) will be placed in the various build-samples\\Release folder(s) under the 'samples' folder. - -************************************************* -Running sample applications -************************************************* -This section describes how to execute Ryzen AI CVML library sample applications. - -Update the console and/or system PATH -===================================== -Ryzen AI CVML library applications need to be able to find the library files. One way to do this is to add the location of the libraries to the system or console PATH environment variable. - -In this example, the location of OpenCV's runtime libraries is also added to the PATH environment variable. :: - - set PATH=%PATH%;\windows - set PATH=%PATH%;%OPENCV_INSTALL_ROOT%\x64\vc16\bin - -Adjust the aforementioned commands to match the actual location of Ryzen AI and OpenCV libraries, respectively. - -Select an input source/image/video -================================== -Ryzen AI CVML library samples can accept a variety of image and video input formats, or even open the default camera on the system if "0" is specified as an input. - -In this example, a publicly available video file is used for the application's input. :: - - curl -o dancing.mp4 https://videos.pexels.com/video-files/4540332/4540332-hd_1920_1080_25fps.mp4 - -Execute the sample application -============================== -Finally, the previously built sample application can be executed with the selected input source. :: - - build-samples\cvml-sample-depth-estimation\Release\cvml-sample-depth-estimation.exe -i dancing.mp4 - -.. - ------------ - - ##################################### - License - ##################################### - - Ryzen AI is licensed under MIT License. Refer to the LICENSE file for the full license text and copyright notice. diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_xrt_smi.rst b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_xrt_smi.rst deleted file mode 100644 index 2266d905..00000000 --- a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_docs_repo_xrt_smi.rst +++ /dev/null @@ -1,327 +0,0 @@ -.. -.. Heading guidelines -.. -.. # with overline, for parts -.. * with overline, for chapters -.. =, for sections -.. -, for subsections -.. ^, for subsubsections -.. “, for paragraphs -.. - -.. include:: /icons.txt - -######################## -NPU Management Interface -######################## - -******************************* -Introduction -******************************* - -The ``xrt-smi`` utility is a command-line interface to monitor and manage the NPU integrated AMD CPUs. - -It is installed in ``C:\Windows\System32\AMD`` and it can be directly invoked from within the conda environment created by the Ryzen AI Software installer. - -The ``xrt-smi`` utility currently supports three primary commands: - -- ``examine`` - generates reports related to the state of the AI PC and the NPU. -- ``validate`` - executes sanity tests on the NPU. -- ``configure`` - manages the performance level of the NPU. - -By default, the output of the ``xrt-smi examine`` and ``xrt-smi validate`` commands goes to the terminal. It can also be written to file in JSON format as shown below: - -.. code-block:: shell - - xrt-smi examine -f JSON -o - -The utility also support the following options which can be used with any command: - -- ``--help`` - help to use xrt-smi or one of its sub commands -- ``--version`` - report the version of XRT, driver and firmware -- ``--verbose`` - turn on verbosity -- ``--batch`` - enable batch mode (disables escape characters) -- ``--force`` - when possible, force an operation. Eg - overwrite a file in examine or validate - -The ``xrt-smi`` utility requires `Microsoft Visual C++ Redistributable `_ (version 2015 to 2022) to be installed. - - -******************************* -Overview of Key Commands -******************************* - -.. list-table:: - :widths: 35 65 - :header-rows: 1 - - * - Command - - Description - * - examine - - system config, device name - * - examine --report platform - - performance mode, power - * - examine --report aie-partitions - - hw contexts - * - validate --run latency - - latency test - * - validate --run throughput - - throughput test - * - validate --run gemm - - INT8 GEMM test TOPS. This is a full array test and it should not be run while another workload is running. **NOTE**: This command is not supported on PHX and HPT NPUs. - * - configure --pmode - - set performance mode - - -|memo| **NOTE**: The ``examine --report aie-partition`` report runtime information. These commands should be used when a model is running on the NPU. You can run these commands in a loop to see live updates of the reported data. - - -******************************* -xrt-smi examine -******************************* - -System Information -================== - -Reports OS/system information of the AI PC and confirm the presence of the AMD NPU. - -.. code-block:: shell - - xrt-smi examine - -Sample Command Line Output:: - - - System Configuration - OS Name : Windows NT - Release : 26100 - Machine : x86_64 - CPU Cores : 20 - Memory : 32063 MB - Distribution : Microsoft Windows 11 Enterprise - Model : HP OmniBook Ultra Laptop 14-fd0xxx - BIOS Vendor : HP - BIOS Version : W81 Ver. 01.01.14 - - XRT - Version : 2.19.0 - Branch : HEAD - Hash : f62307ddadf65b54acbed420a9f0edc415fefafc - Hash Date : 2025-03-12 16:34:48 - NPU Driver Version : 32.0.203.257 - NPU Firmware Version : 1.0.7.97 - - Device(s) Present - |BDF |Name | - |----------------|-----------| - |[00c4:00:01.1] |NPU Strix | - - -Sample JSON Output:: - - - { - "schema_version": { - "schema": "JSON", - "creation_date": "Tue Mar 18 22:43:38 2025 GMT" - }, - "system": { - "host": { - "os": { - "sysname": "Windows NT", - "release": "26100", - "machine": "x86_64", - "distribution": "Microsoft Windows 11 Enterprise", - "model": "HP OmniBook Ultra Laptop 14-fd0xxx", - "hostname": "XCOUDAYD02", - "memory_bytes": "0x7d3f62000", - "cores": "20", - "bios_vendor": "HP", - "bios_version": "W81 Ver. 01.01.14" - }, - "xrt": { - "version": "2.19.0", - "branch": "HEAD", - "hash": "f62307ddadf65b54acbed420a9f0edc415fefafc", - "build_date": "2025-03-12 16:34:48", - "drivers": [ - { - "name": "NPU Driver", - "version": "32.0.203.257" - } - ] - }, - "devices": [ - { - "bdf": "00c4:00:01.1", - "device_class": "Ryzen", - "name": "NPU Strix", - "id": "0x0", - "firmware_version": "1.0.7.97", - "instance": "mgmt(inst=1)", - "is_ready": "true" - } - ] - } - } - } - - - - -Platform Information -==================== - -Reports more detailed information about the NPU, such as the performance mode and power consumption. - -.. code-block:: shell - - xrt-smi examine --report platform - -Sample Command Line Output:: - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - Platform - Name : NPU Strix - Performance Mode : Default - - Power : 1.277 Watts - -|memo| **NOTE**: Power reporting is not supported on PHX and HPT NPUs. Power reporting is only available on STX devices and onwards. - -NPU Partitions -============== - -Reports details about the NPU partition and column occupancy on the NPU. - -.. code-block:: shell - - xrt-smi examine --report aie-partitions - -Sample Command Line Output:: - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - AIE Partitions - Partition Index: 0 - Columns: [0, 1, 2, 3] - HW Contexts: - |PID |Ctx ID |Status |Instr BO |Sub |Compl |Migr |Err |Prio |GOPS |EGOPS |FPS |Latency | - |-------|--------|--------|----------|-----|-------|------|-----|--------|------|-------|-----|---------| - |20696 |0 |Active |64 KB |57 |56 |0 |0 |Normal |0 |0 |0 |0 | - - -NPU Context Bindings -==================== - -Reports details about the columns to NPU HW context binding. - -.. code-block:: shell - - xrt-smi examine --report aie-partitions --verbose - -Sample Command Line Output:: - - Verbose: Enabling Verbosity - Verbose: SubCommand: examine - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - AIE Partitions - Partition Index: 0 - Columns: [0, 1, 2, 3] - HW Contexts: - |PID |Ctx ID |Status |Instr BO |Sub |Compl |Migr |Err |Prio |GOPS |EGOPS |FPS |Latency | - |-------|--------|--------|----------|-----|-------|------|-----|--------|------|-------|-----|---------| - |20696 |0 |Active |64 KB |57 |56 |0 |0 |Normal |0 |0 |0 |0 | - - AIE Columns - |Column ||HW Context Slot | - |--------||-----------------| - |0 ||[1] | - |1 ||[1] | - |2 ||[1] | - |3 ||[1] | - - - - - -******************************* -xrt-smi validate -******************************* - -Executing a Sanity Check on the NPU -=================================== - -Runs a set of built-in NPU sanity tests which includes latency, throughput, and gemm. - -Note: All tests are run in performance mode. - -- ``latency`` - this test executes a no-op control code and measures the end-to-end latency on all columns -- ``throughput`` - this test loops back the input data from DDR through a MM2S Shim DMA channel back to DDR through a S2MM Shim DMA channel. The data movement within the AIE array follows the lowest latency path i.e. movement is restricted to just the Shim tile. -- ``gemm`` - An INT8 GeMM kernel is deployed on all 32 cores by the application. Each core is storing cycle count in the core data memory. The cycle count is read by the firmware. The TOPS application uses the "XBUTIL" tool to capture the IPUHCLK while the workload runs. Once all cores are executed, the cycle count from all cores will be synced back to the host. Finally, the application uses IPUHCLK, core cycle count, and GeMM kernel size to calculate the TOPS. This is a full array test and it should not be run while another workload is running. **NOTE**: This command is not supported on PHX and HPT NPUs. -- ``all`` - All applicable validate tests will be executed (default) - - -.. code-block:: shell - - xrt-smi validate --run all - -|memo| **NOTE**: Some sanity checks may fail if other applications (for example MEP, Microsoft Experience Package) are also using the NPU. - -Sample Command Line Output:: - - - Validate Device : [00c4:00:01.1] - Platform : NPU Strix - Power Mode : Performance - ------------------------------------------------------------------------------- - Test 1 [00c4:00:01.1] : gemm - Details : TOPS: 51.3 - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Test 2 [00c4:00:01.1] : latency - Details : Average latency: 84.2 us - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Test 3 [00c4:00:01.1] : throughput - Details : Average throughput: 59891.0 ops - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Validation completed. Please run the command '--verbose' option for more details - -******************************* -xrt-smi configure -******************************* - -Managing the Performance Level of the NPU -========================================= - -To set the performance level of the NPU, you can choose from the following modes: powersaver, balanced, performance, or default. Use the command below: - -.. code-block:: shell - - xrt-smi configure --pmode - -- ``default`` - adapts to the Windows Power Mode setting, which can be adjusted under System -> Power & battery -> Power mode. For finer control of the NPU settings, it is recommended to use the xrt-smi mode setting, which overrides the Windows Power mode and ensures optimal results. -- ``powersaver`` - configures the NPU to prioritize power saving, preserving laptop battery life. -- ``balanced`` - configures the NPU to provide a compromise between power saving and performance. -- ``performance`` - configures the NPU to prioritize performance, consuming more power. -- ``turbo`` - configures the NPU for maximum performance performance, requires AC power to be plugged in otherwise uses ``performance`` mode. - -Example: Setting the NPU to high-performance mode - -.. code-block:: shell - - xrt-smi configure --pmode performance - -To check the current performance level, use the following command: - -.. code-block:: shell - - xrt-smi examine --report platform - diff --git a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_model_compilation.pdf b/LLM-examples/RAG-OGA/Dataset/ryzen_ai_model_compilation.pdf deleted file mode 100644 index f2bf6a87..00000000 Binary files a/LLM-examples/RAG-OGA/Dataset/ryzen_ai_model_compilation.pdf and /dev/null differ diff --git a/LLM-examples/RAG-OGA/Dataset/xrt_smi - Copy.rst b/LLM-examples/RAG-OGA/Dataset/xrt_smi - Copy.rst deleted file mode 100644 index 2266d905..00000000 --- a/LLM-examples/RAG-OGA/Dataset/xrt_smi - Copy.rst +++ /dev/null @@ -1,327 +0,0 @@ -.. -.. Heading guidelines -.. -.. # with overline, for parts -.. * with overline, for chapters -.. =, for sections -.. -, for subsections -.. ^, for subsubsections -.. “, for paragraphs -.. - -.. include:: /icons.txt - -######################## -NPU Management Interface -######################## - -******************************* -Introduction -******************************* - -The ``xrt-smi`` utility is a command-line interface to monitor and manage the NPU integrated AMD CPUs. - -It is installed in ``C:\Windows\System32\AMD`` and it can be directly invoked from within the conda environment created by the Ryzen AI Software installer. - -The ``xrt-smi`` utility currently supports three primary commands: - -- ``examine`` - generates reports related to the state of the AI PC and the NPU. -- ``validate`` - executes sanity tests on the NPU. -- ``configure`` - manages the performance level of the NPU. - -By default, the output of the ``xrt-smi examine`` and ``xrt-smi validate`` commands goes to the terminal. It can also be written to file in JSON format as shown below: - -.. code-block:: shell - - xrt-smi examine -f JSON -o - -The utility also support the following options which can be used with any command: - -- ``--help`` - help to use xrt-smi or one of its sub commands -- ``--version`` - report the version of XRT, driver and firmware -- ``--verbose`` - turn on verbosity -- ``--batch`` - enable batch mode (disables escape characters) -- ``--force`` - when possible, force an operation. Eg - overwrite a file in examine or validate - -The ``xrt-smi`` utility requires `Microsoft Visual C++ Redistributable `_ (version 2015 to 2022) to be installed. - - -******************************* -Overview of Key Commands -******************************* - -.. list-table:: - :widths: 35 65 - :header-rows: 1 - - * - Command - - Description - * - examine - - system config, device name - * - examine --report platform - - performance mode, power - * - examine --report aie-partitions - - hw contexts - * - validate --run latency - - latency test - * - validate --run throughput - - throughput test - * - validate --run gemm - - INT8 GEMM test TOPS. This is a full array test and it should not be run while another workload is running. **NOTE**: This command is not supported on PHX and HPT NPUs. - * - configure --pmode - - set performance mode - - -|memo| **NOTE**: The ``examine --report aie-partition`` report runtime information. These commands should be used when a model is running on the NPU. You can run these commands in a loop to see live updates of the reported data. - - -******************************* -xrt-smi examine -******************************* - -System Information -================== - -Reports OS/system information of the AI PC and confirm the presence of the AMD NPU. - -.. code-block:: shell - - xrt-smi examine - -Sample Command Line Output:: - - - System Configuration - OS Name : Windows NT - Release : 26100 - Machine : x86_64 - CPU Cores : 20 - Memory : 32063 MB - Distribution : Microsoft Windows 11 Enterprise - Model : HP OmniBook Ultra Laptop 14-fd0xxx - BIOS Vendor : HP - BIOS Version : W81 Ver. 01.01.14 - - XRT - Version : 2.19.0 - Branch : HEAD - Hash : f62307ddadf65b54acbed420a9f0edc415fefafc - Hash Date : 2025-03-12 16:34:48 - NPU Driver Version : 32.0.203.257 - NPU Firmware Version : 1.0.7.97 - - Device(s) Present - |BDF |Name | - |----------------|-----------| - |[00c4:00:01.1] |NPU Strix | - - -Sample JSON Output:: - - - { - "schema_version": { - "schema": "JSON", - "creation_date": "Tue Mar 18 22:43:38 2025 GMT" - }, - "system": { - "host": { - "os": { - "sysname": "Windows NT", - "release": "26100", - "machine": "x86_64", - "distribution": "Microsoft Windows 11 Enterprise", - "model": "HP OmniBook Ultra Laptop 14-fd0xxx", - "hostname": "XCOUDAYD02", - "memory_bytes": "0x7d3f62000", - "cores": "20", - "bios_vendor": "HP", - "bios_version": "W81 Ver. 01.01.14" - }, - "xrt": { - "version": "2.19.0", - "branch": "HEAD", - "hash": "f62307ddadf65b54acbed420a9f0edc415fefafc", - "build_date": "2025-03-12 16:34:48", - "drivers": [ - { - "name": "NPU Driver", - "version": "32.0.203.257" - } - ] - }, - "devices": [ - { - "bdf": "00c4:00:01.1", - "device_class": "Ryzen", - "name": "NPU Strix", - "id": "0x0", - "firmware_version": "1.0.7.97", - "instance": "mgmt(inst=1)", - "is_ready": "true" - } - ] - } - } - } - - - - -Platform Information -==================== - -Reports more detailed information about the NPU, such as the performance mode and power consumption. - -.. code-block:: shell - - xrt-smi examine --report platform - -Sample Command Line Output:: - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - Platform - Name : NPU Strix - Performance Mode : Default - - Power : 1.277 Watts - -|memo| **NOTE**: Power reporting is not supported on PHX and HPT NPUs. Power reporting is only available on STX devices and onwards. - -NPU Partitions -============== - -Reports details about the NPU partition and column occupancy on the NPU. - -.. code-block:: shell - - xrt-smi examine --report aie-partitions - -Sample Command Line Output:: - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - AIE Partitions - Partition Index: 0 - Columns: [0, 1, 2, 3] - HW Contexts: - |PID |Ctx ID |Status |Instr BO |Sub |Compl |Migr |Err |Prio |GOPS |EGOPS |FPS |Latency | - |-------|--------|--------|----------|-----|-------|------|-----|--------|------|-------|-----|---------| - |20696 |0 |Active |64 KB |57 |56 |0 |0 |Normal |0 |0 |0 |0 | - - -NPU Context Bindings -==================== - -Reports details about the columns to NPU HW context binding. - -.. code-block:: shell - - xrt-smi examine --report aie-partitions --verbose - -Sample Command Line Output:: - - Verbose: Enabling Verbosity - Verbose: SubCommand: examine - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - AIE Partitions - Partition Index: 0 - Columns: [0, 1, 2, 3] - HW Contexts: - |PID |Ctx ID |Status |Instr BO |Sub |Compl |Migr |Err |Prio |GOPS |EGOPS |FPS |Latency | - |-------|--------|--------|----------|-----|-------|------|-----|--------|------|-------|-----|---------| - |20696 |0 |Active |64 KB |57 |56 |0 |0 |Normal |0 |0 |0 |0 | - - AIE Columns - |Column ||HW Context Slot | - |--------||-----------------| - |0 ||[1] | - |1 ||[1] | - |2 ||[1] | - |3 ||[1] | - - - - - -******************************* -xrt-smi validate -******************************* - -Executing a Sanity Check on the NPU -=================================== - -Runs a set of built-in NPU sanity tests which includes latency, throughput, and gemm. - -Note: All tests are run in performance mode. - -- ``latency`` - this test executes a no-op control code and measures the end-to-end latency on all columns -- ``throughput`` - this test loops back the input data from DDR through a MM2S Shim DMA channel back to DDR through a S2MM Shim DMA channel. The data movement within the AIE array follows the lowest latency path i.e. movement is restricted to just the Shim tile. -- ``gemm`` - An INT8 GeMM kernel is deployed on all 32 cores by the application. Each core is storing cycle count in the core data memory. The cycle count is read by the firmware. The TOPS application uses the "XBUTIL" tool to capture the IPUHCLK while the workload runs. Once all cores are executed, the cycle count from all cores will be synced back to the host. Finally, the application uses IPUHCLK, core cycle count, and GeMM kernel size to calculate the TOPS. This is a full array test and it should not be run while another workload is running. **NOTE**: This command is not supported on PHX and HPT NPUs. -- ``all`` - All applicable validate tests will be executed (default) - - -.. code-block:: shell - - xrt-smi validate --run all - -|memo| **NOTE**: Some sanity checks may fail if other applications (for example MEP, Microsoft Experience Package) are also using the NPU. - -Sample Command Line Output:: - - - Validate Device : [00c4:00:01.1] - Platform : NPU Strix - Power Mode : Performance - ------------------------------------------------------------------------------- - Test 1 [00c4:00:01.1] : gemm - Details : TOPS: 51.3 - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Test 2 [00c4:00:01.1] : latency - Details : Average latency: 84.2 us - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Test 3 [00c4:00:01.1] : throughput - Details : Average throughput: 59891.0 ops - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Validation completed. Please run the command '--verbose' option for more details - -******************************* -xrt-smi configure -******************************* - -Managing the Performance Level of the NPU -========================================= - -To set the performance level of the NPU, you can choose from the following modes: powersaver, balanced, performance, or default. Use the command below: - -.. code-block:: shell - - xrt-smi configure --pmode - -- ``default`` - adapts to the Windows Power Mode setting, which can be adjusted under System -> Power & battery -> Power mode. For finer control of the NPU settings, it is recommended to use the xrt-smi mode setting, which overrides the Windows Power mode and ensures optimal results. -- ``powersaver`` - configures the NPU to prioritize power saving, preserving laptop battery life. -- ``balanced`` - configures the NPU to provide a compromise between power saving and performance. -- ``performance`` - configures the NPU to prioritize performance, consuming more power. -- ``turbo`` - configures the NPU for maximum performance performance, requires AC power to be plugged in otherwise uses ``performance`` mode. - -Example: Setting the NPU to high-performance mode - -.. code-block:: shell - - xrt-smi configure --pmode performance - -To check the current performance level, use the following command: - -.. code-block:: shell - - xrt-smi examine --report platform - diff --git a/LLM-examples/RAG-OGA/Dataset/xrt_smi.rst b/LLM-examples/RAG-OGA/Dataset/xrt_smi.rst deleted file mode 100644 index 2266d905..00000000 --- a/LLM-examples/RAG-OGA/Dataset/xrt_smi.rst +++ /dev/null @@ -1,327 +0,0 @@ -.. -.. Heading guidelines -.. -.. # with overline, for parts -.. * with overline, for chapters -.. =, for sections -.. -, for subsections -.. ^, for subsubsections -.. “, for paragraphs -.. - -.. include:: /icons.txt - -######################## -NPU Management Interface -######################## - -******************************* -Introduction -******************************* - -The ``xrt-smi`` utility is a command-line interface to monitor and manage the NPU integrated AMD CPUs. - -It is installed in ``C:\Windows\System32\AMD`` and it can be directly invoked from within the conda environment created by the Ryzen AI Software installer. - -The ``xrt-smi`` utility currently supports three primary commands: - -- ``examine`` - generates reports related to the state of the AI PC and the NPU. -- ``validate`` - executes sanity tests on the NPU. -- ``configure`` - manages the performance level of the NPU. - -By default, the output of the ``xrt-smi examine`` and ``xrt-smi validate`` commands goes to the terminal. It can also be written to file in JSON format as shown below: - -.. code-block:: shell - - xrt-smi examine -f JSON -o - -The utility also support the following options which can be used with any command: - -- ``--help`` - help to use xrt-smi or one of its sub commands -- ``--version`` - report the version of XRT, driver and firmware -- ``--verbose`` - turn on verbosity -- ``--batch`` - enable batch mode (disables escape characters) -- ``--force`` - when possible, force an operation. Eg - overwrite a file in examine or validate - -The ``xrt-smi`` utility requires `Microsoft Visual C++ Redistributable `_ (version 2015 to 2022) to be installed. - - -******************************* -Overview of Key Commands -******************************* - -.. list-table:: - :widths: 35 65 - :header-rows: 1 - - * - Command - - Description - * - examine - - system config, device name - * - examine --report platform - - performance mode, power - * - examine --report aie-partitions - - hw contexts - * - validate --run latency - - latency test - * - validate --run throughput - - throughput test - * - validate --run gemm - - INT8 GEMM test TOPS. This is a full array test and it should not be run while another workload is running. **NOTE**: This command is not supported on PHX and HPT NPUs. - * - configure --pmode - - set performance mode - - -|memo| **NOTE**: The ``examine --report aie-partition`` report runtime information. These commands should be used when a model is running on the NPU. You can run these commands in a loop to see live updates of the reported data. - - -******************************* -xrt-smi examine -******************************* - -System Information -================== - -Reports OS/system information of the AI PC and confirm the presence of the AMD NPU. - -.. code-block:: shell - - xrt-smi examine - -Sample Command Line Output:: - - - System Configuration - OS Name : Windows NT - Release : 26100 - Machine : x86_64 - CPU Cores : 20 - Memory : 32063 MB - Distribution : Microsoft Windows 11 Enterprise - Model : HP OmniBook Ultra Laptop 14-fd0xxx - BIOS Vendor : HP - BIOS Version : W81 Ver. 01.01.14 - - XRT - Version : 2.19.0 - Branch : HEAD - Hash : f62307ddadf65b54acbed420a9f0edc415fefafc - Hash Date : 2025-03-12 16:34:48 - NPU Driver Version : 32.0.203.257 - NPU Firmware Version : 1.0.7.97 - - Device(s) Present - |BDF |Name | - |----------------|-----------| - |[00c4:00:01.1] |NPU Strix | - - -Sample JSON Output:: - - - { - "schema_version": { - "schema": "JSON", - "creation_date": "Tue Mar 18 22:43:38 2025 GMT" - }, - "system": { - "host": { - "os": { - "sysname": "Windows NT", - "release": "26100", - "machine": "x86_64", - "distribution": "Microsoft Windows 11 Enterprise", - "model": "HP OmniBook Ultra Laptop 14-fd0xxx", - "hostname": "XCOUDAYD02", - "memory_bytes": "0x7d3f62000", - "cores": "20", - "bios_vendor": "HP", - "bios_version": "W81 Ver. 01.01.14" - }, - "xrt": { - "version": "2.19.0", - "branch": "HEAD", - "hash": "f62307ddadf65b54acbed420a9f0edc415fefafc", - "build_date": "2025-03-12 16:34:48", - "drivers": [ - { - "name": "NPU Driver", - "version": "32.0.203.257" - } - ] - }, - "devices": [ - { - "bdf": "00c4:00:01.1", - "device_class": "Ryzen", - "name": "NPU Strix", - "id": "0x0", - "firmware_version": "1.0.7.97", - "instance": "mgmt(inst=1)", - "is_ready": "true" - } - ] - } - } - } - - - - -Platform Information -==================== - -Reports more detailed information about the NPU, such as the performance mode and power consumption. - -.. code-block:: shell - - xrt-smi examine --report platform - -Sample Command Line Output:: - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - Platform - Name : NPU Strix - Performance Mode : Default - - Power : 1.277 Watts - -|memo| **NOTE**: Power reporting is not supported on PHX and HPT NPUs. Power reporting is only available on STX devices and onwards. - -NPU Partitions -============== - -Reports details about the NPU partition and column occupancy on the NPU. - -.. code-block:: shell - - xrt-smi examine --report aie-partitions - -Sample Command Line Output:: - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - AIE Partitions - Partition Index: 0 - Columns: [0, 1, 2, 3] - HW Contexts: - |PID |Ctx ID |Status |Instr BO |Sub |Compl |Migr |Err |Prio |GOPS |EGOPS |FPS |Latency | - |-------|--------|--------|----------|-----|-------|------|-----|--------|------|-------|-----|---------| - |20696 |0 |Active |64 KB |57 |56 |0 |0 |Normal |0 |0 |0 |0 | - - -NPU Context Bindings -==================== - -Reports details about the columns to NPU HW context binding. - -.. code-block:: shell - - xrt-smi examine --report aie-partitions --verbose - -Sample Command Line Output:: - - Verbose: Enabling Verbosity - Verbose: SubCommand: examine - - -------------------------- - [00c5:00:01.1] : NPU Strix - -------------------------- - AIE Partitions - Partition Index: 0 - Columns: [0, 1, 2, 3] - HW Contexts: - |PID |Ctx ID |Status |Instr BO |Sub |Compl |Migr |Err |Prio |GOPS |EGOPS |FPS |Latency | - |-------|--------|--------|----------|-----|-------|------|-----|--------|------|-------|-----|---------| - |20696 |0 |Active |64 KB |57 |56 |0 |0 |Normal |0 |0 |0 |0 | - - AIE Columns - |Column ||HW Context Slot | - |--------||-----------------| - |0 ||[1] | - |1 ||[1] | - |2 ||[1] | - |3 ||[1] | - - - - - -******************************* -xrt-smi validate -******************************* - -Executing a Sanity Check on the NPU -=================================== - -Runs a set of built-in NPU sanity tests which includes latency, throughput, and gemm. - -Note: All tests are run in performance mode. - -- ``latency`` - this test executes a no-op control code and measures the end-to-end latency on all columns -- ``throughput`` - this test loops back the input data from DDR through a MM2S Shim DMA channel back to DDR through a S2MM Shim DMA channel. The data movement within the AIE array follows the lowest latency path i.e. movement is restricted to just the Shim tile. -- ``gemm`` - An INT8 GeMM kernel is deployed on all 32 cores by the application. Each core is storing cycle count in the core data memory. The cycle count is read by the firmware. The TOPS application uses the "XBUTIL" tool to capture the IPUHCLK while the workload runs. Once all cores are executed, the cycle count from all cores will be synced back to the host. Finally, the application uses IPUHCLK, core cycle count, and GeMM kernel size to calculate the TOPS. This is a full array test and it should not be run while another workload is running. **NOTE**: This command is not supported on PHX and HPT NPUs. -- ``all`` - All applicable validate tests will be executed (default) - - -.. code-block:: shell - - xrt-smi validate --run all - -|memo| **NOTE**: Some sanity checks may fail if other applications (for example MEP, Microsoft Experience Package) are also using the NPU. - -Sample Command Line Output:: - - - Validate Device : [00c4:00:01.1] - Platform : NPU Strix - Power Mode : Performance - ------------------------------------------------------------------------------- - Test 1 [00c4:00:01.1] : gemm - Details : TOPS: 51.3 - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Test 2 [00c4:00:01.1] : latency - Details : Average latency: 84.2 us - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Test 3 [00c4:00:01.1] : throughput - Details : Average throughput: 59891.0 ops - Test Status : [PASSED] - ------------------------------------------------------------------------------- - Validation completed. Please run the command '--verbose' option for more details - -******************************* -xrt-smi configure -******************************* - -Managing the Performance Level of the NPU -========================================= - -To set the performance level of the NPU, you can choose from the following modes: powersaver, balanced, performance, or default. Use the command below: - -.. code-block:: shell - - xrt-smi configure --pmode - -- ``default`` - adapts to the Windows Power Mode setting, which can be adjusted under System -> Power & battery -> Power mode. For finer control of the NPU settings, it is recommended to use the xrt-smi mode setting, which overrides the Windows Power mode and ensures optimal results. -- ``powersaver`` - configures the NPU to prioritize power saving, preserving laptop battery life. -- ``balanced`` - configures the NPU to provide a compromise between power saving and performance. -- ``performance`` - configures the NPU to prioritize performance, consuming more power. -- ``turbo`` - configures the NPU for maximum performance performance, requires AC power to be plugged in otherwise uses ``performance`` mode. - -Example: Setting the NPU to high-performance mode - -.. code-block:: shell - - xrt-smi configure --pmode performance - -To check the current performance level, use the following command: - -.. code-block:: shell - - xrt-smi examine --report platform - diff --git a/LLM-examples/RAG-OGA/README.md b/LLM-examples/RAG-OGA/README.md deleted file mode 100644 index d4421630..00000000 --- a/LLM-examples/RAG-OGA/README.md +++ /dev/null @@ -1,275 +0,0 @@ - - - - -

Ryzen™ AI RAG

-
- -## Introduction -Welcome to this repository, a showcase of an **ONNX Runtime GenAI(OGA)‑based RAG LLM sample application** running on a **Ryzen AI processor**. -This repo provides supplemental code to the AMD Blog [RAG with Hybrid LLM on AMD Ryzen AI Processor](https://www.amd.com/en/developer/resources/technical-articles/2025/rag-with-hybrid-llm-on-amd-ryzen-ai-processors.html). - -## What You’ll Find Here - -- **Retrieval-Augmented Generation (RAG) pipeline** powered by: - - A **hybrid LLM** enables disaggregated inference in which the compute-heavy prefill phase runs on the NPU, while the decode phase executes on the GPU. - - An **embedding model** compiled with **Vitis AI Execution Provider** -- Built using the widely adopted **LangChain** orchestration framework - -## Quick Setup - -Follow these simple steps to get started: - -1. Execute the setup steps outlined below to provision your environment. -2. After setup, this README will guide you through how to run the sample application. - ---- - -## 1. Installation and Setup - -### 1.1 Download the ONNX-Based Llama Model from Hugging Face - -```sh -git clone https://huggingface.co/amd/Llama-3.2-3B-Instruct-onnx-ryzenai-1.7-hybrid /path/to/your/directory/ -``` -replace `/path/to/your/directory/` with actual path where you want to download the model. - -### 1.2 Activate Ryzen AI Environment - -To ensure compatibility with ONNX-based Llama model, activate the ryzen-ai-1.7.0 Conda environment. - -Please follow instructions provided in the official AMD documentation to install Ryzen AI 1.7.0: - -👉 [Ryzen AI 1.7.0 Installation and Conda Environment Creation](https://ryzenai.docs.amd.com/en/latest/inst.html) - -After installation, activate the environment by running: - -```sh -conda activate ryzen-ai-1.7.0 -``` -### 1.3 Install Dependencies - -After activating the environment, install the required Python dependencies by running: -```sh -git clone https://github.com/amd/RyzenAI-SW.git -cd RyzenAI-SW/example/llm/RAG-OGA -pip install -r requirements.txt -``` - -## 2. Demo -To explore the use case, please refer below steps: - -### 2.1 Retrieval-Augmented Generation (RAG) Pipeline - -This example demonstrates a Retrieval-Augmented Generation (RAG) pipeline orchestrated using the LangChain framework. In this setup, documents are indexed into a Facebook AI Similarity Search(FAISS) vector database and retrieved at inference time to enrich user prompts with relevant contextual information. - -The following models are deployed using Ryzen AI 1.7.0: - -- **Embedding Model**: [BGE (BAAI General Embedding)](https://huggingface.co/BAAI/bge-large-en-v1.5), compiled using Vitis AI Execution Provider. - -- **Hybrid LLM**: [Llama3.2-3B-Instruct](https://huggingface.co/amd/Llama-3.2-3B-Instruct-onnx-ryzenai-1.7-hybrid), a quantized ONNX model, running using the OGA(OnnxRuntime GenAI) framework on Ryzen AI 1.7.0. - -By running both critical models on the NPU and/or GPU, this setup enables faster and more efficient inference, delivering a high-performance RAG system optimized for AI PCs. - -

- RAG Diagram -

-

RAG Workflow with LangChain and ONNX

- -### 2.2 🔑 Key Components of the LangChain-Based RAG Pipeline - -This RAG pipeline runs locally on an AMD Ryzen AI PC (with NPU & GPU). It combines LangChain, FAISS, embeddings, and an LLM to deliver fast, on‑device question‑answering. - -#### 🔹 Data Embedding -Documents are preprocessed and converted into dense vector representations using the BGE embedding model. - -#### 🔹 ONNX Inference on AMD NPU -The embedding model is executed using ONNX Runtime on the NPU (Ryzen AI). - -#### 🔹 Vector Store Creation -Document embeddings are stored in a FAISS-based vector database for efficient similarity search. - -#### 🔹 Context Retrieval -The vector database returns the most relevant document chunks based on the embedded query. - -#### 🔹 LLM Prompt Construction -LangChain constructs a prompt using the user’s query, prompt template, and the retrieved context. - -#### 🔹 LLM Response Generation -The retrieved data, along with the user’s query, is fed into a custom LLM, running on a hybrid flow (GPU and NPU), to generate a response from the retrieved data. - -### 2.3 Download, Export to ONNX, and Compile the Embedding Model. - -Run the following command to perform download, export and compile steps: - -```bash -python custom_embedding/export_bge_onnx.py -``` -Note : Please ensure that you have activated your ryzen‑ai‑1.7.0 environment and are in the RyzenAI‑SW/example/llm/RAG‑OGA directory. - -This script generates a static‑shape, non‑quantized FP32 ONNX model that serves as the baseline for further deployment. -The compiled BGE (BAAI General Embedding) ONNX model will be stored in the cache folder named ``modelcachekey_bge``. - - -### 2.4 Run the sample RAG application - -The system supports two modes of query handling. -- ``--direct_llm`` mode, where the user's query is directly sent to the LLM without any document retrieval. - -- If ``--direct_llm`` flag is not specified, the query triggers retrieval from a FAISS index, enriching the prompt with relevant context before passing it to the LLM. - -#### Required Setup: Update Paths in rag.py -- Dataset Path: - Replace the placeholder with the dataset provided in this directory used to build the FAISS index. -``` -dataset_path = r"./Dataset" -``` - -- LLM Model Path: - Replace the path to your LLM model that you downloaded in step 1.1 -``` -llm = custom_llm(model_path="path/to/llm") -``` - - -## 2.5 Sample Outputs - -**Case 1: Direct LLM mode (where no retrieval is being done)** -```sh -python rag.py --direct_llm -``` -Ask any question - -**For instance,** -``` -Enter your question: what is NPU and tell me the three important feature of NPU. -Direct_llm mode is on. No retrieval has been performed. -LLM_call invoked: 1 time(s) -Answer: -NPU stands for Net Protein Utilization, which is a measure of the proportion of dietary protein that is actually utilized by the body for growth and maintenance of tissues. The three important features of NPU are: (1) It is a measure of protein quality, indicating the extent to which a protein is effective in promoting growth and maintenance of body tissues. (2) It is influenced by factors such as the protein's amino acid composition, digestibility, and bioavailability. (3) NPU is a critical factor in determining the adequacy of protein intake, as it helps to identify the protein sources that are most effective in meeting the body's protein needs. - -``` - -**Case 2: Retrieval mode** - -In the **Retrieval mode**, documents most similar to the query are retrieved using FAISS, enabling efficient semantic search based on vector similarity. -You can observe how the model behaves differently between direct mode and retrieval mode: - -For instance, -```sh -python rag.py -``` -**Sample Output** - -***Question 1*** -``` -Enter your question: what is NPU and tell me the three important feature of NPU. -Retrieval mode is on. -Loading existing FAISS index from disk... -LLM_call invoked: 1 time(s) -Answer: -The NPU (Neural Processing Unit) is a specialized processor designed for neural network processing, specifically for deep learning and artificial intelligence applications. -The three important features of NPU are: -1. **High Performance**: NPU is designed to provide high-performance computing for deep learning workloads, making it an ideal choice for applications that require fast processing of large amounts of data. -2. **Energy Efficiency**: NPU is designed to be energy-efficient, which is critical for mobile devices and other applications where power consumption is a major concern. -3. **Low Latency**: NPU is designed to provide low latency, which is critical for real-time applications such as autonomous vehicles, robotics, and other IoT devices. -``` - -***Question 2*** - -``` -Enter your question: what are the main feature provided by the AMD analyzer, and how does it help in visualizing model execution on Ryzen AI ? -Retrieval mode is on. -Loading existing FAISS index from disk... -LLM_call invoked: 1 time(s) - -Answer: - ## Step 1: Identify the main features of the AMD AI Analyzer -The AMD AI Analyzer is a tool that supports analysis and visualization of model compilation and inference on Ryzen AI. The main features provided by the AMD AI Analyzer include: - -- Graph and operator partitions between the NPU and CPU -- Visualization of graph and operator partitions -- Profiling and visualization of model execution -- Generation of artifacts related to inference profile and graph partitions - -## Step 2: Explain how the AMD AI Analyzer helps in visualizing model execution on Ryzen AI -The AMD AI Analyzer helps in visualizing model execution on Ryzen AI by providing a comprehensive view of the model's performance and execution on the NPU. The tool allows users to: - -- Visualize graph and operator partitions to understand how the model is processed by the hardware -- Profile and visualize model execution to identify performance bottlenecks -- Generate artifacts related to inference profile and graph partitions to gain deeper insights into the model's behavior - -## Step 3: Highlight the benefits of using the AMD AI Analyzer -The AMD AI Analyzer provides several benefits, including: - -- Improved understanding of model execution on Ryzen AI -- Identification of performance bottlenecks and optimization opportunities -- Generation of artifacts for further analysis and optimization - -The final answer is: The AMD AI Analyzer provides a comprehensive set of features that help in visualizing model execution on Ryzen AI, including graph and operator partitions, profiling and visualization, and generation of artifacts related to inference profile and graph partitions. These features enable users to gain a deeper understanding of the model's performance and behavior on the NPU, identify performance bottlenecks, and optimize the model for better performance and power efficiency. - - -``` - -***Question 3*** -``` -Enter your question: In the context of Ryzen AI Software's hybrid inference model, how does the integration of automated -operator assignment, encrypted context caching, and hardware-specific xclbin configurations collectively contribute to -optimizing performance, ensuring security, and minimizing compilation overhead across varying model types such as transformers -and CNNs? -Retriveval mode is on. - Loading existing FAISS index from disk... - -Answer: -The integration of automated operator assignment, encrypted context caching, and hardware-specific xclbin configurations collectively contributes to optimizing performance, ensuring security, and minimizing compilation overhead across varying model types such as transformers and CNNs in the following ways: - -1. **Automated Operator Assignment**: This feature optimizes the placement of operators in the model, ensuring that the most efficient and effective assignments are made, which leads to improved performance and reduced computational overhead. - -2. **Encrypted Context Caching**: This feature ensures that sensitive model data is protected from unauthorized access, thereby enhancing security. By caching context information, the model can be efficiently transferred and executed across different environments, reducing the need for manual intervention and minimizing compilation overhead. - -3. **Hardware-Specific xclbin Configurations**: These configurations are tailored to the specific capabilities of the target platform, ensuring that INT8 models are optimized for the hardware, which leads to improved performance and reduced power consumption. This also enhances security by protecting sensitive model data from unauthorized access. - -Together, these features work synergistically to optimize performance, ensure security, and minimize compilation overhead across varying model types such as transformers and CNNs. This results in faster inference times, reduced power consumption, and improved overall efficiency, making the Ryzen AI Software's hybrid inference model a powerful tool for AI and machine learning applications -``` - - -## 2.6 Profiling - -The example code also captures key LLM performance metrics, such as Time to First Token (TTFT), Tokens Per Second (TPS), input prompt length, and total generated tokens, providing a clear view of system responsiveness and throughput. - -To enable profiling, run the sample with the ``--profiling`` flag: - -```sh -python rag.py --profiling -``` -**Note:** -Actual numbers may vary depending on the LLM used, model version, and specific system configuration. - -**Sample output:** -``` ---- Aggregated Profiling Summary --- - -Q1: - Avg Input Tokens : 1607 - Avg Output Tokens : 339 - Avg TTFT(Sec) : 1.640761 - Avg TPS : 31.16 - -Q2: - Avg Input Tokens : 1171 - Avg Output Tokens : 354 - Avg TTFT(Sec) : 1.16953 - Avg TPS : 32.74 - -Q3: - Avg Input Tokens : 1458 - Avg Output Tokens : 1 - Avg TTFT(Sec) : 1.393054 - Avg TPS : 0.0 -``` - - - - - - diff --git a/LLM-examples/RAG-OGA/custom_embedding/custom_embedding.py b/LLM-examples/RAG-OGA/custom_embedding/custom_embedding.py deleted file mode 100644 index 780f9f4c..00000000 --- a/LLM-examples/RAG-OGA/custom_embedding/custom_embedding.py +++ /dev/null @@ -1,61 +0,0 @@ -from typing import List, Dict, Any -from langchain_core.embeddings import Embeddings -import onnxruntime as ort -from transformers import AutoTokenizer -import numpy as np -import time - -class custom_embeddings(Embeddings): - def __init__(self, model_path: str, tokenizer_name: str): - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) - self.session = ort.InferenceSession( - model_path, - providers=["VitisAIExecutionProvider"], - provider_options=[{ - "config_file": "custom_embedding/vaiml_config.json", - "cache_dir": "./", - "cacheKey": "modelcachekey_bge" - }] - ) - print("NPU session created successfully.") - self.profile: Dict[str, Any] = {} - - def _embed(self, texts: List[str]) -> List[List[float]]: - embeddings = [] - total_input_tokens = 0 - start = time.time() - - for text in texts: - inputs = self.tokenizer( - text, - max_length=512, - padding="max_length", - truncation=True, - return_tensors="np", - return_token_type_ids=False - ) - input_ids = inputs["input_ids"] - total_input_tokens += np.count_nonzero(input_ids) - onnx_inputs = { - "input_ids": input_ids.astype(np.int64), - "attention_mask": inputs["attention_mask"].astype(np.int64) - } - - outputs = self.session.run(None, onnx_inputs) - embedding = outputs[1][0] # pooler_output - embeddings.append(embedding.tolist()) - - end = time.time() - - self.profile["embedding_time_sec"] = round(end - start, 4) - self.profile["input_token_length"] = total_input_tokens - return embeddings - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - return self._embed(texts) - - def embed_query(self, text: str) -> List[float]: - return self._embed([text])[0] - - def get_profile(self) -> Dict[str, Any]: - return self.profile diff --git a/LLM-examples/RAG-OGA/custom_embedding/export_bge_onnx.py b/LLM-examples/RAG-OGA/custom_embedding/export_bge_onnx.py deleted file mode 100644 index a56eb0f9..00000000 --- a/LLM-examples/RAG-OGA/custom_embedding/export_bge_onnx.py +++ /dev/null @@ -1,47 +0,0 @@ -from transformers import AutoTokenizer, AutoModel -import onnxruntime as ort -import torch - -# Load model and tokenizer -model_name = "BAAI/bge-large-en-v1.5" -tokenizer = AutoTokenizer.from_pretrained(model_name) -model = AutoModel.from_pretrained(model_name) - -# Prepare dummy inputs with fixed sequence length of 512 -dummy_text = "exporting to onnx " * 64 # Ensure enough tokens -dummy_inputs = tokenizer( - dummy_text, - return_tensors="pt", - max_length=512, - padding="max_length", - truncation=True -) - -# Export to ONNX with static shape (1, 512) -torch.onnx.export( - model, - (dummy_inputs["input_ids"], dummy_inputs["attention_mask"]), - "bge-large-en-v1.5.onnx", - input_names=["input_ids", "attention_mask"], - output_names=["last_hidden_state", "pooler_output"], - dynamic_axes=None, # Static input only - opset_version=17 -) - -print("ONNX model exported with static input shape (1, 512).") - -# Create VitisAI session -# Compile the bge ONNX model and store it to cache named "modelcachekey_bge" folder. -# Will use this cached compiled model for RAG implementation. -model_path = "bge-large-en-v1.5.onnx" -session = ort.InferenceSession( - model_path, - providers=["VitisAIExecutionProvider"], - provider_options=[{ - "config_file": "custom_embedding/vaiml_config.json", - "cache_dir": "./", - "cacheKey": "modelcachekey_bge" - }] -) - -print("NPU session created successfully.") diff --git a/LLM-examples/RAG-OGA/custom_embedding/vaiml_config.json b/LLM-examples/RAG-OGA/custom_embedding/vaiml_config.json deleted file mode 100644 index 1ae4ca30..00000000 --- a/LLM-examples/RAG-OGA/custom_embedding/vaiml_config.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "preferred_data_storage": "unvectorized" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} \ No newline at end of file diff --git a/LLM-examples/RAG-OGA/custom_llm/__init__.py b/LLM-examples/RAG-OGA/custom_llm/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/LLM-examples/RAG-OGA/custom_llm/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/LLM-examples/RAG-OGA/custom_llm/custom_llm.py b/LLM-examples/RAG-OGA/custom_llm/custom_llm.py deleted file mode 100644 index 51d51310..00000000 --- a/LLM-examples/RAG-OGA/custom_llm/custom_llm.py +++ /dev/null @@ -1,102 +0,0 @@ -import warnings -import time -from typing import Any, Dict, List, Optional -from pydantic import PrivateAttr -import onnxruntime_genai as og -from langchain_core.language_models.llms import LLM - -warnings.filterwarnings("ignore", category=UserWarning, module="langchain") -warnings.filterwarnings("ignore", category=DeprecationWarning) - - -class custom_llm(LLM): - _model: og.Model = PrivateAttr() - _tokenizer: og.Tokenizer = PrivateAttr() - _tokenizer_stream: og.Tokenizer = PrivateAttr() - profile: Dict[str, Any] = {} - - def __init__(self, model_path: str, **kwargs: Any): - super().__init__(**kwargs) - self._model = og.Model(model_path) - self._tokenizer = og.Tokenizer(self._model) - self._tokenizer_stream = self._tokenizer.create_stream() - self.profile = {} - - - def _prepare_generator(self, prompt: str) -> og.Generator: - input_tokens = self._tokenizer.encode(prompt) - self.profile["input_token_length"] = len(input_tokens) - - params = og.GeneratorParams(self._model) - search_options = { - "max_length": min(2048, len(input_tokens) + 1024), - "temperature": 0.5, - "top_k": 40, - "top_p": 0.9 - } - params.set_search_options(**search_options) - - generator = og.Generator(self._model, params) - return generator, input_tokens - - def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str: - if not hasattr(self, "_call_count"): - self._call_count = 0 - self._call_count += 1 - print(f"LLM_call invoked: {self._call_count} time(s)") - - generator, input_tokens = self._prepare_generator(prompt) - response_tokens = [] - - # === TTFT: append prompt + generate first token === - ttft_start = time.time() - generator.append_tokens(input_tokens) - generator.generate_next_token() - ttft_end = time.time() - - first_token = generator.get_next_tokens()[0] - response_tokens.append(first_token) - self.profile["ttft_sec"] = f"{(ttft_end - ttft_start):.6f}" - - # === Generation Time: for tokens after the first === - total_gen_time = 0.0 - token_count_after_first = 0 - - while not generator.is_done(): - token_start = time.time() - generator.generate_next_token() - token_end = time.time() - - token = generator.get_next_tokens()[0] - response_tokens.append(token) - - total_gen_time += (token_end - token_start) - token_count_after_first += 1 - - decoded_tokens = [self._tokenizer_stream.decode(t) for t in response_tokens] - response = "".join(decoded_tokens) - total_tokens = len(response_tokens) - - # Safe TPS calculation - if token_count_after_first > 0 and total_gen_time > 0: - tps = token_count_after_first / total_gen_time - else: - tps = 0.0 - - self.profile["generation_time_sec"] = round(total_gen_time, 6) - self.profile["tps"] = round(tps, 2) - self.profile["output_word_count"] = len(response.strip().split()) - self.profile["output_token_count"] = total_tokens - - return response.strip() - - def get_profile(self) -> Dict[str, Any]: - return self.profile - - @property - def _llm_type(self) -> str: - return "onnx-llama-unified" - - @property - def _identifying_params(self) -> Dict[str, Any]: - return {"model_name": "onnx-llama"} diff --git a/LLM-examples/RAG-OGA/gradio_app.py b/LLM-examples/RAG-OGA/gradio_app.py deleted file mode 100644 index 88b88444..00000000 --- a/LLM-examples/RAG-OGA/gradio_app.py +++ /dev/null @@ -1,108 +0,0 @@ -# gradio_app.py - -import os -import gradio as gr -from glob import glob -from langchain_core.prompts import PromptTemplate -from langchain_community.vectorstores import FAISS -from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader -from langchain_core.documents import Document -from langchain_text_splitters import TokenTextSplitter -from custom_llm.custom_llm import custom_llm -from custom_embedding.custom_embedding import custom_embeddings -# from custom_embedding.huggingface_bge_profile import HuggingFaceBGEWithProfile -from langchain_community.embeddings import HuggingFaceEmbeddings - -def gradio_launch_app(dataset_path, model_path, use_npu_model_path="bge-large-en-v1.5.onnx"): - faiss_index_path = "faiss_index" - llm = custom_llm(model_path=model_path) - embedding_model = None - retriever = None - - retrieval_template = PromptTemplate.from_template( - """<|system|> Using the information contained in the context, give a comprehensive answer to the question. Respond only to the question asked, response should be concise and relevant to the question. Provide the number of the source document when relevant. <|user|> Context: {context} --- Now here is the question you need to answer. Question: {question} <|assistant|>""" - ) - - direct_template = PromptTemplate.from_template( - """Answer the following question clearly and accurately in one paragraph.\nQuestion: {question}\nAnswer:""" - ) - - def setup_vectorstore(): - nonlocal embedding_model, retriever - # embedding_model = ( - # custom_embeddings(model_path=use_npu_model_path, tokenizer_name="BAAI/bge-large-en-v1.5") - # if use_npu else HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5",encode_kwargs={"normal_embeddings": True}) - # ) - embedding_model = custom_embeddings(model_path=use_npu_model_path, tokenizer_name="BAAI/bge-large-en-v1.5") - - if os.path.exists(os.path.join(faiss_index_path, "index.faiss")): - vectorstore = FAISS.load_local( - folder_path=faiss_index_path, - embeddings=embedding_model, - allow_dangerous_deserialization=True - ) - else: - documents = [] - for ext in [".pdf", ".txt", ".md", ".docx", ".rst"]: - for file_path in glob(os.path.join(dataset_path, f"*{ext}")): - try: - loader = { - ".pdf": PyPDFLoader, - ".txt": lambda p: TextLoader(p, encoding="utf-8"), - ".md": lambda p: TextLoader(p, encoding="utf-8"), - ".rst": lambda p: TextLoader(p, encoding="utf-8"), - ".docx": UnstructuredWordDocumentLoader, - }[ext](file_path) - pages = loader.load() - content = "\n".join([p.page_content for p in pages]) - documents.append(Document(page_content=content, metadata={"source": file_path})) - except Exception as e: - print(f"Failed to load {file_path}: {e}") - splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=50) - split_docs = splitter.split_documents(documents) - vectorstore = FAISS.from_documents(split_docs, embedding_model) - vectorstore.save_local(faiss_index_path) - retriever = vectorstore.as_retriever(search_type='similarity',search_kwargs={"k": 3}) - - # def answer_query(query, use_direct_llm, use_npu): - def answer_query(query, use_direct_llm): - nonlocal retriever, embedding_model - if not query.strip(): - return "Please enter a valid question." - if retriever is None or embedding_model is None: - setup_vectorstore() - if not use_direct_llm: - retrieved_docs = retriever.invoke(query) - question_tokens = llm._tokenizer.encode(query) - context_chunks = [] - total_tokens = len(question_tokens) + 100 - for doc in retrieved_docs: - doc_tokens = llm._tokenizer.encode(doc.page_content) - if total_tokens + len(doc_tokens) <= 2048: - context_chunks.append(doc.page_content) - total_tokens += len(doc_tokens) - if not context_chunks: - return "No relevant context found." - return (retrieval_template | llm).invoke({"context": "\n\n".join(context_chunks), "question": query}) - return (direct_template | llm).invoke({"question": query}) - - def checkbox_handler(use_direct_llm,use_npu): - if use_direct_llm and use_npu: - return False, True - return use_direct_llm,use_npu - with gr.Blocks(title="SmartAsk", css="""#submit-btn { background-color: #ff6a00; color: white; font-weight: bold; width: 150px; margin: 16px auto; }""") as demo: - gr.Markdown("

RAG: SmartAsk

Ask questions using your document dataset.

") - with gr.Row(): - with gr.Column(scale=1): - use_direct_llm = gr.Checkbox(label="Use Direct LLM (Skip Retrieval)",interactive=True) - # use_npu = gr.Checkbox(label="Use NPU for Embedding Model",interactive=True) - with gr.Column(scale=4): - query = gr.Textbox(label="Enter your question", lines=2) - submit_btn = gr.Button("Submit", elem_id="submit-btn") - answer = gr.Textbox(label="Answer", lines=12) - # use_direct_llm.change(fn=checkbox_handler,inputs=[use_direct_llm,use_npu],outputs=[use_direct_llm,use_npu]) - # use_npu.change(fn=checkbox_handler,inputs=[use_direct_llm,use_npu],outputs=[use_direct_llm,use_npu]) - # submit_btn.click(fn=answer_query, inputs=[query, use_direct_llm, use_npu], outputs=answer) - submit_btn.click(fn=answer_query, inputs=[query, use_direct_llm], outputs=answer) - demo.launch() - diff --git a/LLM-examples/RAG-OGA/image/RAG_Diagram.png b/LLM-examples/RAG-OGA/image/RAG_Diagram.png deleted file mode 100644 index 2d638765..00000000 Binary files a/LLM-examples/RAG-OGA/image/RAG_Diagram.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/RAG_Flow.png b/LLM-examples/RAG-OGA/image/RAG_Flow.png deleted file mode 100644 index 9f8f359f..00000000 Binary files a/LLM-examples/RAG-OGA/image/RAG_Flow.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/ai_analyzer.png b/LLM-examples/RAG-OGA/image/ai_analyzer.png deleted file mode 100644 index 467952b6..00000000 Binary files a/LLM-examples/RAG-OGA/image/ai_analyzer.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/eval_comparision.png b/LLM-examples/RAG-OGA/image/eval_comparision.png deleted file mode 100644 index 18dc8756..00000000 Binary files a/LLM-examples/RAG-OGA/image/eval_comparision.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e1.png b/LLM-examples/RAG-OGA/image/gradio_e1.png deleted file mode 100644 index 515e42c4..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e1.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e10.png b/LLM-examples/RAG-OGA/image/gradio_e10.png deleted file mode 100644 index 393cdf8b..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e10.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e2.png b/LLM-examples/RAG-OGA/image/gradio_e2.png deleted file mode 100644 index e0d7a27f..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e2.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e3.png b/LLM-examples/RAG-OGA/image/gradio_e3.png deleted file mode 100644 index 5752e538..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e3.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e4.png b/LLM-examples/RAG-OGA/image/gradio_e4.png deleted file mode 100644 index 733f7e7d..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e4.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e5.png b/LLM-examples/RAG-OGA/image/gradio_e5.png deleted file mode 100644 index 22475bf2..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e5.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e6.png b/LLM-examples/RAG-OGA/image/gradio_e6.png deleted file mode 100644 index f8248043..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e6.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e7.png b/LLM-examples/RAG-OGA/image/gradio_e7.png deleted file mode 100644 index d7808038..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e7.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e8.png b/LLM-examples/RAG-OGA/image/gradio_e8.png deleted file mode 100644 index 29056bbb..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e8.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/image/gradio_e9.png b/LLM-examples/RAG-OGA/image/gradio_e9.png deleted file mode 100644 index d1eb34b6..00000000 Binary files a/LLM-examples/RAG-OGA/image/gradio_e9.png and /dev/null differ diff --git a/LLM-examples/RAG-OGA/profiling.py b/LLM-examples/RAG-OGA/profiling.py deleted file mode 100644 index 09cf2215..00000000 --- a/LLM-examples/RAG-OGA/profiling.py +++ /dev/null @@ -1,87 +0,0 @@ -import time -from typing import Dict -from statistics import mean - -def run_profiling(llm, embed_model, retriever, chain, questions: Dict[str, str], runs: int = 5): - summary = {} - - # Extract PromptTemplate from chain - prompt_template = chain.first if hasattr(chain, "first") else None - - for qid, question in questions.items(): - llm_metrics = { - "ttft_sec": [], - "tps": [], - "input_token_length": [], - "output_token_count": [] - } - embed_metrics = { - "embedding_time_sec": [], - "input_token_length": [] - } - - for _ in range(runs): - # --- Embedding Pass --- - if hasattr(embed_model, "embed_query"): - start = time.time() - tokens = embed_model.embed_query(question) - end = time.time() - if hasattr(embed_model, "get_profile"): - profile = embed_model.get_profile() - embed_metrics["embedding_time_sec"].append( - float(profile.get("embedding_time_sec", end - start)) - ) - embed_metrics["input_token_length"].append( - int(profile.get("input_token_length", len(tokens))) - ) - - # --- Retrieve context chunks --- - retrieved_docs = retriever.invoke(question) - question_tokens = llm._tokenizer.encode(question) - total_tokens = len(question_tokens) + 100 # buffer for formatting - context_chunks = [] - for doc in retrieved_docs: - doc_tokens = llm._tokenizer.encode(doc.page_content) - if total_tokens + len(doc_tokens) <= 2048: - context_chunks.append(doc.page_content) - total_tokens += len(doc_tokens) - else: - break - context_str = "\n\n".join(context_chunks) - - - input_dict = {"context": context_str, "question": question} - if prompt_template: - formatted_prompt_text = prompt_template.format(**input_dict) - else: - formatted_prompt_text = question - - # --- Tokenize final prompt --- - prompt_tokens = llm._tokenizer.encode(formatted_prompt_text) - llm_metrics["input_token_length"].append(len(prompt_tokens)) - - # --- Run LLM --- - chain.invoke(input_dict) - - # --- Collect profile - llm_profile = llm.get_profile() - llm_metrics["ttft_sec"].append(float(llm_profile.get("ttft_sec", 0))) - llm_metrics["tps"].append(float(llm_profile.get("tps", 0))) - llm_metrics["output_token_count"].append(int(llm_profile.get("output_token_count", 0))) - - summary[qid] = { - "Avg Input Tokens": round(mean(llm_metrics["input_token_length"])), - "Avg Output Tokens": round(mean(llm_metrics["output_token_count"])), - "Avg TTFT(Sec)": round(mean(llm_metrics["ttft_sec"]), 6), - "Avg TPS": round(mean(llm_metrics["tps"]), 2), - - } - - return summary - -def print_profiling_summary(summary: Dict[str, Dict[str, float]]): - print("\n--- Aggregated Profiling Summary ---") - for qid, metrics in summary.items(): - print(f"\n{qid}:") - for k, v in metrics.items(): - print(f" {k:<30}: {v}") diff --git a/LLM-examples/RAG-OGA/rag.py b/LLM-examples/RAG-OGA/rag.py deleted file mode 100644 index 2f640dc9..00000000 --- a/LLM-examples/RAG-OGA/rag.py +++ /dev/null @@ -1,125 +0,0 @@ -import os -import warnings -import argparse -from glob import glob -from langchain_core.prompts import PromptTemplate -from langchain_community.vectorstores import FAISS -from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader -from langchain_core.documents import Document -from langchain_text_splitters import TokenTextSplitter -from custom_llm.custom_llm import custom_llm -from custom_embedding.custom_embedding import custom_embeddings -# from custom_embedding.huggingface_bge_profile import HuggingFaceBGEWithProfile -from profiling import run_profiling, print_profiling_summary -from gradio_app import gradio_launch_app -from langchain_community.embeddings import HuggingFaceEmbeddings - -warnings.filterwarnings("ignore", category=DeprecationWarning) - -# --- Argument Parser --- -parser = argparse.ArgumentParser(description="Run LLM with or without retrieval") -parser.add_argument("--direct_llm", action="store_true", help="Skip retrieval and send query directly to LLM") -# parser.add_argument("--embed_npu", action="store_true", help="Use ONNX embedding model on VitisAI NPU") -parser.add_argument("--profiling", action="store_true", help="Enable profiling: prints TTFT, TPS, tokens, and word counts") -parser.add_argument("--gradio", action="store_true", help="Launch Gradio web app") -args = parser.parse_args() - - -if args.gradio: - gradio_launch_app(dataset_path=r"./Dataset", model_path= r"./model") #update this path - exit() - -# --- Paths --- -dataset_path = r"./Dataset" #update this path -faiss_index_path = "./faiss_index" - -# --- Embedding Model --- -print("Using ONNX embedding model on VitisAI NPU...") -embedding_model = custom_embeddings(model_path="bge-large-en-v1.5.onnx", tokenizer_name="BAAI/bge-large-en-v1.5") - -# --- Load or Build FAISS Index --- -if os.path.exists(os.path.join(faiss_index_path, "index.faiss")) and os.path.exists(os.path.join(faiss_index_path, "index.pkl")): - vectorstore = FAISS.load_local(folder_path=faiss_index_path, embeddings=embedding_model, allow_dangerous_deserialization=True) -else: - print("Loading files and building index...") - supported_exts = [".pdf", ".txt", ".md", ".docx", ".rst"] - documents = [] - for ext in supported_exts: - for file_path in glob(os.path.join(dataset_path, f"*{ext}")): - try: - loader = { - ".pdf": PyPDFLoader, - ".txt": lambda p: TextLoader(p, encoding="utf-8"), - ".md": lambda p: TextLoader(p, encoding="utf-8"), - ".rst": lambda p: TextLoader(p, encoding="utf-8"), - ".docx": UnstructuredWordDocumentLoader - }[ext](file_path) - pages = loader.load() - full_text = "\n".join(page.page_content for page in pages) - documents.append(Document(page_content=full_text, metadata={"source": file_path})) - except Exception as e: - print(f"Failed to load {file_path}: {e}") - print(f"Loaded {len(documents)} documents.") - splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=50) - split_docs = splitter.split_documents(documents) - vectorstore = FAISS.from_documents(split_docs, embedding_model) - vectorstore.save_local(faiss_index_path) - print("FAISS index saved to disk.") - -retriever = vectorstore.as_retriever(search_type='similarity',search_kwargs={"k": 3}) -print("Number of vectors:", vectorstore.index.ntotal) - -llm = custom_llm(model_path=r"./model") # update this with LLM model path - -template = PromptTemplate.from_template("""<|system|> -Using the information contained in the context, -give a comprehensive answer to the question. -Respond only to the question asked, response should be concise and relevant to the question. -Provide the number of the source document when relevant. -<|user|> -Context: -{context} ---- -Now here is the question you need to answer. -Question: {question} -<|assistant|>""") - - -chain = template | llm - -if args.profiling: - questions = { - "Q1": "In the context of Ryzen AI Software's hybrid inference model, how does the integration of automated operator assignment, encrypted context caching, and hardware-specific xclbin configurations collectively contribute to optimizing performance, ensuring security, and minimizing compilation overhead across varying model types such as transformers and CNNs?", - "Q2": "In the context of model quantization using AMD Quark, how do different frameworks vary in capabilities, deployment support, and quantization strategies across platforms and formats?", - "Q3": "What specialized hardware component, inspired by the brain's neural architecture, is designed to perform parallel processing of AI workloads with low precision arithmetic and high energy efficiency?" - } - print("\nRunning profiling on predefined questions...\n") - results = run_profiling(llm, embedding_model, retriever, chain, questions, runs=1) - print_profiling_summary(results) - exit() - -query = input("\nEnter your question: ") - -if not args.direct_llm: - print("Retrieval mode is on.\nLoading existing FAISS index from disk...") - retrieved_docs = retriever.invoke(query) - question_tokens = llm._tokenizer.encode(query) - max_total_tokens = 2048 - buffer_tokens = 100 - context_chunks, total_tokens = [], len(question_tokens) + buffer_tokens - for doc in retrieved_docs: - doc_tokens = llm._tokenizer.encode(doc.page_content) - if total_tokens + len(doc_tokens) <= max_total_tokens: - context_chunks.append(doc.page_content) - total_tokens += len(doc_tokens) - else: - break - context_str = "\n\n".join(context_chunks) - response = chain.invoke({"context": context_str, "question": query}) -else: - direct_template = PromptTemplate.from_template("""Answer the following question clearly and accurately in one paragraph.\nQuestion: {question}\nAnswer:""") - chain = direct_template | llm - print("\nDirect_llm mode is on. No retrieval has been performed.") - response = chain.invoke({"question": query}) - -print("\nAnswer:\n", response) diff --git a/LLM-examples/RAG-OGA/requirements.txt b/LLM-examples/RAG-OGA/requirements.txt deleted file mode 100644 index a7a257f3..00000000 --- a/LLM-examples/RAG-OGA/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -langchain==0.3.27 -langchain-core==0.3.78 -langchain-community==0.3.30 -argparse==1.4.0 -pydantic==2.11.10 -unstructured==0.18.15 -python-docx==1.2.0 -faiss-cpu==1.12.0 -sentence-transformers==5.1.1 -gradio==5.35.0 -tiktoken==0.11.0 -websockets==10.4 -torch==2.8.0 diff --git a/LLM-examples/RAG-OGA/update_genai_config.py b/LLM-examples/RAG-OGA/update_genai_config.py deleted file mode 100644 index 0676f48f..00000000 --- a/LLM-examples/RAG-OGA/update_genai_config.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -import json - -# Get the absolute path to the custom ops DLL -custom_ops_path = os.path.abspath("./onnx_custom_ops.dll") - -print(custom_ops_path) - -# Path to the config file -config_path = "./model/genai_config.json" # Adjust if needed - -# Load the existing config -with open(config_path, "r") as f: - config = json.load(f) - -# Inject the full path to the DLL into the config -config["model"]["decoder"]["session_options"]["custom_ops_library"] = custom_ops_path - -# Save the updated config -with open(config_path, "w") as f: - json.dump(config, f, indent=2) - -print(f"Updated config with custom ops path: {custom_ops_path}") diff --git a/LLM-examples/README.md b/LLM-examples/README.md deleted file mode 100644 index ed616e9e..00000000 --- a/LLM-examples/README.md +++ /dev/null @@ -1,23 +0,0 @@ - - - - -

Ryzen™ AI LLM Examples

-
- -# Ryzen AI LLM OGA (Onnx Runtime Generate API) Flow - -Ryzen AI Software supports deploying LLMs on Ryzen AI PCs using the native ONNX Runtime Generate (OGA) C++ or Python API. - -Refer to [OnnxRuntime GenAI (OGA)](oga_api/README.md) or https://ryzenai.docs.amd.com/en/latest/hybrid_oga.html for more details. - -## LLM flow examples - -- [LLMs on RyzenAI with ONNX Runtime GenAI API](oga_api) -- [ONNX Runtime GenAI(OGA)-based RAG LLM](RAG-OGA) -- [Running Vision Language Model (VLM) on RyzenAI NPU](VLM) -- [Running GPT-OSS-20B with chat template](oga_inference) - -# Copyright - -Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. diff --git a/LLM-examples/VLM/run_vision.py b/LLM-examples/VLM/run_vision.py deleted file mode 100644 index 692150ff..00000000 --- a/LLM-examples/VLM/run_vision.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License - -import argparse -import os -import glob -import time -import json -from pathlib import Path -from PIL import Image -import numpy as np -import tempfile - -import onnxruntime_genai as og -# og.set_log_options(enabled=True, model_input_values=True, model_output_values=True) - -def preprocess_images(image_paths, size=(896, 896)): - resized_paths = [] - for path in image_paths: - img = Image.open(path).convert("RGB") - img = img.resize(size) - - # Save to a temporary file (OGA only accepts file paths) - tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") - img.save(tmp.name, format="JPEG") - resized_paths.append(tmp.name) - - # Load with OGA - return og.Images.open(*resized_paths) - -def _find_dir_contains_sub_dir(current_dir: Path, target_dir_name): - curr_path = Path(current_dir).absolute() - target_dir = glob.glob(target_dir_name, root_dir=curr_path) - if target_dir: - return Path(curr_path / target_dir[0]).absolute() - else: - if curr_path.parent == curr_path: - # Root dir - return None - return _find_dir_contains_sub_dir(curr_path / '..', target_dir_name) - - -def _complete(text, state): - return (glob.glob(text + "*") + [None])[state] - - -def run(args: argparse.Namespace): - print("Loading model...") - config = og.Config(args.model_path) - if args.execution_provider != "follow_config": - config.clear_providers() - if args.execution_provider != "cpu": - print(f"Setting model to {args.execution_provider}...") - config.append_provider(args.execution_provider) - model = og.Model(config) - print("Model loaded") - - tokenizer = og.Tokenizer(model) - processor = model.create_multimodal_processor() - stream = processor.create_stream() - - interactive = not args.non_interactive - - while True: - if interactive: - try: - import readline - readline.set_completer_delims(" \t\n;") - readline.parse_and_bind("tab: complete") - readline.set_completer(_complete) - except ImportError: - # Not available on some platforms. Ignore it. - pass - image_paths = [ - image_path.strip() - for image_path in input( - "Image Path (comma separated; leave empty if no image): " - ).split(",") - ] - else: - if args.image_paths: - image_paths = args.image_paths - else: - image_paths = [str(_find_dir_contains_sub_dir(Path(__file__).parent, "test") / "test_models" / "images" / "australia.jpg")] - - image_paths = [image_path for image_path in image_paths if image_path] - - images = None - if len(image_paths) == 0: - print("No image provided") - else: - for i, image_path in enumerate(image_paths): - if not os.path.exists(image_path): - raise FileNotFoundError(f"Image file not found: {image_path}") - print(f"Using image: {image_path}") - - images = preprocess_images(image_paths, size=(896, 896)) - - if interactive: - text = input("Prompt: ") - else: - if args.prompt: - text = args.prompt - else: - text = "What is shown in this image?" - - # Construct the "messages" argument passed to apply_chat_template - tok_cfg_path = Path(args.model_path) / "tokenizer_config.json" - with open(tok_cfg_path, "r", encoding="utf-8") as f: - tok_cfg = json.load(f) - - template_str = tok_cfg.get("chat_template") - if not template_str: - raise RuntimeError("No chat_template found in tokenizer_config.json") - - # Optional: ensure bos_token is defined - bos = tok_cfg.get("bos_token") - if not bos: - # If your tokenizer doesn’t define a bos_token, strip it from the template. - # Alternatively, set bos to your known BOS token (e.g., ""). - template_str = template_str.replace("{{ bos_token }}", "") - messages = [] - if model.type == "phi3v": - # Combine all image tags and text into one user message - content = "".join([f"<|image_{i+1}|>\n" for i in range(len(image_paths))]) + text - messages.append({"role": "user", "content": content}) - else: - # Gemma3-style multimodal: structured content - content_list = [{"type": "image"} for _ in image_paths] - content_list.append({"type": "text", "text": text}) - messages.append({"role": "user", "content": content_list}) - - # Apply the chat template using the tokenizer - message_json = json.dumps(messages) - prompt = tokenizer.apply_chat_template(message_json, template_str=template_str, add_generation_prompt=True) - # num_images = len(image_paths) - # image_block = "".join(["\n" for _ in range(num_images)]) - # user_text = "describe the image" - # prompt = ( "user\n" - # f"{image_block}{user_text}\n" - # "\n" - # "model\n" - # ) - - print("Processing images and prompt...") - inputs = processor(prompt, images=images) - - for name in inputs.keys(): - tensor = inputs[name] - - - print("Generating response...") - print() - params = og.GeneratorParams(model) - params.set_search_options(max_length=7024) - params.set_search_options(past_present_share_buffer=True) - - generator = og.Generator(model, params) - generator.set_inputs(inputs) - start_time = time.time() - - while not generator.is_done(): - generator.generate_next_token() - - new_token = generator.get_next_tokens()[0] - print(stream.decode(new_token), end="", flush=True) - - print() - total_run_time = time.time() - start_time - print(f"Total Time : {total_run_time:.2f}") - - for _ in range(3): - print() - - # Delete the generator to free the captured graph before creating another one - del generator - - if not interactive: - break - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-m", "--model_path", type=str, required=True, help="Path to the folder containing the model" - ) - parser.add_argument( - "-e", "--execution_provider", type=str, required=False, default='follow_config', choices=["cpu", "cuda", "dml", "follow_config"], help="Execution provider to run the ONNX Runtime session with. Defaults to follow_config that uses the execution provider listed in the genai_config.json instead." - ) - parser.add_argument( - "--image_paths", nargs='*', type=str, required=False, help="Path to the images, mainly for CI usage" - ) - parser.add_argument( - '-pr', '--prompt', required=False, help='Input prompts to generate tokens from, mainly for CI usage' - ) - parser.add_argument( - '--non-interactive', action=argparse.BooleanOptionalAction, required=False, help='Non-interactive mode, mainly for CI usage' - ) - args = parser.parse_args() - run(args) \ No newline at end of file diff --git a/LLM-examples/llm-sft-deploy/inference.py b/LLM-examples/llm-sft-deploy/inference.py deleted file mode 100644 index 25269b4b..00000000 --- a/LLM-examples/llm-sft-deploy/inference.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright(C) [2025] Advanced Micro Devices, Inc. All rights reserved. - -import torch -import os -from transformers import AutoModelForCausalLM, AutoTokenizer -import datasets -import json -import evaluate -from tqdm import tqdm -import random -import numpy as np -import onnxruntime_genai as og -import argparse -from quark_utils import * - -random.seed(0) -np.random.seed(1234) -torch.manual_seed(1234) - -def volve_alpaca_eval(inference_file_name): - bertscore = evaluate.load("bertscore") - eval_set = datasets.load_dataset("bengsoon/volve_alpaca")["test"].select(range(50)) - with open(inference_file_name, "r") as f: - generated_outputs = json.load(f) - - predictions= [] - references = [] - for dataset_ex, generated in zip(eval_set, generated_outputs): - response = generated["output"] - predictions.append(response) - references.append(dataset_ex["output"]) - - bertscore = bertscore.compute(predictions=predictions, references=references, lang="en") - bertscore_f1_avg = sum(bertscore["f1"])/len(bertscore["f1"]) - - print(f"bert f1 avg: {bertscore_f1_avg}") - -def volve_alpaca_generate(model, tokenizer, inference_file_name): - eval_set = datasets.load_dataset("bengsoon/volve_alpaca")["test"].select(range(50)) - def generate(model, tokenizer, prompt): - inputs = tokenizer(prompt, return_tensors="pt").to("cuda") - output_toks = model.generate(**inputs,max_new_tokens=512) - return tokenizer.decode(output_toks[0], skip_special_tokens=True).replace(prompt, "").strip() - - def format_prompt(example): - instruction = example["instruction"] - input_ = example["input"] - TEMPLATE = """<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. - - ### Instruction: - {instruction} - - - ### Input: - {input} - - - ### Response: - - """ - formatted_input = TEMPLATE.format(instruction=instruction, input=input_) - return formatted_input - - eval_set = eval_set.map(lambda ex: {"formatted_prompt": format_prompt(ex)}) - outputs = [] - for example in tqdm(eval_set): - response = generate(model, tokenizer, example["formatted_prompt"]) - response = response.split("Response:")[1].strip() - outputs.append( - { - "prompt": example["formatted_prompt"], - "reference_output": example["output"], - "output": response - } - ) - with open(inference_file_name, "w") as f: - json.dump(outputs, f, indent=2) - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument('--fp', action="store_true", help="eval full precision, unquantized model") - parser.add_argument('--quark_safetensors', action="store_true", help="eval quark quantized safetensors") - parser.add_argument('--model_dir', type=str, help="model path dir of finetuned model") - parser.add_argument ('--inference_filename', type=str, help="predictions saving file name") - parser.add_argument('--quant_model_dir', type=str, help="model path dir of quantized model") - args = parser.parse_args() - - ### evaluating quark quantized model (safetensors) #### - if args.quark_safetensors: - base_model_name= "meta-llama/Llama-3.2-1B" - tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) - tokenizer.pad_token = tokenizer.eos_token - base_model = AutoModelForCausalLM.from_pretrained(base_model_name, trust_remote_code=True).to("cuda") - - quark_model = import_hf_model(base_model, model_info_dir=args.quant_model_dir) - volve_alpaca_generate(quark_model, tokenizer, args.inference_filename) - - ### evaluating fp model (not quark quantized) ### - elif args.fp: - model = AutoModelForCausalLM.from_pretrained(args.model_dir, trust_remote_code=True).to("cuda") - tokenizer = AutoTokenizer.from_pretrained(args.model_dir, trust_remote_code=True) - tokenizer.pad_token = tokenizer.eos_token - volve_alpaca_generate(model, tokenizer, args.inference_filename) - - volve_alpaca_eval(args.inference_filename) diff --git a/LLM-examples/llm-sft-deploy/inference_oga.py b/LLM-examples/llm-sft-deploy/inference_oga.py deleted file mode 100644 index d6223730..00000000 --- a/LLM-examples/llm-sft-deploy/inference_oga.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright(C) [2025] Advanced Micro Devices, Inc. All rights reserved. - -import torch -import os -from transformers import AutoModelForCausalLM, AutoTokenizer -from datasets import load_dataset -import datasets -import json -import evaluate -from tqdm import tqdm -import random -import numpy as np -import onnxruntime_genai as og -import argparse - -random.seed(0) -np.random.seed(1234) -torch.manual_seed(1234) - -def volve_alpaca_eval(inference_file_name): - bertscore = evaluate.load("bertscore") - eval_set = datasets.load_dataset("bengsoon/volve_alpaca")["test"].select(range(50)) - with open(inference_file_name, "r") as f: - generated_outputs = json.load(f) - - predictions= [] - references = [] - for dataset_ex, generated in zip(eval_set, generated_outputs): - response = generated["output"] - predictions.append(response) - references.append(dataset_ex["output"]) - - bertscore = bertscore.compute(predictions=predictions, references=references, lang="en") - bertscore_f1_avg = sum(bertscore["f1"])/len(bertscore["f1"]) - - print(f"bert f1 avg: {bertscore_f1_avg}") - -def volve_alpaca_generate_oga(model, tokenizer, inference_file_name, max_new_tokens=512): - eval_set = datasets.load_dataset("bengsoon/volve_alpaca")["test"].select(range(50)) - - def generate(model, tokenizer, prompt): - inputs = tokenizer.encode(prompt) - search_options = {} - params = og.GeneratorParams(model) - search_options["max_length"] = len(inputs) + max_new_tokens - params.set_search_options(**search_options) - generator = og.Generator(model, params) - generator.append_tokens(inputs) - tokens = [] - response = '' - while not generator.is_done(): - generator.generate_next_token() - new_token = generator.get_next_tokens()[0] - tokens.append(new_token) - response += tokenizer.decode(new_token) - del generator - return response - - def format_prompt(example): - instruction = example["instruction"] - input_ = example["input"] - TEMPLATE = """<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. - - ### Instruction: - {instruction} - - - ### Input: - {input} - - - ### Response: - - """ - formatted_input = TEMPLATE.format(instruction=instruction, input=input_) - return formatted_input - - eval_set = eval_set.map(lambda ex: {"formatted_prompt": format_prompt(ex)}) - outputs = [] - for example in tqdm(eval_set): - response = generate(model, tokenizer, example["formatted_prompt"]) - outputs.append( - { - "prompt": example["formatted_prompt"], - "reference_output": example["output"], - "output": response - } - ) - with open(inference_file_name, "w") as f: - json.dump(outputs, f, indent=2) - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument('--model_dir', type=str, required=True, help="model path dir") - parser.add_argument ('--inference_filename', type=str, required=True, help="predictions saving file name") - args = parser.parse_args() - - ### evaluating oga model - onnx_model_path = args.model_dir - oga_model = og.Model(onnx_model_path) - tokenizer = og.Tokenizer(oga_model) - volve_alpaca_generate_oga(oga_model, tokenizer, args.inference_filename) - volve_alpaca_eval(args.inference_filename) diff --git a/LLM-examples/llm-sft-deploy/quark_utils.py b/LLM-examples/llm-sft-deploy/quark_utils.py deleted file mode 100644 index 1fe80f00..00000000 --- a/LLM-examples/llm-sft-deploy/quark_utils.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright(C) [2025] Advanced Micro Devices, Inc. All rights reserved. - -from pathlib import Path -from typing import List, Optional, Dict, Any -import torch -import json -from accelerate.utils.modeling import find_tied_parameters -from torch import nn -from transformers import AutoTokenizer -from quark.torch.quantization.config.config import Config -from quark.torch.export.config.config import ExporterConfig -from quark.torch import ModelExporter -from quark.torch.export.api import ModelImporter -from safetensors.torch import load_file as safe_load_file -import sys -import os - -PT_WEIGHTS_NAME = "model_state_dict.pth" -SAFE_WEIGHTS_NAME = "model.safetensors" -SAFE_WEIGHTS_INDEX_NAME = "model.safetensors.index.json" -def import_hf_model(model: nn.Module, model_info_dir: str): - ''' - Load the model file, perform preprocessing and post-processing, load weights into the model. - ''' - print("Start importing hf_format quantized model ...") - importer = ModelImporter(model_info_dir=model_info_dir) - model_config = importer.get_model_config() - model_state_dict = _load_hf_state_dict(model_info_dir) - model = importer.import_model(model, model_config, model_state_dict) - _untie_parameters(model, model_state_dict) - model.load_state_dict(model_state_dict) - print("hf_format quantized model imported successfully.") - return model - -def _load_hf_state_dict(model_info_dir: str) -> Dict[str, torch.Tensor]: - ''' - Load the state dict from safetensor file by load_file of safetensors.torch. - ''' - model_state_dict: Dict[str, torch.Tensor] = {} - safetensors_dir = Path(model_info_dir) - safetensors_path = safetensors_dir / SAFE_WEIGHTS_NAME - safetensors_index_path = safetensors_dir / SAFE_WEIGHTS_INDEX_NAME - if safetensors_path.exists(): - model_state_dict = safe_load_file(str(safetensors_path)) - # is_shard - elif safetensors_index_path.exists(): - with open(str(safetensors_index_path), "r") as file: - safetensors_indices = json.load(file) - safetensors_files = [value for _, value in safetensors_indices["weight_map"].items()] - safetensors_files = list(set(safetensors_files)) - for filename in safetensors_files: - filepath = safetensors_dir / filename - model_state_dict.update(safe_load_file(str(filepath))) - else: - raise FileNotFoundError(f"Neither {str(safetensors_path)} nor {str(safetensors_index_path)} were found. Please check that the model path specified {str(safetensors_dir)} is correct.") - return model_state_dict - -def _untie_parameters(model: nn.Module, model_state_dict: Dict[str, Any]) -> None: - ''' - Some parameters share weights, such as embedding and lm_head, and when exporting with `PretrainedModel.save_pretrained` - only one of them will be exported, so need to copy the parameters. - ''' - # TODO: Only embedding for now, need to solve other cases, such as encoder-decoder tied - tied_param_groups = find_tied_parameters(model) - if len(tied_param_groups) > 0: - if len(tied_param_groups) > 1 or "lm_head.weight" not in tied_param_groups[0]: - raise ValueError( - f"Your have tied_param_groups: {tied_param_groups}, temporarily does not support the case where tied_param is not 'lm_head and embedding'" - ) - missing_key: List[str] = [] - tied_param_value: Optional[torch.Tensor] = None - for tied_param_name in tied_param_groups[0]: - if tied_param_name in model_state_dict.keys(): - tied_param_value = model_state_dict[tied_param_name] - else: - missing_key.append(tied_param_name) - if tied_param_value is not None: - for tied_param_key in missing_key: - model_state_dict[tied_param_key] = tied_param_value - else: - raise ValueError("Cannot assign a value to tied_params because tied_param_value is None") diff --git a/LLM-examples/llm-sft-deploy/requirements.txt b/LLM-examples/llm-sft-deploy/requirements.txt deleted file mode 100644 index 97a7da9e..00000000 --- a/LLM-examples/llm-sft-deploy/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -transformers==4.48.0 -datasets==3.2.0 -trl==0.13.0 -evaluate -peft==0.14.0 -bert_score -wandb -nltk -onnxruntime_genai==0.6.0 -numpy==1.26.0 \ No newline at end of file diff --git a/LLM-examples/llm-sft-deploy/train.py b/LLM-examples/llm-sft-deploy/train.py deleted file mode 100644 index 1d7f67af..00000000 --- a/LLM-examples/llm-sft-deploy/train.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright(C) [2025] Advanced Micro Devices, Inc. All rights reserved. - -import torch -from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, TrainerCallback -import datasets as ds -from datasets import load_dataset, Dataset -from trl import SFTConfig, SFTTrainer -import argparse -from transformers.utils import logging -import os -import wandb -from huggingface_hub import login -from peft import get_peft_model, LoraConfig -from tqdm import tqdm -from accelerate import PartialState -device_string = PartialState().process_index -import random -import numpy as np -from peft import PeftModel - -hf_token = os.getenv("HUGGINGFACE_API_TOKEN") -if hf_token: - login(hf_token) -else: - raise ValueError("HF API KEY is not set..") - -class LoggingCallback(TrainerCallback): - def on_step_end(self, args, state, control, **kwargs): - lr = kwargs["optimizer"].param_groups[0]["lr"] - if state.global_step % args.logging_steps == 0: - print("----------------------------------------------------------------") - print(f"Step {state.global_step} / {state.max_steps} completed, lr = {lr}") - -def format_prompt(dataset): - TEMPLATE = """<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. - - ### Instruction: - {instruction} - - - ### Input: - {input} - - - ### Response: - {output} - <|end_of_text|> - """ - - formatted_dataset = [] - for example in tqdm(dataset): - formatted_text="" - instruction = example["instruction"] - input_ = example["input"] - output = example["output"] - formatted_input = TEMPLATE.format(instruction=instruction, input=input_, output=output) - formatted_dataset.append({"text": formatted_input}) - - formatted_dataset = Dataset.from_list(formatted_dataset) - return formatted_dataset - -def finetune_model(args, base_model, tokenizer, training_dataset): - - if args.hf_dir == "local": - args.hf_dir = "./ft_model/" - push_to_hub = False - hub_private_repo = False - report_to = None - else: - push_to_hub = True - hub_private_repo=True - report_to = "wandb" - - training_arguments = SFTConfig( - output_dir = "./results", - num_train_epochs = args.eps, - per_device_train_batch_size = args.bs_per_device, - gradient_accumulation_steps = args.grad_acc, - save_steps = -1, - save_total_limit=1, - weight_decay=0.1, - logging_steps = 50, - learning_rate = args.lr, - max_grad_norm = args.max_grad_norm, - max_seq_length=args.max_seq_length, - warmup_ratio = 0.03, - lr_scheduler_type = args.scheduler, - report_to = report_to, - push_to_hub=push_to_hub, - hub_model_id=args.hf_dir, - hub_private_repo=hub_private_repo, - hub_strategy="checkpoint" - ) - - if args.lora: - if args.lora_all: - peft_config = LoraConfig( - lora_alpha = 32, - r = 16, - bias = "none", - task_type = "CAUSAL_LM", - target_modules= ["v_proj", "k_proj", "q_proj", "o_proj", "gate_proj", "down_proj", "up_proj"], - lora_dropout=0.05 - ) - elif args.lora_qv: - peft_config = LoraConfig( - lora_alpha = 32, - r = 16, - bias = "none", - task_type = "CAUSAL_LM", - target_modules= ["v_proj","q_proj"], - lora_dropout=0.1 - ) - elif args.lora_kv: - peft_config = LoraConfig( - lora_alpha = 32, - r = 16, - bias = "none", - task_type = "CAUSAL_LM", - target_modules = ["k_proj", "v_proj"], - lora_dropout=0.1 - ) - elif args.lora_kvq: - peft_config = LoraConfig( - lora_alpha = 32, - r = 16, - bias = "none", - task_type = "CAUSAL_LM", - target_modules = ["k_proj", "v_proj", "q_proj"], - lora_dropout=0.1 - ) - - peft_model = get_peft_model(base_model, peft_config) - peft_model.print_trainable_parameters() - - sft_trainer = SFTTrainer( - model = base_model, - train_dataset = training_dataset, - peft_config = peft_config, - tokenizer = tokenizer, - args = training_arguments, - callbacks = [LoggingCallback] - ) - else: - sft_trainer = SFTTrainer( - model = base_model, - train_dataset = training_dataset, - tokenizer = tokenizer, - args = training_arguments, - callbacks = [LoggingCallback] - ) - - sft_trainer.train() - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str, default="meta-llama/Llama-3.2-1B", help="model to perform training with") - parser.add_argument("--hf_dir", type=str, default="username/TEST", help="example: /") - parser.add_argument("--bs_per_device", type=int, default=1, help="batch size per device") - parser.add_argument("--lr", type=float, default=2e-5, help="learning rate") - parser.add_argument("--eps", type=int, default=5, help="num epochs") - parser.add_argument("--scheduler", type=str, default="linear", help="lr scheduler type") - parser.add_argument("--max_grad_norm", type=float, default=0.3) - parser.add_argument("--grad_acc", type=int, default=8) - parser.add_argument("--max_seq_length", type=int, default=2048) - parser.add_argument("--lora_all", action="store_true", help="all layers") - parser.add_argument("--lora_qv", action="store_true", help="qv") - parser.add_argument("--lora_kv", action="store_true", help="kv") - parser.add_argument("--lora_kvq", action="store_true", help="qkv") - parser.add_argument("--lora", action="store_true", help="lora") - parser.add_argument("--wandb", action="store_true", help="log training to wandb") - parser.add_argument("--adapter_model_dir", default="volve-adapter") - parser.add_argument("--merge_model", action="store_true", help="merge adapter after training") - - args = parser.parse_args() - - base_model_name = args.model_name - #load model and tokenizer - base_model = AutoModelForCausalLM.from_pretrained(base_model_name, trust_remote_code = True).to("cuda") - tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code = True) - tokenizer.pad_token = tokenizer.eos_token - - if args.merge_model: - adapter_model_dir = args.adapter_model_dir - merged_model = PeftModel.from_pretrained(base_model, adapter_model_dir).to("cuda") - merged_model = merged_model.merge_and_unload() - merged_model.save_pretrained("llama3_1b_ddr_merged") - tokenizer.save_pretrained("llama3_1b_ddr_merged") - else: - #load dataset - training_dataset_name = "bengsoon/volve_alpaca" - training_dataset = load_dataset(training_dataset_name, split = "train") - - #format dataset - print("formatting dataset.....") - formatted_dataset = format_prompt(training_dataset) - - #initialize wandb - if args.wandb: - if torch.cuda.current_device() == 0: - wandb.init( - project="npu-application", - config= { - "learning_rate": args.lr, - "batch_size_per_dev": args.bs_per_device, - "epochs": args.eps, - "optimizer": "AdamW", - "lr_scheduler_type": args.scheduler, - "max_grad_norm": args.max_grad_norm - } - ) - - finetune_model(args, base_model, tokenizer, formatted_dataset) - - - diff --git a/LLM-examples/oga_api/CMakeLists.txt b/LLM-examples/oga_api/CMakeLists.txt deleted file mode 100644 index d3fe71c9..00000000 --- a/LLM-examples/oga_api/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -cmake_minimum_required(VERSION 3.15) -project(oga_api) - -set(CMAKE_CXX_STANDARD 17) - -# Output directory setup -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) - -# Include headers -include_directories(${CMAKE_SOURCE_DIR}/include) - -# Add library search path -link_directories(${CMAKE_SOURCE_DIR}/libs) - -# Executable source -add_executable(example main.cpp) - -# Link the .lib file (drop the extension) -target_link_libraries(example onnxruntime-genai) - -# Copy DLLs after build -file(GLOB DLLS "${CMAKE_SOURCE_DIR}/libs/*.dll") -add_custom_command(TARGET example POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${DLLS} - $) diff --git a/LLM-examples/oga_api/include/.gitkeep b/LLM-examples/oga_api/include/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/LLM-examples/oga_api/libs/.gitkeep b/LLM-examples/oga_api/libs/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/LLM-examples/oga_api/main.cpp b/LLM-examples/oga_api/main.cpp deleted file mode 100644 index 39d29824..00000000 --- a/LLM-examples/oga_api/main.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include -#include "ort_genai.h" - -std::string apply_llama2_chat_template(const std::string& user_input, const std::string& system_prompt = "You are a helpful assistant.") { - return "[INST] <>\n" + system_prompt + "\n<>\n\n" + user_input + " [/INST]"; -} - -int main(int argc, char* argv[]) { - std::string model_path; - - // Parse arguments - for (int i = 1; i < argc; ++i) { - std::string arg = argv[i]; - if ((arg == "-m" || arg == "--model") && i + 1 < argc) { - model_path = argv[++i]; - } - } - - if (model_path.empty()) { - std::cerr << "Usage: " << argv[0] << " -m " << std::endl; - return 1; - } - - std::cout << "Initializing ORT GenAI..." << std::endl; - OgaHandle handle; - - std::cout << "Loading Model from: " << model_path << std::endl; - auto model = OgaModel::Create(model_path.c_str()); - if (!model) { - std::cerr << "Failed to load model from: " << model_path << std::endl; - return -1; - } - std::cout << "Model loaded." << std::endl; - - std::cout << "Creating Tokenizer..." << std::endl; - auto tokenizer = OgaTokenizer::Create(*model); - std::cout << "Tokenizer created." << std::endl; - - auto tokenizer_stream = OgaTokenizerStream::Create(*tokenizer); - auto params = OgaGeneratorParams::Create(*model); - params->SetSearchOption("max_length", 1024); - - std::cout << "Creating Generator..." << std::endl; - auto generator = OgaGenerator::Create(*model, *params); - std::cout << "Generator created." << std::endl; - - std::cout << "--------------------------------" << std::endl; - - std::string input; - std::cout << "Enter prompt: "; - std::getline(std::cin, input); - - std::string prompt = apply_llama2_chat_template(input); - - auto sequences = OgaSequences::Create(); - tokenizer->Encode(prompt.c_str(), *sequences); - generator->AppendTokenSequences(*sequences); - - std::cout << "Generating response:\n"; - while (!generator->IsDone()) { - generator->GenerateNextToken(); - auto token_id = generator->GetSequenceData(0)[generator->GetSequenceCount(0) - 1]; - std::cout << tokenizer_stream->Decode(token_id) << std::flush; - } - - std::cout << std::endl; - return 0; -} diff --git a/LLM-examples/oga_inference/model_chat.py b/LLM-examples/oga_inference/model_chat.py deleted file mode 100644 index 95fdc680..00000000 --- a/LLM-examples/oga_inference/model_chat.py +++ /dev/null @@ -1,454 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -# Modifications Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. - -import onnxruntime_genai as og -import argparse -import os -import json -import time - -# Default settings -DEFAULT_MODEL_MAX_CONTEXT = 2048 -DEFAULT_OUTPUT_RESERVE = 256 - -def get_tools_list(input_tools): - tools_list = [] - try: - tools_list = json.loads(input_tools) - except json.JSONDecodeError: - raise ValueError("Invalid JSON format for tools list, expected format: '[{\"name\": \"fn1\"},{\"name\": \"fn2\"}]'") - if len(tools_list) == 0: - raise ValueError("Tools list cannot be empty") - return tools_list - -def create_prompt_tool_input(tools_list): - tool_input = str(tools_list[0]) - for tool in tools_list[1:]: - tool_input += ',' + str(tool) - return tool_input - -def get_json_grammar(input_tools): - tools_list = get_tools_list(input_tools) - prompt_tool_input = create_prompt_tool_input(tools_list) - if len(tools_list) == 1: - return prompt_tool_input, json.dumps(tools_list[0]) - else: - output = '{ "anyOf": [' + json.dumps(tools_list[0]) - for tool in tools_list[1:]: - output += ',' + json.dumps(tool) - output += '] }' - return prompt_tool_input, output - -def get_lark_grammar(input_tools): - tools_list = get_tools_list(input_tools) - prompt_tool_input = create_prompt_tool_input(tools_list) - if len(tools_list) == 1: - output = ("start: TEXT | fun_call\n" "TEXT: /[^{](.|\\n)*/\n" " fun_call: <|tool_call|> %json " + json.dumps(convert_tool_to_grammar_input(tools_list[0]))) - return prompt_tool_input, output - else: - return prompt_tool_input, "start: TEXT | fun_call \n TEXT: /[^{](.|\n)*/ \n fun_call: <|tool_call|> %json {\"anyOf\": [" + ','.join([json.dumps(tool) for tool in tools_list]) + "]}" - -def convert_tool_to_grammar_input(tool): - param_props = {} - required_params = [] - for param_name, param_info in tool.get("parameters", {}).items(): - param_props[param_name] = { - "type": param_info.get("type", "string"), - "description": param_info.get("description", "") - } - required_params.append(param_name) - output_schema = { - "description": tool.get('description', ''), - "type": "object", - "required": ["name", "parameters"], - "additionalProperties": False, - "properties": { - "name": { "const": tool["name"] }, - "parameters": { - "type": "object", - "properties": param_props, - "required": required_params, - "additionalProperties": False - } - } - } - if len(param_props) == 0: - output_schema["required"] = ["name"] - return output_schema - -def load_prompt_from_file(file_path): - """Load entire file content as a single prompt.""" - if not os.path.exists(file_path): - raise FileNotFoundError(f"Prompt file not found: {file_path}") - - with open(file_path, "r", encoding="utf-8") as f: - content = f.read().strip() - - if not content: - raise ValueError("Prompt file is empty") - - return content - -def calculate_chat_template_overhead(tokenizer, jinja_path, template_str): - """Calculate the number of tokens added by the chat template.""" - minimal_message = '[{"role": "user", "content": "X"}]' - - if os.path.exists(jinja_path) and template_str: - templated = tokenizer.apply_chat_template( - messages=minimal_message, - add_generation_prompt=True, - template_str=template_str - ) - else: - templated = tokenizer.apply_chat_template( - messages=minimal_message, - add_generation_prompt=True - ) - - templated_tokens = tokenizer.encode(templated) - minimal_content_tokens = tokenizer.encode("X") - overhead = len(templated_tokens) - len(minimal_content_tokens) - - return overhead - -def truncate_text_to_token_length(text, tokenizer, max_tokens, verbose=False): - """Truncates text to fit within a specified token length.""" - if max_tokens is None or max_tokens <= 0: - return text - - tokens = tokenizer.encode(text) - original_length = len(tokens) - - if len(tokens) <= max_tokens: - if verbose: - print(f"Text has {original_length} tokens (no truncation needed)") - return text - - truncated_tokens = tokens[:max_tokens] - truncated_text = tokenizer.decode(truncated_tokens) - - if verbose: - print(f"Truncated text from {original_length} to {len(truncated_tokens)} tokens") - - return truncated_text - -def print_input_prompt(prompt_string, tokens, description="Input"): - """Print the full input prompt clearly formatted.""" - print() - print("=" * 80) - print(f"{description} PROMPT (Full chat-templated input)") - print("=" * 80) - print(prompt_string) - print("=" * 80) - print(f"Total tokens: {len(tokens)}") - print("=" * 80) - print() - -def print_timing_stats(input_tokens_count, new_tokens_count, prompt_time, run_time): - """Print timing statistics, handling edge cases like zero time.""" - stats = [f"Prompt length: {input_tokens_count}", f"New tokens: {new_tokens_count}"] - - if prompt_time > 0: - stats.append(f"Time to first: {prompt_time:.2f}s") - stats.append(f"Prompt tokens per second: {input_tokens_count/prompt_time:.2f} tps") - else: - stats.append("Time to first: <0.01s") - - if run_time > 0: - stats.append(f"New tokens per second: {new_tokens_count/run_time:.2f} tps") - elif new_tokens_count > 0: - stats.append("New tokens per second: N/A (too fast to measure)") - - print(", ".join(stats)) - -def main(args): - if args.verbose: print("Loading model...") - if args.timings: - started_timestamp = 0 - first_token_timestamp = 0 - - config = og.Config(args.model_path) - if args.execution_provider != "follow_config": - config.clear_providers() - if args.execution_provider != "cpu": - if args.verbose: print(f"Setting model to {args.execution_provider}") - config.append_provider(args.execution_provider) - model = og.Model(config) - - if args.verbose: print("Model loaded") - - tokenizer = og.Tokenizer(model) - tokenizer_stream = tokenizer.create_stream() - if args.verbose: print("Tokenizer created") - if args.verbose: print() - - search_options = {name:getattr(args, name) for name in ['do_sample', 'max_length', 'min_length', 'top_p', 'top_k', 'temperature', 'repetition_penalty'] if name in args} - search_options['batch_size'] = 1 - - if args.verbose: print(search_options) - - system_prompt = args.system_prompt - guidance_type = "" - prompt_tool_input = "" - guidance_input = "" - if args.guidance_type != "none": - guidance_type = args.guidance_type - if not args.guidance_info: - raise ValueError("Guidance information is required if guidance type is provided") - if guidance_type == "json_schema" or guidance_type == "lark_grammar": - tools_list = args.guidance_info - if guidance_type == "json_schema": - prompt_tool_input, guidance_input = get_json_grammar(tools_list) - elif guidance_type == "lark_grammar": - prompt_tool_input, guidance_input = get_lark_grammar(tools_list) - elif guidance_type == "regex": - guidance_input = args.guidance_info - else: - raise ValueError("Guidance Type can only be [json_schema, regex, or lark_grammar]") - - params = og.GeneratorParams(model) - params.set_search_options(**search_options) - if guidance_type: - params.set_guidance(guidance_type, guidance_input) - if args.verbose: - print("Guidance type is set to:", guidance_type) - print("Guidance input is:", guidance_input) - - generator = og.Generator(model, params) - if args.verbose: print("Generator created") - if guidance_type == "json_schema" or guidance_type == "lark_grammar": - messages = f"""[{{"role": "system", "content": "{system_prompt}", "tools": "{prompt_tool_input}"}}]""" - else: - messages = f"""[{{"role": "system", "content": "{system_prompt}"}}]""" - - # Apply Chat Template - load template string - template_str = "" - jinja_path = os.path.join(args.model_path, "chat_template.jinja") - if os.path.exists(jinja_path): - with open(jinja_path, "r", encoding="utf-8") as f: - template_str = f.read() - tokenizer_input_system_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=False, template_str=template_str) - else: - tokenizer_input_system_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=False) - - input_tokens = tokenizer.encode(tokenizer_input_system_prompt) - if guidance_type: - input_tokens = input_tokens[:-1] - system_prompt_length = len(input_tokens) - - if args.verbose: - print_input_prompt(tokenizer_input_system_prompt, input_tokens, "SYSTEM") - print(f"System prompt tokens: {system_prompt_length}") - - generator.append_tokens(input_tokens) - - # Calculate chat template overhead once - chat_template_overhead = calculate_chat_template_overhead(tokenizer, jinja_path, template_str) - if args.verbose: - print(f"Chat template overhead: {chat_template_overhead} tokens") - - # Get settings - model_max_context = getattr(args, 'max_context', DEFAULT_MODEL_MAX_CONTEXT) - output_reserve = getattr(args, 'output_reserve', DEFAULT_OUTPUT_RESERVE) - - # Calculate the maximum possible user content that fits - max_possible_content = model_max_context - system_prompt_length - chat_template_overhead - output_reserve - - if max_possible_content <= 0: - raise ValueError(f"Model max context ({model_max_context}) is too small. " - f"System prompt ({system_prompt_length}) + template ({chat_template_overhead}) + " - f"output reserve ({output_reserve}) = {system_prompt_length + chat_template_overhead + output_reserve} tokens, " - f"leaving no room for user content.") - - # Check if prompt file is provided - if hasattr(args, 'prompt_file') and args.prompt_file: - text = load_prompt_from_file(args.prompt_file) - - if args.verbose: - print(f"Loaded prompt from file: {args.prompt_file}") - print(f"Original prompt length: {len(text)} characters") - - if args.timings: started_timestamp = time.time() - - # Get requested user content limit - requested_limit = getattr(args, 'input_prompt_length', None) - - # Determine actual limit (cap to max possible) - if requested_limit is not None: - if requested_limit > max_possible_content: - actual_limit = max_possible_content - print(f"\n⚠️ Requested {requested_limit} tokens, but capped to {actual_limit} tokens") - print(f" (Model max: {model_max_context} - system: {system_prompt_length} - " - f"template: {chat_template_overhead} - output reserve: {output_reserve} = {max_possible_content})\n") - else: - actual_limit = requested_limit - else: - # No limit specified, use max possible - actual_limit = max_possible_content - - # Truncate user content - original_text = text - text = truncate_text_to_token_length(text, tokenizer, actual_limit, args.verbose) - - actual_content_tokens = len(tokenizer.encode(text)) - - # Print token budget summary - print(f"\n{'='*55}") - print(f"TOKEN BUDGET") - print(f"{'='*55}") - print(f"Model max context: {model_max_context:>6} tokens") - print(f"Output reserve: {output_reserve:>6} tokens") - print(f"System prompt: {system_prompt_length:>6} tokens") - print(f"Chat template overhead: {chat_template_overhead:>6} tokens") - print(f"Max user content: {max_possible_content:>6} tokens") - print(f"{'='*55}") - if requested_limit is not None: - print(f"Requested (-ipl): {requested_limit:>6} tokens") - print(f"Actual user content: {actual_content_tokens:>6} tokens") - print(f"{'='*55}\n") - - # Properly escape the text for JSON - escaped_text = json.dumps(text) - messages = f"""[{{"role": "user", "content": {escaped_text}}}]""" - - # Apply Chat Template AFTER truncation - user_prompt = "" - if os.path.exists(jinja_path): - user_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True, template_str=template_str) - else: - user_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True) - - input_tokens = tokenizer.encode(user_prompt) - - # Print the full chat-templated input prompt - print_input_prompt(user_prompt, input_tokens, "USER") - - # Print summary - total_input = system_prompt_length + len(input_tokens) - remaining_for_output = model_max_context - total_input - print(f"TOTAL INPUT: {system_prompt_length} (system) + {len(input_tokens)} (user msg) = {total_input} tokens") - print(f"AVAILABLE FOR OUTPUT: {remaining_for_output} tokens") - print() - - generator.append_tokens(input_tokens) - - if args.verbose: print("Running generation loop ...") - if args.timings: - first = True - new_tokens = [] - - print("Output: ", end='', flush=True) - - try: - while not generator.is_done(): - generator.generate_next_token() - if args.timings: - if first: - first_token_timestamp = time.time() - first = False - - new_token = generator.get_next_tokens()[0] - print(tokenizer_stream.decode(new_token), end='', flush=True) - if args.timings: new_tokens.append(new_token) - except KeyboardInterrupt: - print(" --control+c pressed, aborting generation--") - print() - print() - - if args.timings: - prompt_time = first_token_timestamp - started_timestamp - run_time = time.time() - first_token_timestamp - print_timing_stats(len(input_tokens), len(new_tokens), prompt_time, run_time) - - return - - # Interactive mode - while True: - text = input("Prompt (Use quit() to exit): ") - if not text: - print("Error, input cannot be empty") - continue - - if text == "quit()": - break - - if args.timings: started_timestamp = time.time() - - escaped_text = json.dumps(text) - messages = f"""[{{"role": "user", "content": {escaped_text}}}]""" - - user_prompt = "" - if os.path.exists(jinja_path): - user_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True, template_str=template_str) - else: - user_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True) - input_tokens = tokenizer.encode(user_prompt) - - print_input_prompt(user_prompt, input_tokens, "USER") - - generator.append_tokens(input_tokens) - - if args.verbose: print("Running generation loop ...") - if args.timings: - first = True - new_tokens = [] - - print("Output: ", end='', flush=True) - - try: - while not generator.is_done(): - generator.generate_next_token() - if args.timings: - if first: - first_token_timestamp = time.time() - first = False - - new_token = generator.get_next_tokens()[0] - print(tokenizer_stream.decode(new_token), end='', flush=True) - if args.timings: new_tokens.append(new_token) - except KeyboardInterrupt: - print(" --control+c pressed, aborting generation--") - print() - print() - - if args.timings: - prompt_time = first_token_timestamp - started_timestamp - run_time = time.time() - first_token_timestamp - print_timing_stats(len(input_tokens), len(new_tokens), prompt_time, run_time) - - if args.rewind: - generator.rewind_to(system_prompt_length) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") - parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)') - parser.add_argument('-pr', '--prompt_file', type=str, help='Path to .txt file containing the prompt') - - # Token limit settings - parser.add_argument('-ipl', '--input_prompt_length', type=int, default=None, - help='Desired user content tokens (1-2048). Automatically capped if it exceeds ' - 'what fits in model context. If not specified, uses maximum available.') - parser.add_argument('-mc', '--max_context', type=int, default=DEFAULT_MODEL_MAX_CONTEXT, - help=f'Model max context length (default: {DEFAULT_MODEL_MAX_CONTEXT})') - parser.add_argument('-or', '--output_reserve', type=int, default=DEFAULT_OUTPUT_RESERVE, - help=f'Tokens to reserve for output (default: {DEFAULT_OUTPUT_RESERVE})') - - parser.add_argument('-e', '--execution_provider', type=str, required=False, default='follow_config', choices=["cpu", "cuda", "dml", "follow_config"], help="Execution provider") - parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt') - parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt') - parser.add_argument('-ds', '--do_sample', action='store_true', help='Do random sampling') - parser.add_argument('-p', '--top_p', type=float, help='Top p probability to sample with') - parser.add_argument('-k', '--top_k', type=int, help='Top k tokens to sample from') - parser.add_argument('-t', '--temperature', type=float, help='Temperature to sample with') - parser.add_argument('-re', '--repetition_penalty', type=float, help='Repetition penalty') - parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print verbose output') - parser.add_argument('-tm', '--timings', action='store_true', default=False, help='Print timing information') - parser.add_argument('-gtype', '--guidance_type', type=str, default="none", choices=["none", "json_schema", "regex", "lark_grammar"], help='Guidance type') - parser.add_argument('-ginfo', '--guidance_info', type=str, default='', help='Guidance information') - parser.add_argument('-s', '--system_prompt', type=str, default='You are a helpful AI assistant.', help='System prompt') - parser.add_argument('-r', '--rewind', action='store_true', default=False, help='Rewind to system prompt after each generation') - args = parser.parse_args() - main(args) \ No newline at end of file diff --git a/README.md b/README.md index 3c25a3e5..42745181 100644 --- a/README.md +++ b/README.md @@ -1,79 +1,97 @@ - +
- +

Ryzen™ AI Software

-

Ryzen™ AI Software

## Introduction -AMD Ryzen™ AI Software includes the tools and runtime libraries for optimizing and deploying AI inference on your [AMD Ryzen™ AI](https://www.amd.com/en/products/processors/consumer/ryzen-ai.html) based PC. It enables developers to quickly build and run a variety of AI applications for Ryzen™ AI. It is designed with high efficiency and ease-of-use in mind, unleashing the full potential of AI acceleration on Ryzen™ AI. +AMD Ryzen™ AI Software includes the tools and runtime libraries for optimizing and deploying AI inference on your [AMD Ryzen™ AI](https://www.amd.com/en/products/processors/consumer/ryzen-ai.html) based PC. It enables developers to quickly build and run a variety of AI applications for Ryzen™ AI, taking advantage of the neural processing unit (NPU), integrated GPU, and CPU. -This repository contains the demos, examples and tutorials, demonstrating usage and capabilities of the Ryzen™ AI Software. It is a subset of the Ryzen™ AI Software release. +This repository contains the demos, examples, and tutorials demonstrating the usage and capabilities of the Ryzen™ AI Software, along with the source for the documentation site. -Follow the instructions at [Ryzen™ AI Software](https://ryzenai.docs.amd.com/en/latest/inst.html) for installation. +Follow the instructions at [Ryzen™ AI Software Installation](https://ryzenai.docs.amd.com/en/latest/inst.html) to get set up. -## Git LFS and Instructions to clone: +## Documentation + +**Full documentation:** [ryzenai.docs.amd.com](https://ryzenai.docs.amd.com) + +The documentation source lives in the [`docs/`](docs/) directory. + +## Git LFS and Instructions to clone + +Due to the presence of large files in some examples/tutorials, Git Large File Storage (LFS) is configured in this repository. Follow the instructions below to ensure Git LFS is properly set up: + +- Install Git LFS from the [official website](https://git-lfs.com/). +- After installation, set up Git LFS on your machine: - Due to the presence of large files in some examples/tutorials, Git Large File Storage (LFS) has been configured in this repository. Follow the instructions below to ensure Git LFS is properly set up: - - Install Git LFS by downloading it from the [official website](https://git-lfs.com/) - - After installation, run the following command in your terminal to set up Git LFS on your machine: ``` - git lfs install +git lfs install ``` - - Clone the repository (or a fork of it): + +- Clone the repository (or a fork of it): + ``` git clone https://github.com/amd/RyzenAI-SW.git ``` -- Pull the actual LFS files: + +- Pull the LFS files: + ``` git lfs pull ``` -To run the demos and examples in this repository, please follow the instructions of README.md in each directory. - - ## Getting Started Tutorials -- [Getting started tutorial with a fine-tuned ResNet model](CNN-examples/getting_started_resnet) -- [Hello world jupyter notebook tutorial](CNN-examples/hello_world) -- [Getting started ResNet50 example on iGPU](CNN-examples/iGPU/getting_started) +- [Getting started with a fine-tuned ResNet model](docs/vision/getstartex.mdx) +- [Hello World tutorial](docs/vision/hello-world.mdx) +- [ResNet50 on iGPU](docs/vision/igpu-getting-started.mdx) ## LLM Flow -- [LLMs on RyzenAI with ONNX Runtime GenAI API](LLM-examples/oga_api) -- [ONNX Runtime GenAI(OGA)‑based RAG LLM](LLM-examples/RAG-OGA) -- [Running Vision Language Model (VLM) on RyzenAI NPU](LLM-examples/VLM) -- [Running GPT-OSS-20B with chat template](LLM-examples/oga_inference) +- [LLMs on Ryzen AI with the ONNX Runtime GenAI (OGA) API](docs/llms/oga-cpp-api.mdx) +- [ONNX Runtime GenAI (OGA)-based RAG LLM](docs/llms/rag-oga.mdx) +- [Vision Language Model (VLM) on Ryzen AI NPU](docs/llms/vlm.mdx) +- [GPT-OSS-20B with chat template](docs/llms/oga-inference.mdx) ## Examples -- BF16 Model Examples - - [Finetuned DistilBERT for Text Classification](Transformer-examples/DistilBERT_text_classification_bf16) - - [Image classification](CNN-examples/image_classification) -- [Object detection with Yolov8 models](CNN-examples/object_detection) -- [Automatic Speech Recognition: Step by Step guide to run Whisper-base on NPU](Transformer-examples/ASR/Whisper-AI) +- BF16 model examples + - [Finetuned DistilBERT for Text Classification](docs/llms/distilbert-example.mdx) + - [Image classification](docs/vision/image-classification.mdx) +- [Object detection with YOLOv8](docs/vision/yolov8m.mdx) +- [Super-Resolution](docs/vision/super_resolution.mdx) +- [Nemotron OCR v2 on AMD Ryzen AI NPU](docs/vision/nemotron-ocr-v2.mdx) + +## Windows ML Examples +- [Running ResNet with Windows ML](docs/windows-ml/resnet.mdx) +- [Running Transformer models with Windows ML](docs/windows-ml/googlebert.mdx) +- [Running CLIP with Windows ML](docs/windows-ml/clip.mdx) ## Demos -- [NPU-GPU pipeline on RyzenAI](Demos/NPU-GPU-Pipeline) -- [Automatic Speech Recognition using OpenAI Whisper](Demos/ASR/Whisper) +- [NPU-GPU pipeline on Ryzen AI](docs/vision/npu-gpu-pipeline.mdx) +- [Automatic Speech Recognition using OpenAI Whisper](docs/audio/whisper-asr.mdx) +- [Automatic Speech Recognition using NVIDIA Parakeet TDT optimized for AMD Ryzen AI](docs/audio/parakeet-tdt.mdx) ## Other Tutorials -- [AMD Quark Quantization](CNN-examples/quark_quantization) -- [Run Ryzen AI CVML library application](Ryzen-AI-CVML-Library) -- [Torchvision models End-to-End inference with Ryzen AI](CNN-examples/torchvision_inference) - +- [AMD Quark Quantization](docs/tools/quark-quantization.mdx) +- [Run Ryzen AI CVML library application](docs/vision/cvml.mdx) +- [Torchvision models end-to-end inference with Ryzen AI](docs/vision/torchvision.mdx) ## Benchmarking -- [ONNX benchmark utilities](onnx-benchmark) - +- [ONNX benchmark utilities](docs/tools/onnx-benchmark.mdx) ## Reference -- [Ryzen™ AI Developer Guide](https://ryzenai.docs.amd.com/en/latest) +- [Ryzen™ AI Developer Guide](https://ryzenai.docs.amd.com/en/latest) - [ONNX Runtime Vitis-AI EP](https://onnxruntime.ai/docs/execution-providers/Vitis-AI-ExecutionProvider.html) - [AMD AI Developer Forum](https://community.amd.com/t5/ai/ct-p/amd_ai) +- [AMD Developer Community Discord](https://discord.gg/amd-dev) + +## License + +Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE). diff --git a/Ryzen-AI-CVML-Library/LICENSE.txt b/Ryzen-AI-CVML-Library/LICENSE.txt deleted file mode 100644 index 4da0d9b3..00000000 --- a/Ryzen-AI-CVML-Library/LICENSE.txt +++ /dev/null @@ -1,300 +0,0 @@ -ADVANCED MICRO DEVICES, INC. -SOFTWARE LICENSE AGREEMENT - -SOFTWARE: AMD CVML SDK - -IMPORTANT-READ CAREFULLY: DO NOT INSTALL, COPY, OR USE THE ENCLOSED SOFTWARE, -SAMPLE CODE, DOCUMENTATION (ONLINE OR ELECTRONIC DOCUMENTATION ASSOCIATED WITH -THE SOFTWARE, IF ANY), OR ANY PORTION THEREOF, (COLLECTIVELY "SOFTWARE") UNTIL -YOU HAVE CAREFULLY READ AND AGREED TO THE FOLLOWING TERMS AND CONDITIONS. -THIS IS A LEGAL AGREEMENT ("AGREEMENT") BETWEEN YOU (EITHER AN INDIVIDUAL OR -AN ENTITY) ("YOU") AND ADVANCED MICRO DEVICES, INC. ("AMD"). IF YOU ARE -ENTERING INTO THIS AGREEMENT ON BEHALF OF A COMPANY OR OTHER LEGAL ENTITY, YOU -REPRESENT THAT YOU HAVE THE LEGAL AUTHORITY TO BIND THE ENTITY TO THIS -AGREEMENT, IN WHICH CASE "YOU" WILL MEAN THE ENTITY YOU REPRESENT. - -IF YOU DO NOT AGREE TO THE TERMS OF THIS AGREEMENT, DO NOT INSTALL, COPY OR -USE THIS SOFTWARE. BY INSTALLING, COPYING OR USING THE SOFTWARE YOU AGREE TO -ALL THE TERMS AND CONDITIONS OF THIS AGREEMENT. - -1. DEFINITIONS -1.1 "Documentation" means install scripts and online or electronic - documentation associated, included, or provided in connection with - Software, or any portion thereof. - -1.2 "Derivative Works" means any work, revision, modification or adaptation - made to or derived from the Sample Code in whole or in part as - integrated into or combined with your products. - -1.3 "Free Software License" means an open source or other license that - requires, as a condition of use, modification or distribution, that any - resulting software must be (a) disclosed or distributed in source code - form; (b) licensed for the purpose of making derivative works; or (c) - redistributable at no charge. - -1.4 "Intellectual Property Rights" means all copyrights, trademarks, trade - secrets, patents, mask works, and all related, similar, or other - intellectual property rights recognized in any jurisdiction worldwide, - including all applications and registrations with respect thereto. - -1.5 "Object Code" means machine readable computer programming code files, - which are not in a human readable form. - -1.6 "Sample Code" means the header files and Source Code identified within - the Software as sample code. - -1.7 "Source Code" means computer programming code in the human readable - form and related system level documentation, including all associated - comments, symbols and any procedural code such as job control language. - -2. LICENSES. Subject to the terms and conditions of this Agreement, AMD - hereby grants You a non-exclusive, revocable, non-transferable, - non-assignable, limited, copyright license to: - (a) install, use, and copy the Software for internal use only in Object - Code form at Your sites solely for the purpose of evaluating the - Software for use with AMD products as used with Your products; - (b) create Derivative Works of the Software for evaluation of the - Software together with Your Products; - (c) distribute and sublicense the Derivative Works in Object Code form - only to your customers and end users when incorporated within Your - products for use with AMD products. Such distribution may be made - through multiple tiers of distribution, only subject to an end user - license agreement that meets the requirements in Section 2.1. - -2.1 End User License Agreement. Distribution of Software by You will be - pursuant to an enforceable end user license agreement ("End User - License Agreement") with terms and conditions that at a minimum are - substantially similar to those set forth in Section 3 and the - following: (a) prohibition on transfer or duplication of the Software - (except for reasonable backup); (b) prohibitions on reverse engineering - (unless allowed by law for interoperability), disassembly or - de-compilation of the Software; (c) disclaimer, to the extent permitted - by applicable law, of You and Your licensors' liability for any - damages, whether punitive, direct, incidental, indirect, special or - consequential damages, arising from the use of, or distribution of the - Software; (d) requirement that the end user comply fully with all - relevant export laws and regulations of the United States and other - applicable export and import laws; and (e) notification to the end user - that the Software is subject to a restricted license and can only be - used in conjunction with the intended AMD products. You will be - financially responsible for all claims and damages to AMD caused by a - breach of this Section 2.1. AMD is a third party beneficiary of any End - User License Agreement. - -3. RESTRICTIONS. Except for the limited license expressly granted in - Section 2 herein, You have no other rights in the Software, whether - express, implied, arising by estoppel, or otherwise. Except as - expressly set forth in Section 2 You may not: - a) modify or create derivative works of the Software; - b) distribute, publish, display, sublicense, assign or otherwise - transfer the Software; - c) decompile, reverse engineer, disassemble or otherwise reduce the - Software in Object Code to a human-perceivable form (except as - allowed by applicable law but then only to the limited extent of - such law); - d) alter or remove any copyright, trademark or patent notice(s) in the - Software; - e) use AMD's trademarks, logos, and/or product names in Your product - name or advertising in a way that suggests Your product comes from - or is endorsed by AMD; - f) use the Software in applications intended to support or sustain - life, or in which the failure of Your product could create a - situation where personal injury, death, or severe property or - environmental damage may occur; - g) use the Software to: (i) develop inventions directly derived from - Confidential Information to seek patent protection (ii) assist in - the analysis of Your patents and patent applications or (iii) modify - Your existing patents or patent applications; - h) use the Software in a way that requires that the Software or any - portion thereof to be licensed under any under license in violation - of Section 2.1, including but not limited to a Free Software - License. - -4. OWNERSHIP. The Software is licensed and not sold by AMD and all - Intellectual Property Rights in and to the Software, is and remains the - sole and exclusive property of AMD or its licensors, and You have no - right, title or interest therein except as expressly set forth in this - Agreement. - -5. FEEDBACK; DERIVATIVE WORKS. You have no obligation to give AMD any - suggestions, comments or other feedback ("Feedback") relating to the - Software. However, AMD may use and include any Feedback that it - receives from You to improve the Software or other AMD products, - software and technologies. Accordingly, for any Feedback You provide - to AMD, You grant AMD and its affiliates and subsidiaries a worldwide, - non-exclusive, irrevocable, royalty-free, perpetual license to, - directly or indirectly, use, reproduce, license, sublicense, - distribute, make, have made, sell and otherwise commercialize the - Feedback in the Software or other AMD products, software and - technologies. You further agree not to provide any Feedback that (a) - You know is subject to any Intellectual Property Rights of any third - party or (b) is subject to Free Software License. You grant AMD a - worldwide, irrevocable, perpetual, transferable, royalty-free license - with the right to sublicense, to make, have made use, disclose, - reproduce, modify, create derivative works of, license, sell, offer for - sale, and otherwise distribute the Derivative Works. - -6. SUPPORT AND UPDATES. AMD is under no obligation to provide any kind of - support under this Agreement. Furthermore, You are solely responsible - for providing support to Your end users for Your products. AMD may, in - its sole discretion, provide to You updates to the Software, and such - updates will be covered as Software under this Agreement. - -7. THIRD PARTY MATERIALS. The Software may include third party - technologies for which You must obtain licenses from parties other than - AMD (collectively, "Third Party Materials"). These third party - technologies are not licensed as part of the Software licensed under - this Agreement and are used at Your sole discretion. You are solely - responsible for obtaining all applicable Intellectual Property Rights - to use the Third Party Technologies. In addition, the Software may - include code or content subject to an open source or similar license - ("Open Source Component"), which includes any license that requires, as - a condition of use, modification or distribution, that the resulting - software must be (a) disclosed in source code form; (b) licensed for - purpose of making derivative works; or (c) redistributable at no - charge. To the extent that an Open Source Component is subject to a - license that is inconsistent with this Agreement, then such Open Source - Component shall be governed solely by the applicable license. - -8. Disclaimer OF Warranty. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT - WARRANTY OF ANY KIND. AMD AND ITS LICENSORS DISCLAIM ALL WARRANTIES, - EXPRESS, IMPLIED, OR STATUTORY, INCLUDING BUT NOT LIMITED TO THE - IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, TITLE, AND NON-INFRINGEMENT, THAT THE SOFTWARE WILL RUN - UNINTERRUPTED OR ERROR-FREE OR WARRANTIES ARISING FROM CUSTOM OF TRADE - OR COURSE OF USAGE. THE ENTIRE RISK ASSOCIATED WITH THE USE OF THE - SOFTWARE IS ASSUMED BY YOU INCLUDING, WITHOUT LIMITATION, THE RISK OF - DATA CORRUPTION OR LOSS. Some jurisdictions do not allow the exclusion - of implied warranties, so the above exclusion may not apply to You. - -9 Limitation of Liability. AMD AND ITS LICENSORS WILL NOT, UNDER ANY - CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT, INCIDENTAL, - INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES INCLUDING LOSS OF USE, - PROFITS, OR DATA ARISING FROM USE OF THE SOFTWARE, DERIVATIVE WORKS, OR - THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN ADVISED OF THE - POSSIBILITY OF SUCH DAMAGES. In no event shall AMD's total liability - for all damages, losses, and causes of action (whether in contract, - tort (including negligence) or otherwise) exceed the amount of $100 - USD. - -10. Indemnity. You agree to defend, indemnify and hold harmless AMD and its - licensors, and any of their directors, officers, employees, affiliates - or agents from and against any and all loss, damage, liability and - other expenses (including reasonable attorneys' fees), resulting from - (a) Your use, distribution or sublicense of the Software or Derivative - Works, - (b) violation of the terms and conditions of this Agreement by - You or any sublicense, or - (c) Your failure to obtain and comply with - any third party licenses that may be required pursuant to Section 7 - herein. The parties agree that these limitations are an essential - element in setting consideration herein. - -11. CONFIDENTIALITY. You shall protect the Software and any information - related thereto (collectively, "Confidential Information") by using the - same degree of care, but no less than a reasonable degree of care, to - prevent the unauthorized use, dissemination or publication of the - Confidential Information as You use to protect your own confidential - information of a like nature. You shall not disclose any Confidential - Information disclosed hereunder to any third party and shall limit - disclosure of Confidential Information to only those of your employees - and contractors with a need to know and who are bound by - confidentiality obligations with You at least as restrictive as those - contained in this Agreement. You shall be responsible for your - employees and contractors' adherence to the terms of this Agreement. - You may disclose Confidential Information in accordance with an - authorized judicial or other governmental order, provided that You - either (a) give AMD prompt written notice prior to such disclosure - (except where prohibited by applicable law) to allow AMD a reasonable - opportunity to seek a protective order or equivalent or (b) obtain - written assurance from the applicable judicial or governmental entity - that it will afford the Confidential Information the highest level of - protection afforded under applicable law or regulation, in which case - You agree to disclose only that portion of Confidential Information - required by such applicable judicial or governmental entity. - -12. TERMINATION AND SURVIVAL. Either party may terminate this Agreement - upon thirty (30) days prior written notice to the other party. This - Agreement will terminate immediately without notice from AMD or - judicial resolution upon Your breach or Your sublicensee's breach of - the terms of this Agreement. The termination of this Agreement shall: - (i) immediately result in the termination of all rights granted by You - to distribute the Software through multiple tiers of distribution - under Section 2; and - (ii) have no effect on any sublicenses previously granted by You to end - users under Subsection 2.1 which are compliant with all terms and - conditions of this Agreement, and such sublicenses shall survive - in accordance with their terms. Upon termination or expiration of - this Agreement, all provisions survive except for Section 2, and - You will cease using and destroy or return to AMD, at AMD's - discretion, all copies of the Confidential Information, including - but not limited to all copies of the Software. - -13. EXPORT RESTRICTIONS. You shall adhere to all applicable U.S. - import/export laws and regulations, as well as the import/export - control laws and regulations of other countries as applicable. You - further agree to not export, re-export, or transfer, directly or - indirectly, any product, technical data, software or source code - received from AMD under this license, or the direct product of such - technical data or software to any country for which the United States - or any other applicable government requires an export license or other - governmental approval without first obtaining such licenses or - approvals; or in violation of any applicable laws or regulations of the - United States or the country where the technical data or software was - obtained. You acknowledge the technical data and software received - will not, in the absence of authorization from U.S. or local law and - regulations as applicable, be used by or exported, re-exported or - transferred to: - (i) any sanctioned or embargoed country, or to nationals or residents - of such countries; - (ii) any restricted end-user as identified on any applicable - government end-user list; or - (iii) any party where the end-use involves nuclear, chemical/biological - weapons, rocket systems, or unmanned air vehicles. For the most - current Country Group listings, or for additional information - about the EAR or Your obligations under those regulations, please - refer to the U.S. Bureau of Industry and Security's website at - http://www.bis.doc.gov/. - -14. NOTICE TO U.S. GOVERNMENT END USERS. The Software and related - documentation are "commercial items", as that term is defined at 48 - C.F.R. 2.101, consisting of "commercial computer software" and - "commercial computer software documentation", as such terms are used in - 48 C.F.R. 12.212 and 48 C.F.R. 227.7202, respectively. Consistent - with 48 C.F.R. 12.212 or 48 C.F.R. 227.7202-1 through 227.7202-4, as - applicable, the commercial computer software and commercial computer - software documentation are being licensed to U.S. Government end users - (a) only as commercial items and - (b) with only those rights as are granted to all other end users - pursuant to the terms and conditions set forth in this Agreement. - Unpublished rights are reserved under the copyright laws of the - United States. - -15. GENERAL PROVISIONS. This Agreement is made under and shall be - construed according to the laws of the State of Texas, excluding - conflicts of law rules, and is deemed to have been executed and - performed in Austin, Texas. Each party submits to the jurisdiction of - the state and federal courts of Travis County and the Western District - of Texas for the purposes of this Agreement. Licensee acknowledges - that its breach of this Agreement may cause irreparable damage and - agrees that AMD shall be entitled to seek injunctive relief under this - Agreement, as well as such further relief as may be granted by a court - of competent jurisdiction. You may not assign this Agreement without - the prior written consent of AMD and any assignment without such - consent will be null and void. The parties do not intend that any - agency or partnership relationship be created between them by this - Agreement. Each provision of this Agreement shall be interpreted in - such a manner as to be effective and valid under applicable law. - However, if any provision of this Agreement becomes or is declared - unenforceable by any court of competent jurisdiction, such provision - shall be deemed deleted and the remainder of this Agreement shall - remain in full force and effect. This Agreement sets forth the entire - agreement and understanding between the Parties with respect to the - Software and supersedes and merges all prior oral and written - agreements, discussions and understandings between them regarding the - subject matter of this Agreement. No waiver or modification of any - provision of this Agreement shall be binding unless made in writing and - signed by an authorized representative of each Party. - -ADVANCED MICRO DEVICES, INC. -SOFTWARE LICENSE AGREEMENT diff --git a/Ryzen-AI-CVML-Library/cmake/RyzenAILibraryConfig.cmake b/Ryzen-AI-CVML-Library/cmake/RyzenAILibraryConfig.cmake deleted file mode 100644 index f76ba970..00000000 --- a/Ryzen-AI-CVML-Library/cmake/RyzenAILibraryConfig.cmake +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. -# - -# get absolute library path -get_filename_component(PREFIX ${RyzenAILibrary_DIR}/.. ABSOLUTE) - -# set default PLATFORM to windows if not specified -if (NOT DEFINED PLATFORM) -if(WIN32) - set(PLATFORM windows) -else() - set(PLATFORM linux) -endif() -endif() - -# update include directories -set(RyzenAILibrary_INCLUDE_DIRS ${PREFIX}/include) -include_directories(${RyzenAILibrary_INCLUDE_DIRS}) - -# collect all available library files -link_directories(${PREFIX}/${PLATFORM}) -if (${PLATFORM} MATCHES "windows") - file(GLOB RyzenAILibrary_LIBS ${PREFIX}/${PLATFORM}/*.lib) -else() - file(GLOB RyzenAILibrary_LIBS ${PREFIX}/${PLATFORM}/*.so) - add_link_options(-Wl,--as-needed) -endif() - -# generate output variables for find_package -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(RyzenAILibrary DEFAULT_MSG RyzenAILibrary_LIBS RyzenAILibrary_INCLUDE_DIRS) diff --git a/Ryzen-AI-CVML-Library/include/cvml-api-common.h b/Ryzen-AI-CVML-Library/include/cvml-api-common.h deleted file mode 100644 index d76287f8..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-api-common.h +++ /dev/null @@ -1,75 +0,0 @@ -// -// Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. -// - -#ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_API_COMMON_H_ -#define EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_API_COMMON_H_ - -#include - -#ifndef _WIN32 -#define CVML_SDK_EXPORT -#define CVML_SDK_EXPORT_CORE -#define CVML_SDK_NO_EXPORT -#define CVML_SDK_DEPRECATED -#define CVML_SDK_DEPRECATED_EXPORT -#define CVML_SDK_DEPRECATED_NO_EXPORT -#else - -#ifdef CVML_SDK_STATIC_DEFINE -#define CVML_SDK_EXPORT -#define CVML_SDK_EXPORT_CORE -#define CVML_SDK_NO_EXPORT -#else - -#ifndef CVML_SDK_EXPORT_CORE -#ifdef cvml_sdk_EXPORTS_0 -/* We are building this core library */ -#define CVML_SDK_EXPORT_CORE __declspec(dllexport) -#else -/* We are using this core library */ -#define CVML_SDK_EXPORT_CORE __declspec(dllimport) -#endif -#endif - -#ifndef CVML_SDK_EXPORT -#ifdef cvml_sdk_EXPORTS -/* We are building this library */ -#define CVML_SDK_EXPORT __declspec(dllexport) -#else -/* We are using this library */ -#define CVML_SDK_EXPORT __declspec(dllimport) -#endif -#endif - -#ifndef CVML_SDK_NO_EXPORT -#define CVML_SDK_NO_EXPORT -#endif -#endif - -#ifndef CVML_SDK_DEPRECATED -#define CVML_SDK_DEPRECATED __attribute__((__deprecated__)) -#endif - -#ifndef CVML_SDK_DEPRECATED_EXPORT -#define CVML_SDK_DEPRECATED_EXPORT CVML_SDK_EXPORT CVML_SDK_DEPRECATED -#endif - -#ifndef CVML_SDK_DEPRECATED_NO_EXPORT -#define CVML_SDK_DEPRECATED_NO_EXPORT CVML_SDK_NO_EXPORT CVML_SDK_DEPRECATED -#endif - -#endif - -#define AMD_CVML_INTERFACE(TypeName) \ - public: \ - virtual ~TypeName(); \ - \ - protected: \ - TypeName(); \ - TypeName(const TypeName&) = delete; \ - TypeName& operator=(const TypeName&) = delete; \ - TypeName(TypeName&&) noexcept = delete; \ - TypeName& operator=(TypeName&&) noexcept = delete; - -#endif // EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_API_COMMON_H_ diff --git a/Ryzen-AI-CVML-Library/include/cvml-context.h b/Ryzen-AI-CVML-Library/include/cvml-context.h deleted file mode 100644 index 13c799e3..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-context.h +++ /dev/null @@ -1,235 +0,0 @@ -/*! - * Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. - * - * @file - * - * Definitions for SDK contexts and associated structures/functions. - */ - -#ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_CONTEXT_H_ -#define EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_CONTEXT_H_ - -#include "cvml-api-common.h" -#include "cvml-logger.h" - -namespace amd { -namespace cvml { - -/** - * Maximum number of different platforms the SDK can support. - */ -static const uint32_t MAX_SUPPORTED_PLATFORMS = 10; - -/** - * Structure of platforms supported by the SDK. - * - * @see \a amd::cvml::Context - */ -struct SupportedPlatformInformation { - /** - * Structure describing a single supported platform. - */ - struct SupportedPlatform { - /// Device ID of supported AMD APU - /// @deprecated Always returns -1 - int64_t device_id; - - /// Required minimum Vulkan driver version on supported AMD APU - int64_t required_gpu_minimal_vulkan_driver_version; - } platform[MAX_SUPPORTED_PLATFORMS]; ///< Array of supported platforms. - - /// Total number of supported AMD APU platforms. - /// @deprecated Always returns amd::cvml::MAX_SUPPORTED_PLATFORMS - /// @see \a amd::cvml::MAX_SUPPORTED_PLATFORMS - uint32_t supported_platform_count; - - /// Whether supported platform checking is enforced. - bool checking_enforced; -}; - -/** - * Execution context for Ryzen AI CVML library features. - * - * An appropriate context must be created by calling, - * - * amd::cvml::CreateContext() - * - * before using any features in the Ryzen AI CVML library and provided - * to the feature constructor(s). - * - * The context can be shared by multiple features of the SDK. - */ -class CVML_SDK_EXPORT_CORE Context { - AMD_CVML_INTERFACE(Context); - - public: - /** - * Releases all resources for the context and destroys it. - */ - virtual void Release() = 0; - - /** - * Sets the verbosity of the log. - * - * @param level SDK feature log level - */ - virtual void SetLogLevel(Logger::LogLevels level) = 0; - - /** - * Gets the pointer to the logger object. - * - * @return Pointer to logger object - */ - virtual Logger* GetLogger() const = 0; - - /** - * Get the Supported Platform Information object. - * - * @param info Pointer to structure for receiving platform information - * @return true on success, false on failure - */ - static bool GetSupportedPlatformInformation(amd::cvml::SupportedPlatformInformation* info); - - /** - * Defines the inference backends that can be supported by the SDK. - * - * These are provided to the \a SetInferenceBackend API function. - */ - enum InferenceBackend { - AUTO, ///< Allow the SDK to select the hardware for inference operations - GPU, ///< Use GPU hardware for inference operations - NPU, ///< Use NPU hardware for inference operations - CPU, ///< Use CPU hardware for inference operations - dGPU ///< Use discrete GPU hardware, if available, for inference operations - }; - - /** - * Defines the source streaming mode for feature processing. - */ - enum StreamingMode { - ONE_SHOT, ///< Features should expect to process independent images. - ONLINE_STREAMING, ///< Input images are part of real-time streaming content. - OFFLINE_STREAMING ///< Features are intended to process offline streaming content. - }; - - /** - * Specifies the inference backend for subsequently created features. - * - * This function does not affect any features that were instantiated - * via the context before its call. If a feature is unable to support - * a specified inference backend, it will refuse to construct and an - * exception will be thrown instead. - * - * @param inference_backend Desired hardware inference backend - * @return true if backend updated - */ - bool SetInferenceBackend(InferenceBackend inference_backend); - - /** - * Returns the inference backend selection strategy for newly created features. - * - * @return Current hardware inference backend selection - */ - InferenceBackend GetInferenceBackend() const; - - /** - * Returns the current streaming mode. - * - * See \a amd::cvml::Context::SetStreamingMode for more details. - * - * @return Currently configured streaming mode. - */ - StreamingMode GetStreamingMode() const; - - /** - * Set the streaming mode for the context. - * - * The requested streaming mode is used to configure new features - * that are constructed against the context. Any features that - * were created before are not affected by changing streaming - * mode changes. - * - * See \a amd::cvml::Context::StreamingMode - * - * @param mode Desired streaming mode. - */ - void SetStreamingMode(StreamingMode mode); - - /** - * Get current CVML 'nice' setting. - * - * See \a amd::cvml::Context::SetNiceMode() for more details. - * - * @return true if 'nice' request is currently enabled - */ - bool GetNiceMode(); - - /** - * Set current CVML 'nice' settings. - * - * If enabled, the CVML 'nice' mode directs underlying inference - * engines to run in a lower scheduling priority or more power - * efficient mode if possible. The setting is applied to all - * CVML features that were instantiated with the current context - * and may be changed at any time. - * - * This setting provides a hint to the underlying inference - * execution framework but does not guarantee lower priority - * execution or more power efficient inference. Applications may - * enable this setting of the use case is tolerant to occasionally - * longer inference latencies as a tradeoff for potentially reducing - * power consumption. - * - * @param nice_mode Set to true to enable 'niceness' for subsequent features - */ - void SetNiceMode(bool nice_mode); - - /** - * Return if NPU is available on platform - * - * @return true if NPU available - */ - static bool IsNPUAvailable(); - - /** - * Return if iGPU is available on platform - * - * @return true if iGPU available - */ - static bool IsiGPUAvailable(); - - /** - * Return if dGPU is available on platform - * - * @return true if dGPU available - */ - static bool IsdGPUAvailable(); - - /** - * Get detected NPU driver version. - * On Linux, NPU driver will return 1 if legacy driver detected - * - * @return NPU driver version, or 0 if not detected - */ - uint32_t GetNPUDriverVersion(); - - public: - class Impl; - Impl* impl_; ///< Pointer to context implementation -}; - -/** - * Create a Ryzen AI context. - * - * @param log_level Sets the log level. Default value is kINFO - * @param logger External logger for the context. Default value is nullptr - * @see \a amd::cvml::Logger - * @return Pointer to the created Context - */ -CVML_SDK_EXPORT_CORE amd::cvml::Context* CreateContext( - Logger::LogLevels log_level = Logger::LogLevels::kINFO, Logger* logger = nullptr); - -} // namespace cvml -} // namespace amd - -#endif // EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_CONTEXT_H_ diff --git a/Ryzen-AI-CVML-Library/include/cvml-depth-estimation.h b/Ryzen-AI-CVML-Library/include/cvml-depth-estimation.h deleted file mode 100644 index c0cddbcc..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-depth-estimation.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. - */ - -#ifndef EDGEML_FEATURES_DEPTH_ESTIMATION_INCLUDE_CVML_DEPTH_ESTIMATION_H_ -#define EDGEML_FEATURES_DEPTH_ESTIMATION_INCLUDE_CVML_DEPTH_ESTIMATION_H_ - -#include "cvml-api-common.h" -#include "cvml-context.h" -#include "cvml-image.h" -#include "cvml-types.h" - -namespace amd { -namespace cvml { - -/** - * Depth Estimation feature class. - * - * Based on the provided images, the feature calculates a relative depth map - * for each invocation of the \a GenerateDepthMap() function. Appropriate - * resize and normalization is done during pre/post processing by the - * \a GenerateDepthMap() function to generate a depth map for each frame. - * - * Example - * - * // create Ryzen AI context - * auto context = amd::cvml::CreateContext(); - * - * // create depth estimation feature - * amd::cvml::DepthEstimation feature(context); - * - * // iterate over input frames - * for (auto frame ... ) { - * // encapsulate input image - * amd::cvml::Image input( ... ); - * - * // encapsulate output image - * amd::cvml::Image output( ... ); - * - * // generate depth map - * feature.GenerateDepthMap(input, &output); - * } - */ -class CVML_SDK_EXPORT DepthEstimation { - AMD_CVML_INTERFACE(DepthEstimation); - - public: - /** - * Constructor for DepthEstimation feature. - * - * Creates a Depth Estimation feature object against the provided CVML context. - * This function throws exceptions on errors. - * @param context CVML context - */ - explicit DepthEstimation(Context* context); - - /** - * Generate depth map from an image. - * - * This function throws exceptions on errors. - * - * Each call of this function returns a depth map of - * floating point values representing the relative depth of the pixels - * corresponding to the width/height of the uncropped image frame. - * - * @param input Reference to the Image input - * @param output Pointer to the Image output as a floating point grayscale buffer - * @return true if the output Image has been populated with inference information - */ - bool GenerateDepthMap(const Image& input, Image* output); - - /** - * Set the image type of the depth map output data. - * - * This function throws exceptions on errors. - * - * @param t The desired ImageType of the depth map output data - * \n Valid values are: kGrayScaleFloat16, kGrayScaleFloat32. - */ - [[deprecated("Output type is determined by the provided output image buffer")]] - void SetOutputType(ImageType t); - - /** - * Get the image type of the depth map output data. - * - * @return Image type of the depth estimation output (ImageType) - */ - [[deprecated("Output type is determined by the provided output image buffer")]] - ImageType GetOutputType() const; - - class Impl; - - protected: - Impl* impl_; ///< Implementation of depth estimation interface. -}; - -} // namespace cvml -} // namespace amd - -#endif // EDGEML_FEATURES_DEPTH_ESTIMATION_INCLUDE_CVML_DEPTH_ESTIMATION_H_ diff --git a/Ryzen-AI-CVML-Library/include/cvml-face-detector.h b/Ryzen-AI-CVML-Library/include/cvml-face-detector.h deleted file mode 100644 index b80dd2af..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-face-detector.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. - */ - -#ifndef EDGEML_FEATURES_FACE_DETECTOR_INCLUDE_CVML_FACE_DETECTOR_H_ -#define EDGEML_FEATURES_FACE_DETECTOR_INCLUDE_CVML_FACE_DETECTOR_H_ - -#include "cvml-context.h" -#include "cvml-image.h" -#include "cvml-types.h" - -namespace amd { -namespace cvml { - -/** - * Face Detection feature class. - * - * The FaceDetector class offers an interface for efficient face detection in various model types. - * - * Supporting Fast and Precise model types, the class is initialized with a specified - * CVML context, allowing users to switch between models during runtime. Key features include: - * - * - Detecting faces and their landmarks in input images - * - Adjusting the detection threshold - * - Tracking detected faces across consecutive frames - * - * By utilizing the detect and tracking functions, the FaceDetector class allows precise face - * detection and seamless integration in applications. - * - * Example: - * - * // create Ryzen AI context - * auto context = amd::cvml::CreateContext(); - * - * // create face detector feature - * amd::cvml::FaceDetector feature(context); - * - * // encapsulate input image - * amd::cvml::Image input( ... ); - * - * // detect faces in the input image - * auto faces = feature.Detect(input); - */ - -class CVML_SDK_EXPORT FaceDetector { - AMD_CVML_INTERFACE(FaceDetector); - - public: - /** - * Fast - Optimized for performance - * Precise - Optimized for acccuracy - */ - enum class FDModelType { Fast, Precise }; - - /** - * Constructor for FaceDetector class. - * - * Constructs a FaceDetector object with a specified CVML context and face detection model type. - * Face detection model types include Fast, and Precise. - * It throws exceptions if any errors occur during processing. - * @param context CVML context - * @param model_type Face detection model type (default: FDModelType::Precise) - */ - explicit FaceDetector(Context* context, FDModelType model_type = FDModelType::Precise); - - /** - * Main face detection function. - * This function is the main entry point for face detection in the provided image. - * It should be called for each frame in an application or video stream. - * The function detects faces and associated landmarks in the input image. - * It throws exceptions if any errors occur during processing. - * @param img Input image of type amd::cvml::Image. - * @return Array of detected faces of type amd::cvml::Face. - */ - Array Detect(const Image& img) const; - - /** - * Set the detection threshold for face detection. - * - * This function sets the minimum confidence score required for - * faces to be included in the detection output. Faces with a confidence - * score below the specified threshold will be ignored. - * - * The default threshold is 0.5 - */ - void SetDetectionThreshold(float detection_threshold); - - /** - * Set the face detection model type. - * - * This function allows the user to switch between different face detection - * models while the application is running. The available model types are: - * Fast - Optimized for performance. - * Precise - Optimized for accuracy. - * Changing the model type can affect the speed and accuracy of the face - * detection results. The optimal model type may vary depending on the - * requirements of the application and the specific use case. - * @param model_type required model type - */ - void SetModelType(FDModelType model_type); - - /** - * Get tracking IDs for face detection. - * - * This function provides consistent face IDs for detected faces in consecutive frames. - * IDs in the array will be in the same order as the Face Array from the last Detect call. - * Array will be empty if Detect has not been called before. - */ - Array GetTrackedIDs() const; - - /** - * Get the Transformation Matrix from World Space to Camera space - * based on the provided face. - * - * @param face face object used to map world coordinates to image points - * @return Array flattened 3x4 [R|t] matrix represented as [r_00, r01, r02, t0, r_10, ...] - * where translations are in meters - */ - Array GetTransformationMatrix(const Face& face) const; - - /** - * Get position of the middle point between both eyes in - * Camera space based on the provided face in centimeters - * - * @param face face object used to map world coordinates to image points - * @return Point3d position of head in camera space - */ - Point3d GetHeadPosition(const Face& face) const; - - /** - * Get the distance from the middle point between both eyes to the camera in centimeters - * - * @param face face to get the head distance from camera for - * @return double distance of the camera in meters - */ - double GetHeadDistanceFromCamera(const Face& face) const; - - /** - * Set camera focal length for more accurate transofrmation matrix calculation - * - * @param fx focal length of the camera in the x axis - * @param fy focal length of the camera in the y axis - */ - void SetFocalLength(double fx, double fy); - - class Impl; - - protected: - Impl* impl_; ///< Implementation of face detector interface. -}; - -} // namespace cvml -} // namespace amd - -#endif // EDGEML_FEATURES_FACE_DETECTOR_INCLUDE_CVML_FACE_DETECTOR_H_ diff --git a/Ryzen-AI-CVML-Library/include/cvml-face-mesh.h b/Ryzen-AI-CVML-Library/include/cvml-face-mesh.h deleted file mode 100644 index bf2110e9..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-face-mesh.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. - */ -#ifndef EDGEML_FEATURES_FACE_MESH_INCLUDE_CVML_FACE_MESH_H_ -#define EDGEML_FEATURES_FACE_MESH_INCLUDE_CVML_FACE_MESH_H_ - -#include "cvml-api-common.h" -#include "cvml-context.h" -#include "cvml-face-detector.h" -#include "cvml-image.h" -#include "cvml-types.h" - -namespace amd { -namespace cvml { - -/** - * Face Mesh Detection feature class. - * - * The FaceMesh class enables the generation of high-quality 3D face meshes from input images using - * different model types. It simplifies creating instances of face mesh objects and estimating their - * 3D landmarks with the following features: - * - Allowing configuration of the maximum number of faces for mesh creation - * - Enabling/disabling the computation of head poses - * - Generating face meshes for the largest detected face, a specific Face object, or a region of - * interest (ROI) in the image - * - * Example: - * - * // create Ryzen AI context - * auto context = amd::cvml::CreateContext(); - * - * // create face detector feature - * amd::cvml::FaceDetector fd(context); - * - * // create face mesh feature - * amd::cvml::FaceMesh feature(context); - * - * // encapsulate input image - * amd::cvml::Image input( ... ); - * - * // detect faces in the input image - * auto faces = feature.Detect(input); - * - * // generate mesh of first detected face - * auto mesh = face_mesh_->CreateMesh(input, faces[0]); - */ -class CVML_SDK_EXPORT FaceMesh { - AMD_CVML_INTERFACE(FaceMesh); - - public: - /** - * Struct containing output of FaceMesh - */ - struct CVML_SDK_EXPORT Mesh { - Array landmarks_; /// Array of 3D landmarks - }; - - /// Face Mesh Implementaiton class - class Impl; - - /** - * Constructor. - * Creates a FaceMesh instance using the specified CVML context. - * @param context: Pointer to the CVML context used for initializing the object - */ - explicit FaceMesh(Context* context); - - /** - * Set the maximum number of faces for mesh creation. This function is used to - * determine the face mesh for the largest face in an image by limiting the - * number of faces generated during the mesh creation process. - * @param max_num_faces Maximum number of faces for mesh generation. - */ - void SetMaxNumFaces(int max_num_faces) const; - - /** - * Generate face mesh for the largest detected face in the image. - * @param img Input image to generate the face mesh for. - * @return An Array of Mesh objects containing 3D landmarks and mesh transformation matrix. - */ - Array CreateMesh(const Image& img) const; - - /** - * Generate face mesh for the given Face object in the image. - * Useful when skipping face detection step. - * @param img Input image to generate the face mesh for. - * @param face Face structure obtained from face detection. - * @return A Mesh object containing 3D landmarks and mesh transformation matrix. - */ - Mesh CreateMesh(const Image& img, const Face& face) const; - - /** - * Generate face mesh for the given region of interest (ROI) in the image. - * Useful when skipping face detection step. - * @param img Input image to generate the face mesh for. - * @param roi Image region of interest containing the face. - * @return A Mesh object containing 3D landmarks and mesh transformation matrix. - */ - Mesh CreateMesh(const Image& img, const Rect_i& roi) const; - - ///////////////// Head Pose API ///////////////// - - /** - * Get the Transformation Matrix from World Space to Camera space - * based on the provided face. - * - * @param mesh mesh object used to map world coordinates to image points - * @return Array flattened 3x4 [R|t] matrix represented as [r_00, r01, r02, t0, r_10, ...] - */ - Array GetTransformationMatrix(const Mesh& mesh) const; - - /** - * Get position of the middle point between both eyes in - * Camera space based on the provided mesh in centimeters - * - * @param face face object used to map world coordinates to image points - * @return Point3d position of head in camera space - */ - Point3d GetHeadPosition(const Mesh& mesh) const; - - /** - * Get the distance from the middle point between both eyes to the camera in centimeters - * - * @param face_mesh mesh to get the head distance from camera for - * @return double distance of the camera in meters - */ - double GetHeadDistanceFromCamera(const Mesh& face_mesh) const; - - /** - * Set camera focal length for more accurate transofrmation matrix calculation - * - * @param fx_pxl focal length of the camera in the x axis - * @param fy_pxl focal length of the camera in the y axis - */ - void SetFocalLengthInPixels(double fx_pxl, double fy_pxl); - - /** - * Set the Focal Length and sensor width of the camera - * - * @param focal_length_mm focal length in mm - * @param sensor_width_mm camera sensor width in mm - */ - void SetFocalLengthInMillimeters(double focal_length_mm, double sensor_width_mm); - - /** - * Get the x,y image coordinates for the center of the left eye - * and the right eye - * - * @param face_mesh mesh to calculate the left eye coordinates for - * @param left_eye_center pointer to left eye point - * @param right_eye_center pointer to right eye point - * @return bool if both centers were computed sucessfully - */ - bool GetEyeCenterCoordinates(const Mesh& face_mesh, Point2f* left_eye_center, - Point2f* right_eye_center) const; - - private: - Impl* impl_; ///< Implementation of face mesh interface. -}; -/** - * Interface class for face mesh array - */ -template class CVML_SDK_EXPORT Array; -} // namespace cvml -} // namespace amd -#endif // EDGEML_FEATURES_FACE_MESH_INCLUDE_CVML_FACE_MESH_H_ diff --git a/Ryzen-AI-CVML-Library/include/cvml-image.h b/Ryzen-AI-CVML-Library/include/cvml-image.h deleted file mode 100644 index 491f58cb..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-image.h +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. - */ - -#ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_IMAGE_H_ -#define EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_IMAGE_H_ - -#include - -#include "cvml-types.h" - -namespace amd { -namespace cvml { - -class Context; - -/** - * The class representing an image - */ -class CVML_SDK_EXPORT_CORE Image { - public: - /** - * An enumeration of image formats - */ - enum Format { - kGrayScale, - kRGBA, - kRGBAp, - kBGRA, - kBGRAp, - kRGB, - kRGBp, - kBGR, - kBGRp, - kNV12, - kNV21, - kYUV420p, - kYUYV422, - kYUV444p, - kP010, - kUNDEFINED = -1 - }; - - /** - * Defines supported image data types. - */ - enum DataType { - kUint8, - kInt8, - kFloat16, - kFloat32, - kFixedPoint16I8F2LE, /**< P010 little endian */ - kFixedPoint16I8F2BE, /**< P010 big endian */ - kUint16, - }; - - /** - * A valid set of flags used to describe the image. - */ - enum Flags { - /** - * A hint that indicates the image will be used as an image source - * Potentially helpful for accelerating the processing - */ - kSource = 1 << 0, - - /** - * A hint that indicates the image will be used as an image target - * Potentially helpful for accelerating the processing - */ - kTarget = 1 << 1, - - /** - * Indicate the image will be created by importing a device local memory - * (for example, vulkan device local memory) - */ - kDeviceMemoryImport = 1 << 2, - - /** - * Indicate the image will be created on device local memory (for example, vulkan - * device local memory) and the image can be later exported via Image::Export(). - */ - kDeviceMemoryExport = 1 << 3, - }; - - /** - * Initializing constructor for image objects. - * - * @param format Image format - * @param data_type Image data type - * @param width The pixel width of the image - * @param height The pixel height of the image - * @param buffer (optional) A pointer to the image data. - * If buffer is null, the data will be allocated by the CVML context specified via Map() - * If buffer not null: - * - If Flags::kDeviceMemoryImport is not specified, the buffer is expected to be a host buffer. - * - If Flags::kDeviceMemoryImport is specified, the buffer shall point to a HANDLE/fd - * to a device local memory (for example, vulkan device local memory). - * @param stride (optional) if stride is not specified, image will be stored continuously. - * @param flags (optional): bit mask of Flags specifying the valid usage of the image. - * See Image::Flags for more information. Defaults to both source and target. - * If not specified, the default value is Flags::kSource | Flags::kTarget - */ - Image(Format format, DataType data_type, uint32_t width, uint32_t height, - uint8_t* buffer = nullptr, uint32_t stride = 0, - uint32_t flags = Flags::kSource | Flags::kTarget); - - /** - * Move constructor. - * Constructs the image by transferring ownership of the resources from the source - * image (other), leaving the source in a valid but unspecified state. - * - * @param other Source image from which to move - */ - Image(Image&& other) noexcept; - - /** - * Move assignment operator. - * Transfer ownership of the resources from the source image (other) to the - * destination image, leaving the source in a valid but unspecified state. - * - * @param other Source image from which to move - * @return Reference to the destination image (this) - */ - Image& operator=(Image&& other) noexcept; - - /** - * @deprecated - * Construct an Image class wrapper of host buffer - * Using this throws an exception. Use the other constructor. - */ - [[deprecated]] Image(ImageType img_type, uint32_t width, uint32_t height, uint32_t stride, - uint8_t* data_buf); - - /** - * @deprecated - * Returns the image type - * Using this throws an exception. Use GetFormat()/GetDataType(). - */ - [[deprecated("Use GetFormat()/GetDataType().")]] ImageType GetImageType() const; - - /** - * Get CPU pointer to the buffer. - * If the Image object was constructed without an existing buffer pointer, - * a new CPU buffer is allocated to back the object and returned by this - * function. - * @return Underlying CPU pointer for the Image buffer object - */ - uint8_t* GetBuffer() const; - - /** - * @deprecated - * Map CVML Image buffer using the specified CVML context. - * This function is deprecated. Use GetBuffer() instead. - * - * @param context CVML context to be associated with the image - * @param flags Flags for the operation - * @return Address of the mapped CVML Image buffer - */ - uint8_t* Map(Context* context, uint32_t flags = 0); - - /** - * Get the width of the image. - * @return The width of the image. - */ - uint32_t GetWidth() const; - - /** - * Get the height of the image. - * @return The height of the image. - */ - uint32_t GetHeight() const; - - /** - * Get format for this image. - * @return The format that the image was created with. - */ - Format GetFormat() const; - - /** - * Get the stride of the image. - * @return The stride (bytes per row) that the image was created with. - */ - uint32_t GetStride() const; - - /** - * Get data type for this image. - * @return The data type that the image was created with. - */ - DataType GetDataType() const; - - /** - * Get the usage flag bit mask for this image. - * - * @return The bit mask of flags that the image was created with. - */ - uint32_t GetFlags() const; - - /** - * Export the image so that it can be imported in a different device context - * (for example, vulkan context). To make an image exportable, the image must - * be created with Flags::kExport in constructor. - * - * @param handle Pointer to a handle the image wil be exported to. - * For windows, the pointer shall point to windows HANDLE struct. - * For linux, the pointer shall point to file desriptor (int). - * @return true on success, false on failure. - */ - bool Export(void* handle); - - virtual ~Image(); - Image(const Image&) = delete; - Image& operator=(const Image&) = delete; - - class Impl; - Impl* impl_; ///< Implementation of Image interface. -}; - -} // namespace cvml -} // namespace amd - -#endif // EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_IMAGE_H_ diff --git a/Ryzen-AI-CVML-Library/include/cvml-logger.h b/Ryzen-AI-CVML-Library/include/cvml-logger.h deleted file mode 100644 index 5647fd81..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-logger.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. - */ - -#ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_LOGGER_H_ -#define EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_LOGGER_H_ - -#include - -#include "cvml-api-common.h" - -namespace amd { -namespace cvml { - -/** - * Base class for capturing log messages from the SDK. - * - * To customize the target of log messages from the SDK, create a new C++ - * class derived from amd::cvml::Logger and implement its \a LogStr member - * function to direct formatted log messages to the target of choice. For - * example, a derived Logger class may choose to capture all log messages - * to a file on the file system or send them to another process or device. - */ -class CVML_SDK_EXPORT_CORE Logger { - AMD_CVML_INTERFACE(Logger); - - public: - /** - * Log levels to set the log output verbosity. - * Logger will print all the log messages if the log level is greater than - * or equal to the level which is already set. - */ - enum LogLevels { - kVERBOSE = 0, ///< To print all types of log messages - kDEBUG = 1, ///< To print debug type messages and the levels above kDEBUG - kINFO = 2, ///< To print information type messages and the levels above kINFO - kWARNING = 3, ///< To print warning messages and the levels above kWARNING - kERROR = 4, ///< To print error messages only - kDISABLED = 5, ///< To disable logging - }; - - public: - /** - * Set the required log level - * - * @param level A valid value from Logger::LogLevels - */ - void SetLogLevel(Logger::LogLevels level) { level_ = level; } - - /** - * Get the log level - * - * @return level A valid value from Logger::LogLevels - */ - Logger::LogLevels GetLogLevel() { return level_; } - - /** - * Write an entry into the log with a std::string message as input - * - * @param log_level Type of the log message - * @param msg Message of std::string type that needs to be logged - */ - void Log(amd::cvml::Logger::LogLevels log_level, const std::string& msg); - - /** - * Write an entry into the log with a C type string as input - * - * @param log_level Type of the log message - * @param msg C type string message that needs to be logged - */ - void Log(amd::cvml::Logger::LogLevels log_level, const char* msg); - - /** - * Output the actual log message. - * - * This capability is must be implemented by a derived class. - * - * @param msg C type string message to be logged - */ - virtual void LogStr(const char* msg) = 0; - - protected: - /// Currently configured log level - Logger::LogLevels level_ = Logger::LogLevels::kINFO; -}; - -// \deprecated -// This definition is retained for backward compatibility only. -using ICvmlLogger = Logger; - -// \deprecated -// This definition is retained for backward compatibility only. -using CvmlLogger = Logger; - -} // namespace cvml -} // namespace amd - -#endif // EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_LOGGER_H_ diff --git a/Ryzen-AI-CVML-Library/include/cvml-types.h b/Ryzen-AI-CVML-Library/include/cvml-types.h deleted file mode 100644 index 0b44b294..00000000 --- a/Ryzen-AI-CVML-Library/include/cvml-types.h +++ /dev/null @@ -1,377 +0,0 @@ -/*! - * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. - * - * @file - * - * Defines common types and structures for the CVML SDK. - */ - -#ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_TYPES_H_ -#define EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_TYPES_H_ - -#include - -#include -#include - -#include "cvml-api-common.h" - -namespace amd { -namespace cvml { - -/** - * @deprecated - * Image types represent colour space and - * number of bits per pixel. - */ -enum ImageType { - kRgbUint8 = 1, - kRgbFloat16 = 2, - kRgbFloat32 = 3, - kGrayScaleUint8 = 4, - kGrayScaleFloat16 = 5, - kGrayScaleFloat32 = 6, - kRgbInt8 = 7, - kGrayScaleInt8 = 8, - kNV12Float32 = 9, - kNV12Uint8 = 10, - kRgbaUint8 = 11, - kRgbaFloat16 = 12, - kRgbaFloat32 = 13, - kRgbaInt8 = 14, - kNV12Uint16 = 15, -}; - -/** - * Structure for describing rectangular regions. - */ -template -struct CVML_SDK_EXPORT_CORE Rect { - /** - * Default constructor. - */ - Rect() : x_(0), y_(0), width_(0), height_(0) {} - - /** - * Initializing constructor. - * - * @param x X cordinate of top left corner - * @param y Y cordinate of top left corner - * @param width Rectange width - * @param height Rectange height - */ - Rect(_Tp x, _Tp y, _Tp width, _Tp height) : x_(x), y_(y), width_(width), height_(height) {} - - /// X cordinate of top left corner - _Tp x_; - - /// Y cordinate of top left corner - _Tp y_; - - /// Rectange width - _Tp width_; - - /// Rectange height - _Tp height_; -}; - -typedef Rect Rect_i; -typedef Rect Rect_f; -typedef Rect Rect_d; -typedef Rect Rect_u; - -/** - * Structure for 2-dimensional Point values. - */ -template -struct CVML_SDK_EXPORT_CORE Point { - /** - * Default constructor. - */ - Point() : x_(0), y_(0) {} - - /** - * Initializing constructor. - * - * @param x X coordinate - * @param y Y coordinate - */ - Point(_Tp x, _Tp y) : x_(x), y_(y) {} - - /// X cordinate of top left corner - _Tp x_; - - /// Y cordinate of top left corner - _Tp y_; -}; - -typedef Point Point2i; -typedef Point Point2f; -typedef Point Point2d; - -/** - * Structure for 3-dimensional Point values. - */ -template -struct CVML_SDK_EXPORT_CORE Point3 { - /** - * Default constructor. - */ - Point3() : x_(0), y_(0), z_(0) {} - - /** - * Initializing constructor. - * - * @param x X coordinate - * @param y Y coordinate - * @param z Z coordinate - */ - Point3(_Tp x, _Tp y, _Tp z) : x_(x), y_(y), z_(z) {} - - /// X cordinate of top left corner - _Tp x_; - - /// Y cordinate of top left corner - _Tp y_; - - /// Z cordinate of top left corner - _Tp z_; -}; - -typedef Point3 Point3i; -typedef Point3 Point3f; -typedef Point3 Point3d; - -/** - * Structure for quadrilaterals at arbitrary angles. - */ -template -struct CVML_SDK_EXPORT_CORE Quad { - /** - * Default constructor. - */ - Quad() = default; - - /** - * Initializing constructor using Points. - * - * @param top_left coordinates of top left point - * @param top_right coordinates of top right point - * @param bottom_left coordinates of bottom left point - * @param bottom_right coordinates of bottom right point - */ - Quad(Point<_Tp> top_left, Point<_Tp> top_right, Point<_Tp> bottom_left, Point<_Tp> bottom_right) - : top_left_(top_left), - top_right_(top_right), - bottom_left_(bottom_left), - bottom_right_(bottom_right) {} - - /** - * Initializing contructor using explict x and y values. - * - * @param x_tl top left x value - * @param y_tl top left y value - * @param x_tr top right x value - * @param y_tr top right y value - * @param x_bl bottom left x value - * @param y_bl bottom left y value - * @param x_br bottom right x value - * @param y_br bottom right y value - */ - Quad(_Tp x_tl, _Tp y_tl, _Tp x_tr, _Tp y_tr, _Tp x_bl, _Tp y_bl, _Tp x_br, _Tp y_br) - : top_left_(Point<_Tp>(x_tl, y_tl)), - top_right_(Point<_Tp>(x_tr, y_tr)), - bottom_left_(Point<_Tp>(x_bl, y_bl)), - bottom_right_(Point<_Tp>(x_br, y_br)) {} - - /// coordinates of top left point - Point<_Tp> top_left_; - /// coordinates of top left point - Point<_Tp> top_right_; - /// coordinates of top left point - Point<_Tp> bottom_left_; - /// coordinates of top left point - Point<_Tp> bottom_right_; -}; - -/// Alias to older 'BoundingQuad' template definition. -template -using BoundingQuad = Quad; - -typedef Quad Quadi; -typedef Quad Quadf; -typedef Quad Quadd; - -/** - * Fixed size array template class for multiple instances of class T. - */ -template -class CVML_SDK_EXPORT_CORE Array { - public: - /** - * Default constructor. - */ - Array() : v_(nullptr), size_(0) {} - - /** - * Move constructor. - * - * @param other Source array - */ - Array(Array&& other) noexcept : v_(std::move(other.v_)), size_(std::exchange(other.size_, 0)) { - other.v_ = nullptr; - other.size_ = 0; - } - - /** - * Move assignment operator. - * - * @param other Source array - * @return Reference to updated object - */ - Array& operator=(Array&& other) noexcept { - if (this != &other) { - if (v_) delete[] v_; - size_ = other.size_; - v_ = other.v_; - other.v_ = nullptr; - other.size_ = 0; - } - return *this; - } - - /** - * Constructor that initilize required number of classes T. - * - * This function throws exceptions on errors. - * - * @param size Desired size of the array - */ - explicit Array(size_t size) : v_{new T[size]}, size_(size) {} - - /** - * Copy constructor. - * - * This function throws exceptions on errors. - * - * @param other Source array - */ - Array(const Array& other) : v_{new T[other.size()]}, size_(other.size()) { - for (size_t i = 0; i < other.size(); i++) // copy elements - v_[i] = other[i]; - } - - /** - * Assignment operator. - * - * This operator throws exceptions on errors. - * - * @param other Source array - * @return Reference to updated object - */ - Array& operator=(const Array& other) { - if (&other != this) { - T* p = new T[other.size()]; - for (size_t i = 0; i != other.size(); ++i) p[i] = other[i]; - if (v_) delete[] v_; // delete old elements - v_ = p; - size_ = other.size(); - } - return *this; - } - - /** - * Read only operator[] for const objects. - * - * This operator throws exceptions on out-of-range subscripts. - * - * @param i Index to array - * @return Array value - */ - const T& operator[](size_t i) const { - if (i >= size_) throw std::runtime_error("Invalid subscript access"); - return v_[i]; - } - - /** - * operator[] for subscript access. - * - * This operator throws exceptions on out-of-range subscripts. - * - * @param i Index to array - * @return Reference to array entry - */ - T& operator[](size_t i) { - if (i >= size_) throw std::runtime_error("Invalid subscript access"); - return v_[i]; - } - - /** - * Returns the size of the array. - * - * @return Current size of the array - */ - size_t size() const { return size_; } - - /** - * Destructor - */ - ~Array() { - if (v_) delete[] v_; - v_ = nullptr; - } - - private: - T* v_; ///< Internal array storage - size_t size_; ///< Current size of the array -}; - -/** - * Structure representing face location and landmarks for a single person. - */ -struct CVML_SDK_EXPORT_CORE Face { - /// Constructor - Face() : confidence_score_(0.f) {} - - /// Destructor - virtual ~Face() {} - - /// Face bounding box - Rect_i face_; - - /// Face detection confidence score - float confidence_score_; - - /// Image coordinates of landmarks - /// Facial landmarks are used to localize and represent important regions of the face, such as: - /// mouth, eyes, eyebrows, nose - Array landmarks_; - - /// Get bounding box - Rect_i GetROI() { return face_; } -}; - -/** - * Structure representing landmarks and bounding box for a single person. - */ -struct Person { - /// Bounding box for this person - Rect_i person_; - - /// Person detection confidence score - float confidence_score_; - - /// Detected landmarks for this person - Array landmarks_; - - /// Detected landmark scores for this person - Array landmark_scores_; - - /// Get bounding box - Rect_i GetROI() { return person_; } -}; - -} // namespace cvml -} // namespace amd - -#endif // EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_TYPES_H_ diff --git a/Ryzen-AI-CVML-Library/linux/cvml-depth-estimation.graphlib b/Ryzen-AI-CVML-Library/linux/cvml-depth-estimation.graphlib deleted file mode 100644 index 15cebf82..00000000 --- a/Ryzen-AI-CVML-Library/linux/cvml-depth-estimation.graphlib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39d0ce7204e348e83f0a7d57763b1e1527f74767974d9a21be5a1c0d5cf370d5 -size 133191680 diff --git a/Ryzen-AI-CVML-Library/linux/cvml-face-detector.graphlib b/Ryzen-AI-CVML-Library/linux/cvml-face-detector.graphlib deleted file mode 100644 index 580f4970..00000000 --- a/Ryzen-AI-CVML-Library/linux/cvml-face-detector.graphlib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:159262523471828c6899848285c4a59a1e6227f41242a8b9b17d8217b74d90c7 -size 5314560 diff --git a/Ryzen-AI-CVML-Library/linux/cvml-facemesh.graphlib b/Ryzen-AI-CVML-Library/linux/cvml-facemesh.graphlib deleted file mode 100644 index e1919835..00000000 --- a/Ryzen-AI-CVML-Library/linux/cvml-facemesh.graphlib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9df570453d20ed757cdf2dc8258f9b592531c2850c75a5a0a71e6870402dfcf0 -size 8069120 diff --git a/Ryzen-AI-CVML-Library/linux/libcvml-depth-estimation.so b/Ryzen-AI-CVML-Library/linux/libcvml-depth-estimation.so deleted file mode 100644 index 8d0fd0c7..00000000 --- a/Ryzen-AI-CVML-Library/linux/libcvml-depth-estimation.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b0f9f6bfdcda450988f8b57aff5659cc48eff0213a8a12bf2e5f0b0551fe347 -size 49632 diff --git a/Ryzen-AI-CVML-Library/linux/libcvml-face-detector.so b/Ryzen-AI-CVML-Library/linux/libcvml-face-detector.so deleted file mode 100644 index fff8d158..00000000 --- a/Ryzen-AI-CVML-Library/linux/libcvml-face-detector.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:66198759f8f2193fe5974311bfaae1dece58e3d551005e00f114cbb035ed7f6f -size 2677376 diff --git a/Ryzen-AI-CVML-Library/linux/libcvml-facemesh.so b/Ryzen-AI-CVML-Library/linux/libcvml-facemesh.so deleted file mode 100644 index 9cdfd9b7..00000000 --- a/Ryzen-AI-CVML-Library/linux/libcvml-facemesh.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e8f87e3b85f7abaf42ce875d22c47148e18bf062d4f78b77a4970e594e46fa8 -size 149280 diff --git a/Ryzen-AI-CVML-Library/linux/libcvml-sdk.so b/Ryzen-AI-CVML-Library/linux/libcvml-sdk.so deleted file mode 100644 index 6e6ba5ed..00000000 --- a/Ryzen-AI-CVML-Library/linux/libcvml-sdk.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d409388e4dc3f8ab2340e258ecbf4127b552d60babc59563350ffd46ae324da8 -size 21992112 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeConfig.cmake b/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeConfig.cmake deleted file mode 100644 index 7d529981..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeConfig.cmake +++ /dev/null @@ -1,26 +0,0 @@ - -####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() ####### -####### Any changes to this file will be overwritten by the next CMake run #### -####### The input file was PROJECT_CONFIG_FILE ######## - -get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) - -macro(set_and_check _var _file) - set(${_var} "${_file}") - if(NOT EXISTS "${_file}") - message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") - endif() -endmacro() - -macro(check_required_components _NAME) - foreach(comp ${${_NAME}_FIND_COMPONENTS}) - if(NOT ${_NAME}_${comp}_FOUND) - if(${_NAME}_FIND_REQUIRED_${comp}) - set(${_NAME}_FOUND FALSE) - endif() - endif() - endforeach() -endmacro() - -#################################################################################### -include("${CMAKE_CURRENT_LIST_DIR}/onnxruntimeTargets.cmake") diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeConfigVersion.cmake b/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeConfigVersion.cmake deleted file mode 100644 index 70cd1b43..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeConfigVersion.cmake +++ /dev/null @@ -1,65 +0,0 @@ -# This is a basic version file for the Config-mode of find_package(). -# It is used by write_basic_package_version_file() as input file for configure_file() -# to create a version-file which can be installed along a config.cmake file. -# -# The created file sets PACKAGE_VERSION_EXACT if the current version string and -# the requested version string are exactly the same and it sets -# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, -# but only if the requested major version is the same as the current one. -# The variable CVF_VERSION must be set before calling configure_file(). - - -set(PACKAGE_VERSION "1.22.0") - -if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION) - set(PACKAGE_VERSION_COMPATIBLE FALSE) -else() - - if("1.22.0" MATCHES "^([0-9]+)\\.") - set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}") - if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0) - string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}") - endif() - else() - set(CVF_VERSION_MAJOR "1.22.0") - endif() - - if(PACKAGE_FIND_VERSION_RANGE) - # both endpoints of the range must have the expected major version - math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1") - if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR - OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR) - OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT))) - set(PACKAGE_VERSION_COMPATIBLE FALSE) - elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR - AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX) - OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX))) - set(PACKAGE_VERSION_COMPATIBLE TRUE) - else() - set(PACKAGE_VERSION_COMPATIBLE FALSE) - endif() - else() - if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR) - set(PACKAGE_VERSION_COMPATIBLE TRUE) - else() - set(PACKAGE_VERSION_COMPATIBLE FALSE) - endif() - - if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION) - set(PACKAGE_VERSION_EXACT TRUE) - endif() - endif() -endif() - - -# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: -if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "") - return() -endif() - -# check that the installed version has the same 32/64bit-ness as the one which is currently searching: -if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8") - math(EXPR installedBits "8 * 8") - set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") - set(PACKAGE_VERSION_UNSUITABLE TRUE) -endif() diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeTargets-release.cmake b/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeTargets-release.cmake deleted file mode 100644 index 09a159af..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeTargets-release.cmake +++ /dev/null @@ -1,19 +0,0 @@ -#---------------------------------------------------------------- -# Generated CMake target import file for configuration "Release". -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -# Import target "onnxruntime::onnxruntime" for configuration "Release" -set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) -set_target_properties(onnxruntime::onnxruntime PROPERTIES - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib64/libonnxruntime.so.1.22.0" - IMPORTED_SONAME_RELEASE "libonnxruntime.so.1" - ) - -list(APPEND _cmake_import_check_targets onnxruntime::onnxruntime ) -list(APPEND _cmake_import_check_files_for_onnxruntime::onnxruntime "${_IMPORT_PREFIX}/lib64/libonnxruntime.so.1.22.0" ) - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeTargets.cmake b/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeTargets.cmake deleted file mode 100644 index 3a373d09..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/cmake/onnxruntime/onnxruntimeTargets.cmake +++ /dev/null @@ -1,106 +0,0 @@ -# Generated by CMake - -if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) - message(FATAL_ERROR "CMake >= 2.8.3 required") -endif() -if(CMAKE_VERSION VERSION_LESS "2.8.3") - message(FATAL_ERROR "CMake >= 2.8.3 required") -endif() -cmake_policy(PUSH) -cmake_policy(VERSION 2.8.3...3.29) -#---------------------------------------------------------------- -# Generated CMake target import file. -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -# Protect against multiple inclusion, which would fail when already imported targets are added once more. -set(_cmake_targets_defined "") -set(_cmake_targets_not_defined "") -set(_cmake_expected_targets "") -foreach(_cmake_expected_target IN ITEMS onnxruntime::onnxruntime) - list(APPEND _cmake_expected_targets "${_cmake_expected_target}") - if(TARGET "${_cmake_expected_target}") - list(APPEND _cmake_targets_defined "${_cmake_expected_target}") - else() - list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") - endif() -endforeach() -unset(_cmake_expected_target) -if(_cmake_targets_defined STREQUAL _cmake_expected_targets) - unset(_cmake_targets_defined) - unset(_cmake_targets_not_defined) - unset(_cmake_expected_targets) - unset(CMAKE_IMPORT_FILE_VERSION) - cmake_policy(POP) - return() -endif() -if(NOT _cmake_targets_defined STREQUAL "") - string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") - string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") - message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") -endif() -unset(_cmake_targets_defined) -unset(_cmake_targets_not_defined) -unset(_cmake_expected_targets) - - -# Compute the installation prefix relative to this file. -get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) -get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) -get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) -get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) -if(_IMPORT_PREFIX STREQUAL "/") - set(_IMPORT_PREFIX "") -endif() - -# Create imported target onnxruntime::onnxruntime -add_library(onnxruntime::onnxruntime SHARED IMPORTED) - -set_target_properties(onnxruntime::onnxruntime PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/onnxruntime" -) - -# Load information for each installed configuration. -file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/onnxruntimeTargets-*.cmake") -foreach(_cmake_config_file IN LISTS _cmake_config_files) - include("${_cmake_config_file}") -endforeach() -unset(_cmake_config_file) -unset(_cmake_config_files) - -# Cleanup temporary variables. -set(_IMPORT_PREFIX) - -# Loop over all imported files and verify that they actually exist -foreach(_cmake_target IN LISTS _cmake_import_check_targets) - if(CMAKE_VERSION VERSION_LESS "3.28" - OR NOT DEFINED _cmake_import_check_xcframework_for_${_cmake_target} - OR NOT IS_DIRECTORY "${_cmake_import_check_xcframework_for_${_cmake_target}}") - foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") - if(NOT EXISTS "${_cmake_file}") - message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file - \"${_cmake_file}\" -but this file does not exist. Possible reasons include: -* The file was deleted, renamed, or moved to another location. -* An install or uninstall procedure did not complete successfully. -* The installation package was faulty and contained - \"${CMAKE_CURRENT_LIST_FILE}\" -but not all the files it references. -") - endif() - endforeach() - endif() - unset(_cmake_file) - unset("_cmake_import_check_files_for_${_cmake_target}") -endforeach() -unset(_cmake_target) -unset(_cmake_import_check_targets) - -# This file does not depend on other imported targets which have -# been exported from the same project but in a separate export set. - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) -cmake_policy(POP) diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so b/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so deleted file mode 120000 index a64790aa..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so +++ /dev/null @@ -1 +0,0 @@ -libonnxruntime.so.1 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so.1 b/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so.1 deleted file mode 120000 index b56b9bac..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so.1 +++ /dev/null @@ -1 +0,0 @@ -libonnxruntime.so.1.22.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so.1.22.0 b/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so.1.22.0 deleted file mode 100644 index 58b08f59..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime.so.1.22.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime_providers_shared.so b/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime_providers_shared.so deleted file mode 100644 index d1856781..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/libonnxruntime_providers_shared.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4706ea02be3999167f000bb1ff7391c86e95d2d96f78eb33225c14b172a8451 -size 14632 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/cpu/pkgconfig/libonnxruntime.pc b/Ryzen-AI-CVML-Library/linux/onnx/cpu/pkgconfig/libonnxruntime.pc deleted file mode 100644 index 51172926..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/cpu/pkgconfig/libonnxruntime.pc +++ /dev/null @@ -1,13 +0,0 @@ -prefix=/usr/local -bindir=${prefix}/bin -mandir=${prefix}/share/man -docdir=${prefix}/share/doc/onnxruntime -libdir=${prefix}/lib64 -includedir=${prefix}/include/onnxruntime - -Name: onnxruntime -Description: ONNX runtime -URL: https://github.com/microsoft/onnxruntime -Version: 1.22.0 -Libs: -L${libdir} -lonnxruntime -Cflags: -I${includedir} diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay.xclbin b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay.xclbin deleted file mode 100644 index fa754b93..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG0.xclbin b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG0.xclbin deleted file mode 100644 index f4c6cbe4..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG0.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG1.xclbin b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG1.xclbin deleted file mode 100644 index cf7a3136..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG1.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_8x4x1_Overlay.xclbin b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_8x4x1_Overlay.xclbin deleted file mode 100644 index 180436e5..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_8x4x1_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_Nx4_Overlay.xclbin b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_Nx4_Overlay.xclbin deleted file mode 100644 index 1d1b40f1..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/AMD_AIE2P_Nx4_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libcpu-runner_without_symbol.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libcpu-runner_without_symbol.so deleted file mode 100644 index ce406d9c..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libcpu-runner_without_symbol.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6c7fd4f91097310498bea445516d7dc6b444962fb6db538d7f3718cb53348692 -size 16770480 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so deleted file mode 120000 index c0455457..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so +++ /dev/null @@ -1 +0,0 @@ -libglog.so.1 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so.0.6.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so.0.6.0 deleted file mode 100644 index 9a8dc640..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so.0.6.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so.1 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so.1 deleted file mode 120000 index f662722a..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libglog.so.1 +++ /dev/null @@ -1 +0,0 @@ -libglog.so.0.6.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libgraph-engine.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libgraph-engine.so deleted file mode 100644 index 546831d5..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libgraph-engine.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cb8a9d3aa60059c94461de24bb1f8c3392052794a9f8b8470473e34d9c16428 -size 717985 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so deleted file mode 120000 index a64790aa..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so +++ /dev/null @@ -1 +0,0 @@ -libonnxruntime.so.1 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so.1 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so.1 deleted file mode 120000 index 15331b1a..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so.1 +++ /dev/null @@ -1 +0,0 @@ -libonnxruntime.so.1.20.1 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so.1.20.1 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so.1.20.1 deleted file mode 100644 index 5678c288..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime.so.1.20.1 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_providers_shared.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_providers_shared.so deleted file mode 100644 index dd9d4f14..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_providers_shared.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a159552c0f6ab5454053b4436540aba9974d285f736ed29cada1a45219bea7d -size 14104 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_providers_vitisai.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_providers_vitisai.so deleted file mode 100644 index 87456a4f..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_providers_vitisai.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2efb85885a774057950de4453ecc8b954158ae032a2acae447134ab355c56f22 -size 190712 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so deleted file mode 120000 index 1c746796..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so +++ /dev/null @@ -1 +0,0 @@ -libonnxruntime_vitisai_ep.so.1 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so.1 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so.1 deleted file mode 120000 index 511b90ad..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so.1 +++ /dev/null @@ -1 +0,0 @@ -libonnxruntime_vitisai_ep.so.1.0.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so.1.0.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so.1.0.0 deleted file mode 100644 index 5d8f2f71..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libonnxruntime_vitisai_ep.so.1.0.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so deleted file mode 120000 index 9f5e671b..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so +++ /dev/null @@ -1 +0,0 @@ -libtarget-factory.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so.3 deleted file mode 120000 index 46871628..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so.3 +++ /dev/null @@ -1 +0,0 @@ -libtarget-factory.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so.3.5.0 deleted file mode 100644 index 4305eaac..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libtarget-factory.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so deleted file mode 120000 index 8b837155..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so +++ /dev/null @@ -1 +0,0 @@ -libunilog.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so.3 deleted file mode 120000 index 0b4d95b5..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so.3 +++ /dev/null @@ -1 +0,0 @@ -libunilog.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so.3.5.0 deleted file mode 100644 index 392a78a3..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libunilog.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-core.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-core.so deleted file mode 100644 index 036ba42f..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-core.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e5dd73443f5ea0971acf620a3b3659e5fca550a4c2cb36754bfc1482da7af18b -size 7484633 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-encryption.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-encryption.so deleted file mode 100644 index 9dd98a48..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-encryption.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aad961994fe96bbf0a8c0b8969947bf7e5c2db86b039d0b16ac048fc263f9c9a -size 6001457 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_add_fix.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_add_fix.so deleted file mode 100644 index 5ddacae6..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_add_fix.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c06dc9f9498ba7e11dea852f3a09afbd8861d3153feedfda6ab123861cd7b5cf -size 46105 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_fold_batchnorm_to_scale.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_fold_batchnorm_to_scale.so deleted file mode 100644 index 36e414ab..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_fold_batchnorm_to_scale.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c42f7e806e5f602dd1cc54bdd2754f269c39c6f831fa53b5ee826fa71be52ac -size 54401 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_fold_transpose.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_fold_transpose.so deleted file mode 100644 index 6f458d1a..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_const_fold_transpose.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a99ebfc165d5d2d26ddf0d17001036768af0b4240f7192a16cb9e6d59d41c730 -size 54177 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_ending_blacklist_ops_to_unknown_op.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_ending_blacklist_ops_to_unknown_op.so deleted file mode 100644 index a15784fe..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_ending_blacklist_ops_to_unknown_op.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a616cc9d243556823d8d0a1ee58dbe02128a61df175b3bebe474cfba6c296579 -size 42713 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_instancenorm_to_groupnorm.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_instancenorm_to_groupnorm.so deleted file mode 100644 index 98f81311..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_instancenorm_to_groupnorm.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d6fd29b5855995a6305906173abf808072234e7304a2d9328ff13c8b86b860f7 -size 83809 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_pad.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_pad.so deleted file mode 100644 index 40b5ea2d..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_pad.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f9fb68d1afb74ae184ffb42d08eb6a56689731646c21402cc70a6424ad4ceb9f -size 58217 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_split_to_xir_op.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_split_to_xir_op.so deleted file mode 100644 index 13503ac6..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_convert_split_to_xir_op.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79ea3be7522d1c7b60aceeae36a9f62047ff769bf11f05a2c1b31bc3a82d252f -size 1994961 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_create_const_op.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_create_const_op.so deleted file mode 100644 index 34712f6f..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_create_const_op.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21cce6184faf9869e03a8539cdc06ea33ad625d52d828e5fcd733fa2b27454d6 -size 226121 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_dynamic_input_batch.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_dynamic_input_batch.so deleted file mode 100644 index 58d6b1d8..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_dynamic_input_batch.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a5eca938e81f3e5e9d26c0a2ab4666f89a6062a9ce660b8effe30382e9fef2b -size 37729 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_fuse_transpose.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_fuse_transpose.so deleted file mode 100644 index 8163cac7..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_fuse_transpose.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca8d449807c138fe5df1cc3a5e8a942fa82f40be3f49c96a355be9dd7289bb4c -size 762897 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_init.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_init.so deleted file mode 100644 index 0754328c..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_init.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a888eef1a039d2571c48fb72a999e272b339a2729f7db0d72262198aef9f6e5c -size 36705 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_layout_transform_via_adding_transpose.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_layout_transform_via_adding_transpose.so deleted file mode 100644 index 17374850..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_layout_transform_via_adding_transpose.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:099ba4f8f06555de3247c524752b848d8ec4c5776fa1197c59b9f48cfaa9dc04 -size 565081 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_level1_dpu.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_level1_dpu.so deleted file mode 100644 index 7e1cbe32..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_level1_dpu.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d45cf9e5d7e517a2e4df4e14dada3cad86c7947cbca2e1cd2a91676453061b0e -size 3144281 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_add_into_conv_bias.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_add_into_conv_bias.so deleted file mode 100644 index 770258b2..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_add_into_conv_bias.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ac57d430edc2d510a3e127ce766b12dec4f679f80dd6e46af78044a490a351f -size 46185 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_fix.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_fix.so deleted file mode 100644 index e347b610..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_fix.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec6e6fcbd64db717fba6e2d4d8c2822591d697c67231b3c01663915d74d82ada -size 36721 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_hard_sigmoid.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_hard_sigmoid.so deleted file mode 100644 index ad486c96..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_hard_sigmoid.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c9bd948839b17ac442dd878b78c8ab91ee8069628abce8c1b5b3ab1b179cc85 -size 54441 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_mul.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_mul.so deleted file mode 100644 index 8b453732..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_mul.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a311885aba693335a00b025ac077d8db34b59da62ad2d8097e86337fb60f74e -size 50929 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_pad.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_pad.so deleted file mode 100644 index 71e02281..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_merge_pad.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c9c7b5d00ac1d4c7b980c9cc812c960160a494a024490168ddaed440aba7dce3 -size 59745 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_extra_q_dq.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_extra_q_dq.so deleted file mode 100644 index 056d757c..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_extra_q_dq.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3281a59aa748e2a5eabbe9b8d17ab52919f2f25c9e6fc783a6e86f44804dbcd5 -size 36409 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_identity.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_identity.so deleted file mode 100644 index f687a71e..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_identity.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:15185c423aeeb593b531707411e619674528061994b9669879e1758191a6eb2e -size 41977 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_isolated_node.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_isolated_node.so deleted file mode 100644 index 56f9e70a..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_remove_isolated_node.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f24b31fd626ce68da504749cf9e692a43c0106d9ee739bd4b194be8c80b44c9f -size 530689 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_to_xir_ops.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_to_xir_ops.so deleted file mode 100644 index a8b4f686..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_to_xir_ops.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70ac18cbfd21898247aa5dfc73dd9ffc71bb36b1a5a91d5c2c27194ab43e582d -size 2324633 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_vaiml.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_vaiml.so deleted file mode 100644 index 7e32f0e2..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip-pass_vaiml.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:530bdb129dc481c5fc985183b16776d778149f7cd5110ba37922b0651d1bd5a9 -size 33321 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip_custom_op_DPU.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip_custom_op_DPU.so deleted file mode 100644 index b1e8b228..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvaip_custom_op_DPU.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:95dd7ec5780d6fd50760e6c2f782a6844ec15cfd51a178aec03120429efcd353 -size 2521921 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so deleted file mode 120000 index 7ea3baa8..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so +++ /dev/null @@ -1 +0,0 @@ -libvart-cpu-runner.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so.3 deleted file mode 120000 index 891e4402..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so.3 +++ /dev/null @@ -1 +0,0 @@ -libvart-cpu-runner.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so.3.5.0 deleted file mode 100644 index 38441c4a..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-cpu-runner.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so deleted file mode 120000 index fe716c6d..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so +++ /dev/null @@ -1 +0,0 @@ -libvart-mem-manager.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so.3 deleted file mode 120000 index a8b4af52..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so.3 +++ /dev/null @@ -1 +0,0 @@ -libvart-mem-manager.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so.3.5.0 deleted file mode 100644 index cd4432d5..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-mem-manager.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so deleted file mode 120000 index f6dcdb9a..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so +++ /dev/null @@ -1 +0,0 @@ -libvart-runner.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so.3 deleted file mode 120000 index 8a99f742..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so.3 +++ /dev/null @@ -1 +0,0 @@ -libvart-runner.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so.3.5.0 deleted file mode 100644 index 57a76b03..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-runner.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so deleted file mode 120000 index 6480799c..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so +++ /dev/null @@ -1 +0,0 @@ -libvart-trace.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so.3 deleted file mode 120000 index 8fee3ae3..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so.3 +++ /dev/null @@ -1 +0,0 @@ -libvart-trace.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so.3.5.0 deleted file mode 100644 index 3f725c20..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-trace.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so deleted file mode 120000 index 9ecbafc3..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so +++ /dev/null @@ -1 +0,0 @@ -libvart-util.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so.3 deleted file mode 120000 index 14af522d..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so.3 +++ /dev/null @@ -1 +0,0 @@ -libvart-util.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so.3.5.0 deleted file mode 100644 index 4403a4e8..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libvart-util.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-core-without-symbol.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-core-without-symbol.so deleted file mode 100644 index 9c308359..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-core-without-symbol.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:edba2658f125f268cae04d9a577b1a245776951421e16c951c7387f6d95b9301 -size 118569328 diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so deleted file mode 120000 index d06cb889..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so +++ /dev/null @@ -1 +0,0 @@ -libxcompiler-xcompiler-core.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so.3 deleted file mode 120000 index 2c77fd12..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so.3 +++ /dev/null @@ -1 +0,0 @@ -libxcompiler-xcompiler-core.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so.3.5.0 deleted file mode 100644 index 065076b0..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxcompiler-xcompiler-core.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so deleted file mode 120000 index 9752e81e..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so +++ /dev/null @@ -1 +0,0 @@ -libxir.so.3 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so.3 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so.3 deleted file mode 120000 index 537fb538..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so.3 +++ /dev/null @@ -1 +0,0 @@ -libxir.so.3.5.0 \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so.3.5.0 b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so.3.5.0 deleted file mode 100644 index 162c84c9..00000000 Binary files a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/libxir.so.3.5.0 and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/vaip_config_npu_2_3.json b/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/vaip_config_npu_2_3.json deleted file mode 100644 index 6e8f6372..00000000 --- a/Ryzen-AI-CVML-Library/linux/onnx/ryzen14/vaip_config_npu_2_3.json +++ /dev/null @@ -1,1050 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_resize_norm", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_resize_norm", - "methodName": "rules" - } - }, - { - "name": "fuse_softmax", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_softmax", - "methodName": "rules" - } - }, - { - "name": "fuse_topk", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_topk", - "methodName": "rules" - } - }, - { - "name": "fuse_decode_filter_boxes", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_decode_filter_boxes", - "methodName": "rules" - } - }, - { - "name": "vaip_pass_norm_k", - "plugin": "vaip-pass_norm_k", - "enable_gc": true, - "disabled": false - }, - { - "name": "fuse_NMS", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.fuse_NMS", - "methodName": "rules" - } - }, - { - "name": "fuse_dynamic_dispatch", - "plugin": "vaip-pass_level1_dd", - "passDpuParam": { - "subPass": [ - { - "name": "dd_compiler_pass_transformation", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_generation", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_gen", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_transformation_2", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_2", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_generation_2", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_gen_2", - "methodName": "rules" - } - } - ] - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_VAIML-x2.0", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 32 - }, - "debug_mode": { - "stringValue": "function" - }, - "enable_call_aie": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mt_fusion": { - "boolValue": true - }, - "enable_merge_requantize": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "remove_call_aiecompiler_cache": { - "boolValue": true - }, - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "enable_control_optimization": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_MHA", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_MHA", - "plugin": "vaip-pass_convert_MHA", - "enableGc": true - }, - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin" - } - }, - { - "name": "vaiml", - "plugin": "vaip-pass_vaiml", - "pass_vaiml_param": { - "subPass": [ - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - } - ] - } - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "vaiml_model_path": "vaiml_par_0", - "max_num_partitions": 200, - "device_name": "phx", - "debug": true - } - }, - { - "name": "vaip_pass_dd_merge_dqcastgather", - "plugin": "vaip-pass_dd_merge_dqcastgather", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop_onnx", - "plugin": "vaip-pass_dd_merge_qop_onnx", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop_onnx", - "plugin": "vaip-pass_dd_merge_dqop_onnx", - "enable_gc": true, - "disabled": false - } - ], - "mepTable": [ - { - "modelName": "a3", - "md5sumOnDisk": "73ecb2594935fb9bd02707930610f29e", - "md5sumInMemory": "f59151f8b67a7b8f1a8bcc7798558c33", - "md5sumInMemoryWithIo": "39b422f40bcb60bbd20be835097256c2", - "target": "VAIML-x2.0-a3" - }, - { - "modelName": "DeepLabV3", - "md5sumOnDisk": "10a644c6da6b1121f807794506b7e5cc", - "md5sumInMemory": "849608d568bbc54380833c9446299989", - "md5sumInMemoryWithIo": "99f39afbf868542fb575023123c98001", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "inceptionv4", - "md5sumOnDisk": "42c48a7086ba34889699862d98844e62", - "md5sumInMemory": "591d2dc2a27f04111e1b14fbf4222d51", - "md5sumInMemoryWithIo": "3d6d80d6c60811089ced3bf6abb42cdc", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "MobileNet_V3", - "md5sumOnDisk": "5bbc61c013f20b6563c62523100fa2ee", - "md5sumInMemory": "0bcec05d638535092032784dcca2cce3", - "md5sumInMemoryWithIo": "1506de83ac617b0903613ca420061e84", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "ResNet50", - "md5sumOnDisk": "c22b773b35f2ce62932578cc5eec867b", - "md5sumInMemory": "b307088b2bf693c51d8ea399247d8139", - "md5sumInMemoryWithIo": "1bea49f637b8f9f0ec80bc44c6d841bb", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "YoloV3", - "md5sumOnDisk": "2657be600da90313ca9d32c03bd11835", - "md5sumInMemory": "7646f3796c4d98cfa53650482148734a", - "md5sumInMemoryWithIo": "14b71beebb0c98f291bdda3e7843cb1a", - "target": "VAIML-x2.0-Procyon" - } - ], - "target": "RyzenAI_vision_config_1", - "targets": [ - { - "name": "RyzenAI_vision_config_1", - "pass": [ - "init", - "vaiml" - ] - }, - { - "name": "RyzenAI_shell_config_1", - "xclbin": "AMD_AIE2P_2x4x1_Overlay.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU", - "vaip_pass_dd_merge_qop", - "vaip_pass_dd_merge_dqop", - "vaip_pass_dd_merge_qop_onnx", - "vaip_pass_dd_merge_dqop_onnx" - ], - "target_opts": { - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 3 - }, - "profile": { - "uintValue": 0 - }, - "enable_fast_pm": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mergesync": { - "boolValue": true - } - } - }, - "graph_engine_qos_priority": 640 - }, - { - "name": "RyzenAI_vision_config_2", - "xclbin": "1x4.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 16 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - } - } - }, - { - "name": "RyzenAI_vision_config_3", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - } - } - } - }, - { - "name": "RyzenAI_vision_config_3_mha", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "RyzenAI_transformer_config_2", - "xclbin": "4x2_psf_model_a8w8_qdq.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch" - ] - }, - { - "name": "RyzenAI_xcompiler_and_dd_config", - "xclbin": "2x4x2_pss_pst_model_mha_qdq.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "RyzenAI_transformer_cxx_pss_pst", - "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch_pss_pst" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "VAIML-x2.0-a3", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ] - }, - { - "name": "VAIML-x2.0-Procyon", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "enable_qdq_force_xint": { - "boolValue": true - } - } - } - } - ], - "enable_cache_file_io_in_mem": true -} diff --git a/Ryzen-AI-CVML-Library/samples/CMakeLists.txt b/Ryzen-AI-CVML-Library/samples/CMakeLists.txt deleted file mode 100644 index db9675dd..00000000 --- a/Ryzen-AI-CVML-Library/samples/CMakeLists.txt +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# To compile all the Ryzen AI CVML sample applications, execute the following -# commands from the samples folder, -# -# For Windows -# > mkdir build -# > cmake -S %CD% -B %CD%\build -DOPENCV_INSTALL_ROOT=%OPENCV_INSTALL_ROOT% -# > cmake --build %CD%\build --config Release -# -# For Linux -# > mkdir build -# > cmake -S $PWD -B $PWD/build -DOPENCV_INSTALL_ROOT=$OPENCV_INSTALL_ROOT -# > cmake --build $PWD/build --config Release -# -# where OPENCV_INSTALL_ROOT is set to the location of the OpenCV library files. -# - -# -# Build the sample apps inside Ryzen AI CVML release -# -cmake_minimum_required(VERSION 3.18) -project(cvml-sdk-samples) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED) - -# -# Please set opencv install root path, using the following line as an example, -# -file(TO_CMAKE_PATH "$ENV{OPENCV_INSTALL_ROOT}" OPENCV_INSTALL_ROOT) -find_package(OpenCV PATHS ${OPENCV_INSTALL_ROOT}) -if (NOT OpenCV_FOUND) - # Try default OpenCV config, workaround for older OpenCV versions - if (WIN32) - set(OpenCV_RUNTIME vc16) - set(OpenCV_ARCH x64) - set(OpenCV_DIR "${OPENCV_INSTALL_ROOT}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib") - message(STATUS "First attempt to find OpenCV failed. Trying again with RUNTIME=${OpenCV_RUNTIME}, ARCH=${OpenCV_ARCH}") - else () - set(OpenCV_DIR "${OPENCV_INSTALL_ROOT}/lib") - message(STATUS "First attempt to find OpenCV failed. Trying again with OpenCV_DIR=${OPENCV_INSTALL_ROOT}/lib") - endif () - find_package(OpenCV REQUIRED PATHS ${OPENCV_INSTALL_ROOT}) -endif() - -# -# Find Ryzen AI CVML library (also sets include directories) -# -if (NOT DEFINED AMD_CVML_SDK_ROOT) - set(AMD_CVML_SDK_ROOT ..) -endif() -find_package(RyzenAILibrary REQUIRED PATHS ${AMD_CVML_SDK_ROOT}) - -set(CMAKE_CXX_FLAGS "-D_UNICODE -DUNICODE") -if (MSVC) - add_compile_options(/EHsc) -endif () - -# Add the subdirectory of the samples to build -add_subdirectory(common-sample-utils) -file(GLOB SAMPLE_SRC_DIRS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cvml-sample-*) -# Filter out any existing internal samples -file(GLOB INTERNAL_SAMPLE_SRC_DIRS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *internal*) -list(REMOVE_ITEM SAMPLE_SRC_DIRS ${INTERNAL_SAMPLE_SRC_DIRS}) -FOREACH(ITER ${SAMPLE_SRC_DIRS}) - add_subdirectory(${ITER}) -ENDFOREACH() diff --git a/Ryzen-AI-CVML-Library/samples/common-sample-utils/CMakeLists.txt b/Ryzen-AI-CVML-Library/samples/common-sample-utils/CMakeLists.txt deleted file mode 100644 index ea12c960..00000000 --- a/Ryzen-AI-CVML-Library/samples/common-sample-utils/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2021 Advanced Micro Devices, Inc. All rights reserved. - -add_subdirectory(src) \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/samples/common-sample-utils/include/common-sample-utils.h b/Ryzen-AI-CVML-Library/samples/common-sample-utils/include/common-sample-utils.h deleted file mode 100644 index e99f97e0..00000000 --- a/Ryzen-AI-CVML-Library/samples/common-sample-utils/include/common-sample-utils.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. - */ - -#ifndef SAMPLES_COMMON_SAMPLE_UTILS_INCLUDE_COMMON_SAMPLE_UTILS_H_ -#define SAMPLES_COMMON_SAMPLE_UTILS_INCLUDE_COMMON_SAMPLE_UTILS_H_ - -#include -#include -#include - -#include "cvml-context.h" -#include "opencv2/opencv.hpp" - -namespace amd { -namespace cvml { -namespace sample { -namespace utils { - -/** - * camera resolution - */ -typedef struct CamRes { - uint32_t width; - uint32_t height; -} CamRes; - -/** - * Sets up the camera with the specified camera id according to the preferred resolution list - * @param camera_index: camera to open - * @param res_list: a list of resolutions that can be used, the first resolution in the list will be - * tried first - * @param camera: opencv camera handle if camera openeed successfully - * @return camera open successfully or not - */ -bool SetupCamera(int camera_index, const std::vector& res_list, cv::VideoCapture* camera); - -/** - * Create a folder with timestamp corresponding to the current local time of the system - * - * @return folder named with timestamp - */ -std::string CreateFolderWithTimestamp(); - -/** - * Get a string of timestamp corresponding to the current local time of the system - * - * @return string of timestamp - */ -std::string GetTimestamp(); - -/** - * Local class definition for passing information to \a RunFeature - * callbacks. To use, inherit a local class/struct from this and provide - * it to the \a RunFeature function. - * - * This class also contains additional flags/configuration parameters - * to modify the behavior of the RunFeature() function. - */ -class RunFeatureClass { - public: - /// destructor - virtual ~RunFeatureClass() {} - - /// Press the specified key to save a frame shot, not case sensitive - char frame_shot_key_{'s'}; - - /// Repeatedly iterate on image or video until window is closed - bool repeat_image_video_{false}; - - /// Scaling factor for display window, auto-scaling if zero - float disp_window_scale_{0.0f}; - - /// Specify window title to enable showing input frame - const char* input_window_name_{nullptr}; - - /// Concatenate input/output images beside each other - bool side_by_side_{false}; - - // Called for specific run feature code in each feature - virtual cv::Mat Feature(const cv::Mat& input_frame_rgb) { return input_frame_rgb; } - - /** - * The input extension is used to establish streaming mode. If a camera index is - * passed then the streaming mode is set to be online. - * - * @param src_path: Uses the same semantics as RunFeature's input. - * Input file name, or "" if camera desired - * @param context: CVML context being used in the sample - */ - void SetContextStreamingModeBySrc(amd::cvml::Context* context, const std::string& src_path); - - /** - * Opens video source and executes the feature. - * - * This function throws exceptions on errors. - * - * The provided callback function is called for each frame of - * camera/video/still with an OpenCV RGB input buffer. It should - * return an output/processed RGB buffer. - * - * The input extension is used to differentiate between video clips - * and still images. To select a camera input, provide the desired - * camera index as the input. If an empty input string is provided, - * the function attempts to open camera index 0. - * - * @param input: Input file name, or "" if camera desired - * @param output_file: Output file name - * @param window_title: Optional application window title, can be empty - * @param supported_res: Pointer to Camera resolution list supported by feature, can be NULL if - * using default value - */ - virtual void RunFeature(const std::string& input, const std::string& output_file, - const std::string& window_title, - std::vector* supported_res = nullptr); - - protected: - bool is_camera_{false}; ///< Whether input is camera - cv::VideoCapture video_input_; ///< OpenCV video capture device - cv::VideoWriter video_output_; ///< Video writer for main output - - bool open_output_file_{false}; ///< Whether to attempt opening output file - std::string output_file_{}; ///< Output file name for \a RunFeatureStreaming - std::string output_window_name_{}; ///< Output window name for \a RunFeatureStreaming - - cv::Mat frame_rgb_; ///< Input RGB frame data - double stream_fps_{0}; ///< Video/camera input frame rate - - protected: - /** - * Helper function to run feature against video files. - * - * @param input_file Video file name - */ - virtual void RunFeatureVideoFile(const std::string& input_file); - - /** - * Helper function to run feature against streaming inputs. - * - * @return true if user exit - */ - virtual bool RunFeatureStreaming(); - - /** - * Helper function to run feature against a video file. - */ - /** - * Fill local frame_rgb with next input frame. - * - * @param frame_id Zero-based frame id - * @return true if local's frame_rgb is ready to be processed - */ - virtual bool GetSingleVideoFrame(uint32_t frame_id); -}; - -/** - * Function to print the supported platform details. - */ -void GetPlatformInformation(); - -/** - * Parse command line arguments for RunFeature. - * - * @param argc Number of command line arguments - * @param argv Array of command line arguments - * @param input_str Pointer to input device/file string - * @param output_file Pointer to output file string - * @param arg_help Optional replacement string of argument option help text - * @return true if the application should continue - */ -bool ParseArguments(int argc, char** const argv, std::string* input_str, std::string* output_file, - const char* arg_help = nullptr); - -/** - * Render rectangle into the frame. - * - * @param image Pointer to target image buffer - * @param rect OpenCV rectangle definition - * @param color Rectangle color, as an RGBA scalar - */ -void PutRectangle(cv::Mat* image, const cv::Rect& rect, const cv::Scalar& color); - -/** - * Specify extra flags for PutText's override_x parameter. - */ -enum PUTTEXT { - /// Specify X center for PutText() - OVERRIDE_CENTER = 0x0, - - /// Specify absolute X offset for PutText() - OVERRIDE_ABSOLUTE = 1 << (sizeof(int) * 8 - 2) -}; - -/** - * Render text strings into the frame. - * To center text around a point, set override_x = PUTTEXT - * - * @param image Target image buffer - * @param display_text String of text to render - * @param row Zero-based row number to render text, assuming text console - * @param text_color Color of text to render - * @param override_x If non-zero, change behavior based on PUTTEXT flags - * @param text_height If non-zero, specifies text height as a percentage of the frame height - * @param fill_background Whether or not an opaque background should be added - * @param background_color Color of background, if specified - * @return End X value of the rendered text - */ -int PutText(cv::Mat* image, const std::string& display_text, const int text_row, - cv::Scalar text_color, const int override_x, const int text_height, - const bool fill_background = false, cv::Scalar background_color = cv::Scalar(0, 0, 0)); - -} // namespace utils -} // namespace sample -} // namespace cvml -} // namespace amd - -#endif // SAMPLES_COMMON_SAMPLE_UTILS_INCLUDE_COMMON_SAMPLE_UTILS_H_ diff --git a/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/CMakeLists.txt b/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/CMakeLists.txt deleted file mode 100644 index 4f234574..00000000 --- a/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. - -list(APPEND CMAKE_MODULE_PATH ${OPENCV_INSTALL_ROOT} ${AMD_CVML_SDK_ROOT} "${CMAKE_CURRENT_SOURCE_DIR}/../../..") - -find_package(OpenCV REQUIRED) - -# -# Find Ryzen AI CVML library (also sets include directories) -# -find_package(RyzenAILibrary REQUIRED PATHS ${AMD_CVML_SDK_ROOT}) - -file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) - -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include ${OpenCV_INCLUDE_DIRS}) - -add_library(common-sample-utils STATIC ${SOURCES}) - -target_include_directories(common-sample-utils -PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include) - -target_link_libraries(common-sample-utils - ${OpenCV_LIBS} - ${RyzenAILibrary_LIBS} -) diff --git a/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/common-sample-camera-utils.cpp b/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/common-sample-camera-utils.cpp deleted file mode 100644 index 2b6e64b3..00000000 --- a/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/common-sample-camera-utils.cpp +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. - */ - -#include "common-sample-utils.h" - -#ifdef _WIN32 -#include -#include -#include -#include -#include - -#pragma comment(lib, "Mfplat.lib") -#pragma comment(lib, "Mf.lib") -#pragma comment(lib, "mfreadwrite.lib") -#pragma comment(lib, "mfuuid.lib") -#endif - -using amd::cvml::sample::utils::CamRes; - -namespace amd { -namespace cvml { -namespace sample { -namespace utils { - -#ifdef _WIN32 -/** - * camera supported media type - */ -struct MediaTypeInfo { - GUID type; /// Image type - UINT32 width; - UINT32 height; /// Resolution - UINT32 fps; /// Frame rate -}; - -/** - * Helper function to enumerate camera supported image type and resolution. - * - * @param camera_index: selected camera index - * @return Enumeration of image type and resolution - */ -std::vector EnumerateCameraImageTypes(int camera_index) { - std::vector formats; - - // Initialize Media Foundation - HRESULT hr = MFStartup(MF_VERSION); - if (FAILED(hr)) { - std::cout << "Failed to initialize Media Foundation" << std::endl; - return formats; - } - - // Enumerate video capture devices - IMFAttributes* pAttributes = nullptr; - IMFActivate** ppDevices = nullptr; - UINT32 devicecount = 0; - - hr = MFCreateAttributes(&pAttributes, 1); - if (FAILED(hr) || pAttributes == nullptr) { - std::cerr << "Failed to create source resolver" << std::endl; - MFShutdown(); - return formats; - } - - hr = pAttributes->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, - MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID); - if (FAILED(hr)) { - std::cerr << "Failed to set device capture attribute" << std::endl; - pAttributes->Release(); - MFShutdown(); - return formats; - } - - hr = MFEnumDeviceSources(pAttributes, &ppDevices, &devicecount); - pAttributes->Release(); - if (FAILED(hr) || ppDevices == nullptr || devicecount == 0 || - camera_index >= static_cast(devicecount)) { - std::cerr << "No valid video capture devices found" << std::endl; - if (ppDevices) { - for (UINT32 i = 0; i < devicecount; i++) { - ppDevices[i]->Release(); - } - CoTaskMemFree(ppDevices); - } - MFShutdown(); - return formats; - } - - // Activate the selected device - IMFMediaSource* pMediaSource = nullptr; - hr = ppDevices[camera_index]->ActivateObject(IID_PPV_ARGS(&pMediaSource)); - for (UINT32 i = 0; i < devicecount; i++) { - ppDevices[i]->Release(); - } - CoTaskMemFree(ppDevices); - if (FAILED(hr) || pMediaSource == nullptr) { - std::cerr << "Failed to activate media source" << std::endl; - MFShutdown(); - return formats; - } - - IMFSourceReader* pSourceReader = nullptr; - hr = MFCreateSourceReaderFromMediaSource(pMediaSource, nullptr, &pSourceReader); - pMediaSource->Release(); - if (FAILED(hr) || pSourceReader == nullptr) { - std::cerr << "Failed to create source reader" << std::endl; - MFShutdown(); - return formats; - } - - // Enumerate available formats - DWORD dwStreamIndex = 0, mediaTypeIndex = 0; - while (true) { - IMFMediaType* pType = nullptr; - hr = pSourceReader->GetNativeMediaType(dwStreamIndex, mediaTypeIndex, &pType); - if (hr == MF_E_NO_MORE_TYPES) { - mediaTypeIndex = 0; - dwStreamIndex++; - hr = pSourceReader->GetNativeMediaType(dwStreamIndex, mediaTypeIndex, &pType); - if (hr == MF_E_INVALIDREQUEST || hr == MF_E_NO_MORE_TYPES) break; - } - if (FAILED(hr)) break; - GUID subtype; - hr = pType->GetGUID(MF_MT_SUBTYPE, &subtype); - if (SUCCEEDED(hr)) { - // Get the resolution - UINT32 width = 0, height = 0; - hr = MFGetAttributeSize(pType, MF_MT_FRAME_SIZE, &width, &height); - if (SUCCEEDED(hr)) { - // Get the frame rate - UINT32 numerator = 0, denominator = 0; - hr = MFGetAttributeRatio(pType, MF_MT_FRAME_RATE, &numerator, &denominator); - if (SUCCEEDED(hr) && denominator != 0) { - UINT32 fps = numerator / denominator; - if (fps <= 30) { - formats.push_back({subtype, width, height, fps}); - } - } - } - } - pType->Release(); - mediaTypeIndex++; - } - - pSourceReader->Release(); - MFShutdown(); - - return formats; -} -#endif - -bool SetupCamera(int camera_index, const std::vector& res_list, cv::VideoCapture* camera) { - // list certain API preferences before CAP_ANY to try them first - // regardless of opencv's ordering - static const int camera_api_preference[] = { -#ifdef _WIN32 - cv::CAP_DSHOW, cv::CAP_MSMF, -#else - cv::CAP_V4L2, -#endif - cv::CAP_ANY}; - - if (camera == nullptr) { - return false; - } - -#ifdef _WIN32 - std::vector camera_format = EnumerateCameraImageTypes(camera_index); -#endif - - for (auto api : camera_api_preference) { - try { - camera->open(camera_index, api); - if (camera->isOpened()) { - break; - } - } catch (std::exception& e) { - std::cout << "SetupCamera exception(" << api << "): " << e.what() << std::endl; - } - } - - if (camera->isOpened() != true) { - std::cout << "Failed to open camera device with id:" << camera_index << std::endl; - return false; - } - - bool result = false; -#ifdef _WIN32 - int selected_index = -1; - UINT32 highest_fps = 0; - GUID selected_type = GUID_NULL; -#endif - - for (auto res : res_list) { -#ifdef _WIN32 - // Nested loop to compare camera_format resolution and match exactly, or as closely as possible - for (size_t i = camera_format.size() - 1; i > 0; i--) { - // Check for resolution match and either highest fps or MJPG format - if (camera_format[i].width == res.width && camera_format[i].height == res.height && - camera_format[i].fps <= 30 && - (camera_format[i].fps > highest_fps || - (camera_format[i].type == MFVideoFormat_MJPG && selected_type != MFVideoFormat_MJPG))) { - selected_index = static_cast(i); - highest_fps = camera_format[i].fps; - selected_type = camera_format[i].type; - } - } - if (selected_index >= 0) { - break; - } - } - if (selected_index >= 0) { - camera->set(cv::CAP_PROP_FRAME_WIDTH, camera_format[selected_index].width); - camera->set(cv::CAP_PROP_FRAME_HEIGHT, camera_format[selected_index].height); - camera->set(cv::CAP_PROP_FPS, camera_format[selected_index].fps); - if (selected_type == MFVideoFormat_MJPG) { // select MJPG if it was available from previous for - // loop, else use default in OpenCV camera API - camera->set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G')); - std::cout << "Selecting MJPG format." << std::endl; - } - auto w = camera->get(cv::CAP_PROP_FRAME_WIDTH); - auto h = camera->get(cv::CAP_PROP_FRAME_HEIGHT); - auto fps = camera->get(cv::CAP_PROP_FPS); - if (w != camera_format[selected_index].width || h != camera_format[selected_index].height) { - std::cout << "Camera doesn't support " << camera_format[selected_index].width << "x" - << camera_format[selected_index].height << std::endl; - } else { - std::cout << "Camera enabled at " << w << "x" << h << "@" << fps << std::endl; - result = true; - } -#else - camera->set(cv::CAP_PROP_FRAME_WIDTH, res.width); - camera->set(cv::CAP_PROP_FRAME_HEIGHT, res.height); - camera->set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G')); - camera->set(cv::CAP_PROP_FPS, 30); - auto w = camera->get(cv::CAP_PROP_FRAME_WIDTH); - auto h = camera->get(cv::CAP_PROP_FRAME_HEIGHT); - auto fps = camera->get(cv::CAP_PROP_FPS); - if (w != res.width || h != res.height) { - std::cout << "Camera doesn't support " << res.width << "x" << res.height << std::endl; - } else { - std::cout << "Camera enabled at " << w << "x" << h << "@" << fps << std::endl; - result = true; - } -#endif - } - - if (!result) { - std::cout << "No supported resolution for camera." << std::endl; - camera->release(); - } - std::cout << "Selected " << camera->get(cv::CAP_PROP_FRAME_WIDTH) << "x" - << camera->get(cv::CAP_PROP_FRAME_HEIGHT) << "@" << camera->get(cv::CAP_PROP_FPS) - << std::endl; - return result; -} - -} // namespace utils -} // namespace sample -} // namespace cvml -} // namespace amd diff --git a/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/common-sample-utils.cpp b/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/common-sample-utils.cpp deleted file mode 100644 index 9df2a551..00000000 --- a/Ryzen-AI-CVML-Library/samples/common-sample-utils/src/common-sample-utils.cpp +++ /dev/null @@ -1,515 +0,0 @@ -/* - * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. - */ - -#include "common-sample-utils.h" - -#include -#include -#include -#include - -using amd::cvml::sample::utils::CamRes; - -namespace amd { -namespace cvml { -namespace sample { -namespace utils { - -/** - * Helper function to determine output display scale factor. - * - * @param user: User structure for execution flags, etc. - * @param frame_out: Reference to unscaled output buffer - * @return Desired width/height scale factor - */ -static double CalculateDispScaling(const float disp_window_scale, const cv::Mat& frame_out) { - if (disp_window_scale != 0.0f) { - return static_cast(disp_window_scale); - } - const double TARGET_WIDTH = 960; - const double TARGET_HEIGHT = 960; - double scale_width = 1.0; - double scale_height = 1.0; - - if (frame_out.cols > TARGET_WIDTH) { - scale_width = TARGET_WIDTH / frame_out.cols; - } - if (frame_out.rows > TARGET_HEIGHT) { - scale_height = TARGET_HEIGHT / frame_out.rows; - } - - // use the smaller of two scale factors - return scale_width < scale_height ? scale_width : scale_height; -} - -bool RunFeatureClass::GetSingleVideoFrame(uint32_t frame_id) { - (void)frame_id; - - if (video_input_.isOpened()) { - // video capture device input - cv::Mat tmp; - if (!video_input_.read(tmp)) { - return false; - } - cv::cvtColor(tmp, frame_rgb_, cv::COLOR_BGR2RGB); - } - - // in single frame case, frame_rgb_ has already been pre-loaded - return true; -} - -void RunFeatureClass::SetContextStreamingModeBySrc(amd::cvml::Context* context, - const std::string& src_path) { - // assume camera index if a number is provided - const std::string input_str = src_path.empty() ? "0" : src_path; - std::string ext = static_cast(input_str).extension().string(); - if (ext.length() == 0 && std::isdigit(input_str[0])) { - context->SetStreamingMode(amd::cvml::Context::StreamingMode::ONLINE_STREAMING); - } else { - // check if we can treat the input as an image - auto frame_rgb = cv::imread(input_str); - if (!frame_rgb.empty()) { - context->SetStreamingMode(amd::cvml::Context::StreamingMode::ONE_SHOT); - } else { - // assume the input is a video file - context->SetStreamingMode(amd::cvml::Context::StreamingMode::OFFLINE_STREAMING); - } - } -} - -bool RunFeatureClass::RunFeatureStreaming() { - bool user_exit = false; - - uint32_t frame_id; // frame counter - - // set FPS to be the same as the input device/file, or 30FPS - if (video_input_.isOpened()) { - stream_fps_ = video_input_.get(cv::CAP_PROP_FPS); - if (is_camera_) stream_fps_ = 30.0f; - } - - // ms time for stream_fps_ - std::chrono::milliseconds test_fps_period_ = - std::chrono::milliseconds(static_cast(1000 / stream_fps_)); - - // record start time - // std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); - std::chrono::steady_clock::time_point start_time; - - // - // Iterate over frames - // No special handling for frame_id overflow, but this can handle 2^32 frames - // so is good enough for a sample application. - // - for (frame_id = 1; GetSingleVideoFrame(frame_id - 1); ++frame_id) { - if (frame_id == 2) start_time = std::chrono::steady_clock::now(); - - // Run feature and measure effective fps (execution time of a single feature call) - std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); - cv::Mat frame_out = Feature(frame_rgb_); - std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); - - bool window_shown = false; // whether or not an output window was shown - - if (!frame_out.empty()) { - // open requested output file - if (open_output_file_ && !video_output_.isOpened()) { - // only attempt to open once - open_output_file_ = false; - // fourcc encoding format for output video(s) - int fourcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v'); - // set output resolution the same as first output frame - cv::Size output_size(frame_out.cols * (side_by_side_ ? 2 : 1), frame_out.rows); - bool result = video_output_.open(output_file_, fourcc, stream_fps_, output_size); - if (!result) { - std::cout << "Failed to open output file: " << output_file_ << std::endl; - } - } - - // - // Optionally put input/output side by side - // - if (side_by_side_) { - cv::Mat tmp; - - cv::resize(frame_out, tmp, cv::Size(frame_rgb_.cols, frame_rgb_.rows)); - cv::hconcat(frame_rgb_, tmp, frame_out); - } - - cv::cvtColor(frame_out, frame_out, cv::COLOR_RGB2BGR); - - // - // Write output video - // - if (video_output_.isOpened()) { - video_output_.write(frame_out); - } - - // optionally show input window - if (input_window_name_ != nullptr) { - cv::Mat tmp; - cv::cvtColor(frame_rgb_, tmp, cv::COLOR_RGB2BGR); - cv::imshow(input_window_name_, tmp); // Show the input frame - window_shown = true; - } - - // - // Display the output frame at resized width/height - // - if (!output_window_name_.empty()) { - cv::Mat frame_disp{}; // OpenCV display buffer - - double disp_scaling = CalculateDispScaling(disp_window_scale_, frame_out); - cv::resize(frame_out, frame_disp, cv::Size(), disp_scaling, disp_scaling); - - // Add fps to window name - std::stringstream window_title_ss; - window_title_ss - << output_window_name_ << " | Inference time: " << std::fixed << std::setprecision(1) - << std::chrono::duration_cast(end - begin).count() << " ms"; - - cv::imshow(output_window_name_.c_str(), frame_disp); - cv::setWindowTitle(output_window_name_.c_str(), - window_title_ss.str()); // Update fps in window - window_shown = true; - } - } // if (!frame_out.empty()) - - if (window_shown) { - char c = static_cast(cv::pollKey()); - if (tolower(c) == tolower(frame_shot_key_)) { // save frame shot - cv::imwrite(GetTimestamp() + "_frame_" + std::to_string(frame_id) + ".png", frame_out); - } - - // - // Quit if window was closed - // OpenCV throws an exception if the window is invalid, so catch it here. - // - try { - if (input_window_name_ != nullptr && - cv::getWindowProperty(input_window_name_, cv::WND_PROP_AUTOSIZE) == -1) { - user_exit = true; - break; - } - if (!output_window_name_.empty() && - cv::getWindowProperty(output_window_name_.c_str(), cv::WND_PROP_AUTOSIZE) == -1) { - user_exit = true; - break; - } - } catch (std::exception& e) { - (void)e; // ignore the error - user_exit = true; - break; - } - } - - // Rough simulation of test FPS. Figure out how much time should have - // passed based on how many iterations have executed, and inject some - // additional delay if ahead of schedule. - auto current_time = std::chrono::steady_clock::now(); - auto elapsed_time = - std::chrono::duration_cast(current_time - start_time); - - // extra sleep to simulate expected FPS - if (elapsed_time < test_fps_period_ * (frame_id - 1)) { - // std::cout << "Sleep triggered, printout" << std::endl; - std::this_thread::sleep_for(test_fps_period_ * frame_id - elapsed_time); - } - } - - // final clean up if not repeating - if (user_exit == true || repeat_image_video_ == false) { - // clean up OpenCV windows - cv::destroyAllWindows(); - - // perform cleanup and prepare to exit - if (video_output_.isOpened()) { - video_output_.release(); - std::cout << "Output file saved: " << output_file_ << std::endl; - } - } - - return user_exit; -} - -void RunFeatureClass::RunFeatureVideoFile(const std::string& input_file) { - std::cout << "Opening video file: " << input_file << std::endl; - bool user_exit = true; - do { - video_input_ = cv::VideoCapture(input_file); - if (!video_input_.isOpened()) { - std::cout << "Failed to open video file: " << input_file << std::endl; - } else { - user_exit = RunFeatureStreaming(); - - video_input_.release(); - } - } while (user_exit == false && repeat_image_video_ == true); -} - -void RunFeatureClass::RunFeature(const std::string& input, const std::string& output_file, - const std::string& window_title, - std::vector* supported_res) { - // attempt to open output file later if name specified - open_output_file_ = output_file.size() > 0; - if (open_output_file_) { - std::filesystem::path output_path(output_file); - output_file_ = std::filesystem::absolute(output_path).string(); - } - output_window_name_ = window_title; - - // default frame rate - stream_fps_ = 30; - - // - // Determine input type based on the incoming string. - // Default to camera index 0 if no input specified. - // e.g., - // still jpeg - "image.jpg" - // video clip - "clip.mp4" - // camera 2 - "2" - // - const std::string input_str = input.empty() ? "0" : input; - std::string ext = static_cast(input_str).extension().string(); - bool is_image{false}; - bool is_video{false}; - is_camera_ = false; - - // assume camera index if a number is provided - if (ext.length() == 0 && std::isdigit(input_str[0])) { - is_camera_ = true; - } else { - // check if we can treat the input as an image - frame_rgb_ = cv::imread(input_str); - if (!frame_rgb_.empty()) { - cv::cvtColor(frame_rgb_, frame_rgb_, cv::COLOR_BGR2RGB); - is_image = true; - } else { - // assume the input is a video file - is_video = true; - } - } - - if (is_camera_) { - // - // Camera - // - // preferred camera resolution list - const std::vector camera_res_list = {{1920, 1080}, {1280, 720}}; - // requested camera index - int camera_index = static_cast(std::strtod(input_str.c_str(), nullptr)); - - std::cout << "Opening camera index: " << camera_index << std::endl; - if (amd::cvml::sample::utils::SetupCamera( - camera_index, supported_res == nullptr ? camera_res_list : *supported_res, - &video_input_)) { - RunFeatureStreaming(); - video_input_.release(); - } - } else if (is_image) { - // - // Still image file, contents read earlier - // - std::cout << "Image file read: " << input_str << std::endl; - if (repeat_image_video_) { - RunFeatureStreaming(); - } else { - cv::Mat frame_out = Feature(frame_rgb_); - cv::cvtColor(frame_out, frame_out, cv::COLOR_RGB2BGR); - - if (output_file_.size() > 0) { - cv::imwrite(output_file_, frame_out); - std::cout << "Output file saved: " << output_file_ << std::endl; - } - } - } else if (is_video) { - // - // Video file - // - RunFeatureVideoFile(input_str); - } -} - -std::string CreateFolderWithTimestamp() { - std::string file_save_path = GetTimestamp(); - namespace fs = std::filesystem; - if (fs::create_directories(file_save_path)) - return file_save_path; - else - return {}; -} - -std::string GetTimestamp() { - std::string timestamp{}; - std::stringstream mon_s, day_s, hour_s, min_s, sec_s; -#ifdef _WIN32 - struct tm ltm; - time_t now = time(0); - localtime_s(<m, &now); -#else - time_t now = time(&now); - struct tm ltm; - localtime_r(&now, <m); -#endif - mon_s << std::setw(2) << std::setfill('0') << (ltm.tm_mon + 1); - day_s << std::setw(2) << std::setfill('0') << ltm.tm_mday; - hour_s << std::setw(2) << std::setfill('0') << ltm.tm_hour; - min_s << std::setw(2) << std::setfill('0') << ltm.tm_min; - sec_s << std::setw(2) << std::setfill('0') << ltm.tm_sec; - timestamp = mon_s.str() + day_s.str() + hour_s.str() + min_s.str() + sec_s.str(); - return timestamp; -} - -void GetPlatformInformation() { - // print supported platform information - amd::cvml::SupportedPlatformInformation info{}; - amd::cvml::Context::GetSupportedPlatformInformation(&info); - - if (info.supported_platform_count > 0) { - std::cout << "Required minimam Vulkan driver version: 0x" << std::hex - << info.platform[0].required_gpu_minimal_vulkan_driver_version << std::endl; - } -} - -bool ParseArguments(int argc, char** const argv, std::string* input_str, std::string* output_file, - const char* arg_help) { - if (input_str == nullptr || output_file == nullptr || argv == nullptr) { - return false; - } - - for (int i = 1; i < argc; ++i) { - if (std::string(argv[i]) == "-i" && ((i + 1) < argc)) { - *input_str = argv[++i]; - } else if (std::string(argv[i]) == "-o" && ((i + 1) < argc)) { - *output_file = argv[++i]; - } else { - std::string app_name{"sample"}; - - try { - std::filesystem::path app_path = argv[0]; - app_name = app_path.stem().string(); - } catch (std::exception& e) { - // do nothing - (void)e; - } - if (arg_help == nullptr) { - std::cout << "Usage: " << app_name - << " [-i input] [-o file]\n" - " -i\tSpecify an input image/video file or camera device index\n" - " -o\tSpecify output image/video file name\n"; - } else { - // use argument help override string - std::cout << "Usage: " << app_name << " " << arg_help; - } - std::cout - << "\n" - " Opens the specified input device and runs the feature against it. Results are\n" - " displayed in an output window and optionally saved to a file. If no arguments\n" - " are provided, the application attempts to capture input from camera index 0\n" - << std::endl; - return false; - } - } - return true; -} - -void PutRectangle(cv::Mat* image, const cv::Rect& rect, const cv::Scalar& color) { - if (image == nullptr) { - return; - } - - auto alpha = color[3]; - if (alpha == 0 || alpha == 255 || image->type() != CV_8UC3) { - // simple rectangle - cv::rectangle(*image, rect, color, -1); - } else { - // alpha blend - auto x_min = (std::clamp)(rect.x, 0, image->cols); - auto y_min = (std::clamp)(rect.y, 0, image->rows); - auto x_max = (std::clamp)(rect.x + rect.width, 0, image->cols); - auto y_max = (std::clamp)(rect.y + rect.height, 0, image->rows); - auto r = color[0] * alpha / 255.0; - auto g = color[1] * alpha / 255.0; - auto b = color[2] * alpha / 255.0; - auto alpha_1 = (255.0 - alpha) / 255.0; - for (auto y = y_min; y < y_max; ++y) { - // image type checked above, 3 bytes / pixel - auto ptr = image->ptr(y) + x_min * 3; - for (auto x = x_min; x < x_max; ++x) { - *ptr = static_cast(*ptr * alpha_1 + r); - ++ptr; - *ptr = static_cast(*ptr * alpha_1 + g); - ++ptr; - *ptr = static_cast(*ptr * alpha_1 + b); - ++ptr; - } - } - } -} - -int PutText(cv::Mat* image, const std::string& display_text, const int text_row, - cv::Scalar text_color, const int override_x, const int text_height, - const bool fill_background, cv::Scalar background_color) { - static int TEXT_HEIGHT = 30; // hard coded text height, because getTextSize isn't reliable - static int TEXT_BOX_OFFSET = 5; // offset for background box - static int TEXT_PADDING = 3; // space between rows of text - static const int TEXT_THICKNESS = 2; - static const double TEXT_SCALE = 1.0; - - if (image == nullptr || text_row < 0) { - // silently return - return -1; - } - - double text_scale = TEXT_SCALE; - int text_h; - - if (text_height == 0) { - // default to 1.0 text scaling - text_h = static_cast(TEXT_HEIGHT * text_scale); - } else { - // update text scale based on desired height and image - text_h = text_height * image->rows / 100; - text_scale = static_cast(text_h) / TEXT_HEIGHT; - } - - // cppcheck-suppress knownConditionTrueFalse - int text_font = text_scale > 1.0 ? cv::FONT_HERSHEY_DUPLEX : cv::FONT_HERSHEY_PLAIN; - int text_box_offset = static_cast(TEXT_BOX_OFFSET * text_scale + 0.5); - int text_padding = static_cast(TEXT_PADDING * text_scale + 0.5); - int text_thickness = static_cast(TEXT_THICKNESS * text_scale); - - // calculate text height/width - auto text_size = cv::getTextSize(display_text, text_font, text_scale, text_thickness, nullptr); - - // constant left starting point for english text - int org_x = TEXT_PADDING; - - if (override_x > 0) { - if (override_x & PUTTEXT::OVERRIDE_ABSOLUTE) { - org_x = override_x & ~(PUTTEXT::OVERRIDE_ABSOLUTE); - } else { - // handle text centering - org_x = override_x - text_size.width / 2; - } - } - - cv::Point2i origin = cv::Point2i(org_x, (text_h + text_padding) * (text_row + 1)); - - if (fill_background) { - // draw rectangle on frame for each text - cv::Rect rectangle(origin - cv::Point2i(0, text_h - text_box_offset), - cv::Size(text_size.width, text_h)); - PutRectangle(image, rectangle, background_color); - } - - // actually display the text - cv::putText(*image, display_text, origin, text_font, text_scale, text_color, text_thickness); - return org_x + text_size.width; -} - -} // namespace utils -} // namespace sample -} // namespace cvml -} // namespace amd diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-depth-estimation/CMakeLists.txt b/Ryzen-AI-CVML-Library/samples/cvml-sample-depth-estimation/CMakeLists.txt deleted file mode 100644 index 1025ba17..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-depth-estimation/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. - -set(PROJECT_NAME cvml-sample-depth-estimation) -project(${PROJECT_NAME}) - -list(APPEND CMAKE_MODULE_PATH ${OPENCV_INSTALL_ROOT} ${AMD_CVML_SDK_ROOT} "${CMAKE_CURRENT_SOURCE_DIR}/../..") - -# -# Find OpenCV and set include directories -# -find_package(OpenCV REQUIRED) -include_directories(${OpenCV_INCLUDE_DIRS}) - -# -# Find Ryzen AI CVML library (also sets include directories) -# -find_package(RyzenAILibrary REQUIRED) - -# -# Additional include folders for the sample -# -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../common-sample-utils/include) - -# -# Define source files, application and link libraries -# -file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) - -add_executable(${PROJECT_NAME} ${SOURCES}) - -target_link_libraries(${PROJECT_NAME} - ${OpenCV_LIBS} - ${RyzenAILibrary_LIBS} - common-sample-utils -) - -# -# Installation rules. -# -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${PROJECT_NAME}) -install(TARGETS ${PROJECT_NAME} DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${PROJECT_NAME}) diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-depth-estimation/main.cpp b/Ryzen-AI-CVML-Library/samples/cvml-sample-depth-estimation/main.cpp deleted file mode 100644 index 4e3b088c..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-depth-estimation/main.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. - */ -#include -#include - -#include -#include - -#include "opencv2/opencv.hpp" - -using amd::cvml::DepthEstimation; -using amd::cvml::Image; -using amd::cvml::ImageType; - -/** - * Declare local class for sample variables and functions. - */ -class DepthEstimationSample : public amd::cvml::sample::utils::RunFeatureClass { - public: - amd::cvml::DepthEstimation* depth_estimation_{nullptr}; ///< Depth Estimation feature - std::string input_str_{}; ///< frame source: image or video or camera - // cppcheck-suppress duplInheritedMember - std::string output_file_{}; ///< Output file path/name - - /** - * Post process depth map for opencv visualization. - * - * @param depth_map Pointer to depth map - * @return Postprocessed depth-map - */ - cv::Mat DepthEstimationCvmlToOpenCV(const Image* depth_map); - - /** - * Run Depth Estimation on single frame - * - * @param frame_rgb Incoming RGB frame - * @return Output RGB frame - */ - cv::Mat Feature(const cv::Mat& frame_rgb) override; -}; - -cv::Mat DepthEstimationSample::Feature(const cv::Mat& frame_rgb) { - cv::Mat frame_out = frame_rgb.clone(); // OpenCV output buffer - cv::Mat display_buffer; // display buffer for model output - - if (depth_estimation_ == nullptr) { - // return empty output - return frame_out; - } - - Image input_frame_amd_image(amd::cvml::Image::Format::kRGB, amd::cvml::Image::DataType::kUint8, - frame_rgb.cols, frame_rgb.rows, frame_rgb.data); - - // Create destination output - amd::cvml::Image output_img(amd::cvml::Image::Format::kGrayScale, - amd::cvml::Image::DataType::kFloat32, frame_rgb.cols, frame_rgb.rows, - nullptr); - - // Depth Estimation - bool depth_map_generated = - depth_estimation_->GenerateDepthMap(input_frame_amd_image, &output_img); - if (!depth_map_generated) { - std::cout << "Failed to generate depth map" << std::endl; - throw std::runtime_error("Failed to generate depth map!"); - } - return DepthEstimationCvmlToOpenCV(&output_img); -} - -cv::Mat DepthEstimationSample::DepthEstimationCvmlToOpenCV(const Image* depth_map) { - cv::Mat depth_map_or_mat_raw; - cv::Mat frame_out; - - if (depth_map != nullptr) { - float* depth_map_or_p = - reinterpret_cast(reinterpret_cast(depth_map->GetBuffer())); - if (depth_map_or_p == nullptr) { - throw std::runtime_error("Failed to get depth map data!"); - } - depth_map_or_mat_raw = - cv::Mat{static_cast(depth_map->GetHeight()), static_cast(depth_map->GetWidth()), - CV_32FC1, depth_map_or_p}; - cv::Mat tmp; - depth_map_or_mat_raw.convertTo(tmp, CV_8U, 255); - cv::cvtColor(tmp, frame_out, cv::COLOR_GRAY2RGB); - cv::applyColorMap(frame_out, frame_out, cv::COLORMAP_SPRING); - } - return frame_out; -} - -/** - * Main entry point of the sample application. - * - * @param argc: Number of command line arguments - * @param argv: Array of command line arguments - * @return 0 on success - */ -int main(int argc, char** argv) { - DepthEstimationSample de_sample; - - // show both input and output images - de_sample.side_by_side_ = true; - - // parse command line arguments - if (!amd::cvml::sample::utils::ParseArguments(argc, argv, &de_sample.input_str_, - &de_sample.output_file_)) { - return -1; - } - - try { - // create CVML SDK context for the feature - auto context = amd::cvml::CreateContext(); - if (!context) { - std::cerr << "Failed to create context" << std::endl; - } else { - // select backend (optional) - context->SetInferenceBackend(amd::cvml::Context::InferenceBackend::AUTO); - - // initialize depth estimation class - amd::cvml::DepthEstimation depth_estimation(context); - - // execute main sample application loop with the created feature - de_sample.depth_estimation_ = &depth_estimation; - - // run the feature against input frames and local_data - de_sample.RunFeature(de_sample.input_str_, de_sample.output_file_, "AMD Depth Estimation"); - } - - // release previously created context - if (context) { - context->Release(); - } - } catch (std::exception& e) { - std::cerr << "Sample application error: " << e.what() << std::endl; - } - return 0; -} diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/CMakeLists.txt b/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/CMakeLists.txt deleted file mode 100644 index 1f6ab5a3..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. - -set(FEATURE_NAME cvml-sample-face-detection) -project(${FEATURE_NAME}) - -list(APPEND CMAKE_MODULE_PATH ${OPENCV_INSTALL_ROOT} ${AMD_CVML_SDK_ROOT} "${CMAKE_CURRENT_SOURCE_DIR}/../..") - -find_package(OpenCV REQUIRED) -find_package(RyzenAILibrary REQUIRED) - -file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) - -include_directories(${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../common-sample-utils/include) - -add_executable(${PROJECT_NAME} ${SOURCES}) - -target_link_libraries(${PROJECT_NAME} - ${OpenCV_LIBS} - ${RyzenAILibrary_LIBS} - common-sample-utils -) - -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${FEATURE_NAME}) -install(TARGETS ${PROJECT_NAME} DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${FEATURE_NAME}) diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/README.md b/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/README.md deleted file mode 100644 index b5c9d67a..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# `cvml-sample-face-detection` - -This sample demonstrates the implementation of AMD's Face Detection feature. -It detects faces along with five landmarks(two eyes, nose and two edges of the mouth) and confidence scores in the input image, video, or camera. -Then visually represents the results in output images or video. - -## Usage - -```sh -cvml-sample-face-detection.exe [-i path_to_image/video] [-o output image/video filename] [-m fd_model] [-h] -Options --i: Run face detection on the given image or video. (Optional) --o: Specify output image or video file name e.g., .mp4 or .jpg. (Optional) --m: Specify face detection model (precise/fast). Fast is the default. (Optional) --h: Show usage. -If no arguments are provided, the application attempts to capture input from camera index 0. - -Examples -Run the sample with an image input without output file: -cvml-sample-face-detection.exe -i my_image.jpg - -Run the sample with a video input and save the result to an output video file: -cvml-sample-face-detection.exe -i my_video.mp4 -o output_video.mp4 - -Run the sample to capture the camera feed using the "precise" model and save the result to a video file: -cvml-sample-face-detection.exe -m precise -o output_video.mp4 - -Note -If the user runs the application without any arguments, it will use the camera as an input. - diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/main.cpp b/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/main.cpp deleted file mode 100644 index e202800b..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-detection/main.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. - */ -#include -#include - -#include - -#include "opencv2/opencv.hpp" - -/** - * Declare local structure for sample variables. - */ -class FaceDetectionSample : public amd::cvml::sample::utils::RunFeatureClass { - public: - amd::cvml::FaceDetector* face_detector_{nullptr}; /// Face Detection feature - std::string src_path_{}; /// Input path/device - // cppcheck-suppress duplInheritedMember - std::string output_file_{}; /// Output file path/name - amd::cvml::FaceDetector::FDModelType fd_model_{}; /// FD model to use - - /** - * Run Face Detection on single frame - * - * @param frame_rgb Incoming RGB frame - * @return Output RGB frame - */ - cv::Mat Feature(const cv::Mat& frame_rgb) override; -}; - -/** - * Draw detected faces. - * - * @param rgb_img Pointer to the target image - * @param faces Detected face structure - * @param bbox_color Draw color for bounding boxes - * @param landmark_color Draw color for landmarks - * @param landmark_size Size of landmark circle - */ -void DrawFaces(cv::Mat* rgb_img, const amd::cvml::Array& faces, - const cv::Scalar& bbox_color = cv::Scalar(0, 255, 0), - const cv::Scalar& landmark_color = cv::Scalar(0, 0, 255), int landmark_size = 2) { - if (rgb_img == nullptr) { - return; - } - - // go through all the detected faces - for (size_t k = 0; k < faces.size(); ++k) { - const amd::cvml::Face& curr_face = faces[k]; - // convert amd::face to cv::Rect - cv::Rect cv_face(curr_face.face_.x_, curr_face.face_.y_, curr_face.face_.width_, - curr_face.face_.height_); - - // draw a bounding box for each face - cv::rectangle(*rgb_img, cv_face, bbox_color); - - // print confidence score - std::ostringstream out_text; - out_text.precision(2); - out_text << std::fixed << curr_face.confidence_score_; - cv::putText(*rgb_img, out_text.str(), cv::Point(cv_face.x, cv_face.y), cv::FONT_HERSHEY_SIMPLEX, - 0.5, cv::Scalar(0, 255, 255), 2); - - // draw landmarks - for (size_t j = 0; j < curr_face.landmarks_.size(); ++j) { - const amd::cvml::Point2i& landmark = curr_face.landmarks_[j]; - cv::Point center(landmark.x_, landmark.y_); - cv::circle(*rgb_img, center, landmark_size, landmark_color, cv::FILLED, cv::FILLED); - } - } -} - -cv::Mat FaceDetectionSample::Feature(const cv::Mat& frame_rgb) { - std::string title{}; - cv::Mat frame_w_faces = frame_rgb; - if (face_detector_ == nullptr) { - // return empty output - return cv::Mat{}; - } - // convert to amd::cvml::Image - amd::cvml::Image input_img(amd::cvml::Image::Format::kRGB, amd::cvml::Image::DataType::kUint8, - frame_rgb.cols, frame_rgb.rows, frame_rgb.data); - // step 1: run face detect on input frame - auto faces = face_detector_->Detect(input_img); - // step 2: draw faces on output frame - DrawFaces(&frame_w_faces, faces); - - return frame_w_faces; -} - -void PrintHelpMessage() { - std::cout << "Usage: cvml-sample-face-detection [-i path to image/video] [-o output " - "image/video filename] [-h] [-m fd model]" - << std::endl; - std::cout << " -i\trun face detection on video or image given the path" << std::endl; - std::cout << " -o\tspecify output video or image file name. e.g. . Optional." - << std::endl; - std::cout << " -m\tspecify face detection model. e.g. . Optional. Fast " - "is the default" - << std::endl; - std::cout << " -t\t Enable or disable face tracking. Disabled by default" << std::endl; - std::cout << " -h\tshow usage" << std::endl; - - std::cout << "Example 1: cvml-sample-face-detection -i image.jpg" << std::endl; - std::cout << "Example 2: cvml-sample-face-detection -h" << std::endl; - std::cout << "Example 3: cvml-sample-face-detection -i image.jpg -m precise" << std::endl; -} - -bool ParseArguments(int argc, char** argv, FaceDetectionSample* local_data) { - std::string fd_model_str; - for (int i = 0; i < argc; i++) { - if (std::string(argv[i]) == "-i" && ((i + 1) < argc)) { - local_data->src_path_ = argv[i + 1]; - } else if (std::string(argv[i]) == "-o" && ((i + 1) < argc)) { - local_data->output_file_ = argv[i + 1]; - } else if (std::string(argv[i]) == "-m" && ((i + 1) < argc)) { - fd_model_str = argv[i + 1]; - } else if (std::string(argv[i]) == "-h") { - PrintHelpMessage(); - return false; - } - } - - // choose fd model - if (fd_model_str == "precise") { - local_data->fd_model_ = amd::cvml::FaceDetector::FDModelType::Precise; - std::cout << "Running with precise Retinaface model" << std::endl; - } else { // default - local_data->fd_model_ = amd::cvml::FaceDetector::FDModelType::Fast; - std::cout << "Running with fast Retinaface model" << std::endl; - } - return true; -} - -void SetContextStreamingMode(const std::string& src_path, amd::cvml::Context* context) { - // assume camera index if a number is provided - const std::string input_str = src_path.empty() ? "0" : src_path; - std::string ext = static_cast(input_str).extension().string(); - if (ext.length() == 0 && std::isdigit(input_str[0])) { - context->SetStreamingMode(amd::cvml::Context::StreamingMode::ONLINE_STREAMING); - } else { - // check if we can treat the input as an image - auto frame_rgb_ = cv::imread(input_str); - if (!frame_rgb_.empty()) { - context->SetStreamingMode(amd::cvml::Context::StreamingMode::ONE_SHOT); - } else { - // assume the input is a video file - context->SetStreamingMode(amd::cvml::Context::StreamingMode::OFFLINE_STREAMING); - } - } -} - -/** - * Options: - * -i: run face detection on video or image, provide full path to video - * -o: specify output video clip or image file name - * -m specify FD model - precise/fast(default) - * -h: to show usage - */ -int main(int argc, char** argv) { - try { - FaceDetectionSample fd_sample; - if (!ParseArguments(argc, argv, &fd_sample)) { - return -1; - } - - // create CVML SDK context for the feaeture - auto context = amd::cvml::CreateContext(); - if (!context) { - std::cerr << "Failed to create context" << std::endl; - } else { - context->SetInferenceBackend(amd::cvml::Context::InferenceBackend::AUTO); - SetContextStreamingMode(fd_sample.src_path_, context); - - // create the facedetector feature instances - amd::cvml::FaceDetector face_detector(context, fd_sample.fd_model_); - - // execute main sample application loop with the created feature - fd_sample.face_detector_ = &face_detector; - fd_sample.RunFeature(fd_sample.src_path_, fd_sample.output_file_, "AMD Face Detection"); - } - - // release previously created context - if (context) { - context->Release(); - } - } catch (std::exception& e) { - std::cerr << "Sample application error: " << e.what() << std::endl; - } - return 0; -} diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/CMakeLists.txt b/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/CMakeLists.txt deleted file mode 100644 index 76be0165..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. - -set(FEATURE_NAME cvml-sample-face-mesh) -project(${FEATURE_NAME}) - -list(APPEND CMAKE_MODULE_PATH ${OPENCV_INSTALL_ROOT} ${AMD_CVML_SDK_ROOT} "${CMAKE_CURRENT_SOURCE_DIR}/../..") - -find_package(OpenCV REQUIRED) -find_package(RyzenAILibrary REQUIRED) - -file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) - -include_directories(${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../common-sample-utils/include) - -add_executable(${PROJECT_NAME} ${SOURCES}) - -target_link_libraries(${PROJECT_NAME} - ${OpenCV_LIBS} - ${RyzenAILibrary_LIBS} - common-sample-utils -) - -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${FEATURE_NAME}) -install(TARGETS ${PROJECT_NAME} DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${FEATURE_NAME}) diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/README.md b/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/README.md deleted file mode 100644 index 8f525927..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# `cvml-sample-facemesh` - -This sample demonstrates the implementation of AMD's Face Mesh feature. -It detects faces in the input image or video and computes face landmarks and head pose information, and then visually represents the results in output images or video. - -## Usage - -```sh -cvml-sample-facemesh.exe [-i path_to_video/image] [-h] [-o output image/video filename] [-?] -Options --i: Specify an input image or video. --o: Specify output image or video file name e.g., .mp4 or .jpg. --fd: Specify face detection model (precise/fast). Precise is the default. (Optional) --h: Show usage. --?: Show usage. - - -Examples: - -Run the sample with a camera input without output file: -cvml-sample-facemesh.exe - -Run the sample with a video input without output file: -cvml-sample-facemesh.exe -i my_video.mp4 - -Run the sample with an image input and save the result to an image file: -cvml-sample-facemesh.exe -i my_image.jpg -o output_image.jpg - -Run the sample to capture the camera feed and save the result to a video file: -cvml-sample-facemesh.exe -o output_video.mp4 - -Note -If the user runs the application without any arguments, it will use the camera as an input. diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/main.cpp b/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/main.cpp deleted file mode 100644 index 467a6f29..00000000 --- a/Ryzen-AI-CVML-Library/samples/cvml-sample-face-mesh/main.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. - */ - -#include -#include -#include - -#include -#include -#include - -#include "opencv2/opencv.hpp" - -namespace ml = amd::cvml; - -/** - * Declare local class for sample variables and functions. - */ -class FaceMeshSample : public amd::cvml::sample::utils::RunFeatureClass { - public: - amd::cvml::FaceDetector* face_detector_{nullptr}; /// face detection feature - amd::cvml::FaceDetector::FDModelType fd_model_type_{ - amd::cvml::FaceDetector::FDModelType::Precise}; /// face detection model type - amd::cvml::FaceMesh* face_mesh_{nullptr}; /// face mesh feature - std::string src_path_{}; /// Input path/device - // cppcheck-suppress duplInheritedMember - std::string output_file_{}; /// Output file path/name - /** - * Run Face Mesh on single frame - * - * @param frame_rgb Incoming RGB frame - * @return Output RGB frame - */ - cv::Mat Feature(const cv::Mat& frame_rgb) override; -}; - -/** - * Draw face mesh on the input RGB image with given mesh and head pose information - * - * @param rgb_img Original RGB image - * @param mesh Mesh containing face landmarks - * @param head_pose 3D head pose information - * @return RGB image with face mesh drawn - */ -cv::Mat DrawFaceMesh(const cv::Mat& rgb_img, const ml::FaceMesh::Mesh& mesh) { - // opencv point variables - double point_color[] = {0.0, 0.0, 255.0}; - int scale_size = 360; - - // Going through all the detected landmarks - double highest_point_y = mesh.landmarks_[0].y_; - - for (size_t i = 0; i < mesh.landmarks_.size(); i++) { - auto landmark = mesh.landmarks_[i]; - if (landmark.y_ < highest_point_y) { - highest_point_y = landmark.y_; - } - cv::Point point(static_cast(landmark.x_), static_cast(landmark.y_)); - - cv::circle(rgb_img, point, static_cast(rgb_img.rows / scale_size), - cv::Scalar(point_color[0], point_color[1], point_color[2]), - static_cast(rgb_img.rows / scale_size), cv::FILLED); - } - - return rgb_img; -} - -/** - * Run FaceMesh algorithm on given frame and draw the meshes on the input frame - * - * @param frame_rgb Input image frame in RGB format - * @return Output image frame with face meshes drawn - */ -cv::Mat FaceMeshSample::Feature(const cv::Mat& frame_rgb) { - cv::Mat frame_out = frame_rgb; - std::vector meshes; - if (face_mesh_ == nullptr) { - throw std::runtime_error("Incomplete local data"); - } - - // convert to amd::Image - ml::Image amd_img(ml::Image::Format::kRGB, ml::Image::DataType::kUint8, frame_rgb.cols, - frame_rgb.rows, frame_rgb.data); - - // start the clock - auto faces = face_detector_->Detect(amd_img); - - double small_face_size_thr = 0.05; // small face size relative to the frame size - - for (size_t i = 0; i < faces.size(); ++i) { - auto face_width = faces[i].face_.width_; - if (static_cast(face_width) / static_cast(frame_rgb.cols) < - small_face_size_thr) // face is too small - continue; - auto mesh = face_mesh_->CreateMesh(amd_img, faces[i]); - meshes.push_back(mesh); - } - - for (size_t i = 0; i < meshes.size(); ++i) { - frame_out = DrawFaceMesh(frame_out, meshes[i]); - } - - return frame_out; -} - -/** - * Print usage message for the command-line arguments - */ -void PrintUsageMessage() { - std::cout - << "Usage: " - << "cvml-sample-facemesh " - << "[-i path_to_video/image] [-h] [-o output " - "image/video filename] [-?]\n" - "option\n" - " -i\tSpecify an input image or video\n" - " -o\tSpecify output image or video file name e.g .mp4/.jpg\n" - " -fd\tSpecify face detection model (precise/fast). Precise is the default (Optional)\n" - " -h\tShow usage\n" - " -?\tShow usage\n" - "\n" - " Opens the specified input device and runs the feature against it. Results are\n" - " displayed in an output window and optionally saved to a video file. If no\n" - " arguments are provided, the application attempts to capture input from\n" - " camera index 0" - << std::endl; -} - -/** - * Parse command-line arguments and update local data class accordingly - * - * @param local_data Pointer to the local data object to store parsed arguments - * @param argc Number of command-line arguments - * @param argv Array of command-line argument strings - * @return true if arguments are parsed successfully, false otherwise - */ -bool ParseArguments(FaceMeshSample* local_data, int argc, char** const argv) { - if (local_data == nullptr || argv == nullptr) { - return false; - } - - for (int i = 0; i < argc; i++) { - if (std::string(argv[i]) == "-i" && ((i + 1) < argc)) { - local_data->src_path_ = std::string(argv[i + 1]); - } else if (std::string(argv[i]) == "-o" && ((i + 1) < argc)) { - local_data->output_file_ = std::string(argv[i + 1]); - } else if (std::string(argv[i]) == "-fd" && ((i + 1) < argc)) { - if (std::string(argv[i + 1]) == "fast") { - local_data->fd_model_type_ = amd::cvml::FaceDetector::FDModelType::Fast; - } else if (std::string(argv[i + 1]) == "precise") { - local_data->fd_model_type_ = amd::cvml::FaceDetector::FDModelType::Precise; - } else { - std::cout << "Invalid Face Detection model type. Defaulting to Precise" << std::endl; - } - } else if (std::string(argv[i]) == "-h") { - PrintUsageMessage(); - return false; - } - } - - return true; -} - -/** - * Main function - initializes and runs the FaceMesh sample - */ -int main(int argc, char** const argv) { - FaceMeshSample fm_sample; - try { - // parse arguments - bool parse_ok = ParseArguments(&fm_sample, argc, argv); - if (!parse_ok) return -1; - - // create CVML SDK context for the feaeture - std::shared_ptr context(ml::CreateContext(), [](ml::Context* ctx) { - if (ctx) ctx->Release(); - }); - if (!context) { - std::cerr << "Failed to create context" << std::endl; - return 1; - } - - // select backend (optional) - context->SetInferenceBackend(amd::cvml::Context::InferenceBackend::AUTO); - - fm_sample.SetContextStreamingModeBySrc(context.get(), fm_sample.src_path_); - - // initialize FaceDetector class - ml::FaceDetector fd(context.get(), fm_sample.fd_model_type_); - fm_sample.face_detector_ = &fd; - - // intialize FaceMesh class - ml::FaceMesh fm(context.get()); - fm.SetMaxNumFaces(-1); - fm_sample.face_mesh_ = &fm; - - fm_sample.RunFeature(fm_sample.src_path_, fm_sample.output_file_, "AMD Face Mesh"); - } catch (const std::exception& e) { - std::cerr << e.what() << std::endl; - return 1; - } - return 0; -} diff --git a/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.dll b/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.dll deleted file mode 100644 index 41b27846..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f467ad84143b87831f726f5598b422a49a5dd01d95e5891b2e2323b37be4b1f -size 85256 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.graphlib b/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.graphlib deleted file mode 100644 index bbd62f00..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.graphlib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8626bfa3fe1311e04b7ec69acd116378f5deee1ade69b02bf07cef2f6381d6c -size 133191680 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.lib b/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.lib deleted file mode 100644 index 496885e0..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-depth-estimation.lib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0747f2ab2804d5cd1504ba7235e522822a827d9816bd46db78ec90e3babb1202 -size 7944 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-face-detector.dll b/Ryzen-AI-CVML-Library/windows/cvml-face-detector.dll deleted file mode 100644 index acfd4faf..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-face-detector.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42386ed9e09a33d1992842eb13c1926936fc1586d0f3a377e32f654401b4f472 -size 2616592 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-face-detector.graphlib b/Ryzen-AI-CVML-Library/windows/cvml-face-detector.graphlib deleted file mode 100644 index e9c290cb..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-face-detector.graphlib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bcabe6ca31c7893638dc60149adc8ee3c1c0c37df444760bbb25a5dc4be05fec -size 5314560 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-face-detector.lib b/Ryzen-AI-CVML-Library/windows/cvml-face-detector.lib deleted file mode 100644 index 05a0c2a1..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-face-detector.lib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:54ca58a577afbc6fb445b75e02b342c5db682aad3c108bce8c254aa748a97ec4 -size 9282 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-facemesh.dll b/Ryzen-AI-CVML-Library/windows/cvml-facemesh.dll deleted file mode 100644 index a06c61c9..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-facemesh.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b1c371a64ed61ac88a948c52c28f47c47703c953621fc96d8561c0ad03a6268f -size 121616 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-facemesh.graphlib b/Ryzen-AI-CVML-Library/windows/cvml-facemesh.graphlib deleted file mode 100644 index 6a666c08..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-facemesh.graphlib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b41d5f92b51719ee885fc12b98f77b0f739cc7df2105bfd6450ae9d8a6ce8c2 -size 8069120 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-facemesh.lib b/Ryzen-AI-CVML-Library/windows/cvml-facemesh.lib deleted file mode 100644 index 2c8c81c8..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-facemesh.lib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:27121ecf24d4831bdec0efbf7cbb372b0976a031af38af9661cdbd0e11468fac -size 13382 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-sdk.dll b/Ryzen-AI-CVML-Library/windows/cvml-sdk.dll deleted file mode 100644 index a89b4be2..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-sdk.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:71e0e18682fa64f88290292d77d0279561c383ccbbd70617d0f7e02c1768ba66 -size 4701448 diff --git a/Ryzen-AI-CVML-Library/windows/cvml-sdk.lib b/Ryzen-AI-CVML-Library/windows/cvml-sdk.lib deleted file mode 100644 index 7fb2a8f9..00000000 --- a/Ryzen-AI-CVML-Library/windows/cvml-sdk.lib +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2808611102749ccc62b7a30ee64d6770729e9edad02dd2ddfdfc7a0f9b63fd70 -size 238124 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/cpu/onnxruntime.dll b/Ryzen-AI-CVML-Library/windows/onnx/cpu/onnxruntime.dll deleted file mode 100644 index 06d41b0a..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/cpu/onnxruntime.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:579b636403983254346a5c1d80bd28f1519cd1e284cd204f8d4ff41f8d711559 -size 12418080 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/dml/DirectML.dll b/Ryzen-AI-CVML-Library/windows/onnx/dml/DirectML.dll deleted file mode 100644 index aaec0eb9..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/dml/DirectML.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc44fc08f1edb3cafca24de5361af11ce7ebd327bda2caa7936c632591f93393 -size 12159424 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/dml/onnxruntime_dml.dll b/Ryzen-AI-CVML-Library/windows/onnx/dml/onnxruntime_dml.dll deleted file mode 100644 index 5b29312a..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/dml/onnxruntime_dml.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0275aab0ce420e60a9bc3e2e1ac65a6b1c5236d05645d4d0fb05cf8c63edff12 -size 12633816 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/dml2/DirectML.dll b/Ryzen-AI-CVML-Library/windows/onnx/dml2/DirectML.dll deleted file mode 100644 index c330752f..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/dml2/DirectML.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c9e6d822561c6c41b90e6994b3e8857cf1d66dbfb1e0c4c799c7c89b4e92da1 -size 18527776 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/dml2/onnxruntime_dml2.dll b/Ryzen-AI-CVML-Library/windows/onnx/dml2/onnxruntime_dml2.dll deleted file mode 100644 index e53eeaa9..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/dml2/onnxruntime_dml2.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbdad4dd9c762c4374b61fb95d3f2682dfe351a118ef488712a08ef65782cd2f -size 19114192 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/amd_comgr0604.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/amd_comgr0604.dll deleted file mode 100644 index 97c5b19e..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/amd_comgr0604.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:face8405d08135bcf307687b5aedca3061396220f43a0c4bffe134662ff5851d -size 121159576 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/hiprtc-builtins0604.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/hiprtc-builtins0604.dll deleted file mode 100644 index 9a8e133d..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/hiprtc-builtins0604.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c792d7940ad54166f116400f14d393c4e564eadf39428a0b5c56d0b4746accc0 -size 1113496 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/hiprtc0604.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/hiprtc0604.dll deleted file mode 100644 index 17479d95..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/hiprtc0604.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c0e5913442de957f3faa1133ceedd1a8000831cbc32347bf63f03915b58a6446 -size 1922968 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx.dll deleted file mode 100644 index 9a4c71d1..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4b2bea7069c7ebcde2d892699f596e3da490c33407e8480317d6714f84ff0a03 -size 53646848 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_c.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_c.dll deleted file mode 100644 index 3ae0c5fd..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_c.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05356150b248d3c865a6a9221d1b2aa86941def6bccc0742afa77a9bdd6d3364 -size 355328 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_device.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_device.dll deleted file mode 100644 index c52beaf7..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_device.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7552644319ad6dd3e9ce37b07847e058cb084babef2bce903e09599c156e7f7a -size 142541824 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_gpu.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_gpu.dll deleted file mode 100644 index d0c8ef9f..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_gpu.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cdbd558f0b82dc40ff9ac3c952ada4262aa1ddb90cd0799e48bc5122a21548d7 -size 101703680 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_onnx.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_onnx.dll deleted file mode 100644 index 0f3a487c..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_onnx.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:748fd344c35daf81628aa899cc201ea301883abe67fa90aa27b8b3affbdd3cda -size 3170304 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_ref.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_ref.dll deleted file mode 100644 index 3d67d2e6..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/migraphx_ref.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1d2617c715a5ea25a8f7ac295271cf062765eb478fc1b826946f0b094d0e8e0d -size 2701312 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime.dll deleted file mode 100644 index 602ab694..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c40332e7d98ec35a1bc529ed9e640cd5a800c30202bd3fcaa42810c77659514 -size 19805696 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime_providers_migraphx.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime_providers_migraphx.dll deleted file mode 100644 index dadb1962..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime_providers_migraphx.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:64265a707d3cbd21e0593027e63d27bb4395c74f6bafc26a9abd0e8e4b3f51c4 -size 349184 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime_providers_shared.dll b/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime_providers_shared.dll deleted file mode 100644 index 448dd12c..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/migraphx_test/onnxruntime_providers_shared.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d84aec033f6f32250e7980a3841dc5460e6d2665c3620aed8186f3ac59787bdb -size 10752 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/1x4.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/1x4.xclbin deleted file mode 100644 index eb63d209..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/1x4.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/4x4.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/4x4.xclbin deleted file mode 100644 index 3b7d0741..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/4x4.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_providers_shared.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_providers_shared.dll deleted file mode 100644 index 0fef47cd..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_providers_shared.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2798304dd60cfd0dd354badb549fd40790109eb25f5f03cafd23233d1d172351 -size 22744 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_providers_vitisai.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_providers_vitisai.dll deleted file mode 100644 index a931949a..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_providers_vitisai.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe67fc51cee909fbea16ab794d270bf11d6c907f527c838888ae7567022bf51a -size 243928 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_vai.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_vai.dll deleted file mode 100644 index ed81825f..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_vai.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e842bef529f9182dda23cafb045681988ab4e05555a15c9b32847de58c2aeb81 -size 17973976 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_vitisai_ep.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_vitisai_ep.dll deleted file mode 100644 index 06edb713..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/onnxruntime_vitisai_ep.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8ae118083bf8b500ba1bc216f79743769426b15b9fdc659dffc27157378b04bd -size 64899800 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/vaip_config_npu_1.json b/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/vaip_config_npu_1.json deleted file mode 100644 index 8523b100..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen12_wdf/vaip_config_npu_1.json +++ /dev/null @@ -1,214 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_resize_norm", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_resize_norm", - "methodName": "rules" - } - }, - { - "name": "fuse_softmax", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_softmax", - "methodName": "rules" - } - }, - { - "name": "fuse_topk", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_topk", - "methodName": "rules" - } - }, - { - "name": "fuse_decode_filter_boxes", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_decode_filter_boxes", - "methodName": "rules" - } - }, - { - "name": "fuse_NMS", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.fuse_NMS", - "methodName": "rules" - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_to_xir_op", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.convert_to_xir_op", - "methodName": "rules" - } - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.const_fold_batchnorm_to_scale", - "methodName": "rules" - } - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_py_ext", - "enableGc": true, - "pyExt": { - "moduleName": "voe.passes.merge_mul", - "methodName": "rules" - } - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.convert_softmax_to_hard_softmax", - "methodName": "rules" - } - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_py_ext", - "enableGc": true, - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.merge_fix_fix_transpose", - "methodName": "rules" - } - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - }, - "minimum_num_of_conv": 2 - } - } - ] -} diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/1x4.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/1x4.xclbin deleted file mode 100644 index f97583bc..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/1x4.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/4x4.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/4x4.xclbin deleted file mode 100644 index 06c4ad01..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/4x4.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay.xclbin deleted file mode 100644 index fa754b93..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG0.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG0.xclbin deleted file mode 100644 index f4c6cbe4..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG0.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG1.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG1.xclbin deleted file mode 100644 index cf7a3136..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_4x4_Overlay_CFG1.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_8x4x1_Overlay.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_8x4x1_Overlay.xclbin deleted file mode 100644 index 180436e5..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_8x4x1_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_Nx4_Overlay.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_Nx4_Overlay.xclbin deleted file mode 100644 index 1d1b40f1..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/AMD_AIE2P_Nx4_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/dyn_dispatch_core.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/dyn_dispatch_core.dll deleted file mode 100644 index 0ad5a407..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/dyn_dispatch_core.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:15aa69c210ae0fa08607b479a3097a0339d9c7ac703e1bab9fe0a630cb9856fc -size 6894800 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime.dll deleted file mode 100644 index d2fb8d83..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c78a5da3391afe6be347955f55d407cdff7769ecb01c103b5ef8381f32fd17e1 -size 19492048 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_providers_shared.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_providers_shared.dll deleted file mode 100644 index 0c055db2..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_providers_shared.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bacc74bee3711e00d2fe51cefee4342d9a602ce5852edca3cc0f0557f8ee01c7 -size 22736 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_providers_vitisai.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_providers_vitisai.dll deleted file mode 100644 index e3d48648..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_providers_vitisai.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38d336afdb83b4434ef1a0b1d7e29e5dec63e3ea0a45cdfd8b10c0a5d2379ff8 -size 179920 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vai.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vai.dll deleted file mode 100644 index d2fb8d83..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vai.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c78a5da3391afe6be347955f55d407cdff7769ecb01c103b5ef8381f32fd17e1 -size 19492048 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vitis_ai_custom_ops.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vitis_ai_custom_ops.dll deleted file mode 100644 index 9a683ce1..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vitis_ai_custom_ops.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a5be1f8b9774f9e9a647899b727eb681e76fe5b8ead40ac5e97635537e2633d0 -size 755408 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vitisai_ep.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vitisai_ep.dll deleted file mode 100644 index 4c553a73..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/onnxruntime_vitisai_ep.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:14c0a6c5afcc300e07116a36e8773b7f287d377b94880643ae1d6a37767914b2 -size 150492368 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/transaction.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/transaction.dll deleted file mode 100644 index faa5430d..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/transaction.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:015e7cc62565b3fdfe3ce45385e18e203869e065632de214baff80841773127e -size 305351376 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/vaip_config_npu_1.json b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/vaip_config_npu_1.json deleted file mode 100644 index 08fd9445..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/vaip_config_npu_1.json +++ /dev/null @@ -1,1050 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_resize_norm", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_resize_norm", - "methodName": "rules" - } - }, - { - "name": "fuse_softmax", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_softmax", - "methodName": "rules" - } - }, - { - "name": "fuse_topk", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_topk", - "methodName": "rules" - } - }, - { - "name": "fuse_decode_filter_boxes", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_decode_filter_boxes", - "methodName": "rules" - } - }, - { - "name": "vaip_pass_norm_k", - "plugin": "vaip-pass_norm_k", - "enable_gc": true, - "disabled": false - }, - { - "name": "fuse_NMS", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.fuse_NMS", - "methodName": "rules" - } - }, - { - "name": "fuse_dynamic_dispatch", - "plugin": "vaip-pass_level1_dd", - "passDpuParam": { - "subPass": [ - { - "name": "dd_compiler_pass_transformation", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_generation", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_gen", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_transformation_2", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_2", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_generation_2", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_gen_2", - "methodName": "rules" - } - } - ] - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 2 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_VAIML-x2.0", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 32 - }, - "debug_mode": { - "stringValue": "function" - }, - "enable_call_aie": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mt_fusion": { - "boolValue": true - }, - "enable_merge_requantize": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "remove_call_aiecompiler_cache": { - "boolValue": true - }, - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "enable_control_optimization": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_MHA", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_MHA", - "plugin": "vaip-pass_convert_MHA", - "enableGc": true - }, - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin" - } - }, - { - "name": "vaiml", - "plugin": "vaip-pass_vaiml", - "pass_vaiml_param": { - "subPass": [ - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 2 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - } - ] - } - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "vaiml_model_path": "vaiml_par_0", - "max_num_partitions": 200, - "device_name": "phx", - "debug": true - } - }, - { - "name": "vaip_pass_dd_merge_dqcastgather", - "plugin": "vaip-pass_dd_merge_dqcastgather", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop_onnx", - "plugin": "vaip-pass_dd_merge_qop_onnx", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop_onnx", - "plugin": "vaip-pass_dd_merge_dqop_onnx", - "enable_gc": true, - "disabled": false - } - ], - "mepTable": [ - { - "modelName": "a3", - "md5sumOnDisk": "73ecb2594935fb9bd02707930610f29e", - "md5sumInMemory": "f59151f8b67a7b8f1a8bcc7798558c33", - "md5sumInMemoryWithIo": "39b422f40bcb60bbd20be835097256c2", - "target": "VAIML-x2.0-a3" - }, - { - "modelName": "DeepLabV3", - "md5sumOnDisk": "10a644c6da6b1121f807794506b7e5cc", - "md5sumInMemory": "849608d568bbc54380833c9446299989", - "md5sumInMemoryWithIo": "99f39afbf868542fb575023123c98001", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "inceptionv4", - "md5sumOnDisk": "42c48a7086ba34889699862d98844e62", - "md5sumInMemory": "591d2dc2a27f04111e1b14fbf4222d51", - "md5sumInMemoryWithIo": "3d6d80d6c60811089ced3bf6abb42cdc", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "MobileNet_V3", - "md5sumOnDisk": "5bbc61c013f20b6563c62523100fa2ee", - "md5sumInMemory": "0bcec05d638535092032784dcca2cce3", - "md5sumInMemoryWithIo": "1506de83ac617b0903613ca420061e84", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "ResNet50", - "md5sumOnDisk": "c22b773b35f2ce62932578cc5eec867b", - "md5sumInMemory": "b307088b2bf693c51d8ea399247d8139", - "md5sumInMemoryWithIo": "1bea49f637b8f9f0ec80bc44c6d841bb", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "YoloV3", - "md5sumOnDisk": "2657be600da90313ca9d32c03bd11835", - "md5sumInMemory": "7646f3796c4d98cfa53650482148734a", - "md5sumInMemoryWithIo": "14b71beebb0c98f291bdda3e7843cb1a", - "target": "VAIML-x2.0-Procyon" - } - ], - "target": "RyzenAI_vision_config_1", - "targets": [ - { - "name": "RyzenAI_vision_config_1", - "pass": [ - "init", - "vaiml" - ] - }, - { - "name": "RyzenAI_shell_config_1", - "xclbin": "AMD_AIE2P_2x4x1_Overlay.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU", - "vaip_pass_dd_merge_qop", - "vaip_pass_dd_merge_dqop", - "vaip_pass_dd_merge_qop_onnx", - "vaip_pass_dd_merge_dqop_onnx" - ], - "target_opts": { - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 3 - }, - "profile": { - "uintValue": 0 - }, - "enable_fast_pm": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mergesync": { - "boolValue": true - } - } - }, - "graph_engine_qos_priority": 640 - }, - { - "name": "RyzenAI_vision_config_2", - "xclbin": "1x4.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 16 - }, - "opt_level": { - "uintValue": 2 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - } - } - }, - { - "name": "RyzenAI_vision_config_3", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - } - } - } - }, - { - "name": "RyzenAI_vision_config_3_mha", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "RyzenAI_transformer_config_2", - "xclbin": "4x2_psf_model_a8w8_qdq.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch" - ] - }, - { - "name": "RyzenAI_xcompiler_and_dd_config", - "xclbin": "2x4x2_pss_pst_model_mha_qdq.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "RyzenAI_transformer_cxx_pss_pst", - "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch_pss_pst" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "VAIML-x2.0-a3", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ] - }, - { - "name": "VAIML-x2.0-Procyon", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "enable_qdq_force_xint": { - "boolValue": true - } - } - } - } - ], - "enable_cache_file_io_in_mem": true -} diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/vaip_config_npu_2_3.json b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/vaip_config_npu_2_3.json deleted file mode 100644 index d24eca37..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/vaip_config_npu_2_3.json +++ /dev/null @@ -1,1050 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_resize_norm", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_resize_norm", - "methodName": "rules" - } - }, - { - "name": "fuse_softmax", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_softmax", - "methodName": "rules" - } - }, - { - "name": "fuse_topk", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_topk", - "methodName": "rules" - } - }, - { - "name": "fuse_decode_filter_boxes", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_decode_filter_boxes", - "methodName": "rules" - } - }, - { - "name": "vaip_pass_norm_k", - "plugin": "vaip-pass_norm_k", - "enable_gc": true, - "disabled": false - }, - { - "name": "fuse_NMS", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.fuse_NMS", - "methodName": "rules" - } - }, - { - "name": "fuse_dynamic_dispatch", - "plugin": "vaip-pass_level1_dd", - "passDpuParam": { - "subPass": [ - { - "name": "dd_compiler_pass_transformation", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_generation", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_gen", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_transformation_2", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_2", - "methodName": "rules" - } - }, - { - "name": "dd_compiler_pass_generation_2", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.dynamic_dispatch_gen_2", - "methodName": "rules" - } - } - ] - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 3 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_VAIML-x2.0", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 32 - }, - "debug_mode": { - "stringValue": "function" - }, - "enable_call_aie": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mt_fusion": { - "boolValue": true - }, - "enable_merge_requantize": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "remove_call_aiecompiler_cache": { - "boolValue": true - }, - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "enable_control_optimization": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_MHA", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_MHA", - "plugin": "vaip-pass_convert_MHA", - "enableGc": true - }, - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin" - } - }, - { - "name": "vaiml", - "plugin": "vaip-pass_vaiml", - "pass_vaiml_param": { - "subPass": [ - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 3 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - } - ] - } - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "vaiml_model_path": "vaiml_par_0", - "max_num_partitions": 200, - "device_name": "phx", - "debug": true - } - }, - { - "name": "vaip_pass_dd_merge_dqcastgather", - "plugin": "vaip-pass_dd_merge_dqcastgather", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop_onnx", - "plugin": "vaip-pass_dd_merge_qop_onnx", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop_onnx", - "plugin": "vaip-pass_dd_merge_dqop_onnx", - "enable_gc": true, - "disabled": false - } - ], - "mepTable": [ - { - "modelName": "a3", - "md5sumOnDisk": "73ecb2594935fb9bd02707930610f29e", - "md5sumInMemory": "f59151f8b67a7b8f1a8bcc7798558c33", - "md5sumInMemoryWithIo": "39b422f40bcb60bbd20be835097256c2", - "target": "VAIML-x2.0-a3" - }, - { - "modelName": "DeepLabV3", - "md5sumOnDisk": "10a644c6da6b1121f807794506b7e5cc", - "md5sumInMemory": "849608d568bbc54380833c9446299989", - "md5sumInMemoryWithIo": "99f39afbf868542fb575023123c98001", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "inceptionv4", - "md5sumOnDisk": "42c48a7086ba34889699862d98844e62", - "md5sumInMemory": "591d2dc2a27f04111e1b14fbf4222d51", - "md5sumInMemoryWithIo": "3d6d80d6c60811089ced3bf6abb42cdc", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "MobileNet_V3", - "md5sumOnDisk": "5bbc61c013f20b6563c62523100fa2ee", - "md5sumInMemory": "0bcec05d638535092032784dcca2cce3", - "md5sumInMemoryWithIo": "1506de83ac617b0903613ca420061e84", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "ResNet50", - "md5sumOnDisk": "c22b773b35f2ce62932578cc5eec867b", - "md5sumInMemory": "b307088b2bf693c51d8ea399247d8139", - "md5sumInMemoryWithIo": "1bea49f637b8f9f0ec80bc44c6d841bb", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "YoloV3", - "md5sumOnDisk": "2657be600da90313ca9d32c03bd11835", - "md5sumInMemory": "7646f3796c4d98cfa53650482148734a", - "md5sumInMemoryWithIo": "14b71beebb0c98f291bdda3e7843cb1a", - "target": "VAIML-x2.0-Procyon" - } - ], - "target": "RyzenAI_vision_config_1", - "targets": [ - { - "name": "RyzenAI_vision_config_1", - "pass": [ - "init", - "vaiml" - ] - }, - { - "name": "RyzenAI_shell_config_1", - "xclbin": "AMD_AIE2P_2x4x1_Overlay.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU", - "vaip_pass_dd_merge_qop", - "vaip_pass_dd_merge_dqop", - "vaip_pass_dd_merge_qop_onnx", - "vaip_pass_dd_merge_dqop_onnx" - ], - "target_opts": { - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 3 - }, - "profile": { - "uintValue": 0 - }, - "enable_fast_pm": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mergesync": { - "boolValue": true - } - } - }, - "graph_engine_qos_priority": 640 - }, - { - "name": "RyzenAI_vision_config_2", - "xclbin": "1x4.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 16 - }, - "opt_level": { - "uintValue": 3 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - } - } - }, - { - "name": "RyzenAI_vision_config_3", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - } - } - } - }, - { - "name": "RyzenAI_vision_config_3_mha", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "RyzenAI_transformer_config_2", - "xclbin": "4x2_psf_model_a8w8_qdq.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch" - ] - }, - { - "name": "RyzenAI_xcompiler_and_dd_config", - "xclbin": "2x4x2_pss_pst_model_mha_qdq.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "RyzenAI_transformer_cxx_pss_pst", - "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch_pss_pst" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - } - }, - { - "name": "VAIML-x2.0-a3", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ] - }, - { - "name": "VAIML-x2.0-Procyon", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "enable_qdq_force_xint": { - "boolValue": true - } - } - } - } - ], - "enable_cache_file_io_in_mem": true -} diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/xclbin.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/xclbin.dll deleted file mode 100644 index e5d462b3..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen14/xclbin.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:087260e7dc57c457ea691180c7bc76db7b3645c76831650d02cb411a2692131e -size 153808 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/1x4.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/1x4.xclbin deleted file mode 100644 index 6a386bf9..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/1x4.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/4x4.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/4x4.xclbin deleted file mode 100644 index 980bc35d..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/4x4.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_2x4x1_Overlay.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_2x4x1_Overlay.xclbin deleted file mode 100644 index 30fad29a..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_2x4x1_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay.xclbin deleted file mode 100644 index 8f2b28d8..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG0.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG0.xclbin deleted file mode 100644 index 0df45da4..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG0.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG1.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG1.xclbin deleted file mode 100644 index ad3cd630..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG1.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG2.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG2.xclbin deleted file mode 100644 index f45ba415..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_4x4_Overlay_CFG2.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_8x4x1_Overlay.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_8x4x1_Overlay.xclbin deleted file mode 100644 index 199d0899..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_8x4x1_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_Nx4_Overlay.xclbin b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_Nx4_Overlay.xclbin deleted file mode 100644 index 0c9dd50c..00000000 Binary files a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/AMD_AIE2P_Nx4_Overlay.xclbin and /dev/null differ diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/abseil_dll.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/abseil_dll.dll deleted file mode 100644 index 9670e1d6..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/abseil_dll.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7008fe282300f860358695b2ea15a9b1af61262495c4e506c841a1db354f6828 -size 1590728 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/dyn_dispatch_core.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/dyn_dispatch_core.dll deleted file mode 100644 index 3f3d90e9..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/dyn_dispatch_core.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9427f937d9e76319617205c4ddfeec49a49f70b5a20aea6df507c2cd0731b7fa -size 424810376 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/libutf8_validity.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/libutf8_validity.dll deleted file mode 100644 index 9aaccd00..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/libutf8_validity.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ecd5de2b52aba9efb79ae62b9d03d1866c32408b522a880e1d1e944ef4a126b9 -size 21960 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime.dll deleted file mode 100644 index 8e30a41b..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:247b59ebc996aabceb9445f22ee8a3dd4bf710aafc937721172ce24c3733761e -size 20756480 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_providers_shared.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_providers_shared.dll deleted file mode 100644 index 295734fd..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_providers_shared.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:466c68c301ead11755d15d0ad65e525ab25194d310a14820c8ce74f3e2d8ed7f -size 112640 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_providers_vitisai.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_providers_vitisai.dll deleted file mode 100644 index 51a44295..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_providers_vitisai.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:149da7cc5bd5e2d5c1be36945c3b324694d7f94aadba82bd3e856d203138c88f -size 399872 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vai.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vai.dll deleted file mode 100644 index 8e30a41b..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vai.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:247b59ebc996aabceb9445f22ee8a3dd4bf710aafc937721172ce24c3733761e -size 20756480 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vitis_ai_custom_ops.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vitis_ai_custom_ops.dll deleted file mode 100644 index f0d8e4ef..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vitis_ai_custom_ops.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d3b107ca036dd7a0cd6a4f21f018c09960b258696426bcf24329ca3ab6fe6180 -size 1211784 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vitisai_ep.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vitisai_ep.dll deleted file mode 100644 index 1e8741af..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/onnxruntime_vitisai_ep.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf750146dded8508cfd004687685652cf2be992af66de5c26fdcce0e797f457b -size 108299144 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/vaip_config_npu_1.json b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/vaip_config_npu_1.json deleted file mode 100644 index 6257e9ee..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/vaip_config_npu_1.json +++ /dev/null @@ -1,6777 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_MatMulNBits", - "plugin": "vaip-pass_matmul_nbits", - "enable_gc": true, - "disabled": false - }, - { - "name": "fuse_MLP", - "plugin": "vaip-pass_mladf_mlp", - "enable_gc": true, - "disabled": true - }, - { - "name": "fuse_SSMLP", - "plugin": "vaip-pass_ssmlp", - "enable_gc": true, - "disabled": false - }, - { - "name": "fuse_dynamic_dispatch_wcr", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_iconv_3", - "input_names": [ - "input_0", - "constant_4" - ], - "output_names": "ms_QuantizeLinear_15" - }, - "op_name": "IConv", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "input_format", - "attribute_value": "NCHW" - }, - { - "attribute_name": "is_bias", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_5", - "constant_6", - "constant_1", - "constant_2", - "constant_13", - "constant_14", - "constant_8", - "constant_9", - "constant_10" - ], - "modifiers": [ - "add_stride_inp_zp_attr" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_iconv_3", - "input_names": [ - "input_0", - "constant_4" - ], - "output_names": "ms_QuantizeLinear_15" - }, - "op_name": "QConv2MatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "input_format", - "attribute_value": "NCHW" - }, - { - "attribute_name": "is_bias", - "attribute_value": "true" - }, - { - "attribute_name": "from_conv2matmul", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_5", - "constant_6", - "constant_1", - "constant_2", - "constant_13", - "constant_14", - "constant_8", - "constant_9", - "constant_10" - ], - "modifiers": [ - "check_kernel_conv2matmul" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_iconv_4", - "input_names": [ - "input_0", - "constant_4" - ], - "output_names": "ms_QuantizeLinear_15" - }, - "op_name": "IConv", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "input_format", - "attribute_value": "NCHW" - } - ], - "initializers": [ - "constant_5", - "constant_6", - "constant_1", - "constant_2", - "constant_13", - "constant_14" - ], - "modifiers": [ - "add_stride_inp_zp_attr" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_gelu_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_quickgelu", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_tanh", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QGemmTanh", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "dq5_s", - "dq5_z", - "q3_s", - "q3_z" - ], - "modifiers": [ - "transpose_weight_gemm" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_gelu_microsoft", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q2" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q1_s", - "q1_z", - "dq3_s", - "dq3_z", - "b_s", - "b_z", - "q2_s", - "q2_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_2", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q1" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z" - ], - "modifiers": [ - "transpose_weight_gemm", - "wcr_prefix" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_0", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_act_act_transpose", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "output" - }, - "op_name": "QMatMulDynamic", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "modifiers": [ - "act_act_matmul_modifier" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_act_act_transpose", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "output" - }, - "op_name": "QMatmulDynamicTranspose", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5", - "constant_6", - "constant_7", - "constant_8", - "constant_9" - ], - "modifiers": [ - "QBatchMatMulDynamic_predicate", - "dq_a", - "dq_c", - "input_0", - "input_1" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_act_act_2", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "q1" - }, - "op_name": "QMatmulDynamicTranspose", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBatchMatMulDynamic_predicate", - "input_0", - "input_1", - "extract_bmkn" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_1", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_0", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_reshape_softmax", - "input_names": [ - "input" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QMulSoftmax", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_dqsoftmax_1", - "input_names": [ - "input_0" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QMulSoftmax", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QEltWiseAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "b_s", - "b_z", - "y_s", - "y_z" - ], - "modifiers": [ - "QEltWiseAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QActConstAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b_s", - "b_z", - "a_s", - "a_z", - "y_s", - "y_z" - ], - "modifiers": [ - "QBiasAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qbroadcastadd_1", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QBroadcastBiasAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_2", - "constant_3", - "constant_0", - "constant_1", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBroadcastBiasAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qbroadcastadd", - "input_names": [ - "input_1", - "input_0" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QBroadcastBiasAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_2", - "constant_3", - "constant_0", - "constant_1" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBroadcastBiasAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qbroadcastadd", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QBroadcastAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBroadcastAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_transpose", - "input_names": [ - "input" - ], - "output_names": "output" - }, - "op_name": "QReshapeTranspose", - "extractor": { - "input_q_params": [ - "in_scale", - "in_zp" - ], - "output_q_params": [ - "out_scale", - "out_zp" - ], - "accessor_attributes": [ - { - "node_binder_name": "transpose", - "attribute_name": "perm", - "dtype": "ints" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "modifiers": [ - "Transpose_predicate", - "add_dummy_inputs", - "int32[16]" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_lpnorm_4", - "input_names": [ - "a" - ], - "output_names": "q1" - }, - "op_name": "L2_Norm", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "q1_s", - "q1_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qreshape", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qsqueeze", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qunsqueeze", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qelwediv", - "input_names": [ - "input_0" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "modifiers": [ - "qdiv_to_noop" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qelwemul", - "input_names": [ - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "modifiers": [ - "qmul_to_noop" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_transpose_noop", - "input_names": [ - "input" - ], - "output_names": "output" - }, - "op_name": "QReshapeTranspose", - "extractor": { - "initializers": [ - "dummy" - ], - "accessor_attributes": [ - { - "node_binder_name": "", - "attribute_name": "perm_transpose", - "dtype": "ints" - }, - { - "node_binder_name": "", - "attribute_name": "input_q_params", - "dtype": "floats" - }, - { - "node_binder_name": "", - "attribute_name": "output_q_params", - "dtype": "floats" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "modifiers": [ - "Transpose_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgather", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "modifiers": [ - "infer_qgather_to_noop" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_dq", - "input_names": [ - "input" - ], - "output_names": "dq" - }, - "op_name": "DeQuantOP", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "in_s", - "in_z" - ], - "input_q_params": [ - "in_s", - "in_z" - ], - "output_q_params": [ - "in_s", - "in_z" - ], - "modifiers": [ - "out_dtype_modifier", - "bfloat16", - "input_only_act" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_q", - "input_names": [ - "input" - ], - "output_names": "dq" - }, - "op_name": "QuantOP", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "in_s", - "in_z" - ], - "input_q_params": [ - "in_s", - "in_z" - ], - "output_q_params": [ - "in_s", - "in_z" - ], - "modifiers": [ - "in_dtype_modifier", - "bfloat16" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psp1", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_gelu_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_quickgelu", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_gelu_microsoft", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q2" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q1_s", - "q1_z", - "dq3_s", - "dq3_z", - "b_s", - "b_z", - "q2_s", - "q2_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_2", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q1" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z" - ], - "modifiers": [ - "transpose_weight_gemm" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_0", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_1", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_0", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QEltWiseAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "b_s", - "b_z", - "y_s", - "y_z" - ], - "modifiers": [ - "QEltWiseAdd_predicate" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_wcr_llm", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_onnx_qdq", - "plugin": "vaip-pass_dd_merge_onnx_qdq", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_matmul_channelwise_silu", - "plugin": "vaip-pass_dd_merge_qmatmul_silu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_silu", - "input_names": [ - "a", - "w" - ], - "output_names": "com_microsoft_QuantizeLinear_3" - }, - "op_name": "QMatMulAddSilu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "channelwise", - "attribute_value": "true" - }, - { - "attribute_name": "input_format", - "attribute_value": "NHWC" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z", - "constant_19", - "constant_20" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "constant_19", - "constant_20" - ], - "modifiers": [ - "out_dtype_modifier", - "bfloat16" - ] - } - } - }, - { - "name": "merge_matmul_channelwise_biasadd", - "plugin": "vaip-pass_dd_merge_qmatmul_int4_bias", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_int4_bias", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q2" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "channelwise", - "attribute_value": "true" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "input_format", - "attribute_value": "NHWC" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q2_s", - "q2_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "q2_s", - "q2_z" - ] - } - } - }, - { - "name": "merge_matmul_channelwise", - "plugin": "vaip-pass_dd_merge_qmatmul2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_2", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "channelwise", - "attribute_value": "true" - }, - { - "attribute_name": "input_format", - "attribute_value": "NHWC" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "q_s", - "q_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Transpose" - ] - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_lpnorm_3", - "input_names": [ - "a" - ], - "output_names": "q2" - }, - "op_name": "L2_Norm", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "fuse_mul", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "q2_s", - "q2_z", - "dq3_s", - "b" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "q2_s", - "q2_z" - ], - "modifiers": [ - "in_dtype_modifier", - "bfloat16" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QEltWiseAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "b_s", - "b_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ], - "modifiers": [ - "QEltWiseAdd_predicate", - "in_dtype_modifier", - "bfloat16", - "out_dtype_modifier", - "bfloat16", - "set_xclbin_name" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qelwemul", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QELWEMUL_qdq", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_0", - "constant_1" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "in_dtype_modifier", - "bfloat16", - "QELWEMUL_qdq_predicate", - "set_xclbin_name" - ] - } - } - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_0", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "4x2" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ], - "modifiers": [ - "QMatMul_predicate" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psf", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_qmhagrpb", - "plugin": "vaip-pass_dd_merge_qmhagrpb", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxpzi", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxpzi", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_mzdk5mha", - "plugin": "vaip-pass_dd_merge_mzdk5mha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmha", - "plugin": "vaip-pass_dd_merge_qmha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhachannel", - "plugin": "vaip-pass_dd_merge_qmhachannel", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhawindow", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlstm", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_0", - "plugin": "vaip-pass_dd_merge_iconv_0", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_1", - "plugin": "vaip-pass_dd_merge_iconv_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_2", - "plugin": "vaip-pass_dd_merge_iconv_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_3", - "plugin": "vaip-pass_dd_merge_iconv_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_1", - "plugin": "vaip-pass_dd_merge_qlayernorm_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dqadd", - "plugin": "vaip-pass_dd_merge_dqadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qskipadd", - "plugin": "vaip-pass_dd_merge_qskipadd", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_ql2norm", - "plugin": "vaip-pass_dd_merge_ql2norm", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm", - "plugin": "vaip-pass_dd_merge_qlpnorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_2", - "plugin": "vaip-pass_dd_merge_qlpnorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops", - "plugin": "vaip-pass_dd_merge_qconcateops", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro", - "plugin": "vaip-pass_dd_merge_attentionprepro", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_identity", - "plugin": "vaip-pass_dd_merge_identity", - "enable_gc": true, - "disabled": true - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psw", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_demha", - "plugin": "vaip-pass_dd_merge_DeMHA", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_psw", - "plugin": "vaip-pass_dd_merge_attentionprepro_psw", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_QKVProj", - "plugin": "vaip-pass_dd_merge_qmatmul_add_rtr", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu4", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_batch_matmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "intadd" - ] - }, - { - "name": "vaip_pass_dd_merge_qgemmv", - "plugin": "vaip-pass_dd_merge_qgemmv", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_elementwise_mul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": false, - "args": [ - "intmul" - ] - }, - { - "name": "vaip_pass_dd_merge_qconcat", - "plugin": "vaip-pass_dd_merge_qconcat", - "enable_gc": true, - "disabled": false, - "args": [ - "custom_op" - ] - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "DDMergeShape_psw", - "plugin": "vaip-pass_dd_merge_DDMergeShape_psw", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_identity", - "plugin": "vaip-pass_dd_merge_identity", - "enable_gc": true, - "disabled": true - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psj", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_qmhagrpb", - "plugin": "vaip-pass_dd_merge_qmhagrpb", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxpzi", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxpzi", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_mzdk5mha", - "plugin": "vaip-pass_dd_merge_mzdk5mha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmha", - "plugin": "vaip-pass_dd_merge_qmha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhachannel", - "plugin": "vaip-pass_dd_merge_qmhachannel", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhawindow", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlstm", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_0", - "plugin": "vaip-pass_dd_merge_iconv_0", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_1", - "plugin": "vaip-pass_dd_merge_iconv_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_2", - "plugin": "vaip-pass_dd_merge_iconv_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_3", - "plugin": "vaip-pass_dd_merge_iconv_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_1", - "plugin": "vaip-pass_dd_merge_qlayernorm_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dqadd", - "plugin": "vaip-pass_dd_merge_dqadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qskipadd", - "plugin": "vaip-pass_dd_merge_qskipadd", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_ql2norm", - "plugin": "vaip-pass_dd_merge_ql2norm", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops", - "plugin": "vaip-pass_dd_merge_qconcateops", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_mxgan", - "plugin": "vaip-pass_dd_merge_attentionprepro_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_mxpzi", - "plugin": "vaip-pass_dd_merge_attentionprepro_mxpzi", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psr", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_iconv_0", - "plugin": "vaip-pass_dd_merge_iconv_0", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_iconv_1", - "plugin": "vaip-pass_dd_merge_iconv_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_iconv_2", - "plugin": "vaip-pass_dd_merge_iconv_2", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_iconv_3", - "plugin": "vaip-pass_dd_merge_iconv_3", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_mzdk5mha", - "plugin": "vaip-pass_dd_merge_mzdk5mha", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgroupnorm_0", - "plugin": "vaip-pass_dd_merge_qgroupnorm_0", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgroupnorm_1", - "plugin": "vaip-pass_dd_merge_qgroupnorm_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_0", - "plugin": "vaip-pass_dd_merge_qconv2matmul_0", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_1", - "plugin": "vaip-pass_dd_merge_qconv2matmul_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_2", - "plugin": "vaip-pass_dd_merge_qconv2matmul_2", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul_add3", - "plugin": "vaip-pass_dd_merge_qmatmul_add3", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qslice", - "plugin": "vaip-pass_dd_merge_qslice", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcat", - "plugin": "vaip-pass_dd_merge_qconcat", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgemmv", - "plugin": "vaip-pass_dd_merge_qgemmv", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsilu", - "plugin": "vaip-pass_dd_merge_qsilu", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgelu", - "plugin": "vaip-pass_dd_merge_qgelu", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qresize", - "plugin": "vaip-pass_dd_merge_qresize", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qbroadcastadd", - "plugin": "vaip-pass_dd_merge_qbroadcastadd", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_quant", - "plugin": "vaip-pass_dd_merge_quant", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dequant", - "plugin": "vaip-pass_dd_merge_dequant", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_dtype_mzdk5", - "plugin": "vaip-pass_dd_merge_dtype_mzdk5", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_shape_mzdk5", - "plugin": "vaip-pass_dd_merge_shape_mzdk5", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psh", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_tanh_lpnorm", - "plugin": "vaip-pass_dd_merge_tanh_lpnorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_3_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_3_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul_mxgan", - "plugin": "vaip-pass_dd_merge_qelwemul_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsigmoid", - "plugin": "vaip-pass_dd_merge_qsigmoid", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_mxgan", - "plugin": "vaip-pass_dd_merge_attentionprepro_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_psh", - "plugin": "vaip-pass_dd_merge_attentionprepro_psh", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_reduce_sum", - "plugin": "vaip-pass_dd_merge_reduce_sum", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_expand", - "plugin": "vaip-pass_dd_merge_expand", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_expand_psh", - "plugin": "vaip-pass_dd_merge_expand_psh", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qeltwise_div", - "plugin": "vaip-pass_dd_merge_qeltwise_div", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dps", - "plugin": "vaip-pass_dd_merge_dps", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul1", - "plugin": "vaip-pass_dd_merge_qmatmul1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psq1", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_actconstadd", - "plugin": "vaip-pass_dd_merge_actconstadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dqadd", - "plugin": "vaip-pass_dd_merge_dqadd", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_gather", - "plugin": "vaip-pass_dd_merge_gather", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_pso2", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_qconcateops_1_2_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_2_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops", - "plugin": "vaip-pass_dd_merge_qconcateops", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_2", - "plugin": "vaip-pass_dd_merge_qconcateops_1_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqsoftmax", - "plugin": "vaip-pass_dd_merge_dqsoftmax", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_pso2_02", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_qconcateops_1_2", - "plugin": "vaip-pass_dd_merge_qconcateops_1_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_3", - "plugin": "vaip-pass_dd_merge_qconcateops_1_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqsoftmax", - "plugin": "vaip-pass_dd_merge_dqsoftmax", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_mu0_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_rope_const", - "plugin": "vaip-pass_dd_merge_rope_const", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_const_1", - "plugin": "vaip-pass_dd_merge_rope_const_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act_1", - "plugin": "vaip-pass_dd_merge_rope_act_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act", - "plugin": "vaip-pass_dd_merge_rope_act", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmha_psmu", - "plugin": "vaip-pass_dd_merge_qmha_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": true, - "args": [ - "bf16" - ] - }, - { - "name": "merge_qgemm_gelu", - "plugin": "vaip-pass_dd_merge_qgemm_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qgemm_1", - "plugin": "vaip-pass_dd_merge_qgemm_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qgemm", - "plugin": "vaip-pass_dd_merge_qgemm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmul_actxact_1", - "plugin": "vaip-pass_dd_merge_qmul_actxact_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmul_actxact", - "plugin": "vaip-pass_dd_merge_qmul_actxact", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsoftmax_1", - "plugin": "vaip-pass_dd_merge_qsoftmax_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Reshape" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastadd", - "plugin": "vaip-pass_dd_merge_qbroadcastadd", - "enable_gc": true, - "disabled": false, - "args": [ - "add_flags" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastbiasadd", - "plugin": "vaip-pass_dd_merge_qbroadcastbiasadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_no_op", - "plugin": "vaip-pass_dd_merge_no_op", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_no_op_reshape", - "plugin": "vaip-pass_dd_merge_no_op_reshape", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcat_m", - "plugin": "vaip-pass_dd_merge_qconcat_m", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_transpose", - "plugin": "vaip-pass_dd_merge_transpose", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch", - "plugin": "vaip-pass_dd_merge_combined_mhamask", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch_1", - "plugin": "vaip-pass_dd_merge_combined_mhamask_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_qrope_const", - "input_names": [ - "input" - ], - "output_names": "output" - }, - "op_name": "QMatMul_QRopeConst", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "const_1", - "const_2", - "const_3", - "const_4", - "const_5", - "const_8", - "const_7", - "const_6" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_mu1_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_rope_const", - "plugin": "vaip-pass_dd_merge_rope_const", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_const_1", - "plugin": "vaip-pass_dd_merge_rope_const_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act_1", - "plugin": "vaip-pass_dd_merge_rope_act_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act", - "plugin": "vaip-pass_dd_merge_rope_act", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmha_psmu", - "plugin": "vaip-pass_dd_merge_qmha_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qgemm_gelu", - "plugin": "vaip-pass_dd_merge_qgemm_gelu", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qgemm_1", - "plugin": "vaip-pass_dd_merge_qgemm_1", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qgemm", - "plugin": "vaip-pass_dd_merge_qgemm", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Reshape", - "Slice" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastadd", - "plugin": "vaip-pass_dd_merge_qbroadcastadd", - "enable_gc": true, - "disabled": false, - "args": [ - "add_flags" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastbiasadd", - "plugin": "vaip-pass_dd_merge_qbroadcastbiasadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_no_op", - "plugin": "vaip-pass_dd_merge_no_op", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_no_op_reshape", - "plugin": "vaip-pass_dd_merge_no_op_reshape", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcat_m", - "plugin": "vaip-pass_dd_merge_qconcat_m", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qslice", - "plugin": "vaip-pass_dd_merge_qslice_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch", - "plugin": "vaip-pass_dd_merge_combined_mhamask", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch_1", - "plugin": "vaip-pass_dd_merge_combined_mhamask_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_qrope_input", - "input_names": [ - "input", - "const_6", - "const_7" - ], - "output_names": "output" - }, - "op_name": "QMatMul_QRopeInput", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "const_1", - "const_2", - "const_3", - "const_4", - "const_5", - "const_8" - ] - } - } - }, - { - "name": "pass_dd_batch_matmul_v", - "plugin": "vaip-pass_dd_batch_matmul_v", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_combined_matmul_psmu", - "plugin": "vaip-pass_dd_merge_combined_matmul_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_combined_MatmulRope_psmu", - "plugin": "vaip-pass_dd_merge_combined_MatmulRope_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_ds0_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Transpose" - ] - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_5", - "plugin": "vaip-pass_dd_merge_qconv2matmul_5", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_7", - "plugin": "vaip-pass_dd_merge_qconv2matmul_7", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_6", - "plugin": "vaip-pass_dd_merge_qconv2matmul_6", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": true, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_4x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "DD_8_4_psu", - "plugin": "vaip-pass_dd_merge_EndPass_psu", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qdq_unsqueeze", - "plugin": "vaip-pass_dd_merge_qdq_unsqueeze", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "DD_8_4_psu", - "plugin": "vaip-pass_dd_merge_EndPass_psu", - "enable_gc": true, - "disabled": false, - "args": [ - "8x4PSU" - ] - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_4x4_opt", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp", - "plugin": "vaip-pass_dd_merge_flatmlp", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qdq_unsqueeze", - "plugin": "vaip-pass_dd_merge_qdq_unsqueeze", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_8x4_opt", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp", - "plugin": "vaip-pass_dd_merge_flatmlp", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qdq_unsqueeze", - "plugin": "vaip-pass_dd_merge_qdq_unsqueeze", - "enable_gc": true, - "disabled": false - }, - { - "name": "DD_8_4_psu", - "plugin": "vaip-pass_dd_merge_EndPass_psu", - "enable_gc": true, - "disabled": false, - "args": [ - "8x4PSU" - ] - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu0", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "gqa", - "plugin": "vaip-pass_gqa", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_silu", - "plugin": "vaip-pass_dd_merge_silu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_elwemul", - "plugin": "vaip-pass_dd_merge_elwemul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_matmul_nbits", - "plugin": "vaip-pass_dd_merge_matmul_nbits", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mladfadd", - "plugin": "vaip-pass_dd_merge_mladfadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_slrn", - "plugin": "vaip-pass_dd_merge_slrn", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu1", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flat_sslrn", - "plugin": "vaip-pass_dd_merge_flat_sslrn", - "enable_gc": true, - "disabled": true - }, - { - "name": "merge_mlp", - "plugin": "vaip-pass_dd_merge_mlp", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_silu", - "plugin": "vaip-pass_dd_merge_silu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_elwemul", - "plugin": "vaip-pass_dd_merge_elwemul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_matmul_nbits", - "plugin": "vaip-pass_dd_merge_matmul_nbits", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mladfadd", - "plugin": "vaip-pass_dd_merge_mladfadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_slrn", - "plugin": "vaip-pass_dd_merge_slrn", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_pss_pst", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_mladfelwmul", - "plugin": "vaip-pass_dd_merge_mladfelwmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mladfmatmul", - "plugin": "vaip-pass_dd_merge_mladfmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsoftmax", - "plugin": "vaip-pass_dd_merge_qsoftmax", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "convert_topk_to_xir", - "plugin": "vaip-pass_convert_topk_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - }, - "minimum_num_of_conv": 2 - } - }, - { - "name": "fuse_DPU_VAIML-x2.0", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "convert_topk_to_xir", - "plugin": "vaip-pass_convert_topk_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 32 - }, - "debug_mode": { - "stringValue": "function" - }, - "enable_vaiml_flow_compile": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mt_fusion": { - "boolValue": true - }, - "enable_merge_requantize": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "remove_call_aiecompiler_cache": { - "boolValue": true - }, - "enable_control_optimization": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_VAIML-x2.0_yolo", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "convert_topk_to_xir", - "plugin": "vaip-pass_convert_topk_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 32 - }, - "debug_mode": { - "stringValue": "function" - }, - "enable_vaiml_flow_compile": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_control_optimization": { - "boolValue": true - }, - "enable_merge_requantize": { - "boolValue": false - }, - "remove_call_aiecompiler_cache": { - "boolValue": true - }, - "mladf_options": { - "stringValue": "print-timer=3" - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_dynamic_dispatch_vaiml_llm", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_onnx_qdq", - "plugin": "vaip-pass_dd_merge_onnx_qdq", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qrmsnorm", - "plugin": "vaip-pass_dd_merge_qlpnorm_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qsilu", - "plugin": "vaip-pass_dd_merge_qsilu_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmulbias", - "plugin": "vaip-pass_dd_merge_qmatmul_add4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_vaimlqmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qadd", - "plugin": "vaip-pass_dd_merge_qeltwise_add_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmul", - "plugin": "vaip-pass_dd_merge_vaimlqelwemul", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_vaiml_llm_gen_txn", - "plugin": "vaip-pass_level1_dd_cxx_gen_txn", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_onnx_qdq", - "plugin": "vaip-pass_dd_merge_onnx_qdq", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qrmsnorm", - "plugin": "vaip-pass_dd_merge_qlpnorm_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qsilu", - "plugin": "vaip-pass_dd_merge_qsilu_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmulbias", - "plugin": "vaip-pass_dd_merge_qmatmul_add4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_vaimlqmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qadd", - "plugin": "vaip-pass_dd_merge_qeltwise_add_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmul", - "plugin": "vaip-pass_dd_merge_vaimlqelwemul", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_DPU_MHA", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_MHA", - "plugin": "vaip-pass_convert_MHA", - "enableGc": true - }, - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2_2925.xclbin" - } - }, - { - "name": "vaiml_lt", - "plugin": "vaip-pass_vaiml_lt", - "vaiml_config": { - "vaiml_model_path": "vaiml_par_0", - "max_num_partitions": 200, - "device_name": "phx", - "debug": true - } - }, - { - "name": "vaip_pass_dd_merge_dqcastgather", - "plugin": "vaip-pass_dd_merge_dqcastgather", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop_onnx", - "plugin": "vaip-pass_dd_merge_qop_onnx", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop_onnx", - "plugin": "vaip-pass_dd_merge_dqop_onnx", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "device": "stx", - "optimize_level": 2, - "preferred_data_storage": "auto", - "fe_experiment": "enable-outer-garbage-propagation-through-reshape=1", - "aiecompiler_args": " -Xelfgen=-j16 " - } - } - ], - "mepTable": [ - { - "modelName": "PSA", - "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", - "md5sumInMemory": "ca42121518cca903f07262b8f2751a42", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA_v2_66.0", - "md5sumOnDisk": "73c0f0683f9faaf9c2710272066ba0af", - "md5sumInMemory": "5cbfd77efbcb1f6ba299bc4ffdff8aac", - "md5sumInMemoryWithIo": "8030f6b91feeccf39855b104111a6143", - "target": "PSAV2" - }, - { - "modelName": "PSO0_A", - "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", - "md5sumInMemory": "74ded15705d2c958177604029a20a208", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_B", - "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", - "md5sumInMemory": "3ee8e6a8f08912a9a92a260b68447bb2", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_0", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "452a80b01d29ebc42559d59b42de03ca", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_1", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "4853206b53d2cae2a40aad448d73370c", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_2", - "md5sumOnDisk": "0abe0b0bbc8314b482f0737da3d831ad", - "md5sumInMemory": "ee7f757248851d28061617f87043805f", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_3", - "md5sumOnDisk": "a87968c033291ab04069feaafb5fd7df", - "md5sumInMemory": "19ef4a7ffa9c9ab3871ea3142db4a5db", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_4", - "md5sumOnDisk": "ab9c6f1bb23d04765708622f5e48c0da", - "md5sumInMemory": "4c54574f384ddf99ed01b09bd249ca8b", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "352de20cfd0a050f6083eb661237c6cc", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_MSFT", - "md5sumOnDisk": "F42BC6AC686C72B8CAA38DE28DFAA553", - "md5sumInMemory": "F42BC6AC686C72B8CAA38DE28DFAA553", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA_1", - "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", - "md5sumInMemory": "ee9a5fc4b79342b98049c4826983f18e", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_1", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "27b186167f3a3957b32141846bcf81f8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA_1.0.1_nchw", - "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", - "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", - "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA6.3", - "md5sumOnDisk": "b5e9f87a18d925e43f6e74ef34a299c3", - "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", - "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_A_1.0.1_nchw", - "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", - "md5sumInMemory": "6cefe9e4244aa0f3f8dbf21d43789e86", - "md5sumInMemoryWithIo": "e3497d38e57aa72df6aec42833b784a8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_B_1.0.1_nchw", - "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", - "md5sumInMemory": "48f349c792e8e2a7562e092968750947", - "md5sumInMemoryWithIo": "dc6c1b8774c65f20fcbaaae86cd05f8c", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_1.0.1_nchw", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "35527caf46c38e60ad74204a1d966847", - "md5sumInMemoryWithIo": "4b0f1adedf7f6e7f74b22dd8d1fb22fb", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_A_nhwc", - "md5sumOnDisk": "777bd69cc9e12b6e7868277e639f97e2", - "md5sumInMemory": "3a9abd050f70b09de3546f1d61e43c74", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_A_1.1.0_nhwc", - "md5sumOnDisk": "b9eb351ca7af65a2a43f99f41dda7dc2", - "md5sumInMemory": "e3f795258afe1d046f56fad0f8574864", - "md5sumInMemoryWithIo": "3af1b536b95d9eebe3190294e311f57a", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_B_1.1.0_nhwc", - "md5sumOnDisk": "fa6d63ec0a7674cb9464020802e5f3ca", - "md5sumInMemory": "862aea9de93c2e711ec842f942d9b8b1", - "md5sumInMemoryWithIo": "b54766ff357e41c4ca538d017e126385", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_1.1.0_nhwc", - "md5sumOnDisk": "3a56581e403def2548e50c77808c4174", - "md5sumInMemory": "377a5a02ffbba68bc1fdd25b54f0f18b", - "md5sumInMemoryWithIo": "ac4f50ea2c10c863db4bfbd6ca024f6e", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSF", - "md5sumOnDisk": "d38670a70c72561cb3f718125829b5fa", - "md5sumInMemory": "2fbcab29de9dd547562c46319a225a9c", - "md5sumInMemoryWithIo": "5c07dc0856549dabc4d092763e1ce5cf", - "xclbin": "4x2_psf_model_a8w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psf", - "perf_pref": "1", - "modelCategory": "PSF" - }, - { - "modelName": "PSFv1.1", - "md5sumOnDisk": "c711cb8798e562011383bae4c5c91ce1", - "md5sumInMemory": "a394ffc0e58a8e841f5ae415c15a63e8", - "md5sumInMemoryWithIo": "037fca5fd1b0c7b195ef410c3e9b88df", - "xclbin": "4x2_psf_v1.1_model_a8w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psf", - "modelCategory": "PSF", - "perf_pref": "1", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSHv1.2", - "md5sumOnDisk": "08678ce4a4eab9eaa2cd8b3dcbdf5697", - "md5sumInMemory": "62099384a6af2956912b2d31a99be483", - "md5sumInMemoryWithIo": "b3a1041f9de14dae3b52e711d8de0037", - "xclbin": "4x2_psh_v1.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psh", - "perf_pref": "1", - "modelCategory": "PSH", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSJ", - "md5sumOnDisk": "15d1515c86b40874ac954869798d0e77", - "md5sumInMemory": "546b8eca24af15302c647edb3e575d54", - "md5sumInMemoryWithIo": "9f9d91808166382b325459dfb88494c0", - "xclbin": "4x2_psj_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psj", - "perf_pref": "1", - "modelCategory": "PSJ" - }, - { - "modelName": "PSJv3.0", - "md5sumOnDisk": "d58fbf8f0bc55fe43fbe917b8352e56d", - "md5sumInMemory": "86fdce04074bce4b540e1e71e2e74fbf", - "md5sumInMemoryWithIo": "9f9d91808166382b325459dfb88494c0", - "xclbin": "4x2_psj_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psj", - "perf_pref": "1", - "modelCategory": "PSJ" - }, - { - "modelName": "DS_R1_1.5_0_v2.0", - "md5sumOnDisk": "908d5e0ffb4a1fea7f9f65fcbd65a361", - "md5sumInMemory": "7062221644ff1e1e1cf87ba383f631ed", - "md5sumInMemoryWithIo": "398b12809feca99c9fd80e53ae9fd874", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_ds0_8x4", - "modelCategory": "PSU" - }, - { - "modelName": "DS_R1_1.5_1_v2.0", - "md5sumOnDisk": "589c6e9ba2ced271915d95a31dc8c7b9", - "md5sumInMemory": "22caad72379b1629ec67889af03f49a6", - "md5sumInMemoryWithIo": "b31dd87909f2bb50aaf99c52600ccbf1", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_ds1_8x4", - "modelCategory": "PSU" - }, - { - "modelName": "PSMU_ST0", - "md5sumOnDisk": "74b5d0dff744baecb9bf55660e166b77", - "md5sumInMemory": "13d5d50818ca009b00cbc73c86d2487a", - "md5sumInMemoryWithIo": "3e9e7b7cf4e7ac34f8fb116c2d958a2b", - "xclbin": "8x4_psmu_st0_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu0_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSMU_ST1", - "md5sumOnDisk": "aaf6f0a2ed85d133dfa48e75b0af3700", - "md5sumInMemory": "382a9bf1181f24fcd3db77238e596ef1", - "md5sumInMemoryWithIo": "f67f5434de44edb87b6e98deaee5609b", - "xclbin": "8x4_psmu_st1_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu1_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSMU_ST0_v3.8", - "md5sumOnDisk": "bbc426034771275f67935de0b8164995", - "md5sumInMemory": "ce3a4dd7df0e07bb9a90634d33a5b167", - "md5sumInMemoryWithIo": "3e9e7b7cf4e7ac34f8fb116c2d958a2b", - "xclbin": "8x4_psmu_st0_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu0_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSMU_ST1_v3.8", - "md5sumOnDisk": "0ec19e24264ca46247ac3047c6abd389", - "md5sumInMemory": "6a5eed0ee0b8fba217fe16df5b713101", - "md5sumInMemoryWithIo": "f67f5434de44edb87b6e98deaee5609b", - "xclbin": "8x4_psmu_st1_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu1_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSU0-v1.2", - "md5sumOnDisk": "3ec68bd0d8423b1700046b29e05a688f", - "md5sumInMemory": "db82ec8222e7f515b55415e3e707befb", - "md5sumInMemoryWithIo": "351879f8282407b5f6524de2993f34ab", - "target": "RyzenAI_transformer_cxx_psu_8x4", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU1-v1.2", - "md5sumOnDisk": "436ccc61a4cec07c1e449fb879e7e905", - "md5sumInMemory": "a557620781ac029b8ef198e9c7d95f92", - "md5sumInMemoryWithIo": "cde769adfa0ee62a9cd128928f73b43b", - "target": "RyzenAI_transformer_cxx_psu_8x4", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU0-v1.2-Opt", - "md5sumOnDisk": "3ec68bd0d8423b1700046b29e05a688f", - "md5sumInMemory": "947860ce26ac9b0a445e19cea2cc86dc", - "md5sumInMemoryWithIo": "351879f8282407b5f6524de2993f34ab", - "target": "RyzenAI_transformer_cxx_psu_8x4_opt", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU1-v1.2-Opt", - "md5sumOnDisk": "436ccc61a4cec07c1e449fb879e7e905", - "md5sumInMemory": "e8eebc04612ab31b904545cb45ea291c", - "md5sumInMemoryWithIo": "cde769adfa0ee62a9cd128928f73b43b", - "target": "RyzenAI_transformer_cxx_psu_8x4_opt", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU0", - "md5sumOnDisk": "85346f221749b9e7ffd2e928acac3d09", - "md5sumInMemory": "929b05d96fc8a61b4d1131a7722732e9", - "md5sumInMemoryWithIo": "351879f8282407b5f6524de2993f34ab", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "target": "RyzenAI_transformer_cxx_psu0", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU1", - "md5sumOnDisk": "c39dfc4c621b2f2bfa7f9a8b233ce148", - "md5sumInMemory": "0ec91f16621a932d2016784c05f5b703", - "md5sumInMemoryWithIo": "cde769adfa0ee62a9cd128928f73b43b", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "target": "RyzenAI_transformer_cxx_psu1", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSIv1.1", - "md5sumOnDisk": "72651ec6ae2fe552dd0604527d73c1e5", - "md5sumInMemory": "0e3ff9de7ff3d7eddc7712316ca7ab11", - "md5sumInMemoryWithIo": "5a4479883ad7e7724442977a88a257e0", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv1.2", - "md5sumOnDisk": "91aa788cdf570ebd5434bd7b8937feb2", - "md5sumInMemory": "6ed69ec59ba231b919877d12f81cabd3", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv1.3", - "md5sumOnDisk": "028f6808733628e822b86c12d38df4b6", - "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", - "md5sumInMemoryWithIo": "2dc156817e5201dc51c39e821b9d5ec7", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv1.4", - "md5sumOnDisk": "96fefa03d63137796293448db34d78e4", - "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", - "md5sumInMemoryWithIo": "8f4fbd7e1475b7b470e77449211455f2", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv3.0", - "md5sumOnDisk": "f39e8a9d6843c6852f1c3a1942845e90", - "md5sumInMemory": "2a2f237c2903138caca8c854dcff3021", - "md5sumInMemoryWithIo": "8f4fbd7e1475b7b470e77449211455f2", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSQ1", - "md5sumOnDisk": "4e17c61308b9170dda55586dee6c0751", - "md5sumInMemory": "58eb51eccd44a084b185159b67e2c1b6", - "md5sumInMemoryWithIo": "50c2fb23e40a0617f58ebcedbbfac359", - "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq_04.xclbin", - "target": "RyzenAI_transformer_cxx_psq1", - "modelCategory": "PSQ1", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "2" - } - }, - { - "modelName": "PSQ2", - "md5sumOnDisk": "4e485de54588d95209560c0a29049b68", - "md5sumInMemory": "4c121cc7cd35dc04c30f46a94b2baf7a", - "md5sumInMemoryWithIo": "3223d1f84b9dd740f3a829ad9680469f", - "xclbin": "4x2_psq2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psq2", - "modelCategory": "PSQ2", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "493" - } - }, - { - "modelName": "PSO2-320", - "md5sumOnDisk": "4a6a777fc11158c1ca70ebdd6caae3fe", - "md5sumInMemory": "1bd58439b3a31d4e6edce0689e552ccf", - "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-640", - "md5sumOnDisk": "67927fb103d7fcdd3eeca44475eac6df", - "md5sumInMemory": "9313ce55730e051e4d32eb4f9986f1f2", - "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-1280", - "md5sumOnDisk": "dc4cfca432a4e09eb4275b55a485b126", - "md5sumInMemory": "1cf0b88f93d4d5d458e408abc0a4cf5d", - "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-2560", - "md5sumOnDisk": "de50b0722e512613188a39429e70ead7", - "md5sumInMemory": "5dc5872cd47b9da7f64b6855b1035595", - "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-5120", - "md5sumOnDisk": "af4c17dc488364157a8ffecfb7279d3d", - "md5sumInMemory": "e54a80eb8a942cd83f2c4d8149e27feb", - "md5sumInMemoryWithIo": "3db1ee89fe2143cedfcc533f5085a051", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-8000", - "md5sumOnDisk": "a65353afbfda0b221a7868ed88b53811", - "md5sumInMemory": "83313d4445a74bb303a16c824ea6874a", - "md5sumInMemoryWithIo": "f7aefeeb6299d80b979b378e59d940a0", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-320-v1.1", - "md5sumOnDisk": "e6b7d1656f92ca990c7ec94f8b17813d", - "md5sumInMemory": "d0d6cbd1d89f60347ab6c9453e35507e", - "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-640-v1.1", - "md5sumOnDisk": "7d789256f2d568367974b7b74fe9de71", - "md5sumInMemory": "be1539116a956da222b7b678a369fbf6", - "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-1280-v1.1", - "md5sumOnDisk": "b815ee6d465f51de249c4048aa2515cb", - "md5sumInMemory": "4333bedcd96799e4bc2fba9ec4746617", - "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-2560-v1.1", - "md5sumOnDisk": "5d0c439244e9cf46d45d7ce249c0a3e7", - "md5sumInMemory": "577399f309357fc62e307c38945ce770", - "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-02_640-v1.1.2", - "md5sumOnDisk": "74f8fe8dd431826e20ddcfb383887b5a", - "md5sumInMemory": "cb1ace27775a8acde55d93ed9c831fb0", - "md5sumInMemoryWithIo": "567b9a0181a5147629e58d4e11b5414d", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_640", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-02_2560-v1.1.2", - "md5sumOnDisk": "43aa65d66d619191dfb6711164cb7ef2", - "md5sumInMemory": "b7873ef2c883221592f9a427681f7aa7", - "md5sumInMemoryWithIo": "33defb20f9050140c2a5dfd84e0f07d2", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_2560", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-02_5120-v1.1.2", - "md5sumOnDisk": "903da8e472bbdbf3a5181e318abd7de1", - "md5sumInMemory": "b68933a5037b7ddf440e7672d1121793", - "md5sumInMemoryWithIo": "ff1755a022b574525b1a78f748825149", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_5120", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-02_8000-v1.1.2", - "md5sumOnDisk": "9e8b510eb05c4c4b4666aa3eb1b258fa", - "md5sumInMemory": "61440810010d2fee2127b9ecb3418b20", - "md5sumInMemoryWithIo": "855a4d8521d118fd1e02c7d3ea24ecd6", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_8000", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-06_1280-v1.1.2", - "md5sumOnDisk": "5c56c9a5148418914e2a74c1fcc4e5dc", - "md5sumInMemory": "c5704b01ae136320462d84e5ef6f68e8", - "md5sumInMemoryWithIo": "8d4c665ddccc2b3eb71e160dd882b5cb", - "xclbin": "4x2_pso2_model_a16w16_qdq_1_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "06_1280", - "modelCategory": "PSO2_06" - }, - { - "modelName": "PSO2-06_8000-v1.1.2", - "md5sumOnDisk": "5d45bef760e086ae9dd13d90ed4bfd86", - "md5sumInMemory": "dc96e4f1d1fe4ed6631d0d13f49ebb8f", - "md5sumInMemoryWithIo": "de3ab6b289cf54f83c962aea6266f218", - "xclbin": "4x2_pso2_model_a16w16_qdq_1_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "06_8000", - "modelCategory": "PSO2_06" - }, - { - "modelName": "PSO2-08_80-v1.1.2", - "md5sumOnDisk": "f112b5e15b705d02c354cc6d0b75bf3f", - "md5sumInMemory": "4b470d88553037774d4305760e679bf2", - "md5sumInMemoryWithIo": "ab93d75b985f5084c35f736c4c02855d", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_80", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_160-v1.1.2", - "md5sumOnDisk": "4e3c4e6a306bd796b55711791f368b51", - "md5sumInMemory": "064bfd691e194cd0964cac6e3f8aaed6", - "md5sumInMemoryWithIo": "ef4e0fc4c7f93426b06a1d32a012235b", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_160", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_320-v1.1.2", - "md5sumOnDisk": "2ea69a0a0f7ea4de2c69ba5a2d8eb214", - "md5sumInMemory": "4305970852f28dd46b896a00cf31765e", - "md5sumInMemoryWithIo": "4dcef3e0c58630449318b77428055539", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_320", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_640-v1.1.2", - "md5sumOnDisk": "64f8b9d961c33a1e1782b8b6ab1d05fe", - "md5sumInMemory": "f24cbcd91d0aff2e527a362cffd30fa0", - "md5sumInMemoryWithIo": "2941fbd78bb79228139c313a4c15955f", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_640", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_1280-v1.1.2", - "md5sumOnDisk": "cab8f52fce66b1165ccaa7e275e0f461", - "md5sumInMemory": "85ef437ef0f943dcbc6dfff78daa8d59", - "md5sumInMemoryWithIo": "e0e03814c630da534e3ca62de6a4be96", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_1280", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_2560-v1.1.2", - "md5sumOnDisk": "5cb6ffc68e95e5a72777231a4fe45781", - "md5sumInMemory": "52c43902ae61abc9759d6f91dccf5fff", - "md5sumInMemoryWithIo": "4942bb5c99591622bdf693d503823439", - "xclbin": "4x2_pso2_model_2k_a16w16_qdq.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_2560", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_5120-v1.1.2", - "md5sumOnDisk": "66f978a1eb891e9183187c4f8b39513f", - "md5sumInMemory": "7000b77b04c5cb833ec0d23ce6811b67", - "md5sumInMemoryWithIo": "b408a863e4c9d5d5327a8d9b0b9e89e6", - "xclbin": "4x2_pso2_model_5k_a16w16_qdq.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_5120", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSO2-08_8000-v1.1.2", - "md5sumOnDisk": "c331b78262c2de4762605f77602a0982", - "md5sumInMemory": "c0b6569e081583e377ee67bd40a85fcf", - "md5sumInMemoryWithIo": "2ca9dde32adfb5322f93bf50bdb8503d", - "xclbin": "4x2_pso2_model_8k_a16w16_qdq.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_8000", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSO2-09_1280-v1.1.2", - "md5sumOnDisk": "06e0629102c7bc3ed241c92e880e49bf", - "md5sumInMemory": "a62e7818824b478c80ec6235e7b99f0d", - "md5sumInMemoryWithIo": "d8a93310a25cd06f569a1d409cea375b", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "09_1280", - "modelCategory": "PSO2_08_09" - }, - { - "modelName": "PSO2-09_8000-v1.1.2", - "md5sumOnDisk": "bff1723ea4a3346560c3269cf52002f4", - "md5sumInMemory": "cf17cad72bea8b9434ea0a47fae75fb6", - "md5sumInMemoryWithIo": "932243e512281ead62d23c9aeb4fd557", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "09_8000", - "modelCategory": "PSO2_08_09" - }, - { - "modelName": "PSR", - "md5sumOnDisk": "c62d3ac875af16b59180d662966266e8", - "md5sumInMemory": "8cef986e9f91bd8248d929cec012a7aa", - "md5sumInMemoryWithIo": "c43cb269c32cb943ec3544b3cca7db4d", - "xclbin": "4x4_psr_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psr", - "modelCategory": "PSR", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "493" - } - }, - { - "modelName": "PSR", - "md5sumOnDisk": "33d809d58b570fa23262e9b787578344", - "md5sumInMemory": "5e6ab13b18d575a176d04aa840b13ebe", - "md5sumInMemoryWithIo": "50fc5db86f2a22cc3c5ece4c28096dbd", - "xclbin": "4x4_psr_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psr", - "modelCategory": "PSR", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "493" - } - }, - { - "modelName": "PSW", - "md5sumOnDisk": "b6a4965536786c4953e30bca0cd3b315", - "md5sumInMemory": "c63dc0c92c08dec61ca896d846a8ca99", - "md5sumInMemoryWithIo": "6920d4a804bff69422a3e2127ed78c51", - "xclbin": "4x4_psw_v1.0_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psw", - "modelCategory": "PSW", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "8" - } - }, - { - "modelName": "PSW_v1.1.2", - "md5sumOnDisk": "84dd8949edca8d596bce3aacfbdfa94d", - "md5sumInMemory": "2d0fc3d7b8ecbd0be7d66b407168d231", - "md5sumInMemoryWithIo": "bda3bc15da0aebd40783415a820d3d2d", - "xclbin": "4x4_psw_v1.0_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psw", - "modelCategory": "PSW", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1" - } - }, - { - "modelName": "PSS", - "md5sumOnDisk": "5ad2857510b5646376f3f9348591b83e", - "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", - "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PSS" - }, - { - "modelName": "PST", - "md5sumOnDisk": "c8c2ab668b56daf7d2228e53c9a4f0db", - "md5sumInMemory": "62d1f9a68e9a1af013852ed3d1564d02", - "md5sumInMemoryWithIo": "4413357faa7c2f1ce6036f869f4d7e14", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PST", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PST_v1.1", - "md5sumOnDisk": "92358bd7e8a68ea9c6e9d327423069e3", - "md5sumInMemory": "67d78f48fd05ce03e3efb69212243d30", - "md5sumInMemoryWithIo": "a807ae1f05fc42e16d57d59186a414b4", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PST", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PSS_v1.1", - "md5sumOnDisk": "63a4651d48b4281ddf6a6a33ebad5fc7", - "md5sumInMemory": "60142dfa473572b34fbf476c37ebfa1b", - "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PSS", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PSS_v1.0", - "md5sumOnDisk": "fbf3fd6e6bab35efba46b7e9060f2d62", - "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", - "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PST_v1.0", - "md5sumOnDisk": "4045c2294abd7e8c4af6812779a524de", - "md5sumInMemory": "62d1f9a68e9a1af013852ed3d1564d02", - "md5sumInMemoryWithIo": "4413357faa7c2f1ce6036f869f4d7e14", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PSS_nhwc", - "md5sumOnDisk": "1d46fbe6a09e79b36e21d985d937df3b", - "md5sumInMemory": "ccf646813e6e91ff09f9d4216047a6ec", - "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PST_nhwc", - "md5sumOnDisk": "02c8e157824d0c75289f2333b307a5a9", - "md5sumInMemory": "087423cf961c2df293d363abd712d5d7", - "md5sumInMemoryWithIo": "6512311dac77f235e3ef637287389419", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "GT_v1.2", - "md5sumOnDisk": "4daa45a72a36d731279b7c01e4545637", - "md5sumInMemory": "0fd6cc09fe78a6a5e4fb697c0e8670e9", - "md5sumInMemoryWithIo": "ebe9d9f38f4762972cc71a9d3b906017", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_00.xclbin", - "nodeCount": 7060 - }, - { - "modelName": "GT_v1.3", - "md5sumOnDisk": "d799de8b1e1fa572daad06d7a49a7afe", - "md5sumInMemory": "97bcaa432a2c634a707dfc23bf222032", - "md5sumInMemoryWithIo": "ebe9d9f38f4762972cc71a9d3b906017", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_03.xclbin" - }, - { - "modelName": "GTC_v1.0", - "md5sumOnDisk": "d917b3af3dd2a05734571635cd760871", - "md5sumInMemory": "426c5dd1cf2fc5d303d4fb1288dca3d7", - "md5sumInMemoryWithIo": "deb53433fefad8d16b5f42cbcd5c061c", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_03.xclbin" - }, - { - "modelName": "HT_v1.2", - "md5sumOnDisk": "01fa81ebc4c70ea06c2f4d28c16af389", - "md5sumInMemory": "6809517bbd0a7b44acc31942d410ca3a", - "md5sumInMemoryWithIo": "9731a32b64df8abdead6b6f370a293fc", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_03.xclbin" - }, - { - "modelName": "PSV", - "md5sumOnDisk": "7a5774108d65860923250379a5efc8bd", - "md5sumInMemory": "6bca3a6045c4c6fdad5c40af8169326f", - "md5sumInMemoryWithIo": "8a4108e1444ab985761adb46447bdb7b", - "target": "PSV" - }, - { - "modelName": "M_w8a8", - "md5sumOnDisk": "39576973a43e3543b0437249f968201c", - "md5sumInMemory": "030e7f309fc2512acfacf471a033bd63", - "md5sumInMemoryWithIo": "99e8be1eb63f87e58af2641872edfb9a", - "target": "m_model" - }, - { - "modelName": "M_w8a16", - "md5sumOnDisk": "d2005c2526f4093cf709b67cbd00742e", - "md5sumInMemory": "61a06fc8ab22616577903bed15e10fdc", - "md5sumInMemoryWithIo": "99e8be1eb63f87e58af2641872edfb9a", - "target": "m_model" - }, - { - "modelName": "M_a8w8", - "md5sumOnDisk": "0bbf933b3fc8d5a1165057b5ee44c306", - "md5sumInMemory": "2cb58d0f5e6c05da9d48ba5a537a7bd7", - "md5sumInMemoryWithIo": "56648de96fa265727fdd3458cfb97bdc", - "target": "m_model" - }, - { - "modelName": "M_1080_1920_s8s8", - "md5sumOnDisk": "fe90a5b4602392474e0da565de1b0183", - "md5sumInMemory": "5dffba4329ed8687ae22ab2b82d2fcb4", - "md5sumInMemoryWithIo": "58be535dc6f625cb020cdc89097ff411", - "target": "m_model" - }, - { - "modelName": "M_720_1080_s8s8", - "md5sumOnDisk": "c643b3f3e2ca0b5081eddb39f1999afa", - "md5sumInMemory": "e96860eb14f53974f7fd8a96a8d83496", - "md5sumInMemoryWithIo": "651696e4f97e5b52d99f98a5312cf3ba", - "target": "m_model" - }, - { - "modelName": "M_540_960_s8s8", - "md5sumOnDisk": "1e8bc505d655a26e53cacef52fee2f01", - "md5sumInMemory": "582fd1f36a0e1d930dcf3fd524adc044", - "md5sumInMemoryWithIo": "859678b393dd2c03cbf84b8700183bc4", - "target": "m_model" - }, - { - "modelName": "M_400_400_s8s8", - "md5sumOnDisk": "49a7e98718263eccae8954ab75aa9910", - "md5sumInMemory": "0a3b66d3754d1a3840eec2c06e3334b9", - "md5sumInMemoryWithIo": "28ad9f773b56894cf8ea5e4c5786aad1", - "target": "m_model" - }, - { - "modelName": "model_m_1x256x256x3_xint8_quantized", - "md5sumOnDisk": "656ec46893c7d38af45cb4721e091eaf", - "md5sumInMemory": "9c94b2c668bb3f4d8a12cea493d1003c", - "md5sumInMemoryWithIo": "9b5e5f072cd8c4aefced37d7f6b0e866", - "target": "old_qdq_py3" - }, - { - "modelName": "model_m_1x400x400x3_xint8_quantized", - "md5sumOnDisk": "9f837a55af61e2e5c74202907cc95bd1", - "md5sumInMemory": "e1b0cdbc9e7bbedbd3f3c6fe421acc8f", - "md5sumInMemoryWithIo": "56648de96fa265727fdd3458cfb97bdc", - "target": "old_qdq_py3" - }, - { - "modelName": "model_m_1x512x512x3_xint8_quantized", - "md5sumOnDisk": "02925f26f9571cd51b9c4efcf1f7a50c", - "md5sumInMemory": "545b9e36cd5959f978ca6704137e5d69", - "md5sumInMemoryWithIo": "0266c6d50634ec2397e95903704d079e", - "target": "old_qdq_py3" - }, - { - "modelName": "model_m_1x1080x1920x3_xint8_quantized", - "md5sumOnDisk": "9a2fd356dc309f08c8d3832e28541dd2", - "md5sumInMemory": "5005f9a54e4f57d2100aa5502a63f622", - "md5sumInMemoryWithIo": "0665ad30ba4cd44c95fab7eff31db5a2", - "target": "old_qdq_py3" - }, - { - "modelName": "A3", - "md5sumOnDisk": "5817e747dea65f2f9e60dbaea457f4ad", - "md5sumInMemory": "b721088fac1683349406d47ce00c7162", - "md5sumInMemoryWithIo": "39b422f40bcb60bbd20be835097256c2", - "target": "old_qdq_py3" - }, - { - "modelName": "C2", - "md5sumOnDisk": "5b04d9add2f4eccf0981567ecd6596e8", - "md5sumInMemory": "9e20c31e62951e398d86e557dc50c490", - "md5sumInMemoryWithIo": "ed9200d492602a3fddc52f13a81bf92f", - "target": "old_qdq_py3" - }, - { - "modelName": "C4-Mix", - "md5sumOnDisk": "0b9b31d11a710796ea71b2e5a373d805", - "md5sumInMemory": "4e755febbb17436edd95dc93cd6273c8", - "md5sumInMemoryWithIo": "aa958aa703bfa2e551c9d9c5d92ab47b", - "target": "py3" - }, - { - "modelName": "E", - "md5sumOnDisk": "601323cb390efbd40d202292eba01174", - "md5sumInMemory": "dcaf70bc4238127d8541a962ff418b75", - "md5sumInMemoryWithIo": "07bd53aeb901c363c59d6965d972af0e", - "target": "old_qdq_py3" - }, - { - "modelName": "L_v_1_0_A8W8", - "md5sumOnDisk": "ee0cbe0c248bf37801297dd29b2dabbf", - "md5sumInMemory": "742ce71c1a46509932ecb6af7f8e2c39", - "md5sumInMemoryWithIo": "e3ef4dc14b209de607c0b028066d4d05", - "target": "py3" - }, - { - "modelName": "a3", - "md5sumOnDisk": "73ecb2594935fb9bd02707930610f29e", - "md5sumInMemory": "f59151f8b67a7b8f1a8bcc7798558c33", - "md5sumInMemoryWithIo": "39b422f40bcb60bbd20be835097256c2", - "target": "VAIML-x2.0-a3" - }, - { - "modelName": "DeepLabV3", - "md5sumOnDisk": "10a644c6da6b1121f807794506b7e5cc", - "md5sumInMemory": "849608d568bbc54380833c9446299989", - "md5sumInMemoryWithIo": "99f39afbf868542fb575023123c98001", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "inceptionv4", - "md5sumOnDisk": "42c48a7086ba34889699862d98844e62", - "md5sumInMemory": "591d2dc2a27f04111e1b14fbf4222d51", - "md5sumInMemoryWithIo": "3d6d80d6c60811089ced3bf6abb42cdc", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "MobileNet_V3", - "md5sumOnDisk": "5bbc61c013f20b6563c62523100fa2ee", - "md5sumInMemory": "0bcec05d638535092032784dcca2cce3", - "md5sumInMemoryWithIo": "1506de83ac617b0903613ca420061e84", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "ResNet50", - "md5sumOnDisk": "c22b773b35f2ce62932578cc5eec867b", - "md5sumInMemory": "b307088b2bf693c51d8ea399247d8139", - "md5sumInMemoryWithIo": "1bea49f637b8f9f0ec80bc44c6d841bb", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "timm_mobilenetv3_small_100.lamb_in1k_a8w8_nchw", - "md5sumOnDisk": "fcb0bf5a6614042c0859937a24a24185", - "md5sumInMemory": "ac63b86137383c6b001f12030eeeff3e", - "target": "VAIML-x2.0-4x4" - }, - { - "modelName": "microsoft_resnet50_224x224_a8w8", - "md5sumOnDisk": "a6235fb37dd84a930c9b2951615570c6", - "md5sumInMemory": "da5192919ce2534ddaa2fb5ced220215", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "microsoft_resnet50_224x224_a8w8", - "md5sumOnDisk": "eeebfd0356baf9a8deed9c30d438d792", - "md5sumInMemory": "d795b64172132cecca0f5b80ff6e0861", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "microsoft_resnet50_224x224_nhwc_a8w8", - "md5sumOnDisk": "6f8d3850d21b6e85deafe180d682249b", - "md5sumInMemory": "1ce6d27e87140766164c39cd58fc426c", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "timm_mobilenetv3_small_100.lamb_in1k_nhwc_a8w8", - "md5sumOnDisk": "2e19885c87c5db681fa35da00b7c9579", - "md5sumInMemory": "7ae8dcf602d72549759ae5089b9119cf", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-4x4" - }, - { - "modelName": "resnet_50_ptq_qdq_nhwc_a16w8", - "md5sumOnDisk": "8f883624a97f00789c5d0e508202c224", - "md5sumInMemory": "94476250149cf1dfa219fe42b30026da", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "resnet_50_ptq_qdq_uint8a_uint8w_nhwc_a8w8", - "md5sumOnDisk": "5f86b73c21a98695c741664193d0b93e", - "md5sumInMemory": "94476250149cf1dfa219fe42b30026da", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "PSP1_v1.1", - "md5sumOnDisk": "8ac224e3547da42bde7ec182c94c88ea", - "md5sumInMemory": "efdf2702eda557558169377bd2301d9c", - "md5sumInMemoryWithIo": "962b676e17290cdb22869b2df9a19400", - "target": "VAIML-x2.0-4x4" - }, - { - "modelName": "PSA2", - "md5sumOnDisk": "e086205adc8a885a8440eb375068159f", - "md5sumInMemory": "0803bf9ef641e3e1043f56653552101f", - "md5sumInMemoryWithIo": "7205a1506ef140cd65f2eb0a5b2ce65e", - "target": "VAIML-x2.0-4x4" - } - ], - "target": "VAIML", - "targets": [ - { - "name": "xcompiler", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_2904.xclbin", - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 2 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - }, - "py3_round": false, - "provider_options": { - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "VAIML_config_0", - "pass": [ - "init", - "vaiml_lt", - "vaip_pass_dd_merge_dqcastgather", - "vaip_pass_dd_merge_qop", - "vaip_pass_dd_merge_dqop", - "vaip_pass_dd_merge_qop_onnx", - "vaip_pass_dd_merge_dqop_onnx" - ] - }, - { - "name": "py3", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - } - } - }, - "py3_round": true, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "old_qdq_py3", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - } - } - }, - "old_qdq": true, - "py3_round": true, - "provider_options": { - "xlnx_enable_old_qdq": "1", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "m_model", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "PSV", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "uint16_qconv_leaky_fusion": { - "boolValue": true - }, - "enable_convert_three_term_to_two_term": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_shell_config_1", - "xclbin": "AMD_AIE2P_2x4x1_Overlay_2926.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 3 - }, - "profile": { - "uintValue": 0 - }, - "enable_fast_pm": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mergesync": { - "boolValue": true - } - } - }, - "graph_engine_qos_priority": 640, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_vision_config_2", - "xclbin": "1x4.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 16 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - } - }, - "provider_options": { - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_vision_config_3", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2_2925.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - } - } - }, - "provider_options": { - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_vision_config_3_mha", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2_2925.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_transformer_cxx_psf", - "xclbin": "4x2_psf_model_a8w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psf" - ] - }, - { - "name": "RyzenAI_llm_eager", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "pass": [ - "fuse_MatMulNBits", - "fuse_SSMLP", - "fuse_MLP" - ] - }, - { - "name": "RyzenAI_transformers_base", - "xclbin": "8x4_wcr_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr" - ] - }, - { - "name": "RyzenAI_transformers_psp1", - "xclbin": "8x4_psp1_a16w8_qdq.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psp1" - ] - }, - { - "name": "RyzenAI_llm_phi3", - "xclbin": "8x4_psu_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr_llm" - ] - }, - { - "name": "RyzenAI_llm_llama", - "xclbin": "8x4_llama_3_2_1b_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr_llm" - ] - }, - { - "name": "RyzenAI_llm_qwen2", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr_llm" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psw", - "xclbin": "4x4_psw_v1.0_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psw" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psi", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psf" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psj", - "xclbin": "4x2_psj_model_a8w8_qdq.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psj" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu0", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu0" - ] - }, - { - "name": "RyzenAI_transformer_cxx_vaiml_llm", - "xclbin": "vaiml_2x4x4_gemm_silu_rms_mul_add_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_vaiml_llm" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu1", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu1" - ] - }, - { - "name": "RyzenAI_transformer_cxx_mu0_8x4", - "xclbin": "8x4_psmu_st_v3.2_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_mu0_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_mu1_8x4", - "xclbin": "8x4_psmu_st_v3.2_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_mu1_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_ds0_8x4", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_ds0_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_ds1_8x4", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_ds0_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_4x4", - "xclbin": "4x4_psw_psu_integrated_model_a16w16_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_4x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_8x4", - "xclbin": "8x4_psu_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_8x4_opt", - "xclbin": "8x4_psu_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_8x4_opt" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_4x4_opt", - "xclbin": "4x4_psw_psu_integrated_model_a16w16_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_4x4_opt" - ] - }, - { - "name": "RyzenAI_transformer_config_2", - "pass": [ - "init", - "fuse_dynamic_dispatch_psr" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psh", - "xclbin": "4x2_psh_model_a16w8_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psh" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psq1", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psq1" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psq2", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psf" - ] - }, - { - "name": "RyzenAI_transformer_cxx_pso2", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_pso2" - ] - }, - { - "name": "RyzenAI_transformer_cxx_pso2_02", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_pso2_02" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psr", - "pass": [ - "init", - "fuse_dynamic_dispatch_psr" - ] - }, - { - "name": "RyzenAI_transformer_cxx_pss_pst", - "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq_04.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch_pss_pst" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - }, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-a3", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "mladf_options": { - "stringValue": "print-timer=3" - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-Procyon", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "enable_qdq_force_xint": { - "boolValue": true - }, - "opt_level": { - "uintValue": 3 - }, - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "mladf_options": { - "stringValue": "print-timer=3,disabel-rtp-pipeline=0" - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-Procyon_resnet50", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "opt_level": { - "uintValue": 3 - }, - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "mladf_options": { - "stringValue": "print-timer=3,disabel-rtp-pipeline=0" - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-4x4", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0_yolo" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "target": { - "stringValues": [ - "AMD_AIE2P_4x4_CMC_Overlay" - ] - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "PSAV2", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0_yolo" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "target": { - "stringValues": [ - "AMD_AIE2P_4x4_CMC_Overlay" - ] - }, - "enable_mul_matmul_fusion": { - "boolValue": true - }, - "enable_matmul_add_fusion": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ], - "provider_options": { - "enable_cache_file_io_in_mem": "0" - } - } - ], - "enable_cache_file_io_in_mem": true -} \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/vaip_config_npu_2_3.json b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/vaip_config_npu_2_3.json deleted file mode 100644 index 6257e9ee..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/vaip_config_npu_2_3.json +++ /dev/null @@ -1,6777 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_MatMulNBits", - "plugin": "vaip-pass_matmul_nbits", - "enable_gc": true, - "disabled": false - }, - { - "name": "fuse_MLP", - "plugin": "vaip-pass_mladf_mlp", - "enable_gc": true, - "disabled": true - }, - { - "name": "fuse_SSMLP", - "plugin": "vaip-pass_ssmlp", - "enable_gc": true, - "disabled": false - }, - { - "name": "fuse_dynamic_dispatch_wcr", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_iconv_3", - "input_names": [ - "input_0", - "constant_4" - ], - "output_names": "ms_QuantizeLinear_15" - }, - "op_name": "IConv", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "input_format", - "attribute_value": "NCHW" - }, - { - "attribute_name": "is_bias", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_5", - "constant_6", - "constant_1", - "constant_2", - "constant_13", - "constant_14", - "constant_8", - "constant_9", - "constant_10" - ], - "modifiers": [ - "add_stride_inp_zp_attr" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_iconv_3", - "input_names": [ - "input_0", - "constant_4" - ], - "output_names": "ms_QuantizeLinear_15" - }, - "op_name": "QConv2MatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "input_format", - "attribute_value": "NCHW" - }, - { - "attribute_name": "is_bias", - "attribute_value": "true" - }, - { - "attribute_name": "from_conv2matmul", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_5", - "constant_6", - "constant_1", - "constant_2", - "constant_13", - "constant_14", - "constant_8", - "constant_9", - "constant_10" - ], - "modifiers": [ - "check_kernel_conv2matmul" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_iconv_4", - "input_names": [ - "input_0", - "constant_4" - ], - "output_names": "ms_QuantizeLinear_15" - }, - "op_name": "IConv", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "input_format", - "attribute_value": "NCHW" - } - ], - "initializers": [ - "constant_5", - "constant_6", - "constant_1", - "constant_2", - "constant_13", - "constant_14" - ], - "modifiers": [ - "add_stride_inp_zp_attr" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_gelu_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_quickgelu", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_tanh", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QGemmTanh", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "dq5_s", - "dq5_z", - "q3_s", - "q3_z" - ], - "modifiers": [ - "transpose_weight_gemm" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_gelu_microsoft", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q2" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q1_s", - "q1_z", - "dq3_s", - "dq3_z", - "b_s", - "b_z", - "q2_s", - "q2_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_2", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q1" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z" - ], - "modifiers": [ - "transpose_weight_gemm", - "wcr_prefix" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_0", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_act_act_transpose", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "output" - }, - "op_name": "QMatMulDynamic", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "modifiers": [ - "act_act_matmul_modifier" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_act_act_transpose", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "output" - }, - "op_name": "QMatmulDynamicTranspose", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5", - "constant_6", - "constant_7", - "constant_8", - "constant_9" - ], - "modifiers": [ - "QBatchMatMulDynamic_predicate", - "dq_a", - "dq_c", - "input_0", - "input_1" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_act_act_2", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "q1" - }, - "op_name": "QMatmulDynamicTranspose", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBatchMatMulDynamic_predicate", - "input_0", - "input_1", - "extract_bmkn" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_1", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_0", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_reshape_softmax", - "input_names": [ - "input" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QMulSoftmax", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_dqsoftmax_1", - "input_names": [ - "input_0" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QMulSoftmax", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QEltWiseAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "b_s", - "b_z", - "y_s", - "y_z" - ], - "modifiers": [ - "QEltWiseAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QActConstAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b_s", - "b_z", - "a_s", - "a_z", - "y_s", - "y_z" - ], - "modifiers": [ - "QBiasAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qbroadcastadd_1", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QBroadcastBiasAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_2", - "constant_3", - "constant_0", - "constant_1", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBroadcastBiasAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qbroadcastadd", - "input_names": [ - "input_1", - "input_0" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QBroadcastBiasAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_2", - "constant_3", - "constant_0", - "constant_1" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBroadcastBiasAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qbroadcastadd", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QBroadcastAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "QBroadcastAdd_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_transpose", - "input_names": [ - "input" - ], - "output_names": "output" - }, - "op_name": "QReshapeTranspose", - "extractor": { - "input_q_params": [ - "in_scale", - "in_zp" - ], - "output_q_params": [ - "out_scale", - "out_zp" - ], - "accessor_attributes": [ - { - "node_binder_name": "transpose", - "attribute_name": "perm", - "dtype": "ints" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "modifiers": [ - "Transpose_predicate", - "add_dummy_inputs", - "int32[16]" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_lpnorm_4", - "input_names": [ - "a" - ], - "output_names": "q1" - }, - "op_name": "L2_Norm", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "q1_s", - "q1_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qreshape", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qsqueeze", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qunsqueeze", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qelwediv", - "input_names": [ - "input_0" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "modifiers": [ - "qdiv_to_noop" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qelwemul", - "input_names": [ - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "modifiers": [ - "qmul_to_noop" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_transpose_noop", - "input_names": [ - "input" - ], - "output_names": "output" - }, - "op_name": "QReshapeTranspose", - "extractor": { - "initializers": [ - "dummy" - ], - "accessor_attributes": [ - { - "node_binder_name": "", - "attribute_name": "perm_transpose", - "dtype": "ints" - }, - { - "node_binder_name": "", - "attribute_name": "input_q_params", - "dtype": "floats" - }, - { - "node_binder_name": "", - "attribute_name": "output_q_params", - "dtype": "floats" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "generic_fusion", - "attribute_value": "true" - } - ], - "modifiers": [ - "Transpose_predicate" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgather", - "input_names": [ - "input_0" - ], - "output_names": "q" - }, - "op_name": "NoOp", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "modifiers": [ - "infer_qgather_to_noop" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_dq", - "input_names": [ - "input" - ], - "output_names": "dq" - }, - "op_name": "DeQuantOP", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "in_s", - "in_z" - ], - "input_q_params": [ - "in_s", - "in_z" - ], - "output_q_params": [ - "in_s", - "in_z" - ], - "modifiers": [ - "out_dtype_modifier", - "bfloat16", - "input_only_act" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_q", - "input_names": [ - "input" - ], - "output_names": "dq" - }, - "op_name": "QuantOP", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "in_s", - "in_z" - ], - "input_q_params": [ - "in_s", - "in_z" - ], - "output_q_params": [ - "in_s", - "in_z" - ], - "modifiers": [ - "in_dtype_modifier", - "bfloat16" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psp1", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_gelu_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_quickgelu", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_gelu_microsoft", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q3" - }, - "op_name": "QMatMulAddGelu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "gelu", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z", - "q2_s", - "q2_z", - "q3_s", - "q3_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_add_0", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q2" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q1_s", - "q1_z", - "dq3_s", - "dq3_z", - "b_s", - "b_z", - "q2_s", - "q2_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qgemm_2", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q1" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q1_s", - "q1_z" - ], - "modifiers": [ - "transpose_weight_gemm" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_0", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_1", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qlayernorm_0", - "input_names": [ - "a" - ], - "output_names": "q" - }, - "op_name": "QLayerNorm", - "extractor": { - "accessor_attributes": [ - { - "node_binder_name": "ln", - "attribute_name": "axis", - "dtype": "int" - }, - { - "node_binder_name": "ln", - "attribute_name": "epsilon", - "dtype": "float" - }, - { - "node_binder_name": "ln", - "attribute_name": "stash_type", - "dtype": "int" - } - ], - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "b", - "c", - "a_s", - "a_z", - "b_s", - "b_z", - "c_s", - "c_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QEltWiseAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "b_s", - "b_z", - "y_s", - "y_z" - ], - "modifiers": [ - "QEltWiseAdd_predicate" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_wcr_llm", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_onnx_qdq", - "plugin": "vaip-pass_dd_merge_onnx_qdq", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_matmul_channelwise_silu", - "plugin": "vaip-pass_dd_merge_qmatmul_silu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_silu", - "input_names": [ - "a", - "w" - ], - "output_names": "com_microsoft_QuantizeLinear_3" - }, - "op_name": "QMatMulAddSilu", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "channelwise", - "attribute_value": "true" - }, - { - "attribute_name": "input_format", - "attribute_value": "NHWC" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z", - "constant_19", - "constant_20" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "constant_19", - "constant_20" - ], - "modifiers": [ - "out_dtype_modifier", - "bfloat16" - ] - } - } - }, - { - "name": "merge_matmul_channelwise_biasadd", - "plugin": "vaip-pass_dd_merge_qmatmul_int4_bias", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_int4_bias", - "input_names": [ - "a", - "w", - "b" - ], - "output_names": "q2" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "channelwise", - "attribute_value": "true" - }, - { - "attribute_name": "bias", - "attribute_value": "true" - }, - { - "attribute_name": "input_format", - "attribute_value": "NHWC" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "b_s", - "b_z", - "q2_s", - "q2_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "q2_s", - "q2_z" - ] - } - } - }, - { - "name": "merge_matmul_channelwise", - "plugin": "vaip-pass_dd_merge_qmatmul2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_2", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMulAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "channelwise", - "attribute_value": "true" - }, - { - "attribute_name": "input_format", - "attribute_value": "NHWC" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "q_s", - "q_z" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Transpose" - ] - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_lpnorm_3", - "input_names": [ - "a" - ], - "output_names": "q2" - }, - "op_name": "L2_Norm", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - }, - { - "attribute_name": "fuse_mul", - "attribute_value": "true" - } - ], - "initializers": [ - "a_s", - "a_z", - "q2_s", - "q2_z", - "dq3_s", - "b" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "q2_s", - "q2_z" - ], - "modifiers": [ - "in_dtype_modifier", - "bfloat16" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qeltwise_add_0", - "input_names": [ - "a", - "b" - ], - "output_names": "q" - }, - "op_name": "QEltWiseAdd", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "a_s", - "a_z", - "b_s", - "b_z", - "y_s", - "y_z" - ], - "input_q_params": [ - "a_s", - "a_z" - ], - "output_q_params": [ - "y_s", - "y_z" - ], - "modifiers": [ - "QEltWiseAdd_predicate", - "in_dtype_modifier", - "bfloat16", - "out_dtype_modifier", - "bfloat16", - "set_xclbin_name" - ] - } - } - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qelwemul", - "input_names": [ - "input_0", - "input_1" - ], - "output_names": "com_microsoft_QuantizeLinear_0" - }, - "op_name": "QELWEMUL_qdq", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "constant_0", - "constant_1", - "constant_2", - "constant_3", - "constant_4", - "constant_5" - ], - "input_q_params": [ - "constant_0", - "constant_1" - ], - "output_q_params": [ - "constant_4", - "constant_5" - ], - "modifiers": [ - "in_dtype_modifier", - "bfloat16", - "QELWEMUL_qdq_predicate", - "set_xclbin_name" - ] - } - } - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": true, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_0", - "input_names": [ - "a", - "w" - ], - "output_names": "q" - }, - "op_name": "QMatMul", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "4x2" - } - ], - "initializers": [ - "a_s", - "a_z", - "w_s", - "w_z", - "q_s", - "q_z" - ], - "modifiers": [ - "QMatMul_predicate" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psf", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_qmhagrpb", - "plugin": "vaip-pass_dd_merge_qmhagrpb", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxpzi", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxpzi", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_mzdk5mha", - "plugin": "vaip-pass_dd_merge_mzdk5mha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmha", - "plugin": "vaip-pass_dd_merge_qmha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhachannel", - "plugin": "vaip-pass_dd_merge_qmhachannel", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhawindow", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlstm", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_0", - "plugin": "vaip-pass_dd_merge_iconv_0", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_1", - "plugin": "vaip-pass_dd_merge_iconv_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_2", - "plugin": "vaip-pass_dd_merge_iconv_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_3", - "plugin": "vaip-pass_dd_merge_iconv_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_1", - "plugin": "vaip-pass_dd_merge_qlayernorm_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dqadd", - "plugin": "vaip-pass_dd_merge_dqadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qskipadd", - "plugin": "vaip-pass_dd_merge_qskipadd", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_ql2norm", - "plugin": "vaip-pass_dd_merge_ql2norm", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm", - "plugin": "vaip-pass_dd_merge_qlpnorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_2", - "plugin": "vaip-pass_dd_merge_qlpnorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops", - "plugin": "vaip-pass_dd_merge_qconcateops", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro", - "plugin": "vaip-pass_dd_merge_attentionprepro", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_identity", - "plugin": "vaip-pass_dd_merge_identity", - "enable_gc": true, - "disabled": true - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psw", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_demha", - "plugin": "vaip-pass_dd_merge_DeMHA", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_psw", - "plugin": "vaip-pass_dd_merge_attentionprepro_psw", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_QKVProj", - "plugin": "vaip-pass_dd_merge_qmatmul_add_rtr", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu4", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_batch_matmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "intadd" - ] - }, - { - "name": "vaip_pass_dd_merge_qgemmv", - "plugin": "vaip-pass_dd_merge_qgemmv", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_elementwise_mul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": false, - "args": [ - "intmul" - ] - }, - { - "name": "vaip_pass_dd_merge_qconcat", - "plugin": "vaip-pass_dd_merge_qconcat", - "enable_gc": true, - "disabled": false, - "args": [ - "custom_op" - ] - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "DDMergeShape_psw", - "plugin": "vaip-pass_dd_merge_DDMergeShape_psw", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_identity", - "plugin": "vaip-pass_dd_merge_identity", - "enable_gc": true, - "disabled": true - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psj", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_qmhagrpb", - "plugin": "vaip-pass_dd_merge_qmhagrpb", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxpzi", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxpzi", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_mzdk5mha", - "plugin": "vaip-pass_dd_merge_mzdk5mha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmha", - "plugin": "vaip-pass_dd_merge_qmha", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhachannel", - "plugin": "vaip-pass_dd_merge_qmhachannel", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhawindow", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlstm", - "plugin": "vaip-pass_dd_merge_qmhawindow", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_0", - "plugin": "vaip-pass_dd_merge_iconv_0", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_1", - "plugin": "vaip-pass_dd_merge_iconv_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_2", - "plugin": "vaip-pass_dd_merge_iconv_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_iconv_3", - "plugin": "vaip-pass_dd_merge_iconv_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_1", - "plugin": "vaip-pass_dd_merge_qlayernorm_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dqadd", - "plugin": "vaip-pass_dd_merge_dqadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qglobalavgpool", - "plugin": "vaip-pass_dd_merge_qglobalavgpool", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qreshape_transpose", - "plugin": "vaip-pass_dd_merge_qreshape_transpose", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qskipadd", - "plugin": "vaip-pass_dd_merge_qskipadd", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_ql2norm", - "plugin": "vaip-pass_dd_merge_ql2norm", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops", - "plugin": "vaip-pass_dd_merge_qconcateops", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_mxgan", - "plugin": "vaip-pass_dd_merge_attentionprepro_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_mxpzi", - "plugin": "vaip-pass_dd_merge_attentionprepro_mxpzi", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psr", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_iconv_0", - "plugin": "vaip-pass_dd_merge_iconv_0", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_iconv_1", - "plugin": "vaip-pass_dd_merge_iconv_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_iconv_2", - "plugin": "vaip-pass_dd_merge_iconv_2", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_iconv_3", - "plugin": "vaip-pass_dd_merge_iconv_3", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_mzdk5mha", - "plugin": "vaip-pass_dd_merge_mzdk5mha", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgroupnorm_0", - "plugin": "vaip-pass_dd_merge_qgroupnorm_0", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgroupnorm_1", - "plugin": "vaip-pass_dd_merge_qgroupnorm_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_0", - "plugin": "vaip-pass_dd_merge_qconv2matmul_0", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_1", - "plugin": "vaip-pass_dd_merge_qconv2matmul_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_2", - "plugin": "vaip-pass_dd_merge_qconv2matmul_2", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul_add3", - "plugin": "vaip-pass_dd_merge_qmatmul_add3", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qslice", - "plugin": "vaip-pass_dd_merge_qslice", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcat", - "plugin": "vaip-pass_dd_merge_qconcat", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgemmv", - "plugin": "vaip-pass_dd_merge_qgemmv", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsilu", - "plugin": "vaip-pass_dd_merge_qsilu", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qgelu", - "plugin": "vaip-pass_dd_merge_qgelu", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": false, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qresize", - "plugin": "vaip-pass_dd_merge_qresize", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qbroadcastadd", - "plugin": "vaip-pass_dd_merge_qbroadcastadd", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_quant", - "plugin": "vaip-pass_dd_merge_quant", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dequant", - "plugin": "vaip-pass_dd_merge_dequant", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_dtype_mzdk5", - "plugin": "vaip-pass_dd_merge_dtype_mzdk5", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_shape_mzdk5", - "plugin": "vaip-pass_dd_merge_shape_mzdk5", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psh", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "merge_tanh_lpnorm", - "plugin": "vaip-pass_dd_merge_tanh_lpnorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmhagrpb_3_mxgan", - "plugin": "vaip-pass_dd_merge_qmhagrpb_3_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu2", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add_gelu3", - "plugin": "vaip-pass_dd_merge_qmatmul_add_gelu3", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul_mxgan", - "plugin": "vaip-pass_dd_merge_qelwemul_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm_2", - "plugin": "vaip-pass_dd_merge_qlayernorm_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qlayernorm", - "plugin": "vaip-pass_dd_merge_qlayernorm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsigmoid", - "plugin": "vaip-pass_dd_merge_qsigmoid", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_mxgan", - "plugin": "vaip-pass_dd_merge_attentionprepro_mxgan", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_attentionprepro_psh", - "plugin": "vaip-pass_dd_merge_attentionprepro_psh", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_reduce_sum", - "plugin": "vaip-pass_dd_merge_reduce_sum", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_expand", - "plugin": "vaip-pass_dd_merge_expand", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_expand_psh", - "plugin": "vaip-pass_dd_merge_expand_psh", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qeltwise_div", - "plugin": "vaip-pass_dd_merge_qeltwise_div", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dps", - "plugin": "vaip-pass_dd_merge_dps", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul1", - "plugin": "vaip-pass_dd_merge_qmatmul1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dtype", - "plugin": "vaip-pass_dd_merge_dtype", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psq1", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_actconstadd", - "plugin": "vaip-pass_dd_merge_actconstadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_dqadd", - "plugin": "vaip-pass_dd_merge_dqadd", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_gather", - "plugin": "vaip-pass_dd_merge_gather", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_pso2", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_qconcateops_1_2_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_2_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops", - "plugin": "vaip-pass_dd_merge_qconcateops", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_1", - "plugin": "vaip-pass_dd_merge_qconcateops_1_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_2", - "plugin": "vaip-pass_dd_merge_qconcateops_1_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqsoftmax", - "plugin": "vaip-pass_dd_merge_dqsoftmax", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_pso2_02", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_qconcateops_1_2", - "plugin": "vaip-pass_dd_merge_qconcateops_1_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcateops_1_3", - "plugin": "vaip-pass_dd_merge_qconcateops_1_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqsoftmax", - "plugin": "vaip-pass_dd_merge_dqsoftmax", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_mu0_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_rope_const", - "plugin": "vaip-pass_dd_merge_rope_const", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_const_1", - "plugin": "vaip-pass_dd_merge_rope_const_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act_1", - "plugin": "vaip-pass_dd_merge_rope_act_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act", - "plugin": "vaip-pass_dd_merge_rope_act", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmha_psmu", - "plugin": "vaip-pass_dd_merge_qmha_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": true, - "args": [ - "bf16" - ] - }, - { - "name": "merge_qgemm_gelu", - "plugin": "vaip-pass_dd_merge_qgemm_gelu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qgemm_1", - "plugin": "vaip-pass_dd_merge_qgemm_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qgemm", - "plugin": "vaip-pass_dd_merge_qgemm", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmul_actxact_1", - "plugin": "vaip-pass_dd_merge_qmul_actxact_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmul_actxact", - "plugin": "vaip-pass_dd_merge_qmul_actxact", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsoftmax_1", - "plugin": "vaip-pass_dd_merge_qsoftmax_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Reshape" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastadd", - "plugin": "vaip-pass_dd_merge_qbroadcastadd", - "enable_gc": true, - "disabled": false, - "args": [ - "add_flags" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastbiasadd", - "plugin": "vaip-pass_dd_merge_qbroadcastbiasadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_no_op", - "plugin": "vaip-pass_dd_merge_no_op", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_no_op_reshape", - "plugin": "vaip-pass_dd_merge_no_op_reshape", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcat_m", - "plugin": "vaip-pass_dd_merge_qconcat_m", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_transpose", - "plugin": "vaip-pass_dd_merge_transpose", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch", - "plugin": "vaip-pass_dd_merge_combined_mhamask", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch_1", - "plugin": "vaip-pass_dd_merge_combined_mhamask_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_qrope_const", - "input_names": [ - "input" - ], - "output_names": "output" - }, - "op_name": "QMatMul_QRopeConst", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "const_1", - "const_2", - "const_3", - "const_4", - "const_5", - "const_8", - "const_7", - "const_6" - ] - } - } - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_mu1_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_rope_const", - "plugin": "vaip-pass_dd_merge_rope_const", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_const_1", - "plugin": "vaip-pass_dd_merge_rope_const_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act_1", - "plugin": "vaip-pass_dd_merge_rope_act_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_rope_act", - "plugin": "vaip-pass_dd_merge_rope_act", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qmha_psmu", - "plugin": "vaip-pass_dd_merge_qmha_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qgemm_gelu", - "plugin": "vaip-pass_dd_merge_qgemm_gelu", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qgemm_1", - "plugin": "vaip-pass_dd_merge_qgemm_1", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qgemm", - "plugin": "vaip-pass_dd_merge_qgemm", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qmatmul_add", - "plugin": "vaip-pass_dd_merge_qmatmul_add", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qmatmul_add2", - "plugin": "vaip-pass_dd_merge_qmatmul_add2", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_qmatmul", - "enable_gc": true, - "disabled": false, - "args": [ - "per_subv" - ] - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Reshape", - "Slice" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastadd", - "plugin": "vaip-pass_dd_merge_qbroadcastadd", - "enable_gc": true, - "disabled": false, - "args": [ - "add_flags" - ] - }, - { - "name": "vaip_pass_dd_merge_qbroadcastbiasadd", - "plugin": "vaip-pass_dd_merge_qbroadcastbiasadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": true, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_no_op", - "plugin": "vaip-pass_dd_merge_no_op", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_no_op_reshape", - "plugin": "vaip-pass_dd_merge_no_op_reshape", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconcat_m", - "plugin": "vaip-pass_dd_merge_qconcat_m", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qslice", - "plugin": "vaip-pass_dd_merge_qslice_1", - "enable_gc": false, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch", - "plugin": "vaip-pass_dd_merge_combined_mhamask", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mha_batch_1", - "plugin": "vaip-pass_dd_merge_combined_mhamask_1", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_top_fuse", - "plugin": "vaip-pass_dd_merge_top_fuse", - "enable_gc": true, - "disabled": false, - "passFusionParam": { - "pattern": { - "pattern_name": "m_qmatmul_qrope_input", - "input_names": [ - "input", - "const_6", - "const_7" - ], - "output_names": "output" - }, - "op_name": "QMatMul_QRopeInput", - "extractor": { - "explicit_attributes": [ - { - "attribute_name": "design_param", - "attribute_value": "8x4" - } - ], - "initializers": [ - "const_1", - "const_2", - "const_3", - "const_4", - "const_5", - "const_8" - ] - } - } - }, - { - "name": "pass_dd_batch_matmul_v", - "plugin": "vaip-pass_dd_batch_matmul_v", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_combined_matmul_psmu", - "plugin": "vaip-pass_dd_merge_combined_matmul_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_combined_MatmulRope_psmu", - "plugin": "vaip-pass_dd_merge_combined_MatmulRope_psmu", - "enable_gc": true, - "disabled": false - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_ds0_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16", - "Transpose" - ] - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_5", - "plugin": "vaip-pass_dd_merge_qconv2matmul_5", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_7", - "plugin": "vaip-pass_dd_merge_qconv2matmul_7", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_6", - "plugin": "vaip-pass_dd_merge_qconv2matmul_6", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": true, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qelwemul", - "plugin": "vaip-pass_dd_merge_qelwemul", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "DD_8_4_ds", - "plugin": "vaip-pass_dd_merge_EndPass_ds", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_4x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "DD_8_4_psu", - "plugin": "vaip-pass_dd_merge_EndPass_psu", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qdq_unsqueeze", - "plugin": "vaip-pass_dd_merge_qdq_unsqueeze", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_8x4", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "DD_8_4_psu", - "plugin": "vaip-pass_dd_merge_EndPass_psu", - "enable_gc": true, - "disabled": false, - "args": [ - "8x4PSU" - ] - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_4x4_opt", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp", - "plugin": "vaip-pass_dd_merge_flatmlp", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qdq_unsqueeze", - "plugin": "vaip-pass_dd_merge_qdq_unsqueeze", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu_8x4_opt", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flatmlp", - "plugin": "vaip-pass_dd_merge_flatmlp", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_flatmlp2", - "plugin": "vaip-pass_dd_merge_flatmlp2", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_flatmlp3", - "plugin": "vaip-pass_dd_merge_flatmlp3", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_3", - "plugin": "vaip-pass_dd_merge_qconv2matmul_3", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qconv2matmul_4", - "plugin": "vaip-pass_dd_merge_qconv2matmul_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qlpnorm_3", - "plugin": "vaip-pass_dd_merge_qlpnorm_3", - "enable_gc": true, - "disabled": false, - "args": [ - "bf16" - ] - }, - { - "name": "vaip_pass_dd_merge_qeltwise_add", - "plugin": "vaip-pass_dd_merge_qeltwise_add", - "enable_gc": true, - "disabled": false, - "args": [ - "4x4PSU" - ] - }, - { - "name": "vaip_pass_dd_merge_qdq_unsqueeze", - "plugin": "vaip-pass_dd_merge_qdq_unsqueeze", - "enable_gc": true, - "disabled": false - }, - { - "name": "DD_8_4_psu", - "plugin": "vaip-pass_dd_merge_EndPass_psu", - "enable_gc": true, - "disabled": false, - "args": [ - "8x4PSU" - ] - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu0", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "gqa", - "plugin": "vaip-pass_gqa", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_silu", - "plugin": "vaip-pass_dd_merge_silu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_elwemul", - "plugin": "vaip-pass_dd_merge_elwemul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_matmul_nbits", - "plugin": "vaip-pass_dd_merge_matmul_nbits", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mladfadd", - "plugin": "vaip-pass_dd_merge_mladfadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_slrn", - "plugin": "vaip-pass_dd_merge_slrn", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_psu1", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_flat_sslrn", - "plugin": "vaip-pass_dd_merge_flat_sslrn", - "enable_gc": true, - "disabled": true - }, - { - "name": "merge_mlp", - "plugin": "vaip-pass_dd_merge_mlp", - "enable_gc": true, - "disabled": true - }, - { - "name": "vaip_pass_dd_merge_silu", - "plugin": "vaip-pass_dd_merge_silu", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_elwemul", - "plugin": "vaip-pass_dd_merge_elwemul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_matmul_nbits", - "plugin": "vaip-pass_dd_merge_matmul_nbits", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mladfadd", - "plugin": "vaip-pass_dd_merge_mladfadd", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_slrn", - "plugin": "vaip-pass_dd_merge_slrn", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_pss_pst", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_mladfelwmul", - "plugin": "vaip-pass_dd_merge_mladfelwmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_mladfmatmul", - "plugin": "vaip-pass_dd_merge_mladfmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qsoftmax", - "plugin": "vaip-pass_dd_merge_qsoftmax", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "convert_topk_to_xir", - "plugin": "vaip-pass_convert_topk_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - }, - "minimum_num_of_conv": 2 - } - }, - { - "name": "fuse_DPU_VAIML-x2.0", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "convert_topk_to_xir", - "plugin": "vaip-pass_convert_topk_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 32 - }, - "debug_mode": { - "stringValue": "function" - }, - "enable_vaiml_flow_compile": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mt_fusion": { - "boolValue": true - }, - "enable_merge_requantize": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "remove_call_aiecompiler_cache": { - "boolValue": true - }, - "enable_control_optimization": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_DPU_VAIML-x2.0_yolo", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "npu_checker", - "plugin": "vaip-pass_npu_checker" - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "convert_topk_to_xir", - "plugin": "vaip-pass_convert_topk_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 32 - }, - "debug_mode": { - "stringValue": "function" - }, - "enable_vaiml_flow_compile": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_control_optimization": { - "boolValue": true - }, - "enable_merge_requantize": { - "boolValue": false - }, - "remove_call_aiecompiler_cache": { - "boolValue": true - }, - "mladf_options": { - "stringValue": "print-timer=3" - } - }, - "minimum_num_of_conv": 2, - "debug": false - } - }, - { - "name": "fuse_dynamic_dispatch_vaiml_llm", - "plugin": "vaip-pass_level1_dd_cxx", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_onnx_qdq", - "plugin": "vaip-pass_dd_merge_onnx_qdq", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qrmsnorm", - "plugin": "vaip-pass_dd_merge_qlpnorm_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qsilu", - "plugin": "vaip-pass_dd_merge_qsilu_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmulbias", - "plugin": "vaip-pass_dd_merge_qmatmul_add4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_vaimlqmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qadd", - "plugin": "vaip-pass_dd_merge_qeltwise_add_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmul", - "plugin": "vaip-pass_dd_merge_vaimlqelwemul", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_dynamic_dispatch_vaiml_llm_gen_txn", - "plugin": "vaip-pass_level1_dd_cxx_gen_txn", - "passDpuParam": { - "subPass": [ - { - "name": "vaip_pass_dd_merge_onnx_qdq", - "plugin": "vaip-pass_dd_merge_onnx_qdq", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qrmsnorm", - "plugin": "vaip-pass_dd_merge_qlpnorm_4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qsilu", - "plugin": "vaip-pass_dd_merge_qsilu_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmulbias", - "plugin": "vaip-pass_dd_merge_qmatmul_add4", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmatmul", - "plugin": "vaip-pass_dd_merge_vaimlqmatmul", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qadd", - "plugin": "vaip-pass_dd_merge_qeltwise_add_2", - "enable_gc": true, - "disabled": false - }, - { - "name": "merge_qmul", - "plugin": "vaip-pass_dd_merge_vaimlqelwemul", - "enable_gc": true, - "disabled": false - } - ] - } - }, - { - "name": "fuse_DPU_MHA", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "name": "convert_MHA", - "plugin": "vaip-pass_convert_MHA", - "enableGc": true - }, - { - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_split_to_xir", - "plugin": "vaip-pass_convert_split_to_xir_op" - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "convert_pad", - "plugin": "vaip-pass_convert_pad", - "enableGc": true - }, - { - "name": "convert_in_to_gn", - "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", - "enableGc": true - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_merge_fix", - "enableGc": true - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_const_fold_batchnorm_to_scale" - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "name": "merge_mul", - "plugin": "vaip-pass_merge_mul", - "enableGc": true - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_convert_softmax_to_hard_softmax", - "disabled": true - }, - { - "name": "merge_fix_fix_transpose", - "plugin": "vaip-pass_merge_fix_fix_transpose", - "enableGc": true, - "disabled": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2_2925.xclbin" - } - }, - { - "name": "vaiml_lt", - "plugin": "vaip-pass_vaiml_lt", - "vaiml_config": { - "vaiml_model_path": "vaiml_par_0", - "max_num_partitions": 200, - "device_name": "phx", - "debug": true - } - }, - { - "name": "vaip_pass_dd_merge_dqcastgather", - "plugin": "vaip-pass_dd_merge_dqcastgather", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop", - "plugin": "vaip-pass_dd_merge_qop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop", - "plugin": "vaip-pass_dd_merge_dqop", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_qop_onnx", - "plugin": "vaip-pass_dd_merge_qop_onnx", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaip_pass_dd_merge_dqop_onnx", - "plugin": "vaip-pass_dd_merge_dqop_onnx", - "enable_gc": true, - "disabled": false - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "device": "stx", - "optimize_level": 2, - "preferred_data_storage": "auto", - "fe_experiment": "enable-outer-garbage-propagation-through-reshape=1", - "aiecompiler_args": " -Xelfgen=-j16 " - } - } - ], - "mepTable": [ - { - "modelName": "PSA", - "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", - "md5sumInMemory": "ca42121518cca903f07262b8f2751a42", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA_v2_66.0", - "md5sumOnDisk": "73c0f0683f9faaf9c2710272066ba0af", - "md5sumInMemory": "5cbfd77efbcb1f6ba299bc4ffdff8aac", - "md5sumInMemoryWithIo": "8030f6b91feeccf39855b104111a6143", - "target": "PSAV2" - }, - { - "modelName": "PSO0_A", - "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", - "md5sumInMemory": "74ded15705d2c958177604029a20a208", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_B", - "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", - "md5sumInMemory": "3ee8e6a8f08912a9a92a260b68447bb2", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_0", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "452a80b01d29ebc42559d59b42de03ca", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_1", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "4853206b53d2cae2a40aad448d73370c", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_2", - "md5sumOnDisk": "0abe0b0bbc8314b482f0737da3d831ad", - "md5sumInMemory": "ee7f757248851d28061617f87043805f", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_3", - "md5sumOnDisk": "a87968c033291ab04069feaafb5fd7df", - "md5sumInMemory": "19ef4a7ffa9c9ab3871ea3142db4a5db", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_4", - "md5sumOnDisk": "ab9c6f1bb23d04765708622f5e48c0da", - "md5sumInMemory": "4c54574f384ddf99ed01b09bd249ca8b", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "352de20cfd0a050f6083eb661237c6cc", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_MSFT", - "md5sumOnDisk": "F42BC6AC686C72B8CAA38DE28DFAA553", - "md5sumInMemory": "F42BC6AC686C72B8CAA38DE28DFAA553", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA_1", - "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", - "md5sumInMemory": "ee9a5fc4b79342b98049c4826983f18e", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_1", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "27b186167f3a3957b32141846bcf81f8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA_1.0.1_nchw", - "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", - "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", - "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSA6.3", - "md5sumOnDisk": "b5e9f87a18d925e43f6e74ef34a299c3", - "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", - "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_A_1.0.1_nchw", - "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", - "md5sumInMemory": "6cefe9e4244aa0f3f8dbf21d43789e86", - "md5sumInMemoryWithIo": "e3497d38e57aa72df6aec42833b784a8", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_B_1.0.1_nchw", - "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", - "md5sumInMemory": "48f349c792e8e2a7562e092968750947", - "md5sumInMemoryWithIo": "dc6c1b8774c65f20fcbaaae86cd05f8c", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_1.0.1_nchw", - "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", - "md5sumInMemory": "35527caf46c38e60ad74204a1d966847", - "md5sumInMemoryWithIo": "4b0f1adedf7f6e7f74b22dd8d1fb22fb", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_A_nhwc", - "md5sumOnDisk": "777bd69cc9e12b6e7868277e639f97e2", - "md5sumInMemory": "3a9abd050f70b09de3546f1d61e43c74", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_A_1.1.0_nhwc", - "md5sumOnDisk": "b9eb351ca7af65a2a43f99f41dda7dc2", - "md5sumInMemory": "e3f795258afe1d046f56fad0f8574864", - "md5sumInMemoryWithIo": "3af1b536b95d9eebe3190294e311f57a", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO0_B_1.1.0_nhwc", - "md5sumOnDisk": "fa6d63ec0a7674cb9464020802e5f3ca", - "md5sumInMemory": "862aea9de93c2e711ec842f942d9b8b1", - "md5sumInMemoryWithIo": "b54766ff357e41c4ca538d017e126385", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSO1_1.1.0_nhwc", - "md5sumOnDisk": "3a56581e403def2548e50c77808c4174", - "md5sumInMemory": "377a5a02ffbba68bc1fdd25b54f0f18b", - "md5sumInMemoryWithIo": "ac4f50ea2c10c863db4bfbd6ca024f6e", - "target": "RyzenAI_shell_config_1" - }, - { - "modelName": "PSF", - "md5sumOnDisk": "d38670a70c72561cb3f718125829b5fa", - "md5sumInMemory": "2fbcab29de9dd547562c46319a225a9c", - "md5sumInMemoryWithIo": "5c07dc0856549dabc4d092763e1ce5cf", - "xclbin": "4x2_psf_model_a8w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psf", - "perf_pref": "1", - "modelCategory": "PSF" - }, - { - "modelName": "PSFv1.1", - "md5sumOnDisk": "c711cb8798e562011383bae4c5c91ce1", - "md5sumInMemory": "a394ffc0e58a8e841f5ae415c15a63e8", - "md5sumInMemoryWithIo": "037fca5fd1b0c7b195ef410c3e9b88df", - "xclbin": "4x2_psf_v1.1_model_a8w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psf", - "modelCategory": "PSF", - "perf_pref": "1", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSHv1.2", - "md5sumOnDisk": "08678ce4a4eab9eaa2cd8b3dcbdf5697", - "md5sumInMemory": "62099384a6af2956912b2d31a99be483", - "md5sumInMemoryWithIo": "b3a1041f9de14dae3b52e711d8de0037", - "xclbin": "4x2_psh_v1.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psh", - "perf_pref": "1", - "modelCategory": "PSH", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSJ", - "md5sumOnDisk": "15d1515c86b40874ac954869798d0e77", - "md5sumInMemory": "546b8eca24af15302c647edb3e575d54", - "md5sumInMemoryWithIo": "9f9d91808166382b325459dfb88494c0", - "xclbin": "4x2_psj_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psj", - "perf_pref": "1", - "modelCategory": "PSJ" - }, - { - "modelName": "PSJv3.0", - "md5sumOnDisk": "d58fbf8f0bc55fe43fbe917b8352e56d", - "md5sumInMemory": "86fdce04074bce4b540e1e71e2e74fbf", - "md5sumInMemoryWithIo": "9f9d91808166382b325459dfb88494c0", - "xclbin": "4x2_psj_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psj", - "perf_pref": "1", - "modelCategory": "PSJ" - }, - { - "modelName": "DS_R1_1.5_0_v2.0", - "md5sumOnDisk": "908d5e0ffb4a1fea7f9f65fcbd65a361", - "md5sumInMemory": "7062221644ff1e1e1cf87ba383f631ed", - "md5sumInMemoryWithIo": "398b12809feca99c9fd80e53ae9fd874", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_ds0_8x4", - "modelCategory": "PSU" - }, - { - "modelName": "DS_R1_1.5_1_v2.0", - "md5sumOnDisk": "589c6e9ba2ced271915d95a31dc8c7b9", - "md5sumInMemory": "22caad72379b1629ec67889af03f49a6", - "md5sumInMemoryWithIo": "b31dd87909f2bb50aaf99c52600ccbf1", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_ds1_8x4", - "modelCategory": "PSU" - }, - { - "modelName": "PSMU_ST0", - "md5sumOnDisk": "74b5d0dff744baecb9bf55660e166b77", - "md5sumInMemory": "13d5d50818ca009b00cbc73c86d2487a", - "md5sumInMemoryWithIo": "3e9e7b7cf4e7ac34f8fb116c2d958a2b", - "xclbin": "8x4_psmu_st0_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu0_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSMU_ST1", - "md5sumOnDisk": "aaf6f0a2ed85d133dfa48e75b0af3700", - "md5sumInMemory": "382a9bf1181f24fcd3db77238e596ef1", - "md5sumInMemoryWithIo": "f67f5434de44edb87b6e98deaee5609b", - "xclbin": "8x4_psmu_st1_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu1_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSMU_ST0_v3.8", - "md5sumOnDisk": "bbc426034771275f67935de0b8164995", - "md5sumInMemory": "ce3a4dd7df0e07bb9a90634d33a5b167", - "md5sumInMemoryWithIo": "3e9e7b7cf4e7ac34f8fb116c2d958a2b", - "xclbin": "8x4_psmu_st0_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu0_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSMU_ST1_v3.8", - "md5sumOnDisk": "0ec19e24264ca46247ac3047c6abd389", - "md5sumInMemory": "6a5eed0ee0b8fba217fe16df5b713101", - "md5sumInMemoryWithIo": "f67f5434de44edb87b6e98deaee5609b", - "xclbin": "8x4_psmu_st1_v3.2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_mu1_8x4", - "modelCategory": "PSMUST" - }, - { - "modelName": "PSU0-v1.2", - "md5sumOnDisk": "3ec68bd0d8423b1700046b29e05a688f", - "md5sumInMemory": "db82ec8222e7f515b55415e3e707befb", - "md5sumInMemoryWithIo": "351879f8282407b5f6524de2993f34ab", - "target": "RyzenAI_transformer_cxx_psu_8x4", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU1-v1.2", - "md5sumOnDisk": "436ccc61a4cec07c1e449fb879e7e905", - "md5sumInMemory": "a557620781ac029b8ef198e9c7d95f92", - "md5sumInMemoryWithIo": "cde769adfa0ee62a9cd128928f73b43b", - "target": "RyzenAI_transformer_cxx_psu_8x4", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU0-v1.2-Opt", - "md5sumOnDisk": "3ec68bd0d8423b1700046b29e05a688f", - "md5sumInMemory": "947860ce26ac9b0a445e19cea2cc86dc", - "md5sumInMemoryWithIo": "351879f8282407b5f6524de2993f34ab", - "target": "RyzenAI_transformer_cxx_psu_8x4_opt", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU1-v1.2-Opt", - "md5sumOnDisk": "436ccc61a4cec07c1e449fb879e7e905", - "md5sumInMemory": "e8eebc04612ab31b904545cb45ea291c", - "md5sumInMemoryWithIo": "cde769adfa0ee62a9cd128928f73b43b", - "target": "RyzenAI_transformer_cxx_psu_8x4_opt", - "modelCategory": "PSU", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU0", - "md5sumOnDisk": "85346f221749b9e7ffd2e928acac3d09", - "md5sumInMemory": "929b05d96fc8a61b4d1131a7722732e9", - "md5sumInMemoryWithIo": "351879f8282407b5f6524de2993f34ab", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "target": "RyzenAI_transformer_cxx_psu0", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSU1", - "md5sumOnDisk": "c39dfc4c621b2f2bfa7f9a8b233ce148", - "md5sumInMemory": "0ec91f16621a932d2016784c05f5b703", - "md5sumInMemoryWithIo": "cde769adfa0ee62a9cd128928f73b43b", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "target": "RyzenAI_transformer_cxx_psu1", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "125" - } - }, - { - "modelName": "PSIv1.1", - "md5sumOnDisk": "72651ec6ae2fe552dd0604527d73c1e5", - "md5sumInMemory": "0e3ff9de7ff3d7eddc7712316ca7ab11", - "md5sumInMemoryWithIo": "5a4479883ad7e7724442977a88a257e0", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv1.2", - "md5sumOnDisk": "91aa788cdf570ebd5434bd7b8937feb2", - "md5sumInMemory": "6ed69ec59ba231b919877d12f81cabd3", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv1.3", - "md5sumOnDisk": "028f6808733628e822b86c12d38df4b6", - "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", - "md5sumInMemoryWithIo": "2dc156817e5201dc51c39e821b9d5ec7", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv1.4", - "md5sumOnDisk": "96fefa03d63137796293448db34d78e4", - "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", - "md5sumInMemoryWithIo": "8f4fbd7e1475b7b470e77449211455f2", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI" - }, - { - "modelName": "PSIv3.0", - "md5sumOnDisk": "f39e8a9d6843c6852f1c3a1942845e90", - "md5sumInMemory": "2a2f237c2903138caca8c854dcff3021", - "md5sumInMemoryWithIo": "8f4fbd7e1475b7b470e77449211455f2", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psi", - "perf_pref": "1", - "modelCategory": "PSI", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSQ1", - "md5sumOnDisk": "4e17c61308b9170dda55586dee6c0751", - "md5sumInMemory": "58eb51eccd44a084b185159b67e2c1b6", - "md5sumInMemoryWithIo": "50c2fb23e40a0617f58ebcedbbfac359", - "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq_04.xclbin", - "target": "RyzenAI_transformer_cxx_psq1", - "modelCategory": "PSQ1", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "2" - } - }, - { - "modelName": "PSQ2", - "md5sumOnDisk": "4e485de54588d95209560c0a29049b68", - "md5sumInMemory": "4c121cc7cd35dc04c30f46a94b2baf7a", - "md5sumInMemoryWithIo": "3223d1f84b9dd740f3a829ad9680469f", - "xclbin": "4x2_psq2_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psq2", - "modelCategory": "PSQ2", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "493" - } - }, - { - "modelName": "PSO2-320", - "md5sumOnDisk": "4a6a777fc11158c1ca70ebdd6caae3fe", - "md5sumInMemory": "1bd58439b3a31d4e6edce0689e552ccf", - "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-640", - "md5sumOnDisk": "67927fb103d7fcdd3eeca44475eac6df", - "md5sumInMemory": "9313ce55730e051e4d32eb4f9986f1f2", - "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-1280", - "md5sumOnDisk": "dc4cfca432a4e09eb4275b55a485b126", - "md5sumInMemory": "1cf0b88f93d4d5d458e408abc0a4cf5d", - "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-2560", - "md5sumOnDisk": "de50b0722e512613188a39429e70ead7", - "md5sumInMemory": "5dc5872cd47b9da7f64b6855b1035595", - "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-5120", - "md5sumOnDisk": "af4c17dc488364157a8ffecfb7279d3d", - "md5sumInMemory": "e54a80eb8a942cd83f2c4d8149e27feb", - "md5sumInMemoryWithIo": "3db1ee89fe2143cedfcc533f5085a051", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-8000", - "md5sumOnDisk": "a65353afbfda0b221a7868ed88b53811", - "md5sumInMemory": "83313d4445a74bb303a16c824ea6874a", - "md5sumInMemoryWithIo": "f7aefeeb6299d80b979b378e59d940a0", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-320-v1.1", - "md5sumOnDisk": "e6b7d1656f92ca990c7ec94f8b17813d", - "md5sumInMemory": "d0d6cbd1d89f60347ab6c9453e35507e", - "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-640-v1.1", - "md5sumOnDisk": "7d789256f2d568367974b7b74fe9de71", - "md5sumInMemory": "be1539116a956da222b7b678a369fbf6", - "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-1280-v1.1", - "md5sumOnDisk": "b815ee6d465f51de249c4048aa2515cb", - "md5sumInMemory": "4333bedcd96799e4bc2fba9ec4746617", - "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-2560-v1.1", - "md5sumOnDisk": "5d0c439244e9cf46d45d7ce249c0a3e7", - "md5sumInMemory": "577399f309357fc62e307c38945ce770", - "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1" - }, - { - "modelName": "PSO2-02_640-v1.1.2", - "md5sumOnDisk": "74f8fe8dd431826e20ddcfb383887b5a", - "md5sumInMemory": "cb1ace27775a8acde55d93ed9c831fb0", - "md5sumInMemoryWithIo": "567b9a0181a5147629e58d4e11b5414d", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_640", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-02_2560-v1.1.2", - "md5sumOnDisk": "43aa65d66d619191dfb6711164cb7ef2", - "md5sumInMemory": "b7873ef2c883221592f9a427681f7aa7", - "md5sumInMemoryWithIo": "33defb20f9050140c2a5dfd84e0f07d2", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_2560", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-02_5120-v1.1.2", - "md5sumOnDisk": "903da8e472bbdbf3a5181e318abd7de1", - "md5sumInMemory": "b68933a5037b7ddf440e7672d1121793", - "md5sumInMemoryWithIo": "ff1755a022b574525b1a78f748825149", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_5120", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-02_8000-v1.1.2", - "md5sumOnDisk": "9e8b510eb05c4c4b4666aa3eb1b258fa", - "md5sumInMemory": "61440810010d2fee2127b9ecb3418b20", - "md5sumInMemoryWithIo": "855a4d8521d118fd1e02c7d3ea24ecd6", - "xclbin": "4x2_pso2_model_a16w16_qdq_2_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2_02", - "perf_pref": "1", - "model_variant": "02_8000", - "modelCategory": "PSO2_02" - }, - { - "modelName": "PSO2-06_1280-v1.1.2", - "md5sumOnDisk": "5c56c9a5148418914e2a74c1fcc4e5dc", - "md5sumInMemory": "c5704b01ae136320462d84e5ef6f68e8", - "md5sumInMemoryWithIo": "8d4c665ddccc2b3eb71e160dd882b5cb", - "xclbin": "4x2_pso2_model_a16w16_qdq_1_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "06_1280", - "modelCategory": "PSO2_06" - }, - { - "modelName": "PSO2-06_8000-v1.1.2", - "md5sumOnDisk": "5d45bef760e086ae9dd13d90ed4bfd86", - "md5sumInMemory": "dc96e4f1d1fe4ed6631d0d13f49ebb8f", - "md5sumInMemoryWithIo": "de3ab6b289cf54f83c962aea6266f218", - "xclbin": "4x2_pso2_model_a16w16_qdq_1_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "06_8000", - "modelCategory": "PSO2_06" - }, - { - "modelName": "PSO2-08_80-v1.1.2", - "md5sumOnDisk": "f112b5e15b705d02c354cc6d0b75bf3f", - "md5sumInMemory": "4b470d88553037774d4305760e679bf2", - "md5sumInMemoryWithIo": "ab93d75b985f5084c35f736c4c02855d", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_80", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_160-v1.1.2", - "md5sumOnDisk": "4e3c4e6a306bd796b55711791f368b51", - "md5sumInMemory": "064bfd691e194cd0964cac6e3f8aaed6", - "md5sumInMemoryWithIo": "ef4e0fc4c7f93426b06a1d32a012235b", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_160", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_320-v1.1.2", - "md5sumOnDisk": "2ea69a0a0f7ea4de2c69ba5a2d8eb214", - "md5sumInMemory": "4305970852f28dd46b896a00cf31765e", - "md5sumInMemoryWithIo": "4dcef3e0c58630449318b77428055539", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_320", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_640-v1.1.2", - "md5sumOnDisk": "64f8b9d961c33a1e1782b8b6ab1d05fe", - "md5sumInMemory": "f24cbcd91d0aff2e527a362cffd30fa0", - "md5sumInMemoryWithIo": "2941fbd78bb79228139c313a4c15955f", - "xclbin": "4x2_pso2_model_a16w16_qdq_3.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_640", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_1280-v1.1.2", - "md5sumOnDisk": "cab8f52fce66b1165ccaa7e275e0f461", - "md5sumInMemory": "85ef437ef0f943dcbc6dfff78daa8d59", - "md5sumInMemoryWithIo": "e0e03814c630da534e3ca62de6a4be96", - "xclbin": "4x2_pso2_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_1280", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_2560-v1.1.2", - "md5sumOnDisk": "5cb6ffc68e95e5a72777231a4fe45781", - "md5sumInMemory": "52c43902ae61abc9759d6f91dccf5fff", - "md5sumInMemoryWithIo": "4942bb5c99591622bdf693d503823439", - "xclbin": "4x2_pso2_model_2k_a16w16_qdq.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_2560", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "18" - } - }, - { - "modelName": "PSO2-08_5120-v1.1.2", - "md5sumOnDisk": "66f978a1eb891e9183187c4f8b39513f", - "md5sumInMemory": "7000b77b04c5cb833ec0d23ce6811b67", - "md5sumInMemoryWithIo": "b408a863e4c9d5d5327a8d9b0b9e89e6", - "xclbin": "4x2_pso2_model_5k_a16w16_qdq.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_5120", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSO2-08_8000-v1.1.2", - "md5sumOnDisk": "c331b78262c2de4762605f77602a0982", - "md5sumInMemory": "c0b6569e081583e377ee67bd40a85fcf", - "md5sumInMemoryWithIo": "2ca9dde32adfb5322f93bf50bdb8503d", - "xclbin": "4x2_pso2_model_8k_a16w16_qdq.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "08_8000", - "modelCategory": "PSO2_08_09", - "qosGenericParams": { - "batch_time_ms": "55" - } - }, - { - "modelName": "PSO2-09_1280-v1.1.2", - "md5sumOnDisk": "06e0629102c7bc3ed241c92e880e49bf", - "md5sumInMemory": "a62e7818824b478c80ec6235e7b99f0d", - "md5sumInMemoryWithIo": "d8a93310a25cd06f569a1d409cea375b", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "09_1280", - "modelCategory": "PSO2_08_09" - }, - { - "modelName": "PSO2-09_8000-v1.1.2", - "md5sumOnDisk": "bff1723ea4a3346560c3269cf52002f4", - "md5sumInMemory": "cf17cad72bea8b9434ea0a47fae75fb6", - "md5sumInMemoryWithIo": "932243e512281ead62d23c9aeb4fd557", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_pso2", - "perf_pref": "1", - "model_variant": "09_8000", - "modelCategory": "PSO2_08_09" - }, - { - "modelName": "PSR", - "md5sumOnDisk": "c62d3ac875af16b59180d662966266e8", - "md5sumInMemory": "8cef986e9f91bd8248d929cec012a7aa", - "md5sumInMemoryWithIo": "c43cb269c32cb943ec3544b3cca7db4d", - "xclbin": "4x4_psr_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psr", - "modelCategory": "PSR", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "493" - } - }, - { - "modelName": "PSR", - "md5sumOnDisk": "33d809d58b570fa23262e9b787578344", - "md5sumInMemory": "5e6ab13b18d575a176d04aa840b13ebe", - "md5sumInMemoryWithIo": "50fc5db86f2a22cc3c5ece4c28096dbd", - "xclbin": "4x4_psr_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psr", - "modelCategory": "PSR", - "qosGenericParams": { - "group_id": "2", - "batch_time_ms": "493" - } - }, - { - "modelName": "PSW", - "md5sumOnDisk": "b6a4965536786c4953e30bca0cd3b315", - "md5sumInMemory": "c63dc0c92c08dec61ca896d846a8ca99", - "md5sumInMemoryWithIo": "6920d4a804bff69422a3e2127ed78c51", - "xclbin": "4x4_psw_v1.0_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psw", - "modelCategory": "PSW", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1", - "batch_time_ms": "8" - } - }, - { - "modelName": "PSW_v1.1.2", - "md5sumOnDisk": "84dd8949edca8d596bce3aacfbdfa94d", - "md5sumInMemory": "2d0fc3d7b8ecbd0be7d66b407168d231", - "md5sumInMemoryWithIo": "bda3bc15da0aebd40783415a820d3d2d", - "xclbin": "4x4_psw_v1.0_model_a16w8_qdq_00.xclbin", - "target": "RyzenAI_transformer_cxx_psw", - "modelCategory": "PSW", - "qosGenericParams": { - "frame_execution_time": "0", - "group_id": "1" - } - }, - { - "modelName": "PSS", - "md5sumOnDisk": "5ad2857510b5646376f3f9348591b83e", - "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", - "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PSS" - }, - { - "modelName": "PST", - "md5sumOnDisk": "c8c2ab668b56daf7d2228e53c9a4f0db", - "md5sumInMemory": "62d1f9a68e9a1af013852ed3d1564d02", - "md5sumInMemoryWithIo": "4413357faa7c2f1ce6036f869f4d7e14", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PST", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PST_v1.1", - "md5sumOnDisk": "92358bd7e8a68ea9c6e9d327423069e3", - "md5sumInMemory": "67d78f48fd05ce03e3efb69212243d30", - "md5sumInMemoryWithIo": "a807ae1f05fc42e16d57d59186a414b4", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PST", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PSS_v1.1", - "md5sumOnDisk": "63a4651d48b4281ddf6a6a33ebad5fc7", - "md5sumInMemory": "60142dfa473572b34fbf476c37ebfa1b", - "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", - "target": "RyzenAI_transformer_cxx_pss_pst", - "modelCategory": "PSS", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PSS_v1.0", - "md5sumOnDisk": "fbf3fd6e6bab35efba46b7e9060f2d62", - "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", - "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PST_v1.0", - "md5sumOnDisk": "4045c2294abd7e8c4af6812779a524de", - "md5sumInMemory": "62d1f9a68e9a1af013852ed3d1564d02", - "md5sumInMemoryWithIo": "4413357faa7c2f1ce6036f869f4d7e14", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PSS_nhwc", - "md5sumOnDisk": "1d46fbe6a09e79b36e21d985d937df3b", - "md5sumInMemory": "ccf646813e6e91ff09f9d4216047a6ec", - "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "PST_nhwc", - "md5sumOnDisk": "02c8e157824d0c75289f2333b307a5a9", - "md5sumInMemory": "087423cf961c2df293d363abd712d5d7", - "md5sumInMemoryWithIo": "6512311dac77f235e3ef637287389419", - "target": "RyzenAI_vision_config_3_mha", - "modelCloneThreshold": 17179869184 - }, - { - "modelName": "GT_v1.2", - "md5sumOnDisk": "4daa45a72a36d731279b7c01e4545637", - "md5sumInMemory": "0fd6cc09fe78a6a5e4fb697c0e8670e9", - "md5sumInMemoryWithIo": "ebe9d9f38f4762972cc71a9d3b906017", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_00.xclbin", - "nodeCount": 7060 - }, - { - "modelName": "GT_v1.3", - "md5sumOnDisk": "d799de8b1e1fa572daad06d7a49a7afe", - "md5sumInMemory": "97bcaa432a2c634a707dfc23bf222032", - "md5sumInMemoryWithIo": "ebe9d9f38f4762972cc71a9d3b906017", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_03.xclbin" - }, - { - "modelName": "GTC_v1.0", - "md5sumOnDisk": "d917b3af3dd2a05734571635cd760871", - "md5sumInMemory": "426c5dd1cf2fc5d303d4fb1288dca3d7", - "md5sumInMemoryWithIo": "deb53433fefad8d16b5f42cbcd5c061c", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_03.xclbin" - }, - { - "modelName": "HT_v1.2", - "md5sumOnDisk": "01fa81ebc4c70ea06c2f4d28c16af389", - "md5sumInMemory": "6809517bbd0a7b44acc31942d410ca3a", - "md5sumInMemoryWithIo": "9731a32b64df8abdead6b6f370a293fc", - "target": "VAIML_config_0", - "xclbin": "4x4_gt_ht_03.xclbin" - }, - { - "modelName": "PSV", - "md5sumOnDisk": "7a5774108d65860923250379a5efc8bd", - "md5sumInMemory": "6bca3a6045c4c6fdad5c40af8169326f", - "md5sumInMemoryWithIo": "8a4108e1444ab985761adb46447bdb7b", - "target": "PSV" - }, - { - "modelName": "M_w8a8", - "md5sumOnDisk": "39576973a43e3543b0437249f968201c", - "md5sumInMemory": "030e7f309fc2512acfacf471a033bd63", - "md5sumInMemoryWithIo": "99e8be1eb63f87e58af2641872edfb9a", - "target": "m_model" - }, - { - "modelName": "M_w8a16", - "md5sumOnDisk": "d2005c2526f4093cf709b67cbd00742e", - "md5sumInMemory": "61a06fc8ab22616577903bed15e10fdc", - "md5sumInMemoryWithIo": "99e8be1eb63f87e58af2641872edfb9a", - "target": "m_model" - }, - { - "modelName": "M_a8w8", - "md5sumOnDisk": "0bbf933b3fc8d5a1165057b5ee44c306", - "md5sumInMemory": "2cb58d0f5e6c05da9d48ba5a537a7bd7", - "md5sumInMemoryWithIo": "56648de96fa265727fdd3458cfb97bdc", - "target": "m_model" - }, - { - "modelName": "M_1080_1920_s8s8", - "md5sumOnDisk": "fe90a5b4602392474e0da565de1b0183", - "md5sumInMemory": "5dffba4329ed8687ae22ab2b82d2fcb4", - "md5sumInMemoryWithIo": "58be535dc6f625cb020cdc89097ff411", - "target": "m_model" - }, - { - "modelName": "M_720_1080_s8s8", - "md5sumOnDisk": "c643b3f3e2ca0b5081eddb39f1999afa", - "md5sumInMemory": "e96860eb14f53974f7fd8a96a8d83496", - "md5sumInMemoryWithIo": "651696e4f97e5b52d99f98a5312cf3ba", - "target": "m_model" - }, - { - "modelName": "M_540_960_s8s8", - "md5sumOnDisk": "1e8bc505d655a26e53cacef52fee2f01", - "md5sumInMemory": "582fd1f36a0e1d930dcf3fd524adc044", - "md5sumInMemoryWithIo": "859678b393dd2c03cbf84b8700183bc4", - "target": "m_model" - }, - { - "modelName": "M_400_400_s8s8", - "md5sumOnDisk": "49a7e98718263eccae8954ab75aa9910", - "md5sumInMemory": "0a3b66d3754d1a3840eec2c06e3334b9", - "md5sumInMemoryWithIo": "28ad9f773b56894cf8ea5e4c5786aad1", - "target": "m_model" - }, - { - "modelName": "model_m_1x256x256x3_xint8_quantized", - "md5sumOnDisk": "656ec46893c7d38af45cb4721e091eaf", - "md5sumInMemory": "9c94b2c668bb3f4d8a12cea493d1003c", - "md5sumInMemoryWithIo": "9b5e5f072cd8c4aefced37d7f6b0e866", - "target": "old_qdq_py3" - }, - { - "modelName": "model_m_1x400x400x3_xint8_quantized", - "md5sumOnDisk": "9f837a55af61e2e5c74202907cc95bd1", - "md5sumInMemory": "e1b0cdbc9e7bbedbd3f3c6fe421acc8f", - "md5sumInMemoryWithIo": "56648de96fa265727fdd3458cfb97bdc", - "target": "old_qdq_py3" - }, - { - "modelName": "model_m_1x512x512x3_xint8_quantized", - "md5sumOnDisk": "02925f26f9571cd51b9c4efcf1f7a50c", - "md5sumInMemory": "545b9e36cd5959f978ca6704137e5d69", - "md5sumInMemoryWithIo": "0266c6d50634ec2397e95903704d079e", - "target": "old_qdq_py3" - }, - { - "modelName": "model_m_1x1080x1920x3_xint8_quantized", - "md5sumOnDisk": "9a2fd356dc309f08c8d3832e28541dd2", - "md5sumInMemory": "5005f9a54e4f57d2100aa5502a63f622", - "md5sumInMemoryWithIo": "0665ad30ba4cd44c95fab7eff31db5a2", - "target": "old_qdq_py3" - }, - { - "modelName": "A3", - "md5sumOnDisk": "5817e747dea65f2f9e60dbaea457f4ad", - "md5sumInMemory": "b721088fac1683349406d47ce00c7162", - "md5sumInMemoryWithIo": "39b422f40bcb60bbd20be835097256c2", - "target": "old_qdq_py3" - }, - { - "modelName": "C2", - "md5sumOnDisk": "5b04d9add2f4eccf0981567ecd6596e8", - "md5sumInMemory": "9e20c31e62951e398d86e557dc50c490", - "md5sumInMemoryWithIo": "ed9200d492602a3fddc52f13a81bf92f", - "target": "old_qdq_py3" - }, - { - "modelName": "C4-Mix", - "md5sumOnDisk": "0b9b31d11a710796ea71b2e5a373d805", - "md5sumInMemory": "4e755febbb17436edd95dc93cd6273c8", - "md5sumInMemoryWithIo": "aa958aa703bfa2e551c9d9c5d92ab47b", - "target": "py3" - }, - { - "modelName": "E", - "md5sumOnDisk": "601323cb390efbd40d202292eba01174", - "md5sumInMemory": "dcaf70bc4238127d8541a962ff418b75", - "md5sumInMemoryWithIo": "07bd53aeb901c363c59d6965d972af0e", - "target": "old_qdq_py3" - }, - { - "modelName": "L_v_1_0_A8W8", - "md5sumOnDisk": "ee0cbe0c248bf37801297dd29b2dabbf", - "md5sumInMemory": "742ce71c1a46509932ecb6af7f8e2c39", - "md5sumInMemoryWithIo": "e3ef4dc14b209de607c0b028066d4d05", - "target": "py3" - }, - { - "modelName": "a3", - "md5sumOnDisk": "73ecb2594935fb9bd02707930610f29e", - "md5sumInMemory": "f59151f8b67a7b8f1a8bcc7798558c33", - "md5sumInMemoryWithIo": "39b422f40bcb60bbd20be835097256c2", - "target": "VAIML-x2.0-a3" - }, - { - "modelName": "DeepLabV3", - "md5sumOnDisk": "10a644c6da6b1121f807794506b7e5cc", - "md5sumInMemory": "849608d568bbc54380833c9446299989", - "md5sumInMemoryWithIo": "99f39afbf868542fb575023123c98001", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "inceptionv4", - "md5sumOnDisk": "42c48a7086ba34889699862d98844e62", - "md5sumInMemory": "591d2dc2a27f04111e1b14fbf4222d51", - "md5sumInMemoryWithIo": "3d6d80d6c60811089ced3bf6abb42cdc", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "MobileNet_V3", - "md5sumOnDisk": "5bbc61c013f20b6563c62523100fa2ee", - "md5sumInMemory": "0bcec05d638535092032784dcca2cce3", - "md5sumInMemoryWithIo": "1506de83ac617b0903613ca420061e84", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "ResNet50", - "md5sumOnDisk": "c22b773b35f2ce62932578cc5eec867b", - "md5sumInMemory": "b307088b2bf693c51d8ea399247d8139", - "md5sumInMemoryWithIo": "1bea49f637b8f9f0ec80bc44c6d841bb", - "target": "VAIML-x2.0-Procyon" - }, - { - "modelName": "timm_mobilenetv3_small_100.lamb_in1k_a8w8_nchw", - "md5sumOnDisk": "fcb0bf5a6614042c0859937a24a24185", - "md5sumInMemory": "ac63b86137383c6b001f12030eeeff3e", - "target": "VAIML-x2.0-4x4" - }, - { - "modelName": "microsoft_resnet50_224x224_a8w8", - "md5sumOnDisk": "a6235fb37dd84a930c9b2951615570c6", - "md5sumInMemory": "da5192919ce2534ddaa2fb5ced220215", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "microsoft_resnet50_224x224_a8w8", - "md5sumOnDisk": "eeebfd0356baf9a8deed9c30d438d792", - "md5sumInMemory": "d795b64172132cecca0f5b80ff6e0861", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "microsoft_resnet50_224x224_nhwc_a8w8", - "md5sumOnDisk": "6f8d3850d21b6e85deafe180d682249b", - "md5sumInMemory": "1ce6d27e87140766164c39cd58fc426c", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "timm_mobilenetv3_small_100.lamb_in1k_nhwc_a8w8", - "md5sumOnDisk": "2e19885c87c5db681fa35da00b7c9579", - "md5sumInMemory": "7ae8dcf602d72549759ae5089b9119cf", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-4x4" - }, - { - "modelName": "resnet_50_ptq_qdq_nhwc_a16w8", - "md5sumOnDisk": "8f883624a97f00789c5d0e508202c224", - "md5sumInMemory": "94476250149cf1dfa219fe42b30026da", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "resnet_50_ptq_qdq_uint8a_uint8w_nhwc_a8w8", - "md5sumOnDisk": "5f86b73c21a98695c741664193d0b93e", - "md5sumInMemory": "94476250149cf1dfa219fe42b30026da", - "md5sumInMemoryWithIo": "30dc70101541ff464da7f768abd5b9ea", - "target": "VAIML-x2.0-Procyon_resnet50" - }, - { - "modelName": "PSP1_v1.1", - "md5sumOnDisk": "8ac224e3547da42bde7ec182c94c88ea", - "md5sumInMemory": "efdf2702eda557558169377bd2301d9c", - "md5sumInMemoryWithIo": "962b676e17290cdb22869b2df9a19400", - "target": "VAIML-x2.0-4x4" - }, - { - "modelName": "PSA2", - "md5sumOnDisk": "e086205adc8a885a8440eb375068159f", - "md5sumInMemory": "0803bf9ef641e3e1043f56653552101f", - "md5sumInMemoryWithIo": "7205a1506ef140cd65f2eb0a5b2ce65e", - "target": "VAIML-x2.0-4x4" - } - ], - "target": "VAIML", - "targets": [ - { - "name": "xcompiler", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_2904.xclbin", - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 32 - }, - "opt_level": { - "uintValue": 2 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - }, - "enable_fast_pm": { - "boolValue": true - } - }, - "minimum_num_of_conv": 2, - "debug": false - }, - "py3_round": false, - "provider_options": { - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "VAIML_config_0", - "pass": [ - "init", - "vaiml_lt", - "vaip_pass_dd_merge_dqcastgather", - "vaip_pass_dd_merge_qop", - "vaip_pass_dd_merge_dqop", - "vaip_pass_dd_merge_qop_onnx", - "vaip_pass_dd_merge_dqop_onnx" - ] - }, - { - "name": "py3", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - } - } - }, - "py3_round": true, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "old_qdq_py3", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - } - } - }, - "old_qdq": true, - "py3_round": true, - "provider_options": { - "xlnx_enable_old_qdq": "1", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "m_model", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "PSV", - "pass": [ - "init", - "fuse_DPU" - ], - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG1_2924.xclbin", - "target_opts": { - "xcompilerAttrs": { - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "uint16_qconv_leaky_fusion": { - "boolValue": true - }, - "enable_convert_three_term_to_two_term": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_shell_config_1", - "xclbin": "AMD_AIE2P_2x4x1_Overlay_2926.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 3 - }, - "profile": { - "uintValue": 0 - }, - "enable_fast_pm": { - "boolValue": true - }, - "enable_weights_prefetch": { - "boolValue": true - }, - "enable_cost_model_tiling": { - "boolValue": true - }, - "enable_mergesync": { - "boolValue": true - } - } - }, - "graph_engine_qos_priority": 640, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_vision_config_2", - "xclbin": "1x4.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 16 - }, - "opt_level": { - "uintValue": 0 - }, - "dump_subgraph_ops": { - "boolValue": false - }, - "profile": { - "uintValue": 0 - }, - "disable_std_quant": { - "boolValue": false - } - } - }, - "provider_options": { - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_vision_config_3", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2_2925.xclbin", - "pass": [ - "init", - "fuse_DPU" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - } - } - }, - "provider_options": { - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_vision_config_3_mha", - "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2_2925.xclbin", - "pass": [ - "init", - "fuse_DPU_MHA" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0", - "enable_preemption": "1", - "enable_txn_elf": "1", - "is_preemptible": "1" - } - }, - { - "name": "RyzenAI_transformer_cxx_psf", - "xclbin": "4x2_psf_model_a8w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psf" - ] - }, - { - "name": "RyzenAI_llm_eager", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "pass": [ - "fuse_MatMulNBits", - "fuse_SSMLP", - "fuse_MLP" - ] - }, - { - "name": "RyzenAI_transformers_base", - "xclbin": "8x4_wcr_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr" - ] - }, - { - "name": "RyzenAI_transformers_psp1", - "xclbin": "8x4_psp1_a16w8_qdq.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psp1" - ] - }, - { - "name": "RyzenAI_llm_phi3", - "xclbin": "8x4_psu_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr_llm" - ] - }, - { - "name": "RyzenAI_llm_llama", - "xclbin": "8x4_llama_3_2_1b_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr_llm" - ] - }, - { - "name": "RyzenAI_llm_qwen2", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_wcr_llm" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psw", - "xclbin": "4x4_psw_v1.0_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psw" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psi", - "xclbin": "4x2_psi_integrated_model_a16w8_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psf" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psj", - "xclbin": "4x2_psj_model_a8w8_qdq.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psj" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu0", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu0" - ] - }, - { - "name": "RyzenAI_transformer_cxx_vaiml_llm", - "xclbin": "vaiml_2x4x4_gemm_silu_rms_mul_add_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_vaiml_llm" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu1", - "xclbin": "llama2_mladf_2x4x4_v1_gemmbfp16_silu_mul_mha_rms_rope_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu1" - ] - }, - { - "name": "RyzenAI_transformer_cxx_mu0_8x4", - "xclbin": "8x4_psmu_st_v3.2_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_mu0_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_mu1_8x4", - "xclbin": "8x4_psmu_st_v3.2_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_mu1_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_ds0_8x4", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_ds0_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_ds1_8x4", - "xclbin": "8x4_hfds_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_ds0_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_4x4", - "xclbin": "4x4_psw_psu_integrated_model_a16w16_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_4x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_8x4", - "xclbin": "8x4_psu_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_8x4" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_8x4_opt", - "xclbin": "8x4_psu_model_a16w8_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_8x4_opt" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psu_4x4_opt", - "xclbin": "4x4_psw_psu_integrated_model_a16w16_qdq_00.xclbin", - "pass": [ - "init", - "fuse_dynamic_dispatch_psu_4x4_opt" - ] - }, - { - "name": "RyzenAI_transformer_config_2", - "pass": [ - "init", - "fuse_dynamic_dispatch_psr" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psh", - "xclbin": "4x2_psh_model_a16w8_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psh" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psq1", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psq1" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psq2", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_psf" - ] - }, - { - "name": "RyzenAI_transformer_cxx_pso2", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_pso2" - ] - }, - { - "name": "RyzenAI_transformer_cxx_pso2_02", - "xclbin": "4x2_pso2_integrated_model_a16w16_qdq_00.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_dynamic_dispatch_pso2_02" - ] - }, - { - "name": "RyzenAI_transformer_cxx_psr", - "pass": [ - "init", - "fuse_dynamic_dispatch_psr" - ] - }, - { - "name": "RyzenAI_transformer_cxx_pss_pst", - "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq_04.xclbin", - "share_hw_context": true, - "pass": [ - "init", - "fuse_DPU_MHA", - "fuse_dynamic_dispatch_pss_pst" - ], - "target_opts": { - "xcompilerAttrs": { - "debug_mode": { - "stringValue": "performance" - }, - "dpu_subgraph_num": { - "uintValue": 1000 - }, - "opt_level": { - "uintValue": 65536 - }, - "enable_fast_pm": { - "boolValue": true - }, - "advanced_opt": { - "boolValue": true - } - } - }, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-a3", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "mladf_options": { - "stringValue": "print-timer=3" - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-Procyon", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "enable_qdq_force_xint": { - "boolValue": true - }, - "opt_level": { - "uintValue": 3 - }, - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "mladf_options": { - "stringValue": "print-timer=3,disabel-rtp-pipeline=0" - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-Procyon_resnet50", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "opt_level": { - "uintValue": 3 - }, - "target": { - "stringValues": [ - "AMD_AIE2P_2x4x4_CMC_Overlay" - ] - }, - "mladf_options": { - "stringValue": "print-timer=3,disabel-rtp-pipeline=0" - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML-x2.0-4x4", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0_yolo" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "target": { - "stringValues": [ - "AMD_AIE2P_4x4_CMC_Overlay" - ] - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "PSAV2", - "pass": [ - "init", - "fuse_DPU_VAIML-x2.0_yolo" - ], - "target_opts": { - "xcompilerAttrs": { - "enable_mergesync": { - "boolValue": true - }, - "target": { - "stringValues": [ - "AMD_AIE2P_4x4_CMC_Overlay" - ] - }, - "enable_mul_matmul_fusion": { - "boolValue": true - }, - "enable_matmul_add_fusion": { - "boolValue": true - } - } - }, - "old_qdq": false, - "provider_options": { - "xlnx_enable_old_qdq": "0" - } - }, - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ], - "provider_options": { - "enable_cache_file_io_in_mem": "0" - } - } - ], - "enable_cache_file_io_in_mem": true -} \ No newline at end of file diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/xaiengine.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/xaiengine.dll deleted file mode 100644 index d6c5a2ac..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/xaiengine.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d7e2a0c60d5207dccf707e0c765e7a5c981d8aa2f538ed05fa2d53d1e56f0ea -size 746376 diff --git a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/zlib.dll b/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/zlib.dll deleted file mode 100644 index b26e0728..00000000 --- a/Ryzen-AI-CVML-Library/windows/onnx/ryzen15/zlib.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:12fa4b5bee407cb2ab4e7ef128f5952bdd07add4ac99c364e690fdfeb0eb4ec2 -size 568320 diff --git a/Transformer-examples/ASR/Whisper-AI/README.md b/Transformer-examples/ASR/Whisper-AI/README.md deleted file mode 100644 index a104aafb..00000000 --- a/Transformer-examples/ASR/Whisper-AI/README.md +++ /dev/null @@ -1,148 +0,0 @@ - - - - -

Ryzen™ AI ASR

-
- -# Running Whisper on Ryzen AI - -This Ryzen AI example lets you bring in OpenAI’s Whisper model and run fast, local automatic speech recognition (ASR) on your AMD NPU. Whisper is a versatile speech model trained on 680,000+ hours of diverse audio, capable of speech-to-text, translation, and language detection. -This example uses the [Whisper-base](https://huggingface.co/openai/whisper-base) variant and provides a simple demonstration of how to run it on the NPU. For real-time factor (RTF) evaluation of the model on the NPU, please refer to the [whisper-demo](https://github.com/amd/RyzenAI-SW/tree/main/demo/ASR/Whisper). - -Learn how you can: -- **Export Whisper models** from Hugging Face to ONNX format -- **Optimize** them for static shape inference -- **Run ASR** fully on-device using CPU or AMD NPU -- **Evaluate ASR** performance on sample data from public datasets like LibriSpeech. - -This example supports: -- **Audio file transcription** – load your own `.wav` files for instant speech-to-text - -## Prerequisites -**Step 1:** Install the latest Conda environment using [RyzenAI Documentation](https://ryzenai.docs.amd.com/en/latest/inst.html#). -Ensure the SDK and driver are installed. - -**Step 2:** Export Hugging face Whisper model to onnx and set static shape as mentioned below: -1. Activate conda environment: -```bash - conda create --name asr --clone ryzen-ai- - conda activate asr -``` -2. Navigate to the Whisper-AI directory: -```bash - cd \Transformer-examples\ASR\Whisper-AI -``` -3. Install the necessary libraries: -```bash - pip install -r requirements.txt - ``` -4. Export Whisper AI model to ONNX using [Hugging Face Optimum library](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model): -```bash - optimum-cli export onnx --model openai/whisper-base.en --opset 17 exported_model_directory - ``` -**Note:** -The above command creates a new directory `exported_model_directory` in the current path. In `exported_model_directory`, you should see `encoder_model.onnx` and `decoder_model.onnx` models available. - -5. Convert the dynamic ONNX model to static using the `dynamic_to_static.py` script. -```bash - #Convert the encoder - python dynamic_to_static.py --input_model ".\exported_model_directory\encoder_model.onnx" - - #Convert the decoder - python dynamic_to_static.py --input_model ".\exported_model_directory\decoder_model.onnx" - ``` -The `dynamic_to_static.py` script utilizes `onnxruntime.tools.make_dynamic_shape_fixed` to convert dynamic shapes in an ONNX model to static shapes. It takes as input a `params.json` file, which specifies the dynamic dimensions to be fixed and their target static values. After the conversion, the script verifies the correctness of the modified ONNX model using the ONNX Checker and performs a dummy inference to ensure the model runs as expected. - -The `params.json` file defines the static shapes used to convert a dynamic Whisper-base ONNX model to a fixed-shape version suitable for optimized inference on NPUs. - -```bash - { - "batch_size": "1", - "encoder_sequence_length / 2": "1500", - "decoder_sequence_length": "180" -} -``` -- `"batch_size": "1"` - Fixes the model to process one audio sample at a time. -- `"encoder_sequence_length / 2": "1500"` - Whisper converts audio to a log-Mel spectrogram with 3000 frames for 30s of audio. After 2× downsampling, the encoder input length becomes 1500. This is fixed in params.json for optimized static-shape inference. -- `"decoder_sequence_length": "180"` - Fixed to 180 to match 30s of audio input (3000 tokens). At ~5 tokens/sec, average output is 150 tokens; 30-token buffer ensures completeness and handles variation - -**Note:** The final static ONNX models are stored in `.\exported_model_directory\encoder_model.onnx` and `.\exported_model_directory\decoder_model.onnx`. - -## Whisper ONNX Inference and Evaluation - -The `run_whisper.py` script performs speech-to-text transcription using a Whisper-base model exported to ONNX format. It supports transcribing audio from WAV files or a live microphone stream and can evaluate model accuracy on a labeled dataset using WER and CER metrics. The script runs the encoder and decoder models via ONNX Runtime, with support for both CPU and NPU backends, and includes chunk-based processing for long audio inputs. - -The `load_provider_options` function returns ONNX Runtime execution providers and configuration options based on the selected device (cpu or npu). - -```bash - provider = "VitisAIExecutionProvider" - - encoder_options = { - "config_file": "vaiep_config.json", - "cache_dir": "./cache/", - "cache_key": "whisper_encoder" - } - - decoder_options = { - "config_file": "vaiep_config.json", - "cache_dir": "./cache/", - "cache_key": "whisper_decoder" - } -``` -When running on the NPU, the provider options for the encoder and decoder are identical, except for the cache directory used. Both utilize the official RAI `vaiep_config.json` [configuration file](https://ryzenai.docs.amd.com/en/latest/modelrun.html#config-file-options). - -When running inference on the NPU, 100% of the encoder operators and 93.4% of the decoder operators are executed on the NPU. - -```bash - #encoder operations - [Vitis AI EP] No. of Operators : VAIML 225 - [Vitis AI EP] No. of Subgraphs : VAIML 1 - - #decoder operations - [Vitis AI EP] No. of Operators : CPU 24 VAIML 341 - [Vitis AI EP] No. of Subgraphs : VAIML 2 -``` - -Command to run transcription using `.wav` file or microphone: -```bash - python run_whisper.py \ - --encoder exported_model_directory\encoder_model.onnx \ - --decoder exported_model_directory\decoder_model.onnx \ - --device \ - --input -``` - -### Expected Output - -Run the above command with sample audio file and observe the expected Model output below - ---input audio_files\61-52s.wav - -```bash -Transcription: Also, there was a stripling page who turned into a maze with so sweet a lady, sir. -And in some manner, I do think she died. But then the picture was gone as quickly as it came. -Sister Nell, do you hear these models? Take your place and let us see what the crystal can show to you, like is not young, Master. -Though I am an old man. With all rant the opening of the tent to see what might be a miss. -But Master Will, who peeped out first, needed no more than one glance. -Mistress Fitzsooth to the rear of the Ted cries of "A knotting ham! A knotting ham!" before them fled the stroller and his three sons, capless and tear away. -"What is that tumult and rioting?" cried out the squire, authoritatively, and he blew twice on the silver whistle which hung at his belt. - -``` - -### Model Evaluation - -To evaluate model performance, we provide an eval_dataset directory containing sample audio from the LibriSpeech dataset. You can run the following command to generate a detailed report including WER, and CER metrics: -```bash -python run_whisper.py \ - --encoder exported_model_directory\encoder_model.onnx \ - --decoder exported_model_directory\decoder_model.onnx \ - --device \ - --eval-dir eval_dataset\LibriSpeech-samples \ - --results-dir -``` - -### Notes - -- If the model has not been precompiled before, the first run will take approximately 15 minutes to compile. -- Ensure that the paths to the encoder, decoder, and configuration file are correctly set based on your environment. diff --git a/Transformer-examples/ASR/Whisper-AI/audio_files/1089-134686-0000.wav b/Transformer-examples/ASR/Whisper-AI/audio_files/1089-134686-0000.wav deleted file mode 100644 index 2cdc6df3..00000000 Binary files a/Transformer-examples/ASR/Whisper-AI/audio_files/1089-134686-0000.wav and /dev/null differ diff --git a/Transformer-examples/ASR/Whisper-AI/audio_files/61-52s.wav b/Transformer-examples/ASR/Whisper-AI/audio_files/61-52s.wav deleted file mode 100644 index b94fb03b..00000000 Binary files a/Transformer-examples/ASR/Whisper-AI/audio_files/61-52s.wav and /dev/null differ diff --git a/Transformer-examples/ASR/Whisper-AI/dynamic_to_static.py b/Transformer-examples/ASR/Whisper-AI/dynamic_to_static.py deleted file mode 100644 index bcc65f63..00000000 --- a/Transformer-examples/ASR/Whisper-AI/dynamic_to_static.py +++ /dev/null @@ -1,113 +0,0 @@ -import numpy as np -import subprocess -import onnx -import onnxruntime as ort -import json -import os -import argparse -import shutil - -from onnx import shape_inference - - -def parse_args(): - parser = argparse.ArgumentParser(description="Fix dynamic shapes in ONNX file using onnxruntime") - parser.add_argument("--input_model", required=True, help="Path to the input ONNX file (encoder.onnx or decoder.onnx)") - args = parser.parse_args() - with open('params.json', 'r') as f: - args.params_to_fix = json.load(f) - return args - - -def directorycreation(directory_name): - if os.path.exists(directory_name): - shutil.rmtree(directory_name) - print(f"Directory '{directory_name}' already exists. It has been deleted.") - os.mkdir(directory_name) - - -def generate_dummy_data(input_tensor, name): - input_shape = input_tensor.shape - input_type = input_tensor.type - print(f"Generating dummy data for: {name}") - if input_type == 'tensor(float)': - return np.random.randint(0, 64, size=input_shape).astype(np.float32) - elif input_type == 'tensor(float16)': - return np.random.rand(*input_shape).astype(np.float32) - elif input_type == 'tensor(int32)': - return np.random.randint(0, 100, size=input_shape).astype(np.int32) - elif input_type == 'tensor(int64)' and name == 'attention_mask': - return np.random.randint(0, 32, size=input_shape).astype(np.int64) - elif input_type == 'tensor(int64)' and name == 'token_type_ids': - return np.random.randint(0, 32, size=input_shape).astype(np.int64) - elif input_type == 'tensor(int64)': - return np.random.randint(0, 9, size=input_shape).astype(np.int64) - elif input_type == 'tensor(bool)': - return np.ones(input_shape).astype(np.bool_) - elif input_type == 'tensor(double)': - return np.random.rand(*input_shape).astype(np.float64) - else: - raise ValueError(f"Unsupported input type: {input_type}") - - -def validate_onnx_model(model): - try: - onnx.checker.check_model(model) - print("ONNX model is valid.") - return True - except onnx.onnx_cpp2py_export.checker.ValidationError as e: - print("ONNX model is not valid.") - print(e) - return False - - -if __name__ == "__main__": - args = parse_args() - exported_model_directory = "exported_model_directory" - tmp_dir = "tmp" - - # Prepare workspace - directorycreation(tmp_dir) - - # Derive filenames - input_model_path = args.input_model - base_filename = os.path.basename(input_model_path) - tmp_output_path = os.path.join(tmp_dir, base_filename) - final_output_path = os.path.join(exported_model_directory, base_filename) - - # Fix dynamic shapes - print("WARNING: You might have to comment out ONNX checker in //onnxruntime/tools/onnx_model_utils.py if model > 2GB") - command_base = ["python", "-m", "onnxruntime.tools.make_dynamic_shape_fixed"] - for param, value in args.params_to_fix.items(): - command = command_base + [input_model_path, tmp_output_path, "--dim_param", str(param), "--dim_value", str(value)] - subprocess.run(command) - input_model_path = tmp_output_path # use modified model as next input - - print("Static conversion complete.") - - # Shape inference - print(f"Inferencing shapes for: {tmp_output_path}") - model = onnx.load(tmp_output_path) - if not validate_onnx_model(model): - exit(1) - - inferred_model = shape_inference.infer_shapes(model, data_prop=True) - onnx.save_model(inferred_model, final_output_path) - print(f"Shape inference complete. Overwritten: {final_output_path}") - - # Sanity check (forward pass) - print("---------- Running forward pass ----------------------") - ort_session = ort.InferenceSession(final_output_path) - input_data = { - tensor.name: generate_dummy_data(tensor, tensor.name) - for tensor in ort_session.get_inputs() - } - outputs = ort_session.run(None, input_data) - - # Cleanup - shutil.rmtree(tmp_dir) - print(f"Deleted temporary directory: {tmp_dir}") - - print("Model export successful.") - - diff --git a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/transcripts.txt b/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/transcripts.txt deleted file mode 100644 index a9b64e71..00000000 --- a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/transcripts.txt +++ /dev/null @@ -1,3 +0,0 @@ -61-70968-0000 HE BEGAN A CONFUSED COMPLAINT AGAINST THE WIZARD WHO HAD VANISHED BEHIND THE CURTAIN ON THE LEFT -1089-134686-0009 AT MOST BY AN ALMS GIVEN TO A BEGGAR WHOSE BLESSING HE FLED FROM HE MIGHT HOPE WEARILY TO WIN FOR HIMSELF SOME MEASURE OF ACTUAL GRACE -3570-5694-0000 BUT ALREADY AT A POINT IN ECONOMIC EVOLUTION FAR ANTEDATING THE EMERGENCE OF THE LADY SPECIALISED CONSUMPTION OF GOODS AS AN EVIDENCE OF PECUNIARY STRENGTH HAD BEGUN TO WORK OUT IN A MORE OR LESS ELABORATE SYSTEM \ No newline at end of file diff --git a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/1089-134686-0009.wav b/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/1089-134686-0009.wav deleted file mode 100644 index 9c711054..00000000 Binary files a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/1089-134686-0009.wav and /dev/null differ diff --git a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/3570-5694-0000.wav b/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/3570-5694-0000.wav deleted file mode 100644 index 31643ce5..00000000 Binary files a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/3570-5694-0000.wav and /dev/null differ diff --git a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/61-70968-0000.wav b/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/61-70968-0000.wav deleted file mode 100644 index ab3cab4f..00000000 Binary files a/Transformer-examples/ASR/Whisper-AI/eval_dataset/LibriSpeech-samples/wav/61-70968-0000.wav and /dev/null differ diff --git a/Transformer-examples/ASR/Whisper-AI/params.json b/Transformer-examples/ASR/Whisper-AI/params.json deleted file mode 100644 index 799089db..00000000 --- a/Transformer-examples/ASR/Whisper-AI/params.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "batch_size": "1", - "encoder_sequence_length / 2": "1500", - "decoder_sequence_length": "180" -} \ No newline at end of file diff --git a/Transformer-examples/ASR/Whisper-AI/requirements.txt b/Transformer-examples/ASR/Whisper-AI/requirements.txt deleted file mode 100644 index 35db0d29..00000000 --- a/Transformer-examples/ASR/Whisper-AI/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -torch==2.8.0 -torchaudio==2.8.0 -sounddevice==0.5.3 -transformers==4.52.4 -onnxsim==0.4.36 -optimum==1.27.0 -accelerate==1.11.0 -jiwer==4.0.0 -PySoundFile==0.9.0 \ No newline at end of file diff --git a/Transformer-examples/ASR/Whisper-AI/run_whisper.py b/Transformer-examples/ASR/Whisper-AI/run_whisper.py deleted file mode 100644 index 9e143eea..00000000 --- a/Transformer-examples/ASR/Whisper-AI/run_whisper.py +++ /dev/null @@ -1,281 +0,0 @@ -import argparse -import json -import numpy as np -import onnxruntime as ort -import torchaudio -import sounddevice as sd -import queue -import threading -import time -from transformers import WhisperFeatureExtractor, WhisperTokenizer -from pathlib import Path -from jiwer import wer, cer - -SAMPLE_RATE = 16000 -CHUNK_SIZE = 1600 # 0.1 sec chunks - - -class WhisperONNX: - def __init__(self, encoder_path, decoder_path, - tokenizer_dir=None,encoder_providers=None, decoder_providers=None): - - self.encoder = ort.InferenceSession(encoder_path, providers=encoder_providers) - self.decoder = ort.InferenceSession(decoder_path, providers=decoder_providers) - - if tokenizer_dir is None: - tokenizer_dir = Path(encoder_path).parent - print(f"\nLoading tokenizer and feature extractor from: {Path(tokenizer_dir).resolve()}") - self.feature_extractor = WhisperFeatureExtractor.from_pretrained(tokenizer_dir) - self.tokenizer = WhisperTokenizer.from_pretrained(tokenizer_dir) - - self.decoder_start_token = self.sot_token = self.tokenizer.convert_tokens_to_ids("<|startoftranscript|>") - self.eos_token = self.tokenizer.eos_token_id - self.max_length = min(448, self.decoder.get_inputs()[0].shape[1]) - if not isinstance(self.max_length, int): - raise ValueError("Invalid/Dynamic input shapes") - def preprocess(self, audio): - """ - Convert raw audio to Whisper log-mel spectrogram - """ - inputs = self.feature_extractor(audio, sampling_rate=SAMPLE_RATE, return_tensors="np") - return inputs["input_features"] - - def encode(self, input_features): - """ - Run encoder ONNX model - """ - return self.encoder.run(None, {"input_features": input_features})[0] - - def decode(self, encoder_out): - """ - Greedy decode with fixed-length input_ids - """ - tokens = [self.decoder_start_token] - for _ in range(self.max_length): - # Pad input_ids to (1, max_length) - decoder_input = np.full((1, self.max_length), self.eos_token, dtype=np.int64) - decoder_input[0, :len(tokens)] = tokens - - outputs = self.decoder.run(None, { - "input_ids": decoder_input, - "encoder_hidden_states": encoder_out - }) - logits = outputs[0] - next_token = int(np.argmax(logits[0, len(tokens)-1])) - - if next_token == self.eos_token: - break - tokens.append(next_token) - return tokens - - def transcribe(self, audio, chunk_length_s=30, is_mic=False): - """ - Full encode-decode pipeline with support for long-form transcription using chunking. - """ - chunk_size = SAMPLE_RATE * chunk_length_s - total_samples = len(audio) - transcription = [] - chunk_idx = 0 - - overlap = SAMPLE_RATE * 1 #Tune this - for start in range(0, total_samples, chunk_size - overlap): - end = min(start + chunk_size, total_samples) - audio_chunk = audio[start:end] - - # Process the chunk - input_features = self.preprocess(audio_chunk) - encoder_out = self.encode(input_features) - tokens = self.decode(encoder_out) - transcription.append(self.tokenizer.decode(tokens, skip_special_tokens=True).strip()) - chunk_idx+= 1 - - # Combine all transcriptions - return " ".join(transcription) - -def evaluate(model, dataset_dir, results_dir): - dataset_name = Path(dataset_dir).name - wav_dir = Path(dataset_dir) / "wav" - transcript_file = Path(dataset_dir) / "transcripts.txt" - - if not transcript_file.exists() or not wav_dir.exists(): - print(f"Missing transcripts.txt or wav folder in {dataset_dir}") - return - - with open(transcript_file, "r", encoding="utf-8") as f: - references = {line.split()[0]: " ".join(line.strip().split()[1:]) for line in f.readlines()} - - output_dir = Path(results_dir) / dataset_name - output_dir.mkdir(parents=True, exist_ok=True) - result_file = output_dir / "results.txt" - - total_wer, total_cer, count = 0, 0, 0 - - with result_file.open("w", encoding="utf-8") as out_f: - for wav_path in sorted(wav_dir.glob("*.wav")): - key = wav_path.stem - if key not in references: - print(f"Reference for {key} not found in transcripts.txt") - continue - reference = references[key].lower() - # FIX: Convert Path to str for torchaudio - waveform, sr = torchaudio.load(str(wav_path)) - if sr != SAMPLE_RATE: - waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=SAMPLE_RATE)(waveform) - audio = waveform.squeeze(0).numpy() - predicted = model.transcribe(audio) - - sample_wer = wer(reference, predicted) - sample_cer = cer(reference, predicted) - total_wer += sample_wer - total_cer += sample_cer - count += 1 - - out_f.write(f"{key}\n") - out_f.write(f"Reference: {reference}\n") - out_f.write(f"Predicted: {predicted}\n") - out_f.write(f"WER: {sample_wer:.3f}, CER: {sample_cer:.3f} \n\n") - - if count: - avg_wer = total_wer / count - avg_cer = total_cer / count - print(f"Evaluation completed for {count} files.") - print(f"Average WER: {avg_wer:.3f}, Average CER: {avg_cer:.3f}") - out_f.write(f"Summary:\nAverage WER: {avg_wer:.3f}\nAverage CER: {avg_cer:.3f}\n") - else: - print("No valid audio-transcript pairs found.") - -def load_provider_options(device): - if device == "cpu": - provider = "CPUExecutionProvider" - return [provider], [provider] - - elif device == "npu": - provider = "VitisAIExecutionProvider" - - encoder_options = { - "config_file": "vaiep_config.json", - "cache_dir": "./cache/", - "cache_key": "whisper_encoder" - } - - decoder_options = { - "config_file": "vaiep_config.json", - "cache_dir": "./cache/", - "cache_key": "whisper_decoder" - } - - return [(provider, encoder_options)], [(provider, decoder_options)] - - -def mic_stream(model, duration=0, silence_threshold=0.01, silence_duration=5.0): - """ - Capture microphone audio and transcribe in real time. - Automatically stops on silence if duration=0. - """ - q_audio = queue.Queue() - stop_flag = threading.Event() - - def audio_callback(indata, frames, time, status): - if status: - print(status, flush=True) - q_audio.put(indata.copy()) - - def feeder(): - try: - with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, dtype='float32', - blocksize=CHUNK_SIZE, callback=audio_callback): - if duration > 0: - sd.sleep(int(duration * 1000)) - stop_flag.set() - else: - while not stop_flag.is_set(): - sd.sleep(100) - except sd.PortAudioError as e: - print(f"\n Microphone error: {e}") - print("Could not initialize microphone. Please check your audio device settings.") - stop_flag.set() - - threading.Thread(target=feeder, daemon=True).start() - - buffer = np.zeros((0,), dtype=np.float32) - silence_start = None - print("\n Real-time Transcription:") - while not stop_flag.is_set(): - try: - chunk = q_audio.get(timeout=0.1).squeeze() - buffer = np.concatenate((buffer, chunk)) - - # Check for silence - rms = np.sqrt(np.mean(chunk**2)) - if rms < silence_threshold: - if silence_start is None: - silence_start = time.time() - elif time.time() - silence_start >= silence_duration: - print("\n Silence detected. Stopping transcription.") - stop_flag.set() - break - else: - silence_start = None # Reset silence timer - - if len(buffer) >= SAMPLE_RATE * 2: - text, _ = model.transcribe(buffer, is_mic=True) - print(text) - buffer = np.zeros((0,), dtype=np.float32) - except queue.Empty: - continue - - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--input", help="WAV file path or 'mic'") - parser.add_argument("--encoder", required=True, help="Path to Whisper encoder ONNX model") - parser.add_argument("--decoder", required=True, help="Path to Whisper decoder ONNX model") - parser.add_argument("--tokenizer-dir", default=None, - help="Path to directory containing tokenizer and feature extractor files. " - "If not set, defaults to directory of encoder/decoder models.") - parser.add_argument("--eval-dir", help="Dataset directory with wavs/ and transcripts.txt") - parser.add_argument("--results-dir", default="results", help="Directory to store evaluation results") - parser.add_argument("--device", choices=['cpu', 'npu'], default='cpu') - parser.add_argument("--duration", type=int, default=0, help="Mic duration in seconds (0 = unlimited)") - args = parser.parse_args() - - - encoder_providers, decoder_providers = load_provider_options(args.device - ) - - model = WhisperONNX(args.encoder, - args.decoder, - tokenizer_dir=args.tokenizer_dir, - encoder_providers=encoder_providers, - decoder_providers=decoder_providers) - - if args.eval_dir: - evaluate(model, args.eval_dir, args.results_dir) - return - - if not args.input and not args.eval_dir: - print("Error: You must provide --input (wav or mic) or --eval-dir.") - return - - if args.input and args.input.lower() not in ['mic'] and not Path(args.input).suffix == '.wav': - print("Error: --input must be 'mic' or path to a .wav file.") - return - - if args.input.lower() == 'mic': - try: - mic_stream(model, args.duration) - except sd.PortAudioError as e: - print("Fix your device or try using a .wav file instead of mic. Exiting") - return - else: - waveform, sr = torchaudio.load(args.input) - if sr != SAMPLE_RATE: - waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=SAMPLE_RATE)(waveform) - audio = waveform.squeeze(0).numpy() - text = model.transcribe(audio, chunk_length_s=30) - print("\n Transcription:", text) - - -if __name__ == "__main__": - main() diff --git a/Transformer-examples/ASR/Whisper-AI/vaiep_config.json b/Transformer-examples/ASR/Whisper-AI/vaiep_config.json deleted file mode 100644 index 95bebe53..00000000 --- a/Transformer-examples/ASR/Whisper-AI/vaiep_config.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "enable_f32_to_bf16_conversion": true - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} \ No newline at end of file diff --git a/Transformer-examples/DistilBERT_text_classification_bf16/pt_to_onnx.py b/Transformer-examples/DistilBERT_text_classification_bf16/pt_to_onnx.py deleted file mode 100644 index 994e5136..00000000 --- a/Transformer-examples/DistilBERT_text_classification_bf16/pt_to_onnx.py +++ /dev/null @@ -1,40 +0,0 @@ -import torch -import argparse -import os -from transformers import AutoModelForSequenceClassification, AutoTokenizer - - -def main(args): - - model_id = "distilbert-base-uncased-finetuned-sst-2-english" - model = AutoModelForSequenceClassification.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) - - seq_length = 128 - dummy_input_ids = torch.randint(0, 30522, (1, seq_length), dtype=torch.int64) - dummy_attention_mask = torch.ones((1, seq_length), dtype=torch.int64) - - os.makedirs(args.output_dir,exist_ok=True) - onnx_model_path = os.path.join(args.output_dir,'distilbert-base-uncased-finetuned-sst-2-english.onnx') - - - torch.onnx.export( - model, - (dummy_input_ids, dummy_attention_mask), - onnx_model_path, - input_names=["input_ids", "attention_mask"], - output_names=["logits"], - dynamic_axes=None, - opset_version=17, - ) - - print("Exported ONNX model with fixed sequence length 128 (int64 inputs).") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Export from Huggingface to ONNX model') - parser.add_argument('--output_dir', type=str, required=True,default ='models',help='Output directory for the ONNX model') - - args = parser.parse_args() - - main(args) diff --git a/Transformer-examples/DistilBERT_text_classification_bf16/quantize_to_bf16.py b/Transformer-examples/DistilBERT_text_classification_bf16/quantize_to_bf16.py deleted file mode 100644 index 3853af7d..00000000 --- a/Transformer-examples/DistilBERT_text_classification_bf16/quantize_to_bf16.py +++ /dev/null @@ -1,39 +0,0 @@ -from quark.onnx.quantization.config import Config, get_default_config -from quark.onnx.quantization.config.config import QuantizationConfig -from onnxruntime.quantization.calibrate import CalibrationMethod -from onnxruntime.quantization.quant_utils import QuantType, QuantFormat -import argparse -import os - -def main(args): - # Use default quantization configuration - quant_config = get_default_config("BF16") - quant_config.extra_options["BF16QDQToCast"] = True - - config = Config(global_quant_config=quant_config) - print("The configuration of the quantization is {}".format(config)) - - config.global_quant_config.extra_options["UseRandomData"] = True - - - from quark.onnx import ModelQuantizer - # Create an ONNX Quantizer - quantizer = ModelQuantizer(config) - - input_model_path = os.path.join(args.model_path) - output_model_path = os.path.join(args.output_dir,"distilbert-base-uncased-finetuned-sst-2-english_bf16.onnx") - - # Quantize the ONNX model - quant_model = quantizer.quantize_model(model_input = input_model_path, - model_output = output_model_path, - calibration_data_path = None) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Export from Huggingface to ONNX model') - parser.add_argument('--model_path', type=str, required=True,default="models/model.onnx", help='Name or path of the Hugging Face model') - parser.add_argument('--output_dir', type=str, required=True,default ='models',help='Output directory for the ONNX model') - - args = parser.parse_args() - - main(args) diff --git a/Transformer-examples/DistilBERT_text_classification_bf16/requirements.txt b/Transformer-examples/DistilBERT_text_classification_bf16/requirements.txt deleted file mode 100644 index 0884085d..00000000 --- a/Transformer-examples/DistilBERT_text_classification_bf16/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch==2.9.1 -onnxscript==0.5.7 \ No newline at end of file diff --git a/Transformer-examples/DistilBERT_text_classification_bf16/run_inference.py b/Transformer-examples/DistilBERT_text_classification_bf16/run_inference.py deleted file mode 100644 index 68540e2c..00000000 --- a/Transformer-examples/DistilBERT_text_classification_bf16/run_inference.py +++ /dev/null @@ -1,46 +0,0 @@ -import onnxruntime as ort -import numpy as np -from transformers import AutoTokenizer -from pathlib import Path - - - -cache_dir = Path(__file__).parent.resolve() -# Create session options -session_options = ort.SessionOptions() -session_options.log_severity_level = 1 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal - -tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") -ort_session = ort.InferenceSession("model/distilbert-base-uncased-finetuned-sst-2-english.onnx", - sess_options = session_options, - providers=["VitisAIExecutionProvider"], - provider_options=[{'config_file': 'vitisai_config.json', - 'cache_dir': str(cache_dir), - 'cache_key': 'compiled_distilbert-base-uncased-finetuned-sst-2-english_bf16'}]) - -id2label = {0: "NEGATIVE", 1: "POSITIVE"} - -def preprocess(text, tokenizer, max_length=128): - encoded = tokenizer(text, return_tensors="np", padding="max_length", truncation=True, max_length=max_length) - return encoded["input_ids"].astype(np.int64), encoded["attention_mask"].astype(np.int64) - -prompts = [ - "Hello, my dog is cute", - "Stop talking to me", - "That painting is ugly", - "Life is beautiful" -] - -for text in prompts: - input_ids, attention_mask = preprocess(text, tokenizer) - - outputs = ort_session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask}) - - logits = outputs[0] - predicted_class_id = np.argmax(logits, axis=1)[0] - predicted_label = id2label[predicted_class_id] - - # Print result - print("*" * 10) - print("Prompt:", text) - print("Text classification:", predicted_label) diff --git a/Transformer-examples/DistilBERT_text_classification_bf16/vitisai_config.json b/Transformer-examples/DistilBERT_text_classification_bf16/vitisai_config.json deleted file mode 100644 index 0437e56a..00000000 --- a/Transformer-examples/DistilBERT_text_classification_bf16/vitisai_config.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "vaiml_partition", - "plugin": "vaip-pass_vaiml_partition", - "vaiml_config": { - "preferred_data_storage": "unvectorized" - } - } - ], - "target": "VAIML", - "targets": [ - { - "name": "VAIML", - "pass": [ - "init", - "vaiml_partition" - ] - } - ] -} diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 00000000..1ea87f7f --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,5 @@ +node_modules/ +.mintlify/ +.next/ +.DS_Store +*.log diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS new file mode 100644 index 00000000..5eadd6b0 --- /dev/null +++ b/docs/CODEOWNERS @@ -0,0 +1,111 @@ +# CODEOWNERS - Ryzen AI Software documentation +# GENERATED by .github/scripts/generate_codeowners.py from each page's +# hidden `{/* owner: */}` header. Do NOT hand-edit: change the page +# header (or add a page) and regenerate. GitHub uses the LAST matching +# rule, so the per-page rules at the bottom override the folder defaults. + +# ----- Default owner (catch-all) ----- +* @dwithchenna + +# ----- CI / configuration (docs lead) ----- +/.github/ @bconsolvo +/docs/docs.json @bconsolvo + +# ----- Section defaults (dominant owner per top-level folder) ----- +/docs/audio/ @bconsolvo +/docs/gpu-radeon/ @dwithchenna +/docs/llms/ @dwithchenna +/docs/reference/ @bconsolvo +/docs/tools/ @dwithchenna +/docs/vision/ @dwithchenna +/docs/windows-ml/ @dwithchenna + +# ----- Top-level pages ----- +/docs/README.md @dwithchenna +/docs/index.mdx @uday610 +/docs/installation.mdx @uday610 + +# ----- audio/ ----- +/docs/audio/index.mdx @bconsolvo +/docs/audio/parakeet-tdt.mdx @lakshay048 +/docs/audio/whisper-asr.mdx @dwithchenna +/docs/audio/whisper_cpp.mdx @uday610 + +# ----- gpu-radeon/ ----- +/docs/gpu-radeon/README.md @dwithchenna +/docs/gpu-radeon/igpu-getting-started.mdx @dwithchenna +/docs/gpu-radeon/index.mdx @bconsolvo +/docs/gpu-radeon/radeon.mdx @dwithchenna +/docs/gpu-radeon/ryzenai_gpu.mdx @uday610 + +# ----- llms/ ----- +/docs/llms/distilbert-example.mdx @dwithchenna +/docs/llms/high_level_python.mdx @jeremyfowers +/docs/llms/hybrid_oga.mdx @uday610 +/docs/llms/index.mdx @bconsolvo +/docs/llms/llm-sft-deploy.mdx @dwithchenna +/docs/llms/llm_linux.mdx @lakshay048 +/docs/llms/oga-cpp-api.mdx @dwithchenna +/docs/llms/oga-inference.mdx @dwithchenna +/docs/llms/oga_model_prepare.mdx @uday610 +/docs/llms/oga_op_prepare.mdx @uday610 +/docs/llms/rag-oga.mdx @dwithchenna +/docs/llms/server_interface.mdx @jeremyfowers +/docs/llms/vlm.mdx @dwithchenna + +# ----- reference/ ----- +/docs/reference/README.md @dwithchenna +/docs/reference/app_development.mdx @ThomasXilinx +/docs/reference/applications.mdx @bconsolvo +/docs/reference/ci-dashboard.mdx @bconsolvo +/docs/reference/index.mdx @bconsolvo +/docs/reference/licenses.mdx @raholbharadwaj +/docs/reference/relnotes.mdx @uday610 +/docs/reference/versions.mdx @bconsolvo + +# ----- tools/ ----- +/docs/tools/README.md @dwithchenna +/docs/tools/ai_analyzer.mdx @savitha-srinivasan +/docs/tools/index.mdx @bconsolvo +/docs/tools/model_quantization.mdx @uday610 +/docs/tools/modelrun.mdx @uday610 +/docs/tools/npu-check.mdx @dwithchenna +/docs/tools/onnx-benchmark.mdx @dwithchenna +/docs/tools/ops_support.mdx @dwithchenna +/docs/tools/quark-quantization.mdx @dwithchenna +/docs/tools/ryzen_ai_libraries.mdx @uday610 +/docs/tools/xrt_smi.mdx @uday610 + +# ----- vision/ ----- +/docs/vision/cvml-face-detection.mdx @dwithchenna +/docs/vision/cvml-face-mesh.mdx @dwithchenna +/docs/vision/cvml.mdx @dwithchenna +/docs/vision/getstartex.mdx @savitha-srinivasan +/docs/vision/getting-started-resnet-bf16.mdx @dwithchenna +/docs/vision/getting-started-resnet.mdx @dwithchenna +/docs/vision/hello-world.mdx @dwithchenna +/docs/vision/image-classification.mdx @dwithchenna +/docs/vision/index.mdx @bconsolvo +/docs/vision/nemotron-ocr-v2.mdx @lakshay048 +/docs/vision/npu-gpu-pipeline.mdx @dwithchenna +/docs/vision/sd_demo.mdx @ThomasXilinx +/docs/vision/super_resolution.mdx @bconsolvo +/docs/vision/torchvision.mdx @dwithchenna +/docs/vision/yolov8m.mdx @dwithchenna +/docs/vision/yolov8s-worldv2.mdx @dwithchenna + +# ----- windows-ml/ ----- +/docs/windows-ml/README.md @dwithchenna +/docs/windows-ml/clip.mdx @dwithchenna +/docs/windows-ml/faq.mdx @dwithchenna +/docs/windows-ml/googlebert.mdx @dwithchenna +/docs/windows-ml/index.mdx @dwithchenna +/docs/windows-ml/installation.mdx @dwithchenna +/docs/windows-ml/llm.mdx @dwithchenna +/docs/windows-ml/model_conversion.mdx @dwithchenna +/docs/windows-ml/model_deployment.mdx @dwithchenna +/docs/windows-ml/model_support.mdx @dwithchenna +/docs/windows-ml/resnet.mdx @dwithchenna +/docs/windows-ml/troubleshooting.mdx @dwithchenna +/docs/windows-ml/winml_ep.mdx @dwithchenna +/docs/windows-ml/winml_example.mdx @dwithchenna diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..6b8adc38 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,32 @@ +# Ryzen AI Software Documentation + +Documentation site for the AMD Ryzen AI Software Platform, living in the `docs/` folder of the RyzenAI-SW repository. + +Content was migrated from the previous Sphinx/RST site (https://ryzenai.docs.amd.com/en/latest/), reorganized into a simplified category structure, and extended with a GPU/Radeon section and the RyzenAI-SW examples. + +## Local preview + +```bash +npx mint dev +``` + +Then open http://localhost:3000. + +## Structure + +- `docs.json` - site configuration (navigation, theme, branding) +- `index.mdx` - landing page (Overview) +- `installation.mdx` - Installation Instructions (top-level page) +- Category folders (one level each): `vision/`, `llms/`, `audio/`, `gpu-radeon/`, `windows-ml/`, `tools/`, `reference/` +- `images/` - inline diagrams +- `assets/` - site assets (favicon) + +## Page ownership + +Every page carries a hidden owner header, for example: + +``` +{/* owner: dwithchenna */} +``` + +CI uses this header to route failures to the responsible owner via GitHub @mention. Default owner is `@dwithchenna`. See `.github/scripts/generate_codeowners.py`. diff --git a/docs/assets/favicon.ico b/docs/assets/favicon.ico new file mode 100644 index 00000000..03461310 Binary files /dev/null and b/docs/assets/favicon.ico differ diff --git a/docs/audio/index.mdx b/docs/audio/index.mdx new file mode 100644 index 00000000..d7afedcd --- /dev/null +++ b/docs/audio/index.mdx @@ -0,0 +1,40 @@ +--- +title: "Audio Overview" +description: "Speech-to-text (ASR) on AMD Ryzen AI with Whisper and Parakeet." +--- + +{/* owner: bconsolvo */} + +Audio models and tutorials for AMD Ryzen™ AI - on-device speech-to-text (ASR) with Whisper and Parakeet. + +## Audio Models + +Speech-to-text (ASR) models for the AMD NPU. The table is generated by `.github/scripts/fetch_models.py`; do not edit it by hand. + +{/* MODELS_TABLE_START - generated by .github/scripts/fetch_models.py; do not edit by hand */} + +| Model | Parameters | Task | +| --- | --- | --- | +| [whisper-base](https://huggingface.co/amd/whisper-base-onnx-npu) | 74M | Speech-to-text (ASR) | +| [whisper-small](https://huggingface.co/amd/whisper-small-onnx-npu) | 244M | Speech-to-text (ASR) | +| [whisper-medium](https://huggingface.co/amd/whisper-medium-onnx-npu) | 769M | Speech-to-text (ASR) | +| [whisper-large-v3-turbo](https://huggingface.co/amd/whisper-large-v3-turbo-onnx-npu) | 809M | Speech-to-text (ASR) | +| [Parakeet-TDT-0.6B](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) | 0.6B | Speech-to-text (ASR) | + +{/* MODELS_TABLE_END */} + +## Tutorials + +{/* CARDS_START - generated by .github/scripts/gen_cards.py; do not edit by hand */} + + + Parakeet ASR on AMD Ryzen AI. + + + Whisper ASR on AMD Ryzen AI. + + + NPU acceleration for whisper.cpp on AMD Ryzen AI 300 Series systems through an AMD-maintained fork. + + +{/* CARDS_END */} diff --git a/docs/audio/parakeet-tdt.mdx b/docs/audio/parakeet-tdt.mdx new file mode 100644 index 00000000..26e6be35 --- /dev/null +++ b/docs/audio/parakeet-tdt.mdx @@ -0,0 +1,207 @@ +--- +title: "Parakeet ASR" +description: "Parakeet ASR on AMD Ryzen AI." +--- + +{/* owner: lakshay048 */} + +Source: [RyzenAI-SW / Demos/ASR/Parakeet-TDT](https://github.com/amd/RyzenAI-SW/blob/main/Demos/ASR/Parakeet-TDT). Code is maintained in the repository; this page mirrors its README. + + +Speech-to-text transcription using NVIDIA's **Parakeet TDT 0.6B** model, optimized for the **AMD Ryzen AI NPU**. Achieves **35-43x real-time** transcription by running the Conformer encoder on the NPU, LSTM decoder on the integrated Radeon GPU, and mel features on the CPU -- all three processors working in parallel. + +Includes an OpenAI Whisper-compatible REST API, a CLI benchmark tool, and a real-time microphone transcription demo. + +## Performance + +| Configuration | Speed | Hardware | +|---|---|---| +| CPU INT8 | 17-18x real-time | Zen 5 CPU only | +| **NPU BF16 (default power)** | **35x real-time** | NPU + iGPU + CPU | +| **NPU BF16 (performance mode)** | **43x real-time** | NPU + iGPU + CPU | + +Tested on 16.5 minutes of audio (RTF=0.023-0.030). See [OPTIMIZATION.md](https://github.com/amd/RyzenAI-SW/blob/main/Demos/ASR/Parakeet-TDT/OPTIMIZATION.md) for the full optimization journey. + +To set NPU performance mode: `C:\Windows\System32\AMD\xrt-smi.exe configure --pmode performance` + +## Quick Start + +### 1. Download Models + +```powershell +python download_models.py --precision fp32 +``` + +Downloads FP32 models (~2.4GB) from HuggingFace. For INT8 (CPU-only, smaller): + +```powershell +python download_models.py +``` + +### 2. Prepare Models for NPU + +```powershell +conda activate ryzen-ai-1.7.1 + +# Static shapes + NPU compiler fixes (Pad->Conv fuse, attention mask patch) +python preprocess_for_npu.py --precision fp32 +``` + +### 3. Run + +**Benchmark (NPU + iGPU):** +```powershell +conda activate ryzen-ai-1.7.1 +python test_transcribe.py audio.wav --device npu --decoder-device gpu --runs 3 +``` + +**Live microphone transcription:** +```powershell +pip install sounddevice +python live_transcribe.py --device npu +``` + +**API server:** +```powershell +pip install -r requirements.txt +python server.py --device npu +``` + +**CPU-only (no Ryzen AI needed):** +```powershell +pip install onnxruntime +python test_transcribe.py audio.wav --device cpu +``` + +> **Note:** The first NPU run triggers VAIML compilation which is cached at `C:\temp\\vaip\.cache\`. Subsequent runs load from cache in ~4-6 seconds. The cache is keyed by model signature, so it is shared across directories using the same model. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ Audio (WAV) │ +│ ↓ │ +│ Mel Filterbank (CPU, vectorized numpy) ~25ms/chunk │ +│ ↓ │ +│ Conformer Encoder (NPU, BF16) ~300ms/chunk │ +│ ↓ │ +│ TDT LSTM Decoder (iGPU, DirectML) ~1.0ms/step ×188 │ +│ ↓ │ +│ Text output │ +└─────────────────────────────────────────────────────────┘ + +For multi-chunk audio, encoder and decoder run in parallel: + NPU encodes chunk N+1 while iGPU decodes chunk N +``` + +## Files + +``` +server.py FastAPI server (Whisper-compatible API) +test_transcribe.py Benchmark with per-stage timing breakdown +live_transcribe.py Real-time microphone transcription +benchmark_npu.py Multi-config VAIML parameter sweep + +inference/ + __init__.py + transcriber.py ONNX Runtime pipeline (NPU encoder + iGPU decoder) + mel.py Vectorized 128-bin mel filterbank + audio.py WAV parsing + +preprocess_for_npu.py Static shapes + NPU Pad/mask fixes (FP32 encoder -> .static.npu.onnx) +fuse_pads_direct.py Optional: fuse Pad->Conv on a legacy .static.onnx only +optimize_model.py Experimental ORT fold + fusion (needs unfused static.onnx) +fuse_attn_pads.py Analyze attention Pad ops + +models/ + vai_ep_config.json VitisAI EP config (optimize_level=3) + static_config.json Static shape config (15s chunks) + config.json Model parameters + vocab.txt SentencePiece vocabulary (8193 tokens) + encoder-model.fp32.static.npu.onnx Static encoder (Pad-fused, for NPU) + decoder_joint-model.fp32.static.onnx Static decoder +``` + +## API Reference + +### Transcribe Audio + +``` +POST /v1/audio/transcriptions +Content-Type: multipart/form-data +``` + +| Parameter | Type | Required | Description | +|---|---|---|---| +| file | file | Yes | Audio file (WAV, max 25MB) | +| model | string | No | Model name (accepted but ignored) | +| language | string | No | ISO-639-1 code (default: en) | +| response_format | string | No | json, text, srt, vtt, verbose_json | + +### Other Endpoints + +- `GET /v1/models` -- List models +- `GET /v1/info` -- Execution provider info +- `GET /health` -- Health check + +## CLI Options + +``` +python test_transcribe.py audio.wav [options] + --device {cpu,npu,gpu} Encoder device (default: cpu) + --decoder-device {auto,cpu,gpu} Decoder device (default: auto) + --models-dir DIR Models directory (default: ./models) + --runs N Benchmark runs (default: 1) + --debug Verbose logging + +python live_transcribe.py [options] + --device {cpu,npu,gpu} Execution device + --test-mic Test microphone levels + --list-devices Show audio devices + +python server.py [options] + --device {cpu,npu} Execution device + --port PORT Server port (default: 5092) + --host HOST Server host (default: 0.0.0.0) +``` + +## Requirements + +**CPU mode:** +- Python 3.10+ +- onnxruntime +- numpy, fastapi, uvicorn + +**NPU mode (Ryzen AI):** +- AMD Ryzen AI processor (Strix/XDNA2) +- Windows 11 +- Miniforge with `ryzen-ai-1.7.0` or `ryzen-ai-1.7.1` conda environment +- onnxruntime-vitisai, flexml-lite (included in Ryzen AI SDK) +- sounddevice (for live microphone mode) + +## Troubleshooting + +**VitisAI EP not available:** +```powershell +conda activate ryzen-ai-1.7.1 +python -c "import onnxruntime; print(onnxruntime.get_available_providers())" +# Should show: ['VitisAIExecutionProvider', 'DmlExecutionProvider', 'CPUExecutionProvider'] +``` + +**NPU startup takes ~4-6 seconds:** This is normal -- VAIML loads the compiled encoder from its cache at `C:\temp\\vaip\.cache\`. If the cache is missing (first run or new model), compilation will take longer. + +**All ops falling back to CPU:** If you see `unknown type 9` errors or `CPU 1434` in the log, the VAIML compiler failed to partition the model. Re-run `python preprocess_for_npu.py --precision fp32` so the encoder includes Pad->Conv fusion and the VAIML 1.7.x attention-mask rewrite. This is currently tested on Strix NPUs; Strix Halo may have compatibility issues with the VAIML frontend. + +**vaiml.dll not found:** Ensure flexml-lite is installed and conda env is activated. The transcriber auto-discovers it via `sys.prefix`. + +**Audio format not supported:** Convert with ffmpeg: +```powershell +ffmpeg -i input.mp3 -ar 16000 -ac 1 output.wav +``` + +## Credits + +- **NVIDIA** -- Parakeet TDT 0.6B model +- **Ivan Stupakov** (@istupakov) -- ONNX conversion +- **achetronic** -- Original Go implementation +- **AMD** -- Ryzen AI NPU, VitisAI EP, DirectML EP diff --git a/docs/audio/whisper-asr.mdx b/docs/audio/whisper-asr.mdx new file mode 100644 index 00000000..abd097af --- /dev/null +++ b/docs/audio/whisper-asr.mdx @@ -0,0 +1,164 @@ +--- +title: "Whisper ASR" +description: "Whisper ASR on AMD Ryzen AI." +--- + +{/* owner: dwithchenna */} + +Source: [RyzenAI-SW / Demos/ASR/Whisper](https://github.com/amd/RyzenAI-SW/blob/main/Demos/ASR/Whisper). Code is maintained in the repository; this page mirrors its README. + + + +Run fast, on-device speech recognition with Ryzen AI and OpenAI Whisper. This guide covers running pre-built NPU-optimized models (quick start) and exporting your own models from Hugging Face (advanced). + +## Supported Models + +| Model | Parameters | NPU Support | Auto-Download | +|-------|-----------|-------------|---------------| +| whisper-base | 74M | Yes | [amd/whisper-base-onnx-npu](https://huggingface.co/amd/whisper-base-onnx-npu) | +| whisper-small | 244M | Yes | [amd/whisper-small-onnx-npu](https://huggingface.co/amd/whisper-small-onnx-npu) | +| whisper-medium | 769M | Yes | [amd/whisper-medium-onnx-npu](https://huggingface.co/amd/whisper-medium-onnx-npu) | +| whisper-large-v3-turbo | 809M | Yes | [amd/whisper-large-v3-turbo-onnx-npu](https://huggingface.co/amd/whisper-large-v3-turbo-onnx-npu) | + +## Prerequisites + +1. **Install Ryzen AI Software** — follow the Installation guide. + +2. **Activate environment** + +```powershell +conda activate ryzen-ai-1.4.0 +``` + +3. **Install dependencies** + +```powershell +cd docs/audio/whisper +pip install -r requirements.txt +``` + +## Quick Start: Transcribe an Audio File + +Models are auto-downloaded from AMD's Hugging Face repos on first run. + +```powershell +python run_whisper.py \ + --model-type whisper-base \ + --device npu \ + --input audio_files/1089-134686-0000.wav +``` + +Replace `whisper-base` with any supported model (`whisper-small`, `whisper-medium`, `whisper-large-v3-turbo`). + +## Live Microphone Transcription + +```powershell +python run_whisper.py \ + --model-type whisper-base \ + --device npu \ + --input mic \ + --duration 0 +``` + +`--duration 0` records continuously until Ctrl+C or silence is detected. + +## Dataset Evaluation (WER, CER, RTF) + +Evaluate on LibriSpeech samples to measure Word Error Rate, Character Error Rate, Real-Time Factor, and Time to First Token: + +```powershell +python run_whisper.py \ + --model-type whisper-base \ + --device npu \ + --eval-dir eval_dataset/LibriSpeech-samples \ + --results-dir results +``` + +## NPU Configuration + +### How NPU Acceleration Works + +When running on the NPU, Whisper's encoder and decoder are accelerated through the Vitis AI Execution Provider. For whisper-base: + +```text +# Encoder operations +[Vitis AI EP] No. of Operators : VAIML 225 +[Vitis AI EP] No. of Subgraphs : VAIML 1 + +# Decoder operations +[Vitis AI EP] No. of Operators : CPU 24 VAIML 341 +[Vitis AI EP] No. of Subgraphs : VAIML 2 +``` + +100% of encoder operators and 93.4% of decoder operators run on the NPU. + +### Execution Provider Configuration + +Edit `config/model_config.json` to configure execution providers per model. For NPU, set `cache_key`, `cache_dir`, and point to the appropriate VitisAI config: + +```json +{ + "config_file": "config/vitisai_config_whisper_decoder.json", + "cache_dir": "./cache", + "cache_key": "whisper_medium_decoder" +} +``` + +### Whisper-Medium Special Configuration + +Whisper-medium requires additional flags in `config/vitisai_config_whisper_encoder.json`: + +```json +"vaiml_config": { + "optimize_level": 3, + "aiecompiler_args": "--system-stack-size=512" +} +``` + +- `optimize_level=3`: aggressive optimizations for larger models +- `--system-stack-size=512`: increases AI Engine stack size for whisper-medium's resource demands + +## Advanced: Export Your Own Models + +If you need to export a custom Whisper model (e.g., a fine-tuned variant) from Hugging Face to ONNX with static shapes for NPU: + +### Step 1: Export to ONNX + +```powershell +optimum-cli export onnx \ + --model openai/whisper-base \ + --task automatic-speech-recognition \ + whisper-base-onnx/ +``` + +### Step 2: Convert Dynamic to Static Shapes + +The NPU requires static input shapes. Use the included conversion script: + +```powershell +python dynamic_to_static.py +``` + +This uses `params.json` to fix dynamic dimensions in the encoder and decoder ONNX models. + +### Step 3: Run with Explicit Paths + +```powershell +python run_whisper.py \ + --encoder whisper-base-onnx/encoder_model.onnx \ + --decoder whisper-base-onnx/decoder_model.onnx \ + --device npu \ + --input audio_files/1089-134686-0000.wav +``` + +## Whisper.cpp (C++ Alternative) + +Ryzen AI also provides NPU acceleration for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) through an AMD-maintained fork. On Ryzen AI 300 Series, the encoder fully offloads to the NPU for significant speedup versus CPU-only runs. NPU acceleration is currently Windows-only with Linux support planned. + +For setup steps and NPU-optimized model guidance, see the [AMD whisper.cpp fork](https://github.com/amd/whisper.cpp?tab=readme-ov-file#amd-ryzen-ai-support-for-npu). + +## Notes + +- First run on NPU takes ~15 minutes for model compilation. Subsequent runs use the cached compiled model. +- Supports both CPU and NPU devices via the `--device` flag. +- Use `--language` to force a specific language for transcription. diff --git a/docs/audio/whisper_cpp.mdx b/docs/audio/whisper_cpp.mdx new file mode 100644 index 00000000..2ee1d24e --- /dev/null +++ b/docs/audio/whisper_cpp.mdx @@ -0,0 +1,10 @@ +--- +title: "Whisper.cpp (NPU)" +description: "NPU acceleration for whisper.cpp on AMD Ryzen AI 300 Series systems through an AMD-maintained fork." +--- + +{/* owner: uday610 */} + +Ryzen AI provides NPU acceleration for whisper.cpp through an AMD-maintained fork. On AMD Ryzen AI 300 Series systems, Whisper can fully offload the encoder to the NPU, which can deliver a significant speedup versus CPU-only runs. NPU acceleration is currently supported on Windows only, with Linux support planned. + +For setup steps, build flags, and NPU-optimized model guidance, refer to the GitHub README at [https://github.com/amd/whisper.cpp?tab=readme-ov-file#amd-ryzen-ai-support-for-npu](https://github.com/amd/whisper.cpp?tab=readme-ov-file#amd-ryzen-ai-support-for-npu). diff --git a/docs/docs.json b/docs/docs.json new file mode 100644 index 00000000..f49c114b --- /dev/null +++ b/docs/docs.json @@ -0,0 +1,219 @@ +{ + "$schema": "https://mintlify.com/docs.json", + "theme": "mint", + "name": "Ryzen AI Software", + "metadata": { + "og:site_name": "Ryzen AI Software", + "og:title": "Ryzen AI Software Documentation", + "og:description": "Tools and runtime libraries for optimizing and deploying AI inference on AMD Ryzen AI powered PCs." + }, + "colors": { + "primary": "#ED1C24", + "light": "#F4484D", + "dark": "#C8171E" + }, + "favicon": "/logo/favicon-light.svg", + "logo": { + "light": "/logo/amd-ryzenai-light.svg", + "dark": "/logo/amd-ryzenai-dark.svg", + "href": "/" + }, + "contextual": { + "options": [ + "copy", + "view", + "chatgpt", + "claude", + "cursor" + ] + }, + "navigation": { + "global": { + "anchors": [ + { + "anchor": "GitHub", + "href": "https://github.com/amd/RyzenAI-SW", + "icon": "github" + }, + { + "anchor": "Discord", + "href": "https://discord.gg/amd-dev", + "icon": "discord" + } + ], + "versions": [ + { + "version": "1.7.1", + "default": true, + "href": "/" + }, + { + "version": "1.7", + "href": "https://ryzenai.docs.amd.com/en/1.7/" + }, + { + "version": "1.6.1", + "href": "https://ryzenai.docs.amd.com/en/1.6.1/" + }, + { + "version": "1.6", + "href": "https://ryzenai.docs.amd.com/en/1.6/" + }, + { + "version": "1.5", + "href": "https://ryzenai.docs.amd.com/en/1.5/" + }, + { + "version": "1.4", + "href": "https://ryzenai.docs.amd.com/en/1.4/" + }, + { + "version": "1.3", + "href": "https://ryzenai.docs.amd.com/en/1.3/" + }, + { + "version": "1.2", + "href": "https://ryzenai.docs.amd.com/en/1.2/" + }, + { + "version": "1.1", + "href": "https://ryzenai.docs.amd.com/en/1.1/" + } + ] + }, + "groups": [ + { + "group": "Ryzen AI Software", + "pages": [ + "index", + "installation", + { + "group": "Vision", + "icon": "image", + "expanded": false, + "pages": [ + "vision/index", + "vision/cvml-face-detection", + "vision/cvml-face-mesh", + "vision/getting-started-resnet", + "vision/getting-started-resnet-bf16", + "vision/hello-world", + "vision/image-classification", + "vision/nemotron-ocr-v2", + "vision/npu-gpu-pipeline", + "vision/yolov8m", + "vision/yolov8s-worldv2", + "vision/getstartex", + "vision/cvml", + "vision/sd_demo", + "vision/super_resolution", + "vision/torchvision" + ] + }, + { + "group": "LLMs", + "icon": "comments", + "expanded": false, + "pages": [ + "llms/index", + "llms/oga_op_prepare", + "llms/distilbert-example", + "llms/llm-sft-deploy", + "llms/high_level_python", + "llms/oga-cpp-api", + "llms/oga-inference", + "llms/hybrid_oga", + "llms/oga_model_prepare", + "llms/rag-oga", + "llms/llm_linux", + "llms/server_interface", + "llms/vlm" + ] + }, + { + "group": "Audio", + "icon": "microphone", + "expanded": false, + "pages": [ + "audio/index", + "audio/parakeet-tdt", + "audio/whisper-asr", + "audio/whisper_cpp" + ] + }, + { + "group": "GPU & Radeon", + "icon": "microchip", + "expanded": false, + "pages": [ + "gpu-radeon/index", + "gpu-radeon/ryzenai_gpu", + "gpu-radeon/igpu-getting-started", + "gpu-radeon/radeon" + ] + }, + { + "group": "Windows ML", + "icon": "windows", + "expanded": false, + "pages": [ + "windows-ml/index", + "windows-ml/winml_ep", + "windows-ml/faq", + "windows-ml/installation", + "windows-ml/model_conversion", + "windows-ml/model_deployment", + "windows-ml/troubleshooting", + "windows-ml/model_support", + "windows-ml/winml_example", + "windows-ml/clip", + "windows-ml/googlebert", + "windows-ml/llm", + "windows-ml/resnet" + ] + }, + { + "group": "Tools", + "icon": "screwdriver-wrench", + "expanded": false, + "pages": [ + "tools/index", + "tools/ai_analyzer", + "tools/modelrun", + "tools/model_quantization", + "tools/npu-check", + "tools/xrt_smi", + "tools/onnx-benchmark", + "tools/quark-quantization", + "tools/ryzen_ai_libraries", + "tools/ops_support" + ] + }, + { + "group": "Reference", + "icon": "book", + "expanded": false, + "pages": [ + "reference/index", + "reference/app_development", + "reference/applications", + "reference/ci-dashboard", + "reference/licenses", + "reference/versions", + "reference/relnotes" + ] + } + ] + } + ] + }, + "footer": { + "socials": { + "github": "https://github.com/amd/RyzenAI-SW", + "discord": "https://discord.gg/amd-dev", + "x": "https://x.com/amd", + "linkedin": "https://www.linkedin.com/company/amd" + }, + "copyright": "Copyright (C) 2023-2026 Advanced Micro Devices, Inc." + } +} diff --git a/docs/gpu-radeon/README.md b/docs/gpu-radeon/README.md new file mode 100644 index 00000000..e9abf8c6 --- /dev/null +++ b/docs/gpu-radeon/README.md @@ -0,0 +1,5 @@ +# GPU & Radeon + +Run AI workloads on AMD Radeon integrated and discrete GPUs (DirectML and ROCm). + +Landing page: `gpu-radeon/index.mdx`. diff --git a/CNN-examples/iGPU/getting_started/README.md b/docs/gpu-radeon/igpu-getting-started.mdx similarity index 84% rename from CNN-examples/iGPU/getting_started/README.md rename to docs/gpu-radeon/igpu-getting-started.mdx index b9978df4..e7063057 100644 --- a/CNN-examples/iGPU/getting_started/README.md +++ b/docs/gpu-radeon/igpu-getting-started.mdx @@ -1,9 +1,13 @@ - - - - -

Ryzen™ AI iGPU Example

-
+--- +title: "Getting Started on iGPU" +description: "Getting Started on iGPU on AMD Ryzen AI." +--- + +{/* owner: dwithchenna */} + +Source: [RyzenAI-SW / CNN-examples/iGPU/getting_started](https://github.com/amd/RyzenAI-SW/blob/main/CNN-examples/iGPU/getting_started). Code is maintained in the repository; this page mirrors its README. + + # Getting started with iGPU @@ -13,7 +17,7 @@ This is an example showing how to run the ResNet50 model from PyTorch on AMD's i Create a clone of the Ryzen AI installation conda environment to add required python packages -```python +```bat set RYZEN_AI_CONDA_ENV_NAME=ryzen-ai- conda create --name igpu-example --clone %RYZEN_AI_CONDA_ENV_NAME% conda activate igpu-example @@ -21,27 +25,27 @@ conda activate igpu-example Set RyzenAI Environment variable -```bash -# Default location of RyzenAI software installation +```bat +:: Default location of RyzenAI software installation set RYZEN_AI_INSTALLATION_PATH= ``` ## Install Olive -```bash +```powershell cd \CNN-examples\iGPU\getting_started python -m pip install -r requirements.txt ``` ## Install additional dependencies for the example -```bash +```powershell python -m olive.workflows.run --config resnet50_config.json --setup ``` ## Optimize the model using Olive -```bash +```powershell python -m olive.workflows.run --config resnet50_config.json ``` @@ -52,7 +56,7 @@ The optimized models will be available in `./torch_to_onnx-float16_conversion-pe ### Deployment in Python -```bash +```powershell python predict.py ``` **_NOTE:_** In predict.py, line 15, the iGPU device ID is enumerated as 0. For PCs with multiple GPUs, you may adjust the device_id to target a specific iGPU. @@ -69,7 +73,7 @@ python predict.py It is recommended to build OpenCV from the source code and use static build. The following instruction installs OpenCV in the location "C:\\opencv" as an example, this can be changed by modifying `CMAKE_PREFIX_PATH` in the following cmake command. You may first change the directory to where you want to clone the OpenCV repository. -```bash +```powershell git clone https://github.com/opencv/opencv.git -b 4.6.0 cd opencv @@ -84,14 +88,14 @@ The build files will be written to ``build\``. Build the given ResNet50 C++ example: -```bash +```powershell cd cpp compile.bat "path/to/your/opencv/build" ``` Run inference: -```bash +```powershell run.bat ``` diff --git a/docs/gpu-radeon/index.mdx b/docs/gpu-radeon/index.mdx new file mode 100644 index 00000000..60b00c08 --- /dev/null +++ b/docs/gpu-radeon/index.mdx @@ -0,0 +1,22 @@ +--- +title: "GPU & Radeon Overview" +description: "Run AI workloads on AMD Radeon integrated and discrete GPUs." +--- + +{/* owner: bconsolvo */} + +Ryzen™ AI systems pair the NPU with a Radeon™ GPU. Use this section for GPU-accelerated inference paths. + +{/* CARDS_START - generated by .github/scripts/gen_cards.py; do not edit by hand */} + + + Run ONNX models on the Ryzen AI integrated GPU using the DirectML Execution Provider. + + + Getting Started on iGPU on AMD Ryzen AI. + + + Run AI workloads on AMD Radeon and Ryzen AI integrated/discrete GPUs using ROCm. + + +{/* CARDS_END */} diff --git a/docs/gpu-radeon/radeon.mdx b/docs/gpu-radeon/radeon.mdx new file mode 100644 index 00000000..477798f5 --- /dev/null +++ b/docs/gpu-radeon/radeon.mdx @@ -0,0 +1,35 @@ +--- +title: "Radeon GPU (ROCm)" +description: "Run AI workloads on AMD Radeon and Ryzen AI integrated/discrete GPUs using ROCm." +--- + +{/* owner: dwithchenna */} + +AMD Ryzen™ AI systems pair the NPU with a Radeon™ integrated GPU (iGPU), and many platforms also support a Radeon discrete GPU (dGPU). For GPU-accelerated AI on Windows and Linux, AMD provides **ROCm** for Radeon and Ryzen. + + +GPU documentation lives in the canonical ROCm docs to avoid drift. This page is the entry point from Ryzen AI Software; deep GPU content is maintained by the ROCm team. + + +## ROCm on Radeon and Ryzen + + + + Official ROCm documentation for AMD Radeon and Ryzen AI GPUs. + + + Run models on the Ryzen AI integrated GPU via the DirectML execution provider. + + + +## When to use the GPU vs the NPU + +- **NPU** - best for sustained, power-efficient inference (CNNs, transformers, LLM hybrid/NPU flows). +- **GPU (Radeon)** - best for high-throughput workloads, larger models via GGUF/Vulkan, and ROCm-accelerated pipelines. +- **Hybrid** - LLM flows can split work across NPU and iGPU for the best time-to-first-token and tokens-per-second. + +## Next steps + +- Install: see the [Installation Instructions](/installation). +- LLMs: see the [LLM Deployment Overview](/llms). +- Full GPU setup and supported configurations: [ROCm for Radeon and Ryzen](https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/index.html). diff --git a/docs/gpu-radeon/ryzenai_gpu.mdx b/docs/gpu-radeon/ryzenai_gpu.mdx new file mode 100644 index 00000000..56cc10ed --- /dev/null +++ b/docs/gpu-radeon/ryzenai_gpu.mdx @@ -0,0 +1,43 @@ +--- +title: "DirectML Flow" +description: "Run ONNX models on the Ryzen AI integrated GPU using the DirectML Execution Provider." +--- + +{/* owner: uday610 */} + +## Prerequisites + +- DirectX12 capable Windows OS (Windows 11 recommended) +- Latest AMD [GPU device driver](https://www.amd.com/en/support) installed +- [Microsoft Olive](https://microsoft.github.io/Olive/how-to/installation.html) for model conversion and optimization +- Latest [ONNX Runtime DirectML EP](https://onnxruntime.ai/docs/execution-providers/DirectML-ExecutionProvider.html) + +You can ensure GPU driver and DirectX version from `Windows Task Manager` -> `Performance` -> `GPU`. + +## Running models on Ryzen AI GPU + +Running models on the Ryzen AI GPU is accomplished in two simple steps: + +**Model Conversion and Optimization**: After the model is trained, Microsoft Olive Optimizer can be used to convert the model to ONNX and optimize it for optimal target execution. + +For additional information, refer to the [Microsoft Olive Documentation](https://microsoft.github.io/Olive/). + +**Deployment**: Once the model is in the ONNX format, the ONNX Runtime DirectML EP (`DmlExecutionProvider`) is used to run the model on the AMD Ryzen AI GPU. + +For additional information, refer to the [ONNX Runtime documentation for the DirectML Execution Provider](https://onnxruntime.ai/docs/execution-providers/DirectML-ExecutionProvider.html). + +## Examples + +- Optimizing and running [ResNet on Ryzen AI GPU](https://github.com/amd/RyzenAI-SW/tree/main/CNN-examples/iGPU/getting_started) + +## Additional Resources + +- Article on how AMD and Black Magic Design worked together to accelerate [Davinci Resolve Studio](https://www.blackmagicdesign.com/products/davinciresolve/studio) workload on AMD hardware: + + - [AI Accelerated Video Editing with DaVinci Resolve 18.6 & AMD Radeon Graphics](https://www.amd.com/en/blogs/2023/ai-accelerated-video-editing-with-davinci-resolve-.html) + +- Blog posts on using the Ryzen AI Software for various generative AI workloads on GPU: + + - [Automatic1111 Stable Diffusion WebUI with DirectML Extension on AMD GPUs](https://www.amd.com/en/blogs/2023/-how-to-automatic1111-stable-diffusion-webui-with.html) + - [Running Optimized Llama2 with Microsoft DirectML on AMD Radeon Graphics](https://www.amd.com/en/blogs/2023/-how-to-running-optimized-llama2-with-microsoft-d.html) + - [AI-Assisted Mobile Workstation Workflows Powered by AMD Ryzen™ AI](https://www.amd.com/en/blogs/2024/ai-assisted-mobile-workstation-workflows-powered-b.html) diff --git a/docs/images/rai-sw.png b/docs/images/rai-sw.png new file mode 100644 index 00000000..6a150394 Binary files /dev/null and b/docs/images/rai-sw.png differ diff --git a/docs/images/winml-sw.png b/docs/images/winml-sw.png new file mode 100644 index 00000000..e1cc1417 Binary files /dev/null and b/docs/images/winml-sw.png differ diff --git a/docs/images/winml-workflow.png b/docs/images/winml-workflow.png new file mode 100644 index 00000000..f9dd393e Binary files /dev/null and b/docs/images/winml-workflow.png differ diff --git a/docs/index.mdx b/docs/index.mdx new file mode 100644 index 00000000..2f321a7b --- /dev/null +++ b/docs/index.mdx @@ -0,0 +1,108 @@ +--- +title: "Overview" +description: "Tools and runtime libraries for optimizing and deploying AI inference on AMD Ryzen AI powered PCs." +icon: "house" +--- + +{/* owner: uday610 */} + +AMD Ryzen™ AI Software includes the tools and runtime libraries for optimizing and deploying AI inference on AMD Ryzen™ AI powered PCs. Ryzen AI software enables applications to run on the neural processing unit (NPU) built in the AMD XDNA™ architecture, as well as on the integrated GPU. This allows developers to build and deploy models trained in PyTorch or TensorFlow and run them directly on laptops powered by Ryzen AI using ONNX Runtime and the Vitis™ AI Execution Provider (EP). + +Ryzen AI Software stack + +## Explore + + + + Install the NPU driver and Ryzen AI Software on Windows or Linux. + + + Image generation, object detection, classification, OCR, and super-resolution on the NPU. + + + Run large language models on the NPU and iGPU via OGA and the Lemonade SDK. + + + On-device speech-to-text (ASR) with Whisper and Parakeet. + + + GPU-accelerated inference on AMD Radeon and the integrated GPU. + + + Run ONNX models through Windows ML and the VitisAI execution provider. + + + Quantization, compilation, profiling, and NPU management utilities. + + + Release notes, supported operators, model lists, and licenses. + + + +## Run local LLMs: Lemonade & GAIA + +AMD's open-source stack for running generative AI locally on Ryzen™ AI: + + + + AMD's open-source **LLM serving framework**. Serve local LLMs on the Ryzen AI NPU and iGPU behind an **OpenAI-compatible REST API**, so any OpenAI-compatible app works out of the box. See the [Server Interface](/llms/server_interface) or [github.com/lemonade-sdk/lemonade](https://github.com/lemonade-sdk/lemonade). + + + AMD's open-source **agent framework built on top of Lemonade** - chat, RAG, and tool-using agents that run entirely on your Ryzen AI PC. See [github.com/amd/gaia](https://github.com/amd/gaia). + + + +## Development Flow Overview + +The Ryzen AI development flow does not require any modifications to the existing model training processes and methods. The pre-trained model can be used as the starting point of the Ryzen AI flow. + +### Quantization + +Quantization involves converting the AI model's parameters from floating-point to lower-precision representations, such as 8-bit integer. Quantized models are more power-efficient, utilize less memory, and offer better performance. Ryzen AI Software also supports CNN and Transformer models in floating-point 32 format as input models without quantization. These models are internally converted to bfloat16 and compiled using the bfloat16 compilation flow. + +**AMD Quark** is a comprehensive cross-platform deep learning toolkit designed to simplify and enhance the quantization of deep learning models. Supporting both PyTorch and ONNX models, Quark empowers developers to optimize their models for deployment on a wide range of hardware backends, achieving significant performance gains without compromising accuracy. + +For more details, refer to the [Model Quantization](/tools/model_quantization) page. + +### CNN/Transformer Compilation and Deployment + +The AI model is deployed using the ONNX Runtime with either C++ or Python APIs. The Vitis AI Execution Provider included in the ONNX Runtime intelligently determines what portions of the AI model should run on the NPU, optimizing workloads to ensure optimal performance with lower power consumption. + +For more details, refer to the [Model Compilation and Deployment](/tools/modelrun) page. + +## LLM Flow Overview + +The Ryzen AI LLM software stack is available through three development interfaces, each suited for specific use cases as outlined in the sections below. All three interfaces are built on top of native OnnxRuntime GenAI (OGA) libraries or llama.cpp libraries, as shown in the Ryzen AI Software Stack diagram below. + +The **high-level Python APIs**, as well as the **Server Interface**, also leverage the **Lemonade SDK**, which is multi-vendor open-source software that provides everything necessary for quickly getting started with LLMs on OGA or llama.cpp. + +At the bottom, **OnnxRuntime GenAI (OGA)** or llama.cpp (only supported for iGPU) API is the lowest-level API available for building LLM applications on a Ryzen AI PC. + + + +**Ryzen AI Software Stack** + + + + + + + + + + + + + + + + + + + + + + +
Your Python ApplicationYour LLM StackYour Native Application
Lemonade Python API*Lemonade Server Interface*OGA C++ Headers OR llama.cpp C++ Headers
Custom AMD OnnxRuntime GenAI (OGA) OR llama.cpp*
AMD Ryzen AI Driver and Hardware
+ +For more details, refer to the [LLM Deployment Overview](/llms) page. diff --git a/docs/installation.mdx b/docs/installation.mdx new file mode 100644 index 00000000..8db92929 --- /dev/null +++ b/docs/installation.mdx @@ -0,0 +1,356 @@ +--- +title: "Installation Instructions" +description: "Install the Ryzen AI NPU driver and Ryzen AI Software on Windows or Linux." +icon: "download" +--- + +{/* owner: uday610 */} + +Install the Ryzen AI NPU driver and Ryzen AI Software. Choose your operating system below. + + + + +This tab covers Ryzen AI installation on Windows. + +## Prerequisites + +The Ryzen AI Software supports AMD processors with a Neural Processing Unit (NPU). Refer to the release notes for the full list of [supported configurations](/reference/relnotes#supported-configurations). + +The following dependencies must be installed on the system before installing the Ryzen AI Software: + +| Dependencies | Version Requirement | +| ------------------------------------------- | --------------------- | +| Windows 11 | build >= 22621.3527 | +| Visual Studio | 2022 | +| cmake | version >= 3.26 | +| Python distribution (Miniforge preferred) | Latest version | + + +**IMPORTANT**: + +- Visual Studio 2022 Community (Optional for AMD Quark, to support custom op flow): ensure that `Desktop Development with C++` is installed +- Miniforge: ensure that the following path is set in the System PATH variable: `path\to\miniforge3\condabin` or `path\to\miniforge3\Scripts\` or `path\to\miniforge3\` (The System PATH variable should be set in the *System Variables* section of the *Environment Variables* window). + + +
+ +## Install NPU Drivers + +- Download and Install the NPU driver version: 32.0.203.280 or newer using the following links: + + - [NPU Driver (Version 32.0.203.280)](https://account.amd.com/en/forms/downloads/ryzen-ai-software-platform-xef.html?filename=NPU_RAI1.5.zip) + + - NPU driver 32.0.203.280 is production driver for Phoenix, Hawk Point, Strix, Strix Halo, and Krackan Point. + - [NPU Driver (Version 32.0.203.314)](https://account.amd.com/en/forms/downloads/ryzen-ai-software-platform-xef.html?filename=NPU_RAI1.5.0.314.zip) + +- Install the NPU drivers by following these steps: + + - Extract the downloaded ZIP file. + - Open a terminal in administrator mode and execute the `.\npu_sw_installer.exe` file. + +- Ensure that NPU driver (Version:32.0.203.280, Date:5/16/2025) is correctly installed by opening Task Manager -> Performance -> NPU0. + + + +## Install Ryzen AI Software + +- Download the Ryzen AI Software installer [ryzen-ai-lt-1.7.1.exe](https://account.amd.com/en/forms/downloads/ryzen-ai-software-platform-xef.html?filename=ryzen-ai-lt-1.7.1.exe). + +- Launch the EXE installer and follow the instructions on the installation wizard: + + - Accept the terms of the Licence agreement + - Provide the destination folder for Ryzen AI installation (default: `C:\Program Files\RyzenAI\1.7.1`) + - Specify the name for the conda environment (default: `ryzen-ai-1.7.1`) + +The Ryzen AI Software packages are now installed in the conda environment created by the installer. + + +NuGet package is available to download at [ryzen-ai-1.7.1-nuget.zip](https://account.amd.com/en/forms/downloads/ryzen-ai-software-platform-xef.html?filename=ryzen-ai-1.7.1-nuget.zip). + + + + +## Test the Installation + +The Ryzen AI Software installation folder contains test to verify that the software is correctly installed. This installation test can be found in the `quicktest` subfolder which is expected to work for Strix (STX) or newer devices. + +- Open a Conda command prompt (search for "Miniforge Prompt" in the Windows start menu) + +- Activate the Conda environment created by the Ryzen AI installer: + +``` +conda activate ryzen-ai- +``` + +- Run the test: + +``` +cd %RYZEN_AI_INSTALLATION_PATH%/quicktest +python quicktest.py +``` + +``` +[I:onnxruntime:, session_state_utils.cc:243 onnxruntime::session_state_utils::SaveInitializedTensors] Saving initialized tensors. +[I:onnxruntime:, session_state_utils.cc:438 onnxruntime::session_state_utils::SaveInitializedTensors] Done saving initialized tensors +[I:onnxruntime:, inference_session.cc:2532 onnxruntime::InferenceSession::Initialize] Session successfully initialized. +Test Finished +``` + +- Verify NPU activity by opening **Task Manager → Performance → NPU** while the test is running. You should see NPU utilization increase during model inference. + +### NPU Offloading with Session Options + +This section demonstrates how to enable NPU offloading logs using ONNX Runtime session options. The code also includes changes needed in `quicktest.py` to run on Phoenix/Hawk Point devices. +To view detailed logging information, update the session options in `quicktest.py` as shown below: + +```python +import os +import sys +import subprocess +import numpy as np +import onnxruntime as ort + +def get_npu_info(): + # Run pnputil as a subprocess to enumerate PCI devices + command = r'pnputil /enum-devices /bus PCI /deviceids ' + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + # Check for supported Hardware IDs + npu_type = '' + if 'PCI\\VEN_1022&DEV_1502&REV_00' in stdout.decode(): npu_type = 'PHX/HPT' + if 'PCI\\VEN_1022&DEV_17F0&REV_00' in stdout.decode(): npu_type = 'STX' + if 'PCI\\VEN_1022&DEV_17F0&REV_10' in stdout.decode(): npu_type = 'STX' + if 'PCI\\VEN_1022&DEV_17F0&REV_11' in stdout.decode(): npu_type = 'STX' + if 'PCI\\VEN_1022&DEV_17F0&REV_20' in stdout.decode(): npu_type = 'KRK' + return npu_type + +# Get APU type info: PHX/STX/HPT +npu_type = get_npu_info() +install_dir = os.environ['RYZEN_AI_INSTALLATION_PATH'] +model = os.path.join(install_dir, 'quicktest', 'test_model.onnx') +providers = ['VitisAIExecutionProvider'] +provider_options = [{}] # Default provider options for STX/KRK and newer devices + +if npu_type == 'PHX/HPT': + print("Setting environment for PHX/HPT") + xclbin_file = os.path.join(install_dir, 'voe-4.0-win_amd64', 'xclbins', 'phoenix', '4x4.xclbin') + provider_options = [{ + 'target': 'X1', + 'xlnx_enable_py3_round': 0, + 'xclbin': xclbin_file, + }] + +# Create session options +session_options = ort.SessionOptions() +session_options.log_severity_level = 0 # 0=Verbose, 1=Info, 2=Warning, 3=Error, 4=Fatal + +try: + session = ort.InferenceSession(model, + sess_options=session_options, + providers=providers, + provider_options=provider_options) +except Exception as e: + print(f"Failed to create an InferenceSession: {e}") + sys.exit(1) # Exit the program with a non-zero status to indicate an error + +def preprocess_random_image(): + image_array = np.random.rand(3, 32, 32).astype(np.float32) + return np.expand_dims(image_array, axis=0) + +# inference on random image data +input_data = preprocess_random_image() +try: + outputs = session.run(None, {'input': input_data}) +except Exception as e: + print(f"Failed to run the InferenceSession: {e}") + sys.exit(1) # Exit the program with a non-zero status to indicate an error +else: + print("Test finished") +``` + +- Run the test: + +``` +cd %RYZEN_AI_INSTALLATION_PATH%/quicktest +python quicktest.py 2>&1 | findstr /i "VerifyEachNodeIsAssignedToAnEp | Test" +``` + +- On a successful run, you will see an output similar to the one shown below. This indicates that the model is running on the NPU and that the installation of the Ryzen AI Software was successful: + +``` +[V:onnxruntime:, session_state.cc:1296 onnxruntime::VerifyEachNodeIsAssignedToAnEp] Node placements +[V:onnxruntime:, session_state.cc:1299 onnxruntime::VerifyEachNodeIsAssignedToAnEp] All nodes placed on [VitisAIExecutionProvider]. Number of nodes: 3 +Test Finished +``` + + +- The full path to the Ryzen AI Software installation folder is stored in the `RYZEN_AI_INSTALLATION_PATH` environment variable. +- For Phoenix/Hawk Point hardware, set the `target` to `X1` in the provider options. + + +
+ + +Ryzen AI for Linux supports running AI models on the AMD Neural Processing Unit (NPU). +The current release supports STX and KRK platforms. + +With this release, users can now compile and run AI models using the following formats: + +- CNN models in INT8 +- CNN models in BF16 +- NLP models (e.g., BERT, encoder-based) in BF16 +- LLMs (NPU-only flow) + +## Prerequisites + +| Dependencies | Version Requirement | +| --------------------- | ------------------- | +| Ubuntu Distribution | Ubuntu 24.04 LTS | +| Kernel Version | >= 6.10 | +| RAM | 64GB (Recommended) | +| Python | 3.12.x | + +Use the commands below to install Python 3.12.x along with certain dependencies: + +```bash +sudo apt update +sudo apt install python3.12 +sudo apt install python3.12-venv +sudo apt install libboost-filesystem1.74.0 +``` + +After installing required Ubuntu distribution and Python version, proceed with NPU drivers installation. + +## Install NPU Drivers + +- Download the NPU driver package from the `Downloads` section of [Ryzen AI Software Drivers](https://account.amd.com/en/forms/downloads/xef.html?filename=RAI_1.7.1_Linux_NPU_XRT.zip). + +- RyzenAI linux driver package contains + - XRT Package + - xrt_202610.2.21.75_24.04-amd64-base.deb + - xrt_202610.2.21.75_24.04-amd64-base-dev.deb + - xrt_202610.2.21.75_24.04-amd64-npu.deb + - NPU driver package + - xrt_plugin.2.21.260102.53.release_24.04-amd64-amdxdna.deb + +- Install NPU driver package on your machine: + +```bash +sudo apt install --fix-broken -y ./xrt_202610.2.21.75_24.04-amd64-base.deb +sudo apt install --fix-broken -y ./xrt_202610.2.21.75_24.04-amd64-base-dev.deb +sudo apt install --fix-broken -y ./xrt_202610.2.21.75_24.04-amd64-npu.deb +sudo apt install --fix-broken -y ./xrt_plugin.2.21.260102.53.release_24.04-amd64-amdxdna.deb +``` + +- Set essential Environment variables: + +```bash +export LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH +source /opt/xilinx/xrt/setup.sh +``` + +- Verify your Driver installation: + +```bash +xrt-smi examine + +Device(s) Present +|BDF |Name | +|----------------|-----------| +|[0000:c5:00.1] |NPU Strix | + +# NPU name might differ based on your machine +``` + +## Install Ryzen AI Software + +- Download the RyzenAI for Linux package `ryzen_ai-1.7.1.tgz` from the `Downloads` section of [Ryzen AI Software Installer](https://account.amd.com/en/forms/downloads/xef.html?filename=ryzen_ai-1.7.1.tgz). +- Navigate to the downloaded path and follow the below steps: + +```bash +mkdir ryzen_ai-1.7.1 +cp ryzen_ai-1.7.1.tgz ryzen_ai-1.7.1 + +cd ryzen_ai-1.7.1 +tar -xvzf ryzen_ai-1.7.1.tgz +``` + +- Install RyzenAI package at your desired target path: + +```bash +./install_ryzen_ai.sh -a yes -p /venv +source /venv/bin/activate +``` + +- This will successfully install RyzenAI and activate the Virtual environment at your target location: + +```bash +# Validate your installation path +echo $RYZEN_AI_INSTALLATION_PATH +``` + +## Test the Installation + +The RyzenAI software package contains a test script that verifies your correct installation of NPU Drivers. + +- Navigate to your targeted Virtual Environment created in the previous step +- You will observe a subfolder named "quicktest": + +```bash +cd /venv/quicktest +python quicktest.py +``` + +- The `quicktest.py` script picks up a simple CNN model, compiles it and runs on AMD's Neural Processing Unit (NPU). +- On successful run, you can observe output as shown below: + +```bash +Setting environment for STX/KRK + +Test Finished +``` + +## Examples, Demos, Tutorials + +- The docs mirror various demos and examples for model compilation and deployment on NPUs. A few to start with: + - [ResNet on Ryzen AI (BF16)](/vision/getting-started-resnet-bf16) + - [ResNet on Ryzen AI (INT8)](/vision/getstartex) + - [YOLOv8m for Object Detection](/vision/yolov8m) +- Browse all mirrored examples under [Vision](/vision/index), [LLMs](/llms/index), and [Audio](/audio/index). + +### Note + +Before running the above examples: +- RyzenAI creates its own Python Virtual Environment to run the examples. You can skip conda environment instruction as they are Windows specific only +- Ensure to activate Linux based Python Virtual Environment + +```bash +source /venv/bin/activate +``` + +### Get NPU Info for your Machine + +The [ResNet on Ryzen AI (INT8)](/vision/getstartex) example contains a `get_npu_info` function to detect the correct NPU type on your machine. That lookup is for Windows; the Linux equivalent is shown below: + +```python +import subprocess + +def get_npu_info(): + # Run below command as subprocess to enumerate PCI devices + command = r'lspci -nn' + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + + # Check for supported Hardware IDs + npu_type = '' + if '1022:17f0' in stdout.decode(): npu_type = 'STX/KRK' + return npu_type +``` + +## Running LLM + +Follow this page to run LLM models on Linux: [LLM on Linux](/llms/llm_linux) + + +
diff --git a/Transformer-examples/DistilBERT_text_classification_bf16/README.md b/docs/llms/distilbert-example.mdx similarity index 86% rename from Transformer-examples/DistilBERT_text_classification_bf16/README.md rename to docs/llms/distilbert-example.mdx index f5f7cde8..74559ebe 100644 --- a/Transformer-examples/DistilBERT_text_classification_bf16/README.md +++ b/docs/llms/distilbert-example.mdx @@ -1,9 +1,13 @@ - - - - -

Ryzen™ AI Text Classification

-
+--- +title: "DistilBERT Text Classification" +description: "DistilBERT Text Classification on AMD Ryzen AI." +--- + +{/* owner: dwithchenna */} + +Source: [RyzenAI-SW / Transformer-examples/DistilBERT_text_classification_bf16](https://github.com/amd/RyzenAI-SW/blob/main/Transformer-examples/DistilBERT_text_classification_bf16). Code is maintained in the repository; this page mirrors its README. + + # Running DistilBERT on Ryzen AI @@ -15,7 +19,7 @@ For more details, refer to the Hugging Face Model Card: [distilbert-base-uncased Clone and Activate the conda environment created by the RyzenAI installer -```bash +```powershell conda create --name bert --clone ryzen-ai- conda activate bert cd \Transformer-examples\DistilBERT_text_classification_bf16 diff --git a/docs/llms/high_level_python.mdx b/docs/llms/high_level_python.mdx new file mode 100644 index 00000000..7449f827 --- /dev/null +++ b/docs/llms/high_level_python.mdx @@ -0,0 +1,61 @@ +--- +title: "High-Level Python SDK" +description: "Use the Lemonade SDK to experiment with LLMs and integrate them into Python applications on Ryzen AI." +--- + +{/* owner: jeremyfowers */} + +A Python environment offers flexibility for experimenting with LLMs, profiling them, and integrating them into Python applications. We use the [Lemonade SDK](https://github.com/lemonade-sdk/lemonade) to get up and running quickly. + +To get started, follow these instructions. + +## System-level pre-requisites + +You only need to do this once per computer: + +1. Make sure your system has the recommended Ryzen AI driver installed as described in [Install NPU Drivers](/installation#install-driver). +2. Download and install [Miniconda for Windows](https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe) or [Miniforge for Windows](https://github.com/conda-forge/miniforge/releases/download/25.3.0-1/Miniforge3-25.3.0-1-Windows-x86_64.exe). +3. Launch a terminal and call `conda init`. + +## Environment Setup + +To create and set up an environment, run these commands in your terminal: + +```powershell +conda create -n ryzenai-llm python=3.12 +conda activate ryzenai-llm +pip install lemonade-sdk[dev,oga-ryzenai] --extra-index-url=https://pypi.amd.com/simple +``` + +## Validation Tools + +Now that you have completed installation, you can try prompting an LLM like this (where `PROMPT` is any prompt you like). + +Run this command in a terminal that has your environment activated: + +```powershell +lemonade -i amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid oga-load --device hybrid --dtype int4 llm-prompt --max-new-tokens 64 -p PROMPT +``` + +For an end-to-end example demonstrating the Validation Tools, visit the [Lemonade Tools Tutorial](https://github.com/lemonade-sdk/lemonade/tree/main/examples). + +## Python API + +You can also run this code to try out the high-level Lemonade API in a Python script: + +```python +from lemonade.api import from_pretrained + +model, tokenizer = from_pretrained( + "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid", recipe="oga-hybrid" +) + +input_ids = tokenizer("This is my prompt", return_tensors="pt").input_ids +response = model.generate(input_ids, max_new_tokens=30) + +print(tokenizer.decode(response[0])) +``` + +## Next Steps + +From here, you can check out the Jupyter Notebook that provides an end-to-end validation of OGA hybrid and NPU-only execution. To run the notebook, visit the [Lemonade Tools Tutorial](https://github.com/lemonade-sdk/lemonade/tree/main/examples). diff --git a/docs/llms/hybrid_oga.mdx b/docs/llms/hybrid_oga.mdx new file mode 100644 index 00000000..e5adfc6f --- /dev/null +++ b/docs/llms/hybrid_oga.mdx @@ -0,0 +1,345 @@ +--- +title: "OnnxRuntime GenAI (OGA) Flow" +description: "Deploy LLMs on Ryzen AI PCs using the native ONNX Runtime Generate (OGA) C++ or Python API." +--- + +{/* owner: uday610 */} + +Ryzen AI Software supports deploying LLMs on Ryzen AI PCs using the native ONNX Runtime Generate (OGA) C++ or Python API. The OGA API is the lowest-level API available for building LLM applications on a Ryzen AI PC. It supports the following execution modes: + +- Hybrid execution mode: This mode uses both the NPU and iGPU to achieve the best TTFT and TPS during the prefill and decode phases. +- NPU-only execution mode: This mode uses the NPU exclusively for both the prefill and decode phases. Two types of NPU models are available: + + - **Token Fusion models**: Support long context up to 16K tokens with no additional configuration required. + - **Full Fusion models**: Optimized for best performance, supporting up to 4096 total tokens (input + output). + +## Supported Configurations + +The Ryzen AI OGA flow supports Strix and Krackan Point processors. Phoenix (PHX) and Hawk (HPT) processors are not supported. + +## Requirements + +- Install NPU Drivers and Ryzen AI MSI installer. See [Installation Instructions](/installation) for more details. +- Install GPU device driver: Ensure GPU device driver [https://www.amd.com/en/support](https://www.amd.com/en/support) is installed +- Install Git for Windows (needed to download models from HF): [https://git-scm.com/downloads](https://git-scm.com/downloads) + +
+ +## Pre-optimized Models + +AMD provides a set of pre-optimized LLMs ready to be deployed with Ryzen AI Software and the supporting runtime for hybrid and/or NPU-only execution. These include popular architectures such as Llama-2, Llama-3, Mistral, DeepSeek Distill models, Qwen-2, Qwen-2.5, Qwen-3, Gemma-2, Gemma-3, GPT-OSS, Phi-3, Phi-3.5, and Phi-4. + +Hugging Face collection of hybrid models: [https://huggingface.co/collections/amd/ryzen-ai-171-hybrid](https://huggingface.co/collections/amd/ryzen-ai-171-hybrid) + +Hugging Face collection of NPU Token Fusion models: [https://huggingface.co/collections/amd/ryzen-ai-171-npu-16k](https://huggingface.co/collections/amd/ryzen-ai-171-npu-16k) + +Hugging Face collection of NPU Full Fusion models: [https://huggingface.co/collections/amd/ryzen-ai-171-npu-4k](https://huggingface.co/collections/amd/ryzen-ai-171-npu-4k) + +Hugging Face collection of NPU Liquid Foundation Models: [https://huggingface.co/collections/amd/ryzen-ai-171-npu-lfm2-models](https://huggingface.co/collections/amd/ryzen-ai-171-npu-lfm2-models) + + +These Liquid Foundation Models are supported through ONNX Runtime. The OGA-based model instructions on this page do not apply to them. For run instructions, refer to the individual model cards in the collection. + + +### NPU Models: Token Fusion vs Full Fusion + +AMD provides two types of NPU models: + +- **Token Fusion models**: These models support long context up to 16K tokens. They are pre-built and uploaded to Hugging Face — no additional configuration is required to use long context. Simply download and run the model. +- **Full Fusion models**: These models are optimized for best inference performance but do not support long context. The total token count (input + output) must not exceed 4096. + +Choose the model type based on your use case: Token Fusion for long context workloads, or Full Fusion for maximum throughput on shorter sequences. + +Each OGA model folder contains a `genai_config.json` file. This file contains various configuration settings for the model. The `session_option` section is where information about specific runtime dependencies is specified. + +## Changes Compared to Previous Release + +- OGA version is updated to v0.11.2 (Ryzen AI 1.7) from v0.9.2.2 (Ryzen AI 1.6.1). +- For 1.7 release, a new set of hybrid and NPU models is published. Models from earlier releases are not compatible with this version. If you are using Ryzen AI 1.7, please download the updated models. +- Two types of NPU models are now available: **Token Fusion** models (long context up to 16K tokens) and **Full Fusion** models (best performance, up to 4096 tokens). +- Context length up to 4K tokens (combined input and output) is supported for Full Fusion NPU models. Extended context length up to 16K tokens is supported for Token Fusion NPU models and Hybrid models. + +## Compatible OGA APIs + +Pre-optimized hybrid or NPU LLMs can be executed using the official OGA C++ and Python APIs. The current release is compatible with OGA version 0.11.2. +For detailed documentation and examples, refer to the official OGA repository: [https://github.com/microsoft/onnxruntime-genai/tree/rel-0.11.2](https://github.com/microsoft/onnxruntime-genai/tree/rel-0.11.2) + +## LLMs Test Programs + +The Ryzen AI installation includes test programs (in C++ and Python) that can be used to run LLMs and understand how to integrate them in your application. + +The steps for deploying the pre-optimized models using the sample programs are described in the following sections. + +### Steps to run C++ program and sample python script. + +1. (Optional) Enable Performance Mode + +To run LLMs in best performance mode, follow these steps: + +- Go to `Windows` → `Settings` → `System` → `Power`, and set the power mode to **Best Performance**. +- Open a terminal and run: + + ```bat + cd C:\Windows\System32\AMD + xrt-smi configure --pmode performance + ``` + +2. Activate the Ryzen AI Conda Environment and install `torch` library. + +Run the following commands: + +```powershell +conda activate ryzen-ai- +pip install torch==2.7.1 +``` + +This step is required for running the python script. + + +For the C++ program, if you choose not to activate the Conda environment, open a Windows Command Prompt and manually set the environment variable before continuing: + +`set RYZEN_AI_INSTALLATION_PATH=C:\Program Files\RyzenAI\` + + +### C++ Program + +Use the `model_benchmark.exe` executable to test LLMs and identify DLL dependencies for C++ applications. + +1. Set Up a working directory and copy required Files + +```bat +mkdir llm_run +cd llm_run + +:: Copy the sample C++ executable +xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\model_benchmark.exe" . + +:: Copy the sample prompt file +xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\amd_genai_prompt.txt" . + +:: Copy required DLLs +xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\deployment\." . +``` + +2. Download model from Hugging Face + +```powershell +:: Install Git LFS if you haven't already: https://git-lfs.com +git lfs install + +:: Clone the model repository +git clone https://huggingface.co/amd/Llama-2-7b-chat-hf-onnx-ryzenai-hybrid +``` + +3. Run `model_benchmark.exe` + +```powershell +.\model_benchmark.exe -i -f -l + +:: Example: +.\model_benchmark.exe -i Llama-2-7b-chat-hf-onnx-ryzenai-hybrid -f amd_genai_prompt.txt -l "1024" +``` + +### Long Context Support + +Ryzen AI supports long context (beyond 4096 tokens) for **Hybrid models** and **Token Fusion NPU models**. + +#### Token Fusion NPU Models + +Token Fusion NPU models are pre-built with long context support up to 16K tokens. No additional configuration is required — simply download the model from Hugging Face and run it. + +```powershell +:: Example: Clone a Token Fusion NPU model +git clone https://huggingface.co/amd/Phi-3.5-mini-instruct-onnx-ryzenai-npu + +:: Run with long context +.\model_benchmark.exe -i -f amd_genai_prompt_long.txt -l "16000" +``` + +#### Hybrid Models + +If the total number of tokens exceeds 4096 for a hybrid model, follow the steps below. + +**Steps to run long context:** + +1. Make the following changes in `genai_config.json` file. + + - Add `"hybrid_opt_chunk_context": "1"` under `model.decoder.session_options.provider_options.RyzenAI`. + + ```json + { + "model": { + "bos_token_id": 1, + "context_length": 16384, + "decoder": { + "session_options": { + "log_id": "onnxruntime-genai", + "provider_options": [ + { + "RyzenAI": { + "external_data_file": "model_jit.pb.bin", + "hybrid_opt_free_after_prefill": "1", + "hybrid_opt_max_seq_length": "4096", + "hybrid_opt_chunk_context": "1" + } + } + ] + }, + ``` + + - Add `"chunk_size":2048` under `search`. + + ```json + "search": { + "diversity_penalty": 0.0, + "do_sample": false, + "chunk_size": 2048, + ... + ``` + +2. Copy the `amd_genai_prompt_long.txt` into your working directory. + +```powershell +xcopy /Y "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\amd_genai_prompt_long.txt" . +``` + +3. Run the model using `model_benchmark.exe` using the `amd_genai_prompt_long.txt` prompt file. + +```powershell +.\model_benchmark.exe -i -f amd_genai_prompt_long.txt -l "16000" +``` + + +The sample test application `model_benchmark.exe` accepts `-l` for input token length and `-g` for output token length. + +- **Full Fusion NPU models** support up to 4096 tokens in total (input + output). By default, `-g` is set to 128. If the input length is close to 4096, you must adjust `-g` so the sum of input and output tokens does not exceed 4096. For example, `-l 4000 -g 96` is valid (4000 + 96 ≤ 4096), while `-l 4000 -g 128` will exceed the limit and result in an error. +- **Token Fusion NPU models** support long context up to 16K tokens (input + output) with no additional configuration. +- **Hybrid models**: The combined number of input and output tokens must not exceed the model's `context_length`. You can verify the `context_length` in the `genai_config.json` file. For example, if a model's `context_length` is 8,000, the total token count (input + output) must not exceed 8,000. + +The long context feature has been tested for Token Fusion NPU models and Hybrid models up to 16,000 tokens. + + +### Python Script + +1. Navigate to your working directory and download model. + +```powershell +:: Install Git LFS if you haven't already: https://git-lfs.com +git lfs install + +:: Clone the model repository +git clone https://huggingface.co/amd/Llama-2-7b-chat-hf-onnx-ryzenai-hybrid +``` + +2. Run sample python script + +``` +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\run_model.py" -m -l + +:: Example command +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\run_model.py" -m "Llama-2-7b-chat-hf-onnx-ryzenai-hybrid" -l 256 +``` + + +Some models may return non-printable characters in their output (for example, Qwen models), which can cause a crash while printing the output text. To avoid this, modify the provided script `%RYZEN_AI_INSTALLATION_PATH%\LLM\example\run_model.py` by adding a text sanitization function and updating the print statement as shown below. + +Add `sanitize_string` function: + +```python +def sanitize_string(input_string): + return input_string.encode("charmap", "ignore").decode("charmap") +``` + +Update line 80 to print sanitized output: + +```python +print("Output:", sanitize_string(output_text)) +``` + +This sanitization fix will be included in the `run_model.py` script in the next release. + + +### Python Script (with Chat Template) + +For models that use chat templates, the `model_chat.py` script provides better output quality by automatically loading and applying the chat template from the model folder during inference. The script also supports single-prompt, multi-turn context cache testing, and interactive chat with timing output. + +The script is included in the Ryzen AI installation: + +```powershell +:: Single prompt with timing +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\model_chat.py" -m -pr amd_genai_prompt.txt --timings + +:: Long context support (increase context window to e.g. 16k) +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\model_chat.py" -m -pr amd_genai_prompt_long.txt -mpt 16000 + +:: Interactive chat +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\model_chat.py" -m +``` + +For the full list of options including multi-turn JSON testing, guided generation, and advanced flags, refer to the [RyzenAI-SW repository](https://github.com/amd/RyzenAI-SW/blob/main/LLM-examples/oga_inference/README.md). + +It is highly recommended to use `model_chat.py` for the [GPT-OSS-20B NPU model](https://huggingface.co/amd/gpt-oss-20b-onnx-ryzenai-npu). + +## Vision Language Model (VLM) + +AMD provides a pre-optimized Gemma-3-4b-it multimodal model ready to be deployed with Ryzen AI Software. Support for this model is available starting with the Ryzen AI 1.7 release. + +Model: [Gemma-3-4b-it-mm-onnx-ryzenai-npu](https://huggingface.co/amd/Gemma-3-4b-it-mm-onnx-ryzenai-npu) + +VLM inference requires dedicated Python scripts, which are included in the Ryzen AI installation at `%RYZEN_AI_INSTALLATION_PATH%\LLM\example\vlm`. + +### Quick Inference + +Use `vlm_run.py` to quickly test a model and see output: + +```powershell +:: Basic inference +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\vlm\vlm_run.py" -m -i + +:: Custom prompt +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\vlm\vlm_run.py" -m -i -p "What's in this image?" + +:: Resize image before running +python "%RYZEN_AI_INSTALLATION_PATH%\LLM\example\vlm\vlm_run.py" -m -i --image_size 1024 1024 +``` + +For benchmarking scripts (`vlm_benchmark.py`, `run_all_benchmarks.py`) and detailed options, refer to the README in the `vlm` directory or the [RyzenAI-SW repository](https://github.com/amd/RyzenAI-SW/blob/main/LLM-examples/VLM/README.md). + +## Building C++ Applications + +A complete example including C++ source and build instructions is available in the [OGA C++ API](/llms/oga-cpp-api) tutorial. + +## Using Fine-Tuned Models + +It is also possible to run fine-tuned versions of the pre-optimized OGA models. + +To do this, the fine-tuned models must first be prepared for execution with the OGA flow. For instructions on how to do this, refer to the page about [Preparing OGA Models](/llms/oga_model_prepare). + +After a fine-tuned model has been prepared for execution, it can be deployed by following the steps described previously in this page. + +## Running LLM via pip install + +In addition to the full RyzenAI software stack, we also provide standalone wheel files for the users who prefer using their own environment. To prepare an environment for running the Hybrid and NPU-only LLM independently, perform the following steps: + +1. Create a new python environment and activate it. + +```powershell +conda create -n python=3.12 -y +conda activate +``` + +2. Install onnxruntime-genai wheel file. + +```powershell +pip install onnxruntime-genai-directml-ryzenai==0.11.2 --extra-index-url https://pypi.amd.com/ryzenai_llm/1.7.1/windows/simple/ +pip install model-generate==1.7.1 --extra-index-url https://pypi.amd.com/ryzenai_llm/1.7.1/windows/simple/ +``` + +3. Navigate to your working directory and download the desired Hybrid/NPU model + +```powershell +cd working_directory +git clone +``` + +4. Run the Hybrid or NPU model. diff --git a/docs/llms/index.mdx b/docs/llms/index.mdx new file mode 100644 index 00000000..7dcd6b5d --- /dev/null +++ b/docs/llms/index.mdx @@ -0,0 +1,161 @@ +--- +title: "LLMs Overview" +description: "Run large language models on AMD Ryzen AI: execution modes, hardware, development interfaces, and supported models." +--- + +{/* owner: bconsolvo */} + +Large Language Models (LLMs) can be deployed on AMD Ryzen™ AI PCs with NPU and GPU acceleration. NPU-only and Hybrid execution modes, which utilize both the NPU and integrated GPU (iGPU), are supported via ONNXRuntime GenAI (OGA). GPU-only acceleration is enabled through llama.cpp. + +## Supported LLMs + +Hybrid (NPU + GPU), NPU 4K, NPU 16K, and NPU-only LFM2 (ONNX) variants. The table is generated by `.github/scripts/fetch_models.py`; do not edit it by hand. + +{/* MODELS_TABLE_START - generated by .github/scripts/fetch_models.py; do not edit by hand */} + +| Model | Hybrid NPU / GPU | NPU 4K | NPU 16K | NPU (ONNX) | +| --- | --- | --- | --- | --- | +| [CodeLlama-7b-Instruct-hf](https://huggingface.co/amd/CodeLlama-7b-Instruct-hf_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/CodeLlama-7b-Instruct-hf_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/CodeLlama-7b-Instruct-hf_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/CodeLlama-7b-Instruct-hf_rai_1.7.1_npu_16K) | | +| [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/amd/DeepSeek-R1-Distill-Llama-8B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/DeepSeek-R1-Distill-Llama-8B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/DeepSeek-R1-Distill-Llama-8B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/DeepSeek-R1-Distill-Llama-8B_rai_1.7.1_npu_16K) | | +| [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-1.5B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-1.5B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-1.5B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-1.5B_rai_1.7.1_npu_16K) | | +| [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-7B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-7B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-7B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/DeepSeek-R1-Distill-Qwen-7B_rai_1.7.1_npu_16K) | | +| [gemma-3-4b-it](https://huggingface.co/amd/gemma-3-4b-it_rai_1.7.1_npu_4K) | | [4K](https://huggingface.co/amd/gemma-3-4b-it_rai_1.7.1_npu_4K) | | | +| [gpt-oss-20b](https://huggingface.co/amd/gpt-oss-20b_rai_1.7.1_npu_4K) | | [4K](https://huggingface.co/amd/gpt-oss-20b_rai_1.7.1_npu_4K) | | | +| [LFM2-1.2B-ONNX](https://huggingface.co/amd/LFM2-1.2B-ONNX_rai_1.7.1) | | | | [ONNX](https://huggingface.co/amd/LFM2-1.2B-ONNX_rai_1.7.1) | +| [LFM2-2.6B-ONNX](https://huggingface.co/amd/LFM2-2.6B-ONNX_rai_1.7.1) | | | | [ONNX](https://huggingface.co/amd/LFM2-2.6B-ONNX_rai_1.7.1) | +| [LFM2.5-1.2B-Thinking-ONNX](https://huggingface.co/amd/LFM2.5-1.2B-Thinking-ONNX_rai_1.7.1) | | | | [ONNX](https://huggingface.co/amd/LFM2.5-1.2B-Thinking-ONNX_rai_1.7.1) | +| [Llama-2-7b-chat-hf](https://huggingface.co/amd/Llama-2-7b-chat-hf_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Llama-2-7b-chat-hf_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Llama-2-7b-chat-hf_rai_1.7.1_npu_4K) | | | +| [Llama-2-7b-hf](https://huggingface.co/amd/Llama-2-7b-hf_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Llama-2-7b-hf_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Llama-2-7b-hf_rai_1.7.1_npu_4K) | | | +| [Llama-3.1-8B](https://huggingface.co/amd/Llama-3.1-8B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Llama-3.1-8B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Llama-3.1-8B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Llama-3.1-8B_rai_1.7.1_npu_16K) | | +| [Llama-3.2-1B](https://huggingface.co/amd/Llama-3.2-1B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Llama-3.2-1B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Llama-3.2-1B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Llama-3.2-1B_rai_1.7.1_npu_16K) | | +| [Llama-3.2-1B-Instruct](https://huggingface.co/amd/Llama-3.2-1B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Llama-3.2-1B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Llama-3.2-1B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Llama-3.2-1B-Instruct_rai_1.7.1_npu_16K) | | +| [Llama-3.2-3B](https://huggingface.co/amd/Llama-3.2-3B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Llama-3.2-3B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Llama-3.2-3B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Llama-3.2-3B_rai_1.7.1_npu_16K) | | +| [Llama-3.2-3B-Instruct](https://huggingface.co/amd/Llama-3.2-3B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Llama-3.2-3B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Llama-3.2-3B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Llama-3.2-3B-Instruct_rai_1.7.1_npu_16K) | | +| [Meta-Llama-3-8B](https://huggingface.co/amd/Meta-Llama-3-8B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Meta-Llama-3-8B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Meta-Llama-3-8B_rai_1.7.1_npu_4K) | | | +| [Meta-Llama-3.1-8B-Instruct](https://huggingface.co/amd/Meta-Llama-3.1-8B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Meta-Llama-3.1-8B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Meta-Llama-3.1-8B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Meta-Llama-3.1-8B-Instruct_rai_1.7.1_npu_16K) | | +| [Mistral-7B-Instruct-v0.1](https://huggingface.co/amd/Mistral-7B-Instruct-v0.1_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Mistral-7B-Instruct-v0.1_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Mistral-7B-Instruct-v0.1_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Mistral-7B-Instruct-v0.1_rai_1.7.1_npu_16K) | | +| [Mistral-7B-Instruct-v0.2](https://huggingface.co/amd/Mistral-7B-Instruct-v0.2_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Mistral-7B-Instruct-v0.2_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Mistral-7B-Instruct-v0.2_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Mistral-7B-Instruct-v0.2_rai_1.7.1_npu_16K) | | +| [Mistral-7B-Instruct-v0.3](https://huggingface.co/amd/Mistral-7B-Instruct-v0.3_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Mistral-7B-Instruct-v0.3_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Mistral-7B-Instruct-v0.3_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Mistral-7B-Instruct-v0.3_rai_1.7.1_npu_16K) | | +| [Mistral-7B-v0.3](https://huggingface.co/amd/Mistral-7B-v0.3_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Mistral-7B-v0.3_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Mistral-7B-v0.3_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Mistral-7B-v0.3_rai_1.7.1_npu_16K) | | +| [Phi-3-mini-128k-instruct](https://huggingface.co/amd/Phi-3-mini-128k-instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Phi-3-mini-128k-instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Phi-3-mini-128k-instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Phi-3-mini-128k-instruct_rai_1.7.1_npu_16K) | | +| [Phi-3-mini-4k-instruct](https://huggingface.co/amd/Phi-3-mini-4k-instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Phi-3-mini-4k-instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Phi-3-mini-4k-instruct_rai_1.7.1_npu_4K) | | | +| [Phi-3.5-mini-instruct](https://huggingface.co/amd/Phi-3.5-mini-instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Phi-3.5-mini-instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Phi-3.5-mini-instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Phi-3.5-mini-instruct_rai_1.7.1_npu_16K) | | +| [Phi-4-mini-instruct](https://huggingface.co/amd/Phi-4-mini-instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Phi-4-mini-instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Phi-4-mini-instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Phi-4-mini-instruct_rai_1.7.1_npu_16K) | | +| [Phi-4-mini-reasoning](https://huggingface.co/amd/Phi-4-mini-reasoning_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Phi-4-mini-reasoning_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Phi-4-mini-reasoning_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Phi-4-mini-reasoning_rai_1.7.1_npu_16K) | | +| [Qwen-2.5_1.5B_Instruct](https://huggingface.co/amd/Qwen-2.5_1.5B_Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen-2.5_1.5B_Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen-2.5_1.5B_Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen-2.5_1.5B_Instruct_rai_1.7.1_npu_16K) | | +| [Qwen2-1.5B](https://huggingface.co/amd/Qwen2-1.5B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2-1.5B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2-1.5B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2-1.5B_rai_1.7.1_npu_16K) | | +| [Qwen2-7B](https://huggingface.co/amd/Qwen2-7B_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2-7B_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2-7B_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2-7B_rai_1.7.1_npu_16K) | | +| [Qwen2.5-0.5B-Instruct](https://huggingface.co/amd/Qwen2.5-0.5B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2.5-0.5B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2.5-0.5B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2.5-0.5B-Instruct_rai_1.7.1_npu_16K) | | +| [Qwen2.5-7B-Instruct](https://huggingface.co/amd/Qwen2.5-7B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2.5-7B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2.5-7B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2.5-7B-Instruct_rai_1.7.1_npu_16K) | | +| [Qwen2.5-Coder-0.5B-Instruct](https://huggingface.co/amd/Qwen2.5-Coder-0.5B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2.5-Coder-0.5B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2.5-Coder-0.5B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2.5-Coder-0.5B-Instruct_rai_1.7.1_npu_16K) | | +| [Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/amd/Qwen2.5-Coder-1.5B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2.5-Coder-1.5B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2.5-Coder-1.5B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2.5-Coder-1.5B-Instruct_rai_1.7.1_npu_16K) | | +| [Qwen2.5-Coder-7B-Instruct](https://huggingface.co/amd/Qwen2.5-Coder-7B-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2.5-Coder-7B-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2.5-Coder-7B-Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2.5-Coder-7B-Instruct_rai_1.7.1_npu_16K) | | +| [Qwen2.5_3B_Instruct](https://huggingface.co/amd/Qwen2.5_3B_Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/Qwen2.5_3B_Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/Qwen2.5_3B_Instruct_rai_1.7.1_npu_4K) | [16K](https://huggingface.co/amd/Qwen2.5_3B_Instruct_rai_1.7.1_npu_16K) | | +| [SmolLM-135M-Instruct](https://huggingface.co/amd/SmolLM-135M-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/SmolLM-135M-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/SmolLM-135M-Instruct_rai_1.7.1_npu_4K) | | | +| [SmolLM2-135M-Instruct](https://huggingface.co/amd/SmolLM2-135M-Instruct_rai_1.7.1_hybrid) | [Hybrid](https://huggingface.co/amd/SmolLM2-135M-Instruct_rai_1.7.1_hybrid) | [4K](https://huggingface.co/amd/SmolLM2-135M-Instruct_rai_1.7.1_npu_4K) | | | + +{/* MODELS_TABLE_END */} + +## Tutorials + +{/* CARDS_START - generated by .github/scripts/gen_cards.py; do not edit by hand */} + + + Experimental recipe for compiling operators when a fine-tuned model introduces new operator shapes. + + + DistilBERT Text Classification on AMD Ryzen AI. + + + Fine-tuned LLM Deployment on AMD Ryzen AI. + + + Use the Lemonade SDK to experiment with LLMs and integrate them into Python applications on Ryzen AI. + + + OGA C++ API on AMD Ryzen AI. + + + OGA Inference (Chat) on AMD Ryzen AI. + + + Deploy LLMs on Ryzen AI PCs using the native ONNX Runtime Generate (OGA) C++ or Python API. + + + Quantize and post-process fine-tuned LLMs for deployment on Ryzen AI PCs in Hybrid or NPU-only mode. + + + RAG with OGA on AMD Ryzen AI. + + + Example of running a quantized OGA LLM on the AMD Ryzen AI NPU under Ubuntu Linux. + + + Use the Lemonade Server REST API to load LLMs on Ryzen AI hardware from any language. + + + Vision Language Models (VLM) on AMD Ryzen AI. + + +{/* CARDS_END */} + +## Execution Modes + + + +**LLM Execution Mode Comparison** + +| Mode | Framework(s) | Compute Allocation | Primary Use Case | +| ------------ | ------------------------- | ------------------------------------- | ------------------------------------------------------------------------- | +| **NPU-Only** | OnnxRuntime GenAI (OGA) | Neural Processing Unit (NPU) exclusive| Maximum NPU utilization while preserving iGPU for parallel workloads | +| **Hybrid** | OnnxRuntime GenAI (OGA) | Dynamic NPU + iGPU partitioning | Interactive inference with optimal prefill/decode performance | +| **GPU** | llama.cpp | Dedicated GPU execution | High-throughput inference on discrete/integrated GPU | +| **CPU** | OGA or llama.cpp | Traditional CPU-based inference | Baseline compatibility across all processor generations | + +## Hardware Requirements + +**Supported Processor Configurations** + +| Processor Series | NPU-Only | Hybrid | GPU/CPU | +| ------------------------ | :------: | :----: | :-----: | +| Ryzen AI 300 (STX/KRK) | ✓ | ✓ | ✓ | +| Ryzen AI 7000/8000 | ✗ | ✗ | ✓ | + +## Development Interfaces + +The Ryzen AI LLM software stack is available through three development interfaces, each suited for specific use cases. All three interfaces are built on top of native OnnxRuntime GenAI (OGA) libraries or llama.cpp libraries, as shown in the Ryzen AI Software Stack diagram below. + +The high-level Python APIs, as well as the Server Interface, also leverage the Lemonade SDK, which is multi-vendor open-source software that provides everything necessary for quickly getting started with LLMs on OGA or llama.cpp. A key benefit of Lemonade is that software developed against their interfaces is portable to many other execution backends. + + + +**Ryzen AI Software Stack** + + + + + + + + + + + + + + + + + + + + + + +
Your Python ApplicationYour LLM StackYour Native Application
Lemonade Python API*Lemonade Server Interface*OGA C++ Headers OR llama.cpp C++ Headers
Custom AMD OnnxRuntime GenAI (OGA) OR llama.cpp*
AMD Ryzen AI Driver and Hardware
+ +\* indicates open-source software (OSS). + +- **High-Level Python SDK** — PyPI install in ~5 minutes; experiment, validate, and integrate with Python apps. See [High-Level Python SDK](/llms/high_level_python). +- **Server Interface (REST API)** — OpenAI-compatible server for any language; process isolation for the LLM backend. See [Server Interface](/llms/server_interface). +- **OGA APIs (C++ / Python)** — native libraries for full customizability in native applications. See [OnnxRuntime GenAI (OGA) Flow](/llms/hybrid_oga). diff --git a/LLM-examples/llm-sft-deploy/README.md b/docs/llms/llm-sft-deploy.mdx similarity index 60% rename from LLM-examples/llm-sft-deploy/README.md rename to docs/llms/llm-sft-deploy.mdx index cae40c27..bb75ac5b 100644 --- a/LLM-examples/llm-sft-deploy/README.md +++ b/docs/llms/llm-sft-deploy.mdx @@ -1,4 +1,12 @@ -# Accelerate Finetuned LLMs Locally on NPU + iGPU Ryzen AI Processor +--- +title: "Fine-tuned LLM Deployment" +description: "Fine-tuned LLM Deployment on AMD Ryzen AI." +--- + +{/* owner: dwithchenna */} + +Source: [RyzenAI-SW / LLM-examples/llm-sft-deploy](https://github.com/amd/RyzenAI-SW/blob/main/LLM-examples/llm-sft-deploy). Code is maintained in the repository; this page mirrors its README. + This repo provides supplemental code to the AMD Blog [Accelerate Finetuned LLMs Locally on NPU + iGPU Ryzen AI processor](https://www.amd.com/en/developer/resources/technical-articles/accelerate-llms-locally-on-amd-ryzen-ai-npu-and-igpu.html). Code is provided for LoRA finetuning on MI300X and then running inference of finetuned model on Ryzen AI. @@ -6,36 +14,52 @@ This repo provides supplemental code to the AMD Blog [Accelerate Finetuned LLMs ## Getting Started 1. Install miniconda/anaconda and create a new conda environment for training/inference on GPUs -2. Install requirements.txt using ```pip install -r requirements.txt``` -3. Set Huggingface API Tokens by ```export={HUGGINGFACE_API_TOKEN}``` in terminal. Needed for accessing gated models and saving to Huggingface. +2. Install requirements.txt using `pip install -r requirements.txt` +3. Set Huggingface API Tokens by `export={HUGGINGFACE_API_TOKEN}` in terminal. Needed for accessing gated models and saving to Huggingface. ## Finetune -We provide ``train.py`` to do LoRA finetuning. Training can be saved locally or directly to huggingface and wandB can be utilized to track training
+We provide ``train.py`` to do LoRA finetuning. Training can be saved locally or directly to huggingface and wandB can be utilized to track training + Set ``--hf_dir local`` to save locally and bypass huggingface and wandB setup. The training script LoRA finetunes Llama3.2 1B on the [Volve Alpaca Dataset](https://huggingface.co/datasets/bengsoon/volve_alpaca), an application for the oil & rigging industry. ### Finetuning Adapter (Save Locally) -```python train.py --lora --lora_qv --hf_dir local``` + +```powershell +python train.py --lora --lora_qv --hf_dir local +``` ### Finetuning Adapter (Save to HF) -```python train.py --lora --lora_qv --hf_dir ``` + +```powershell +python train.py --lora --lora_qv --hf_dir +``` ### Merging Adapter After finetuning the adapter, merge adapter with base LLM through the following: -```python train.py --merge_model --model_name meta-llama/Llama-3.2-1B --adapter_model_dir ``` + +```powershell +python train.py --merge_model --model_name meta-llama/Llama-3.2-1B --adapter_model_dir +``` ## LLM Inference of Finetuned models on GPU -Use: ``inference.py`` to run inference on GPU .
+Use: ``inference.py`` to run inference on GPU . + Set ``--inference_filename`` to a ".json" filename in which model predictions will be stored. #### Inference on Finetuned (merged) model -```python inference.py --fp --model_dir amd/volve-llama3.2-1b --inference_filename "volve-llama3_1B.json"``` + +```powershell +python inference.py --fp --model_dir amd/volve-llama3.2-1b --inference_filename "volve-llama3_1B.json" +``` #### Inference on Quark Quantized model (safetensors) -- Install Quark from wheel file [here](https://quark.docs.amd.com/latest/install.html#install-quark-quark-examples-from-download).
-- Inside the zip folder are example scripts. Use the following for AWQ quantization:
+- Install Quark from wheel file [here](https://quark.docs.amd.com/latest/install.html#install-quark-quark-examples-from-download). + +- Inside the zip folder are example scripts. Use the following for AWQ quantization: + ``` cd examples/torch/language_modeling/llm_ptq/ @@ -51,19 +75,23 @@ python quantize_quark.py \ --custom_mode awq ``` -- Run the following for inference:
-```python inference.py --quark_safetensors --quant_model_dir --inference_filename "quantized_model.json"``` +- Run the following for inference: + +```powershell +python inference.py --quark_safetensors --quant_model_dir --inference_filename "quantized_model.json" +``` # Deploy on Ryzen AI -### Quantize the full-precision, finetuned model using the quantization strategy menentioned [here](https://ryzenai.docs.amd.com/en/latest/oga_model_prepare.html#generate-quantized-model) +### Quantize the full-precision, finetuned model using the quantization strategy menentioned [here](/llms/oga_model_prepare) + +### Install RyzenAI and prerequisites accoring to instructions [here](/installation). -### Install RyzenAI and prerequisites accoring to instructions [here](https://ryzenai.docs.amd.com/en/latest/inst.html). +### Transform the quantized model to run on Hybrid approach within Ryzen AI, utilizing both the iGPU and NPU by running the following. See reference [here](/llms/oga_model_prepare). -### Transform the quantized model to run on Hybrid approach within Ryzen AI, utilizing both the iGPU and NPU by running the following. See reference [here](https://ryzenai.docs.amd.com/en/latest/oga_model_prepare.html#postprocessing). +### Run inference on RyzenAI with the following: -### Run inference on RyzenAI with the following:
-``python inference_oga.py --model_dir "" --inference_filename hybrid_ft_model.json`` +``python inference_oga.py --model_dir "<hybrid-model-path>" --inference_filename hybrid_ft_model.json`` Please check the blog post for comprehensive instructions on additional packages needed within the ryzen-ai conda environment. diff --git a/docs/llms/llm_linux.mdx b/docs/llms/llm_linux.mdx new file mode 100644 index 00000000..98c9d296 --- /dev/null +++ b/docs/llms/llm_linux.mdx @@ -0,0 +1,132 @@ +--- +title: "Running LLM on Linux" +description: "Example of running a quantized OGA LLM on the AMD Ryzen AI NPU under Ubuntu Linux." +--- + +{/* owner: lakshay048 */} + +This page showcases an example of running LLM on RyzenAI NPU. + +- Open a Linux terminal and create a new folder: + +```bash +mkdir run_llm +cd run_llm +``` + +- Choose any prequantized and postprocessed ready-to-run Model from Hugging Face collection of NPU models: + + - [Models with 4K Context length](https://huggingface.co/collections/amd/ryzen-ai-171-npu-4k) + - [Models with 16K Context length](https://huggingface.co/collections/amd/ryzen-ai-171-npu-16k) + +- For this flow, "Phi-3.5-mini-instruct_rai_1.7.1_npu_4K" is chosen for reference: + +``` +# Make sure git-lfs is installed (https://git-lfs.com) +sudo apt install git-lfs +git lfs install +git clone https://huggingface.co/amd/Phi-3.5-mini-instruct_rai_1.7.1_npu_4K +``` + +- Search for `RYZEN_AI_INSTALLATION_PATH`: + +```bash +# Activate the virtual environment created in Linux Installation step +source /venv/bin/activate + +echo $RYZEN_AI_INSTALLATION_PATH +``` + +- Collecting the necessary files to get in current working directory: + +```bash +- Deployment folder - This has necessary libraries to run LLM Model + # Navigate to /venv and copy the "deployment" folder + cp -r /venv/deployment . + +- Model Benchmark Script + # Navigate to /venv/LLM/examples/ and copy "model_benchmark" file. + cp /venv/LLM/examples/model_benchmark . + +- Prompt file - Input to your LLM Model + # Navigate to /venv/LLM/examples/ and copy "amd_genai_prompt.txt" file. + cp /venv/LLM/examples/amd_genai_prompt.txt . +``` + +- Current working directory should have below files: + +``` +amd_genai_prompt.txt deployment model_benchmark Phi-3.5-mini-instruct_rai_1.7.1_npu_4K +``` + +- Create a new file for XRT Drivers named `xrt.ini`: + +```bash +- vi xrt.ini (Creates a new file) + +- Add below lines to the file and save it + [Debug] + num_heap_pages = 8 + +- Set XRT_INI_PATH to point to this file + export XRT_INI_PATH=$PWD/xrt.ini +``` + +- Lastly, set required library path: + +```bash +export LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=deployment/lib:$LD_LIBRARY_PATH +export RYZENAI_EP_PATH=$PWD/deployment/lib/libonnxruntime_providers_ryzenai.so +``` + +- We can now run our Model with command below: + +```bash +./model_benchmark -i Phi-3.5-mini-instruct_rai_1.7.1_npu_4K/ -l 128 -f amd_genai_prompt.txt + +# Enable "-v" flag for verbose output +``` + +## Expected output + +```bash +----------------------------- +Prompt Number of Tokens: 128 + +Batch size: 1, prompt tokens: 128, tokens to generate: 128 +Prompt processing (time to first token): + avg (us): 148056 + avg (tokens/s): 864.536 + p50 (us): 148143 + stddev (us): 375.335 + n: 5 * 128 token(s) +Token generation: + avg (us): 56874.3 + avg (tokens/s): 17.5826 + p50 (us): 56250.6 + stddev (us): 6743.11 + n: 635 * 1 token(s) +Token sampling: + avg (us): 27.273 + avg (tokens/s): 36666.3 + p50 (us): 27.21 + stddev (us): 0.202461 + n: 5 * 1 token(s) +E2E generation (entire generation loop): + avg (ms): 7371.29 + p50 (ms): 7378.4 + stddev (ms): 14.3836 + n: 5 +Peak working set size (bytes): 12168941568 +``` + +## Preparing OGA Model + +Install `model_generate` package in current virtual environment: + +```bash +pip install model-generate==1.7.1 --force-reinstall --no-deps --extra-index-url https://pypi.amd.com/ryzenai_llm/1.7.1/linux/simple/ +``` + +Currently Linux supports NPU only flow. Read more on Model Generation by visiting [Preparing OGA Models](/llms/oga_model_prepare). diff --git a/LLM-examples/oga_api/README.md b/docs/llms/oga-cpp-api.mdx similarity index 94% rename from LLM-examples/oga_api/README.md rename to docs/llms/oga-cpp-api.mdx index 81d139dc..0b796f65 100644 --- a/LLM-examples/oga_api/README.md +++ b/docs/llms/oga-cpp-api.mdx @@ -1,4 +1,12 @@ -# Ryzen AI LLM - Onnxruntime GenAI +--- +title: "OGA C++ API" +description: "OGA C++ API on AMD Ryzen AI." +--- + +{/* owner: dwithchenna */} + +Source: [RyzenAI-SW / LLM-examples/oga_api](https://github.com/amd/RyzenAI-SW/blob/main/LLM-examples/oga_api). Code is maintained in the repository; this page mirrors its README. + Ryzen AI Software includes support for deploying LLMs on Ryzen AI PCs using the ONNX Runtime generate() API (OGA). @@ -12,7 +20,7 @@ AMD provides a set of pre-optimized LLMs ready to be deployed with Ryzen AI Soft ## Ryzen AI Installation -- The steps for installing Ryzen AI along with it's requirement can be found in the Official Ryzen AI Software documantion page here - https://ryzenai.docs.amd.com/en/latest/inst.html +- The steps for installing Ryzen AI along with it's requirement can be found in the Official Ryzen AI Software documantion page here - [Installation Instructions](/installation) ## Steps to compile and run LLM example. - Activate Ryzen AI environment: diff --git a/LLM-examples/oga_inference/README.md b/docs/llms/oga-inference.mdx similarity index 67% rename from LLM-examples/oga_inference/README.md rename to docs/llms/oga-inference.mdx index f6579915..e47a4955 100644 --- a/LLM-examples/oga_inference/README.md +++ b/docs/llms/oga-inference.mdx @@ -1,4 +1,12 @@ -# OGA Inference with Chat Template +--- +title: "OGA Inference (Chat)" +description: "OGA Inference (Chat) on AMD Ryzen AI." +--- + +{/* owner: dwithchenna */} + +Source: [RyzenAI-SW / LLM-examples/oga_inference](https://github.com/amd/RyzenAI-SW/blob/main/LLM-examples/oga_inference). Code is maintained in the repository; this page mirrors its README. + Inference script with chat template support for ONNX Runtime GenAI models. @@ -8,15 +16,15 @@ Use this for models that require chat templates (e.g., GPT-OSS-20B) for better o Based on Microsoft OGA [model-chat.py](https://github.com/microsoft/onnxruntime-genai/blob/rel-0.11.2/examples/python/model-chat.py), modified for Ryzen AI. ## Prerequisites -- Ryzen AI Software installed (see [Installation Instructions](https://ryzenai.docs.amd.com/en/latest/inst.html)) +- Ryzen AI Software installed (see [Installation Instructions](/installation)) - Activate the conda environment created by the MSI installer: -```bash +```powershell conda activate ryzen-ai- ``` -- For more details on running LLMs with OGA, see [OnnxRuntime GenAI (OGA) Flow](https://ryzenai.docs.amd.com/en/latest/hybrid_oga.html). +- For more details on running LLMs with OGA, see [OnnxRuntime GenAI (OGA) Flow](/llms/hybrid_oga). ## Usage -```bash +```powershell python model_chat.py -m -pr -ipl -tm ``` @@ -30,6 +38,6 @@ python model_chat.py -m -pr -ipl -tm | `-v` | Verbose output | ## Example -```bash +```powershell python model_chat.py -m ./gpt-oss-20b-onnx-ryzenai-npu -pr prompt.txt -ipl 256 -tm -``` \ No newline at end of file +``` diff --git a/docs/llms/oga_model_prepare.mdx b/docs/llms/oga_model_prepare.mdx new file mode 100644 index 00000000..958707c3 --- /dev/null +++ b/docs/llms/oga_model_prepare.mdx @@ -0,0 +1,192 @@ +--- +title: "Preparing OGA Models" +description: "Quantize and post-process fine-tuned LLMs for deployment on Ryzen AI PCs in Hybrid or NPU-only mode." +--- + +{/* owner: uday610 */} + +This section describes the process for preparing LLMs for deployment on a Ryzen AI PC using the hybrid or NPU-only execution mode. Currently, the flow supports only fine-tuned versions of the models already supported (as listed in [Hybrid OGA](/llms/hybrid_oga) page). For example, fine-tuned versions of Llama2 or Llama3 can be used. However, different model families with architectures not supported by the hybrid flow cannot be used. + +For fine-tuned models that introduce architectural changes requiring new operator shapes not available in the Ryzen AI runtime, refer to the [OGA Op Prepare](/llms/oga_op_prepare) page. + +Preparing a LLM for deployment on a Ryzen AI PC involves 2 steps: + +1. **Quantization**: The pretrained model is quantized to reduce memory footprint and better map to compute resources in the hardware accelerators +2. **Postprocessing**: During the postprocessing the model is exported to OGA followed by NPU-only or Hybrid execution mode specific postprocess to obtain the final deployable model. + +## Quantization + +### Prerequisites + +Linux machine with AMD (e.g., AMD Instinct MI Series) or Nvidia GPUs. + +### Setup + +1. Create and activate Conda Environment + +``` +conda create --name python=3.12 +conda activate +``` + +2. If Using AMD GPUs, update PyTorch to use ROCm + +``` +pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1 +python -c "import torch; print(torch.cuda.is_available())" # Must return `True` +``` + +3. Download [AMD Quark 0.11](https://download.amd.com/opendownload/Quark/amd_quark-0.11.zip) and unzip the archive. + +4. Install Quark: + +``` +cd +pip install amd_quark-+<>.whl +``` + +5. Install other dependencies + +``` +pip install datasets +pip install transformers==4.57.6 +pip install accelerate +pip install evaluate +pip install nltk +``` + +### Generate Quantized Model + +Use following command to run Quantization. In a GPU equipped Linux machine the quantization can take about 30-60 minutes. + +``` +cd examples/torch/language_modeling/llm_ptq/ + +python quantize_quark.py \ + --no_trust_remote_code \ + --model_dir "meta-llama/Llama-2-7b-chat-hf" \ + --output_dir \ + --quant_scheme uint4_wo_128 \ + --num_calib_data 128 \ + --seq_len 512 \ + --quant_algo awq \ + --dataset pileval_for_awq_benchmark \ + --model_export hf_format \ + --data_type \ + --exclude_layers [] +``` + +- Use `--data_type bfloat16` for bf16 pretrained model. For fp32/fp16 pretrained model use `--datatype float16` +- Quark natively supports AWQ quantization for popular architectures. If AWQ is not supported by default, you must create an AWQ configuration file and pass it to the `quantize_quark.py` script using the `--quant_algo_config_file awq ` option. For details on creating an AWQ config file, see the Quark documentation AWQ example: [https://quark.docs.amd.com/latest/tutorials/torch/example_awq.html](https://quark.docs.amd.com/latest/tutorials/torch/example_awq.html) +- Not using `--exclude_layers` parameter may result in model-specific defaults which may exclude certain layers like output layers. +- To specify a group size other than 128, such as 32, use `--quant_scheme uint4_wo_32` instead of `--quant_scheme uint4_wo_128`. Available group sizes are 32, 64, and 128 (e.g., `uint4_wo_32`, `uint4_wo_64`, `uint4_wo_128`) +- Quark supports quantizing layers with different group sizes, use `--layer_quant_scheme lm_head uint4_wo_32` to quantize the model with 32 group size for `lm_head` + +The quantized model is generated in the `` folder. + + +For the Phi-4 model, the following quantization recipe is recommended for better accuracy: + +- Use `--quant_algo gptq` +- Add `--layer_quant_scheme lm_head uint4_wo_32` + + + +Currently the following files are not copied into the quantized model folder and must be copied manually: + +- For Phi-4 models: `configuration_phi3.py` +- For ChatGLM-6b models: `tokenizer.json` + + +## Postprocessing + +Copy the quantized model to the Windows PC with Ryzen AI installed, and activate the Ryzen AI Conda environment. + +``` +conda activate ryzen-ai- +``` + +Install the `model-generate` package: + +``` +pip install model-generate==1.7.1 --force-reinstall --no-deps --extra-index-url https://pypi.amd.com/ryzenai_llm/1.7.1/windows/simple/ +``` + +### Hybrid Execution Mode + +Generate the final model for Hybrid execution mode (NPU prefill phase + GPU token phase): + +``` +model_generate --hybrid --input --output +``` + +### NPU Execution Mode + +Several NPU optimization levels are available depending on model support and performance requirements. + +**Full Fusion** (Best performance, recommended for [supported models](https://huggingface.co/collections/amd/ryzen-ai-171-npu-4k)): + +``` +model_generate --npu --full_fusion --input --output +``` + +**Token Fusion** (better tokens-per-second): + +``` +model_generate --npu --token_fusion --input --output +``` + + +Token Fusion currently supports generating models with a 4K context length only. For longer context lengths (e.g., 16K), use the pre-built models available on [Hugging Face](https://huggingface.co/collections/amd/ryzen-ai-171-npu-16k). + + +**Basic** (safe default for new or untested models): + +``` +model_generate --npu --basic --input --output +``` + +**Eager**: + +``` +model_generate --npu --eager --input --output +``` + +### OGA Export Only + +To export the quantized model to OGA format without performing any NPU or Hybrid postprocessing: + +``` +model_generate --oga_only --input --output +``` + +### Memory Optimization + +Add `--mem_optimize` to any recipe to optimize for 16 GB laptop configurations: + +``` +model_generate --hybrid --mem_optimize --input --output +``` + +``` +model_generate --npu --token_fusion --mem_optimize --input --output +``` + + +During the `model_generate` step, the quantized model is first converted to an OGA model using ONNX Runtime GenAI Model Builder (version 0.11.2). It is possible to use a standalone environment for exporting an OGA model, refer to the official [ONNX Runtime GenAI Model Builder documentation](https://github.com/microsoft/onnxruntime-genai/tree/main/src/python/py/models). Once you have an exported OGA model, you can pass it directly to the `model_generate` command with `--input`, which will skip the export step and perform only the post-processing. + + +Here are simple commands to export an OGA model from a quantized model using a standalone environment: + +``` +conda create --name oga_builder_env python=3.12 +conda activate oga_builder_env + + +pip install onnxruntime-genai==0.11.2 +# pip install other necessary packages +pip install .... + + +python3 -m onnxruntime_genai.models.builder -m -o -p int4 -e dml +``` diff --git a/docs/llms/oga_op_prepare.mdx b/docs/llms/oga_op_prepare.mdx new file mode 100644 index 00000000..bf3e8cfa --- /dev/null +++ b/docs/llms/oga_op_prepare.mdx @@ -0,0 +1,79 @@ +--- +title: "Compiling Operators for OGA Models" +description: "Experimental recipe for compiling operators when a fine-tuned model introduces new operator shapes." +--- + +{/* owner: uday610 */} + +Ryzen AI currently supports many popular LLMs in both hybrid and NPU-only flows. For these models, the required operators are already compiled and included in the Ryzen AI runtime. Such models can be run directly on Ryzen AI without any additional preparation. + +When users fine-tune these models, only the weights change and no new operator shapes are introduced. In that case, follow the steps from [Preparing OGA Models](/llms/oga_model_prepare) to prepare the model, which will run on the Ryzen AI runtime using the precompiled operators. + +However, in cases where architectural changes introduce new operator shapes not available in the Ryzen AI runtime, additional operator compilation is required. This page provides a recipe to compile operators that are not already present in the runtime. **This flow is experimental, and results may vary depending on the extent of the architectural changes**. + + +All OGA models are currently based on the [ONNX Runtime GenAI Model Builder](https://github.com/microsoft/onnxruntime-genai/tree/main/src/python/py/models#current-support) architecture. Therefore, this operator compilation flow requires the models are supported by ONNX Runtime GenAI. + + +## Operator Compilation Flow (Hybrid Execution) + +Currently this flow is primarily supported for hybrid execution. + +1. Ensure the model is quantized following the [quantization recipe](/llms/oga_model_prepare#quantization). + +2. Build the OGA DML model using the ONNX Runtime GenAI Model Builder included in the Ryzen AI software environment: + +``` +conda activate ryzen-ai- +python -m onnxruntime_genai.models.builder \ + -i -o \ + -p int4 -e dml +``` + +3. Compile the operators extracted from the OGA DML model: + +``` +onnx_utils vaiml --model-dir --plugin_name --compile --ops_type bfp16 +``` + +This generates a compiled operator package at: `transaction-plugin\.zip`. + +4. Generate the hybrid model: + +Create a folder named `dd_plugins` in the current working directory and place `.zip` inside it. By default, the flow looks for the operator zip in `dd_plugins`. To use a different location, see "Additional Details" below. + +Generate the hybrid model: + +``` +model_generate --hybrid +``` + +5. Run the hybrid model + +Follow the [official guide](/llms/hybrid_oga) to copy `model_benchmark.exe` and required DLL dependencies to the current working directory. Then run: + +``` +.\model_benchmark.exe -i -f amd_genai_prompt.txt -l "128, 256, 512, 1024, 2048" --verbose +``` + +**Additional Details** + +1. Path to operator zip file + +If `.zip` is not placed in the `dd_plugins` folder, set the `DD_PLUGINS_ROOT` environment variable to point to its location: + +``` +set DD_PLUGINS_ROOT=C:\.zip> +``` + +2. Enabling tracing + +To enable tracing for debug purposes, set the `DD_PLUGINS_TRACING` environment variable before generating the hybrid model: + +``` +# Optional: enable tracing +set DD_PLUGINS_TRACING=1 + +# Generate the model +model_generate --hybrid +``` diff --git a/docs/llms/rag-oga.mdx b/docs/llms/rag-oga.mdx new file mode 100644 index 00000000..35434f12 --- /dev/null +++ b/docs/llms/rag-oga.mdx @@ -0,0 +1,28 @@ +--- +title: "RAG with OGA" +description: "RAG with OGA on AMD Ryzen AI." +--- + +{/* owner: dwithchenna */} + +Source: [RyzenAI-SW / LLM-examples/RAG-OGA](https://github.com/amd/RyzenAI-SW/blob/main/LLM-examples/RAG-OGA). Code is maintained in the repository; this page mirrors its README. + + + +## Introduction +Welcome to this repository, a showcase of an **ONNX Runtime GenAI(OGA)‑based RAG LLM sample application** running on a **Ryzen AI processor**. +This repo provides supplemental code to the AMD Blog [RAG with Hybrid LLM on AMD Ryzen AI Processor](https://www.amd.com/en/developer/resources/technical-articles/2025/rag-with-hybrid-llm-on-amd-ryzen-ai-processors.html). + +## What You’ll Find Here + +- **Retrieval-Augmented Generation (RAG) pipeline** powered by: + - A **hybrid LLM** enables disaggregated inference in which the compute-heavy prefill phase runs on the NPU, while the decode phase executes on the GPU. + - An **embedding model** compiled with **Vitis AI Execution Provider** +- Built using the widely adopted **LangChain** orchestration framework + +## Quick Setup + +Follow these simple steps to get started: + +1. Execute the setup steps outlined below to provision your environment. +2. After setup, this README will guide you through how to run the sample application. diff --git a/docs/llms/server_interface.mdx b/docs/llms/server_interface.mdx new file mode 100644 index 00000000..e434e734 --- /dev/null +++ b/docs/llms/server_interface.mdx @@ -0,0 +1,72 @@ +--- +title: "Server Interface (REST API)" +description: "Use the Lemonade Server REST API to load LLMs on Ryzen AI hardware from any language." +--- + +{/* owner: jeremyfowers */} + +The Lemonade SDK offers a server interface that allows your application to load an LLM on Ryzen AI hardware in a process, and then communicate with this process using standard `REST` APIs. This allows applications written in any language (C#, JavaScript, Python, C++, etc.) to easily integrate with Ryzen AI LLMs. + +Server interfaces are used across the LLM ecosystem because they allow for no-code plug-and-play between the higher level of the application stack (GUIs, agents, RAG, etc.) with the LLM and hardware that have been abstracted by the server. For more information, see the [Understanding local LLM Servers Guide](https://lemonade-server.ai/docs/guide/concepts/). + +For example, open source projects such as Open WebUI have out-of-box support for connecting to a variety of server interfaces, which in turn allows users to quickly start working with LLMs in a GUI. + +## Server Setup + +Lemonade Server can be installed via the Lemonade Server Installer executable by following these steps: + +1. Make sure your system has the recommended Ryzen AI driver installed as described in [Install NPU Drivers](/installation#install-driver). +2. Download and install `Lemonade_Server_Installer.exe` from the [latest Lemonade release](https://github.com/lemonade-sdk/lemonade/releases). +3. Launch the server by double-clicking the `lemonade_server` shortcut added to your desktop. + +For a visual walkthrough of this process, watch our Lemonade Introductory Video: + +
+