From f9628ca95508cf6d63f3d48f9939d7812369922d Mon Sep 17 00:00:00 2001
From: zeke <40004347+KAJdev@users.noreply.github.com>
Date: Wed, 27 May 2026 14:54:43 -0700
Subject: [PATCH] fix: replace flash run with flash dev, remove standalone
 python references

SLS-132
---
 .github/PULL_REQUEST_TEMPLATE.md              |  2 +-
 01_getting_started/01_hello_world/README.md   | 11 ++---
 .../01_hello_world/gpu_worker.py              |  3 +-
 01_getting_started/02_cpu_worker/README.md    | 11 ++---
 .../02_cpu_worker/cpu_worker.py               |  3 +-
 01_getting_started/03_mixed_workers/README.md |  4 +-
 .../03_mixed_workers/cpu_worker.py            |  3 +-
 .../03_mixed_workers/gpu_worker.py            |  3 +-
 .../03_mixed_workers/pipeline.py              |  2 +-
 01_getting_started/04_dependencies/README.md  |  9 ++--
 .../04_dependencies/cpu_worker.py             |  3 +-
 .../04_dependencies/gpu_worker.py             |  3 +-
 .../04_dependencies/mixed_worker.py           |  3 +-
 02_ml_inference/01_text_to_speech/README.md   |  6 +--
 .../01_text_to_speech/gpu_worker.py           |  3 +-
 .../05_load_balancer/README.md                | 12 ++---
 .../05_load_balancer/cpu_lb.py                |  3 +-
 .../05_load_balancer/gpu_lb.py                |  3 +-
 .../01_autoscaling/README.md                  |  2 +-
 .../01_autoscaling/cpu_worker.py              |  3 +-
 .../01_autoscaling/gpu_worker.py              |  3 +-
 .../02_datacenters/README.md                  |  2 +-
 .../02_datacenters/cpu_worker.py              |  2 +-
 .../02_datacenters/gpu_worker.py              |  2 +-
 .../01_network_volumes/README.md              |  2 +-
 .../01_network_volumes/cpu_worker.py          |  3 +-
 .../01_network_volumes/gpu_worker.py          |  3 +-
 06_real_world/README.md                       |  2 +-
 CLAUDE.md                                     | 17 ++++---
 CLI-REFERENCE.md                              | 20 ++++----
 CONTRIBUTING.md                               | 14 +++---
 DEVELOPMENT.md                                | 22 ++++-----
 README.md                                     |  6 +--
 docs/cli/commands.md                          | 46 +++++++++----------
 docs/cli/getting-started.md                   | 14 +++---
 docs/cli/troubleshooting.md                   | 40 ++++++++--------
 docs/cli/workflows.md                         | 28 +++++------
 37 files changed, 144 insertions(+), 174 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index d305842..ab7cdd3 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -25,7 +25,7 @@ If adding a new example, which category does it belong to?
 ## Checklist
 
 ### Functionality
-- [ ] Example runs successfully with `flash run`
+- [ ] Example runs successfully with `flash dev`
 - [ ] All endpoints return correct responses
 - [ ] Tested locally
 - [ ] Error handling implemented
diff --git a/01_getting_started/01_hello_world/README.md b/01_getting_started/01_hello_world/README.md
index beb291c..494dfb9 100644
--- a/01_getting_started/01_hello_world/README.md
+++ b/01_getting_started/01_hello_world/README.md
@@ -21,14 +21,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`.
 ### 3. Run Locally
 
 ```bash
-uv run flash run
+uv run flash dev
 ```
 
 Server starts at **http://localhost:8888**
 
 ### 4. Test the API
 
-Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@Endpoint` functions.
+Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash dev` based on your `@Endpoint` functions.
 
 ```bash
 curl -X POST http://localhost:8888/gpu_worker/runsync \
@@ -133,14 +133,9 @@ The worker uses PyTorch to detect and report GPU information:
 
 ## Development
 
-### Test Worker Locally
-```bash
-python gpu_worker.py
-```
-
 ### Run the Application
 ```bash
-flash run
+flash dev
 ```
 
 ## Next Steps
diff --git a/01_getting_started/01_hello_world/gpu_worker.py b/01_getting_started/01_hello_world/gpu_worker.py
index d7a330f..47c4b61 100644
--- a/01_getting_started/01_hello_world/gpu_worker.py
+++ b/01_getting_started/01_hello_world/gpu_worker.py
@@ -1,6 +1,5 @@
 # gpu serverless worker -- detects available GPU hardware.
-# run with: flash run
-# test directly: python gpu_worker.py
+# run with: flash dev
 from runpod_flash import Endpoint, GpuType
 
 
diff --git a/01_getting_started/02_cpu_worker/README.md b/01_getting_started/02_cpu_worker/README.md
index 4d5fb88..4de7f54 100644
--- a/01_getting_started/02_cpu_worker/README.md
+++ b/01_getting_started/02_cpu_worker/README.md
@@ -21,14 +21,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`.
 ### 3. Run Locally
 
 ```bash
-uv run flash run
+uv run flash dev
 ```
 
 Server starts at **http://localhost:8888**
 
 ### 4. Test the API
 
-Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@Endpoint` functions.
+Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash dev` based on your `@Endpoint` functions.
 
 ```bash
 curl -X POST http://localhost:8888/cpu_worker/runsync \
@@ -135,14 +135,9 @@ The CPU worker scales to zero when idle:
 
 ## Development
 
-### Test Worker Locally
-```bash
-python cpu_worker.py
-```
-
 ### Run the Application
 ```bash
-flash run
+flash dev
 ```
 
 ## When to Use CPU Workers
diff --git a/01_getting_started/02_cpu_worker/cpu_worker.py b/01_getting_started/02_cpu_worker/cpu_worker.py
index 0679296..a0c21a2 100644
--- a/01_getting_started/02_cpu_worker/cpu_worker.py
+++ b/01_getting_started/02_cpu_worker/cpu_worker.py
@@ -1,6 +1,5 @@
 # cpu serverless worker -- lightweight processing without GPU.
-# run with: flash run
-# test directly: python cpu_worker.py
+# run with: flash dev
 from runpod_flash import CpuInstanceType, Endpoint
 
 
diff --git a/01_getting_started/03_mixed_workers/README.md b/01_getting_started/03_mixed_workers/README.md
index e85fad4..768189d 100644
--- a/01_getting_started/03_mixed_workers/README.md
+++ b/01_getting_started/03_mixed_workers/README.md
@@ -44,7 +44,7 @@ Response
 
 ```bash
 cd 01_getting_started/03_mixed_workers
-flash run
+flash dev
 ```
 
 ### Alternative: Standalone Setup
@@ -60,7 +60,7 @@ uv run flash login
 # Or create .env file with RUNPOD_API_KEY=your_api_key_here
 
 # Run
-uv run flash run
+uv run flash dev
 ```
 
 Server starts at http://localhost:8888
diff --git a/01_getting_started/03_mixed_workers/cpu_worker.py b/01_getting_started/03_mixed_workers/cpu_worker.py
index f65fd6c..8e89264 100644
--- a/01_getting_started/03_mixed_workers/cpu_worker.py
+++ b/01_getting_started/03_mixed_workers/cpu_worker.py
@@ -1,7 +1,6 @@
 # cpu workers for text preprocessing and postprocessing.
 # part of the mixed CPU/GPU pipeline example.
-# run with: flash run
-# test directly: python cpu_worker.py
+# run with: flash dev
 from runpod_flash import CpuInstanceType, Endpoint
 
 
diff --git a/01_getting_started/03_mixed_workers/gpu_worker.py b/01_getting_started/03_mixed_workers/gpu_worker.py
index b6ae065..bba5b5d 100644
--- a/01_getting_started/03_mixed_workers/gpu_worker.py
+++ b/01_getting_started/03_mixed_workers/gpu_worker.py
@@ -1,7 +1,6 @@
 # gpu worker for ML inference (sentiment classification).
 # part of the mixed CPU/GPU pipeline example.
-# run with: flash run
-# test directly: python gpu_worker.py
+# run with: flash dev
 from runpod_flash import Endpoint, GpuGroup
 
 
diff --git a/01_getting_started/03_mixed_workers/pipeline.py b/01_getting_started/03_mixed_workers/pipeline.py
index 6a4615f..d27ae78 100644
--- a/01_getting_started/03_mixed_workers/pipeline.py
+++ b/01_getting_started/03_mixed_workers/pipeline.py
@@ -1,6 +1,6 @@
 # classification pipeline: CPU preprocess -> GPU inference -> CPU postprocess.
 # demonstrates cross-worker orchestration via a load-balanced endpoint.
-# run with: flash run
+# run with: flash dev
 from runpod_flash import Endpoint
 
 pipeline = Endpoint(name="01_03_classify_pipeline", cpu="cpu3c-1-2", workers=(1, 3))
diff --git a/01_getting_started/04_dependencies/README.md b/01_getting_started/04_dependencies/README.md
index cf9a4a8..73f0784 100644
--- a/01_getting_started/04_dependencies/README.md
+++ b/01_getting_started/04_dependencies/README.md
@@ -29,7 +29,7 @@ Learn how to manage Python packages and system dependencies in Flash workers.
 
 ```bash
 cd 01_getting_started/04_dependencies
-flash run
+flash dev
 ```
 
 Server starts at http://localhost:8888
@@ -47,7 +47,7 @@ uv run flash login
 # Or create .env file with RUNPOD_API_KEY=your_api_key_here
 
 # Run
-uv run flash run
+uv run flash dev
 ```
 
 ## GPU vs CPU Packaging
@@ -273,9 +273,8 @@ async def fetch_data(url: str):
 ### 3. Test Dependency Compatibility
 
 ```bash
-# Test locally first
-python gpu_worker.py
-python cpu_worker.py
+# test locally
+flash dev
 ```
 
 ### 4. Document Dependencies
diff --git a/01_getting_started/04_dependencies/cpu_worker.py b/01_getting_started/04_dependencies/cpu_worker.py
index 64e2c96..f77efc1 100644
--- a/01_getting_started/04_dependencies/cpu_worker.py
+++ b/01_getting_started/04_dependencies/cpu_worker.py
@@ -1,6 +1,5 @@
 # cpu workers demonstrating data science and zero-dependency patterns.
-# run with: flash run
-# test directly: python cpu_worker.py
+# run with: flash dev
 from runpod_flash import CpuInstanceType, Endpoint
 
 
diff --git a/01_getting_started/04_dependencies/gpu_worker.py b/01_getting_started/04_dependencies/gpu_worker.py
index 07df859..3567979 100644
--- a/01_getting_started/04_dependencies/gpu_worker.py
+++ b/01_getting_started/04_dependencies/gpu_worker.py
@@ -1,6 +1,5 @@
 # gpu workers demonstrating Python and system dependency management.
-# run with: flash run
-# test directly: python gpu_worker.py
+# run with: flash dev
 from runpod_flash import Endpoint, GpuGroup
 
 
diff --git a/01_getting_started/04_dependencies/mixed_worker.py b/01_getting_started/04_dependencies/mixed_worker.py
index 4b15892..c6f8983 100644
--- a/01_getting_started/04_dependencies/mixed_worker.py
+++ b/01_getting_started/04_dependencies/mixed_worker.py
@@ -3,8 +3,7 @@
 #   - GPU images (runpod/pytorch:*) have numpy pre-installed
 #   - CPU images (python-slim) install numpy from the build artifact
 #
-# run with: flash run
-# test directly: python mixed_worker.py
+# run with: flash dev
 from runpod_flash import CpuInstanceType, Endpoint, GpuType
 
 
diff --git a/02_ml_inference/01_text_to_speech/README.md b/02_ml_inference/01_text_to_speech/README.md
index 4b89a47..bb5980a 100644
--- a/02_ml_inference/01_text_to_speech/README.md
+++ b/02_ml_inference/01_text_to_speech/README.md
@@ -33,14 +33,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`.
 ### Run
 
 ```bash
-uv run flash run
+uv run flash dev
 ```
 
 First run provisions the endpoint (~1 min). Server starts at http://localhost:8888
 
 ### Test the Endpoint
 
-Visit http://localhost:8888/docs for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@Endpoint` functions.
+Visit http://localhost:8888/docs for interactive API documentation. QB endpoints are auto-generated by `flash dev` based on your `@Endpoint` functions.
 
 **Generate speech (JSON with base64 audio):**
 ```bash
@@ -136,7 +136,7 @@ flash deploy send production
 
 ## Common Issues
 
-- **Cold start delay**: First request after idle takes 20-30s to load the model. Use `flash run --auto-provision` during development.
+- **Cold start delay**: First request after idle takes 20-30s to load the model. Use `flash dev --auto-provision` during development.
 - **Out of memory**: The model requires 24GB+ VRAM. Ensure `GpuGroup.ADA_24` or higher is configured.
 - **Invalid speaker/language**: Use `get_voices` to check valid options.
 
diff --git a/02_ml_inference/01_text_to_speech/gpu_worker.py b/02_ml_inference/01_text_to_speech/gpu_worker.py
index 6d60e01..42488e2 100644
--- a/02_ml_inference/01_text_to_speech/gpu_worker.py
+++ b/02_ml_inference/01_text_to_speech/gpu_worker.py
@@ -1,6 +1,5 @@
 # Qwen3-TTS text-to-speech GPU worker.
-# run with: flash run
-# test directly: python gpu_worker.py
+# run with: flash dev
 from runpod_flash import Endpoint, GpuGroup
 
 
diff --git a/03_advanced_workers/05_load_balancer/README.md b/03_advanced_workers/05_load_balancer/README.md
index 2c6eadc..45d8043 100644
--- a/03_advanced_workers/05_load_balancer/README.md
+++ b/03_advanced_workers/05_load_balancer/README.md
@@ -40,14 +40,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`.
 ### 3. Run Locally (from repository root)
 
 ```bash
-uv run flash run
+uv run flash dev
 ```
 
 Visit **http://localhost:8888/docs** for interactive API documentation (unified app with all examples).
 
 ### 4. Test Endpoints (via unified app)
 
-When using `flash run` from the repository root, routes are prefixed with the example name:
+When using `flash dev` from the repository root, routes are prefixed with the example name:
 
 **GPU Service (Compute)**:
 ```bash
@@ -256,14 +256,10 @@ Response:
 }
 ```
 
-## Testing Workers Locally
+## Testing Locally
 
 ```bash
-# Test GPU worker
-python gpu_lb.py
-
-# Test CPU worker
-python cpu_lb.py
+flash dev
 ```
 
 ## Deployment
diff --git a/03_advanced_workers/05_load_balancer/cpu_lb.py b/03_advanced_workers/05_load_balancer/cpu_lb.py
index 08a9105..4317dcb 100644
--- a/03_advanced_workers/05_load_balancer/cpu_lb.py
+++ b/03_advanced_workers/05_load_balancer/cpu_lb.py
@@ -1,6 +1,5 @@
 # cpu load-balanced endpoints with custom HTTP routes.
-# run with: flash run
-# test directly: python cpu_lb.py
+# run with: flash dev
 from runpod_flash import Endpoint
 
 api = Endpoint(
diff --git a/03_advanced_workers/05_load_balancer/gpu_lb.py b/03_advanced_workers/05_load_balancer/gpu_lb.py
index 2637bef..38ad679 100644
--- a/03_advanced_workers/05_load_balancer/gpu_lb.py
+++ b/03_advanced_workers/05_load_balancer/gpu_lb.py
@@ -1,6 +1,5 @@
 # gpu load-balanced endpoints with custom HTTP routes.
-# run with: flash run
-# test directly: python gpu_lb.py
+# run with: flash dev
 from runpod_flash import Endpoint, GpuType
 
 api = Endpoint(
diff --git a/04_scaling_performance/01_autoscaling/README.md b/04_scaling_performance/01_autoscaling/README.md
index 0e02e67..983a18c 100644
--- a/04_scaling_performance/01_autoscaling/README.md
+++ b/04_scaling_performance/01_autoscaling/README.md
@@ -8,7 +8,7 @@ Configure Flash worker autoscaling for different workload patterns. This example
 
 ```bash
 cd 04_scaling_performance/01_autoscaling
-flash run
+flash dev
 ```
 
 Server starts at http://localhost:8888 -- visit http://localhost:8888/docs for interactive API docs.
diff --git a/04_scaling_performance/01_autoscaling/cpu_worker.py b/04_scaling_performance/01_autoscaling/cpu_worker.py
index 6660ea3..05d23c3 100644
--- a/04_scaling_performance/01_autoscaling/cpu_worker.py
+++ b/04_scaling_performance/01_autoscaling/cpu_worker.py
@@ -1,6 +1,5 @@
 # cpu autoscaling strategies -- scale-to-zero and burst-ready.
-# run with: flash run
-# test directly: python cpu_worker.py
+# run with: flash dev
 from runpod_flash import CpuInstanceType, Endpoint
 
 
diff --git a/04_scaling_performance/01_autoscaling/gpu_worker.py b/04_scaling_performance/01_autoscaling/gpu_worker.py
index 2d12fb0..8df93a7 100644
--- a/04_scaling_performance/01_autoscaling/gpu_worker.py
+++ b/04_scaling_performance/01_autoscaling/gpu_worker.py
@@ -1,6 +1,5 @@
 # gpu autoscaling strategies -- scale-to-zero, always-on, high-throughput.
-# run with: flash run
-# test directly: python gpu_worker.py
+# run with: flash dev
 from runpod_flash import Endpoint, GpuType, ServerlessScalerType
 
 
diff --git a/04_scaling_performance/02_datacenters/README.md b/04_scaling_performance/02_datacenters/README.md
index 2e3573b..8029c02 100644
--- a/04_scaling_performance/02_datacenters/README.md
+++ b/04_scaling_performance/02_datacenters/README.md
@@ -10,7 +10,7 @@ By default, endpoints deploy across all available data centers. The `datacenter`
 
 ```bash
 pip install -r requirements.txt
-flash run
+flash dev
 ```
 
 ## What You'll Learn
diff --git a/04_scaling_performance/02_datacenters/cpu_worker.py b/04_scaling_performance/02_datacenters/cpu_worker.py
index 142a3de..6637e45 100644
--- a/04_scaling_performance/02_datacenters/cpu_worker.py
+++ b/04_scaling_performance/02_datacenters/cpu_worker.py
@@ -1,7 +1,7 @@
 # cpu worker pinned to a cpu-supported datacenter.
 # cpu endpoints are only available in a subset of datacenters
 # (see CPU_DATACENTERS). selecting an unsupported DC raises an error.
-# run with: flash run
+# run with: flash dev
 from runpod_flash import Endpoint, DataCenter
 
 api = Endpoint(
diff --git a/04_scaling_performance/02_datacenters/gpu_worker.py b/04_scaling_performance/02_datacenters/gpu_worker.py
index cdacfd4..bfa70a5 100644
--- a/04_scaling_performance/02_datacenters/gpu_worker.py
+++ b/04_scaling_performance/02_datacenters/gpu_worker.py
@@ -1,5 +1,5 @@
 # gpu workers pinned to specific datacenters.
-# run with: flash run
+# run with: flash dev
 from runpod_flash import Endpoint, GpuGroup, DataCenter
 
 
diff --git a/05_data_workflows/01_network_volumes/README.md b/05_data_workflows/01_network_volumes/README.md
index bd9cf24..6c0314f 100644
--- a/05_data_workflows/01_network_volumes/README.md
+++ b/05_data_workflows/01_network_volumes/README.md
@@ -25,7 +25,7 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`.
 ### 3. Run Locally
 
 ```bash
-uv run flash run
+uv run flash dev
 ```
 
 Server starts at `http://localhost:8888`
diff --git a/05_data_workflows/01_network_volumes/cpu_worker.py b/05_data_workflows/01_network_volumes/cpu_worker.py
index d9e0408..0c9b0b8 100644
--- a/05_data_workflows/01_network_volumes/cpu_worker.py
+++ b/05_data_workflows/01_network_volumes/cpu_worker.py
@@ -1,6 +1,5 @@
 # cpu worker with network volume for listing and serving generated images.
-# run with: flash run
-# test directly: python cpu_worker.py
+# run with: flash dev
 from runpod_flash import Endpoint, DataCenter, NetworkVolume
 
 # same volume as gpu_worker.py -- must match name and datacenter
diff --git a/05_data_workflows/01_network_volumes/gpu_worker.py b/05_data_workflows/01_network_volumes/gpu_worker.py
index 34fb339..e4a8ce3 100644
--- a/05_data_workflows/01_network_volumes/gpu_worker.py
+++ b/05_data_workflows/01_network_volumes/gpu_worker.py
@@ -1,6 +1,5 @@
 # gpu worker with network volume for Stable Diffusion image generation.
-# run with: flash run
-# test directly: python gpu_worker.py
+# run with: flash dev
 import logging
 
 from runpod_flash import Endpoint, GpuType, DataCenter, NetworkVolume
diff --git a/06_real_world/README.md b/06_real_world/README.md
index e640184..ee3739c 100644
--- a/06_real_world/README.md
+++ b/06_real_world/README.md
@@ -118,7 +118,7 @@ All real-world examples include:
 ### Development
 ```bash
 cd example_name
-flash run
+flash dev
 ```
 
 ### Production
diff --git a/CLAUDE.md b/CLAUDE.md
index b3e33be..f87f72f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,7 +4,7 @@
 
 ## Project Overview
 
-Production-ready examples demonstrating Flash framework capabilities. Flat-file pattern: each worker is a standalone `.py` file with `@Endpoint` decorator, auto-discovered by `flash run`. 6 categories, 18 worker files. Root `pyproject.toml` declares only `runpod-flash` dependency; runtime deps declared inline via `Endpoint(dependencies=[...])`.
+Production-ready examples demonstrating Flash framework capabilities. Flat-file pattern: each worker is a standalone `.py` file with `@Endpoint` decorator, auto-discovered by `flash dev`. 6 categories, 18 worker files. Root `pyproject.toml` declares only `runpod-flash` dependency; runtime deps declared inline via `Endpoint(dependencies=[...])`.
 
 ## Architecture
 
@@ -14,12 +14,12 @@ Production-ready examples demonstrating Flash framework capabilities. Flat-file
 2. **Endpoint routes (LB)** -- Load-balanced pattern. `api = Endpoint(...)` with `@api.get()/@api.post()` route decorators for HTTP endpoints.
 3. **@Endpoint decorator (class)** -- Used on `SimpleSD` class (`05_data_workflows`). Class-based pattern for stateful workers.
 4. **Cross-worker orchestration** -- Pipeline files import from QB workers, chain with `await`. LB endpoint orchestrates QB workers.
-5. **Flat-file discovery** -- No FastAPI boilerplate, no routers, no `main.py`. `flash run` auto-generates routes from decorated functions.
+5. **Flat-file discovery** -- No FastAPI boilerplate, no routers, no `main.py`. `flash dev` auto-generates routes from decorated functions.
 6. **In-function imports** -- Heavy libs (torch, transformers, etc.) imported inside `@Endpoint` body, only `runpod_flash` at module level.
 
 ### Entry Points
 
-All worker files across 6 categories. Each file is an independent entry point discovered by `flash run`.
+All worker files across 6 categories. Each file is an independent entry point discovered by `flash dev`.
 
 ### Module Structure
 
@@ -136,7 +136,7 @@ flash-examples --> flash (runpod_flash) --> runpod-python (runpod)
 
 ### Known Drift
 
-- No automated tests -- changes caught only at import time or `flash run`
+- No automated tests -- changes caught only at import time or `flash dev`
 - No CI that validates examples against current flash version
 - Python version: inherits from flash (3.10+)
 
@@ -152,9 +152,8 @@ uv sync --all-groups
 ### Testing
 
 ```bash
-flash run                     # Start local dev server (localhost:8888)
+flash dev                     # Start local dev server (localhost:8888)
 # Visit http://localhost:8888/docs for interactive API docs
-python gpu_worker.py          # Test a single worker directly (if __name__ == "__main__" block)
 ```
 
 ### Quality
@@ -202,14 +201,14 @@ No formal test infrastructure exists. Each worker has an optional `if __name__ =
 
 - **100% uncovered** -- no test framework, no conftest, no pytest config
 - No smoke tests that verify examples import successfully
-- No integration tests that run `flash run` against examples
+- No integration tests that run `flash dev` against examples
 
 ### Patterns
 
 To test manually:
 ```bash
 cd 01_getting_started/01_hello_world
-flash run                    # Starts dev server, auto-discovers workers
+flash dev                    # Starts dev server, auto-discovers workers
 # Use http://localhost:8888/docs to invoke endpoints
 ```
 
@@ -217,7 +216,7 @@ flash run                    # Starts dev server, auto-discovers workers
 
 1. Add `tests/test_imports.py` that imports every worker file (catches `Endpoint` signature drift)
 2. Add `tests/test_configs.py` that validates all resource configs construct without error
-3. Add CI job that runs `flash run --check` (dry-run mode) against each example category
+3. Add CI job that runs `flash dev --check` (dry-run mode) against each example category
 
 ## Common Mistakes
 
diff --git a/CLI-REFERENCE.md b/CLI-REFERENCE.md
index 5653416..5b87462 100644
--- a/CLI-REFERENCE.md
+++ b/CLI-REFERENCE.md
@@ -23,7 +23,7 @@ flash <command> --help    # Show help for specific command
 |---------|---------|
 | [`flash login`](#flash-login) | Authenticate with Runpod |
 | [`flash init`](#flash-init) | Create new Flash project |
-| [`flash run`](#flash-run) | Run development server |
+| [`flash dev`](#flash-dev) | Run development server |
 | [`flash build`](#flash-build) | Build application package |
 | [`flash deploy`](#flash-deploy) | Build and deploy application |
 | [`flash undeploy`](#flash-undeploy) | Delete deployed endpoints |
@@ -101,7 +101,7 @@ Get your API key from [Runpod Settings](https://www.runpod.io/console/user/setti
 
 ### Related Commands
 
-- [`flash run`](#flash-run) - Run development server (requires authentication)
+- [`flash dev`](#flash-dev) - Run development server (requires authentication)
 - [`flash deploy`](#flash-deploy) - Deploy to Runpod (requires authentication)
 
 ---
@@ -156,19 +156,19 @@ flash init my-api --force
 
 ### Related Commands
 
-- [`flash run`](#flash-run) - Run the initialized project locally
+- [`flash dev`](#flash-dev) - Run the initialized project locally
 - [Getting Started Guide](docs/cli/getting-started.md) - Full tutorial
 
 ---
 
-## flash run
+## flash dev
 
 Run the Flash development server locally with hot reloading.
 
 ### Syntax
 
 ```bash
-flash run [OPTIONS]
+flash dev [OPTIONS]
 ```
 
 ### Options
@@ -189,26 +189,26 @@ flash run [OPTIONS]
 
 **Basic local development:**
 ```bash
-flash run
+flash dev
 # Server runs at http://localhost:8888
 # Visit http://localhost:8888/docs for Swagger UI
 ```
 
 **Custom host and port:**
 ```bash
-flash run --host 0.0.0.0 --port 3000
+flash dev --host 0.0.0.0 --port 3000
 # Accessible from network at http://<your-ip>:3000
 ```
 
 **Disable auto-reload:**
 ```bash
-flash run --no-reload
+flash dev --no-reload
 # Useful for debugging or production-like testing
 ```
 
 **Auto-provision resources:**
 ```bash
-flash run --auto-provision
+flash dev --auto-provision
 # Automatically creates Runpod endpoints on startup
 ```
 
@@ -216,7 +216,7 @@ flash run --auto-provision
 ```bash
 export FLASH_HOST=0.0.0.0
 export FLASH_PORT=9000
-flash run
+flash dev
 ```
 
 ### What It Does
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 647529a..1af25bb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -40,13 +40,13 @@ Add examples that demonstrate:
 All examples must meet these standards:
 
 ### 1. Functional Requirements
-- [ ] Runs successfully with `flash run`
+- [ ] Runs successfully with `flash dev`
 - [ ] All endpoints return correct responses
 - [ ] Error handling is implemented
 - [ ] Environment variables are documented
 - [ ] Dependencies are declared in pyproject.toml
 - [ ] Runtime deps declared in `Endpoint(dependencies=[...])`
-- [ ] Example discovered by `flash run` from project root
+- [ ] Example discovered by `flash dev` from project root
 
 ### 2. Code Quality
 - [ ] Clear, readable code
@@ -126,17 +126,17 @@ Follow the [standard example structure](#example-structure).
 
 ```bash
 cd your_category/your_example
-flash run
+flash dev
 # Test all endpoints
 ```
 
 ### 5. Verify Discovery
 
-`flash run` auto-discovers all `.py` files containing `@Endpoint` functions. Verify your example loads:
+`flash dev` auto-discovers all `.py` files containing `@Endpoint` functions. Verify your example loads:
 
 ```bash
 # From the repository root
-flash run
+flash dev
 # Check http://localhost:8888/docs for your new endpoints
 ```
 
@@ -177,7 +177,7 @@ your_example/
     └── architecture.png
 ```
 
-`flash run` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories, no router wiring.
+`flash dev` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories, no router wiring.
 
 ### Minimal Worker (`gpu_worker.py`)
 
@@ -286,7 +286,7 @@ Test your example thoroughly:
 
 ```bash
 # Run the application
-flash run
+flash dev
 
 # Test health endpoint
 curl http://localhost:8888/health
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 984e29a..0963730 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -165,16 +165,16 @@ Run commands with the package manager prefix without activation:
 
 ```bash
 # With uv
-uv run flash run
+uv run flash dev
 
 # With poetry
-poetry run flash run
+poetry run flash dev
 
 # With pipenv
-pipenv run flash run
+pipenv run flash dev
 
 # With conda
-conda run -p ./.venv flash run
+conda run -p ./.venv flash dev
 ```
 
 **Option B: Activate Virtual Environment (works with all managers)**
@@ -189,7 +189,7 @@ source .venv/bin/activate
 .venv\Scripts\activate
 
 # Then run normally
-flash run
+flash dev
 ```
 
 Once activated, you can run Flash and other commands directly without a prefix.
@@ -268,9 +268,9 @@ Or set `RUNPOD_API_KEY` in your `.env` file.
 
 ```bash
 # Run development server
-flash run                    # Default: localhost:8888
-flash run --port 9000        # Custom port
-flash run --host 0.0.0.0     # Network accessible
+flash dev                    # Default: localhost:8888
+flash dev --port 9000        # Custom port
+flash dev --host 0.0.0.0     # Network accessible
 
 # Build deployment package
 flash build                  # Standard build
@@ -511,7 +511,7 @@ cd 01_getting_started/05_new_example
 touch README.md gpu_worker.py pyproject.toml
 ```
 
-Each worker file (named `*_worker.py` by convention) is self-contained with `@Endpoint` decorated functions. `flash run` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories needed.
+Each worker file (named `*_worker.py` by convention) is self-contained with `@Endpoint` decorated functions. `flash dev` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories needed.
 
 **3. Declare dependencies:**
 
@@ -521,7 +521,7 @@ Add a `pyproject.toml` with `runpod-flash` as the only local dependency. Runtime
 
 ```bash
 cd ../../  # Back to root
-flash run  # Discovers all .py files with @Endpoint functions
+flash dev  # Discovers all .py files with @Endpoint functions
 ```
 
 ### Cleaning Up
@@ -544,7 +544,7 @@ make setup
 
 ## Unified App Architecture
 
-The root directory provides a programmatic discovery system that automatically finds and loads all examples when you run `flash run` from the project root.
+The root directory provides a programmatic discovery system that automatically finds and loads all examples when you run `flash dev` from the project root.
 
 ### Discovery Process
 
diff --git a/README.md b/README.md
index c73bcdf..487ce16 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ async def generate_image(prompt: str) -> bytes:
 **Key features:**
 - **`@Endpoint` decorator**: Mark any async function to run on serverless infrastructure
 - **Auto-scaling**: Scale to zero when idle, scale up under load
-- **Local development**: `flash run` starts a local server with hot reload
+- **Local development**: `flash dev` starts a local server with hot reload
 - **One-command deploy**: `flash deploy` packages and ships your code
 
 ## Prerequisites
@@ -46,7 +46,7 @@ uv sync && uv pip install -e .
 uv run flash login
 
 # Run all examples locally
-uv run flash run
+uv run flash dev
 ```
 
 Open **http://localhost:8888/docs** to explore all endpoints.
@@ -72,7 +72,7 @@ More examples coming soon in each category.
 
 ```bash
 flash login              # Authenticate with Runpod (opens browser)
-flash run                # Run development server (localhost:8888)
+flash dev                # Run development server (localhost:8888)
 flash build              # Build deployment package
 flash deploy --env <name># Build and deploy to environment
 flash undeploy <name>    # Delete deployed endpoint
diff --git a/docs/cli/commands.md b/docs/cli/commands.md
index 62ee5c7..ab86903 100644
--- a/docs/cli/commands.md
+++ b/docs/cli/commands.md
@@ -6,7 +6,7 @@ Exhaustive documentation for all Flash CLI commands. This guide covers every opt
 
 - [flash login](#flash-login) - Authenticate with Runpod
 - [flash init](#flash-init) - Create new Flash project
-- [flash run](#flash-run) - Run development server
+- [flash dev](#flash-dev) - Run development server
 - [flash build](#flash-build) - Build deployment package
 - [flash deploy](#flash-deploy) - Build and deploy application
 - [flash undeploy](#flash-undeploy) - Delete deployed endpoints
@@ -225,7 +225,7 @@ The generated `.gitignore` already includes necessary patterns.
 
 3. **Run Locally**
    ```bash
-   uv run flash run
+   uv run flash dev
    ```
 
 4. **View Documentation**
@@ -267,7 +267,7 @@ flash init my-api
 
 ### Related Commands
 
-- [`flash run`](#flash-run) - Run the initialized project
+- [`flash dev`](#flash-dev) - Run the initialized project
 - [`flash build`](#flash-build) - Build the project
 - [`flash deploy`](#flash-deploy) - Deploy the project
 
@@ -278,14 +278,14 @@ flash init my-api
 
 ---
 
-## flash run
+## flash dev
 
 Run the Flash development server locally with hot reloading for rapid development and testing.
 
 ### Synopsis
 
 ```bash
-flash run [OPTIONS]
+flash dev [OPTIONS]
 ```
 
 ### Description
@@ -294,7 +294,7 @@ Starts a local uvicorn development server that runs your Flash application. The
 
 ### Architecture: Hybrid Local + Cloud
 
-With `flash run`, your system operates in a **hybrid architecture**:
+With `flash dev`, your system operates in a **hybrid architecture**:
 
 - **Your FastAPI app runs locally** on your machine (localhost:8888)
 - **`@Endpoint` functions run on Runpod** as serverless endpoints
@@ -350,7 +350,7 @@ RUNPOD_API_KEY=your-key-here
 #### Basic Development Server
 
 ```bash
-flash run
+flash dev
 ```
 
 Output:
@@ -367,7 +367,7 @@ Visit `http://localhost:8888/docs` for interactive API documentation.
 #### Custom Host and Port
 
 ```bash
-flash run --host 0.0.0.0 --port 3000
+flash dev --host 0.0.0.0 --port 3000
 ```
 
 Makes server accessible from network at `http://<your-ip>:3000`. Useful for:
@@ -378,7 +378,7 @@ Makes server accessible from network at `http://<your-ip>:3000`. Useful for:
 #### Disable Auto-Reload
 
 ```bash
-flash run --no-reload
+flash dev --no-reload
 ```
 
 Useful for:
@@ -391,7 +391,7 @@ Useful for:
 ```bash
 export FLASH_HOST=0.0.0.0
 export FLASH_PORT=9000
-flash run
+flash dev
 ```
 
 Or with `.env` file:
@@ -402,14 +402,14 @@ FLASH_PORT=9000
 ```
 
 ```bash
-flash run
+flash dev
 # Automatically loads .env
 ```
 
 #### Auto-Provision Resources
 
 ```bash
-flash run --auto-provision
+flash dev --auto-provision
 ```
 
 This will:
@@ -423,7 +423,7 @@ This will:
 #### Combine Options
 
 ```bash
-flash run --host 0.0.0.0 --port 9000 --no-reload
+flash dev --host 0.0.0.0 --port 9000 --no-reload
 ```
 
 Runs on all network interfaces, port 9000, without auto-reload.
@@ -431,7 +431,7 @@ Runs on all network interfaces, port 9000, without auto-reload.
 #### Development with External Access
 
 ```bash
-flash run --host 0.0.0.0
+flash dev --host 0.0.0.0
 # Server accessible at http://192.168.1.100:8888 (your local IP)
 ```
 
@@ -501,7 +501,7 @@ When `--reload` is enabled (default):
 **Example workflow:**
 ```bash
 # Terminal 1: Run server
-flash run
+flash dev
 
 # Terminal 2: Edit code
 echo 'print("Updated!")' >> gpu_worker.py
@@ -520,7 +520,7 @@ echo 'print("Updated!")' >> gpu_worker.py
 
 **For production-like testing:**
 ```bash
-flash run --no-reload
+flash dev --no-reload
 # More representative of deployed performance
 ```
 
@@ -536,7 +536,7 @@ ERROR: [Errno 48] Address already in use
 Solutions:
 ```bash
 # Option 1: Use different port
-flash run --port 9000
+flash dev --port 9000
 
 # Option 2: Kill process using port
 lsof -ti:8888 | xargs kill -9
@@ -576,7 +576,7 @@ Solutions:
 echo "large_files/" >> .gitignore
 
 # Or disable reload temporarily
-flash run --no-reload
+flash dev --no-reload
 ```
 
 **Cannot Access from Network**
@@ -586,7 +586,7 @@ Problem: Cannot reach server from other devices.
 Solutions:
 ```bash
 # Use 0.0.0.0 instead of localhost
-flash run --host 0.0.0.0
+flash dev --host 0.0.0.0
 
 # Check firewall settings
 # macOS: System Preferences → Security & Privacy → Firewall
@@ -603,7 +603,7 @@ Error: RUNPOD_API_KEY environment variable not set
 Solution:
 ```bash
 export RUNPOD_API_KEY=your-key-here
-flash run --auto-provision
+flash dev --auto-provision
 ```
 
 ### Related Commands
@@ -1012,7 +1012,7 @@ flash build --exclude problematic-package
 ### Related Commands
 
 - [`flash deploy`](#flash-deploy) - Build and deploy in one step
-- [`flash run`](#flash-run) - Test before building
+- [`flash dev`](#flash-dev) - Test before building
 
 ### Related Workflows
 
@@ -1047,7 +1047,7 @@ With `flash deploy`, your **entire application** runs on Runpod Serverless:
 - **No `live-` prefix** on endpoint names—these are production endpoints
 - **No hot reload**—code changes require a new deployment
 
-This is different from `flash run`, where your FastAPI app runs locally on your machine. With `flash deploy`, everything is in the cloud for production use.
+This is different from `flash dev`, where your FastAPI app runs locally on your machine. With `flash deploy`, everything is in the cloud for production use.
 
 ### Options
 
@@ -1415,7 +1415,7 @@ flash deploy --env production
 - [`flash build`](#flash-build) - Build without deploying
 - [`flash env`](#flash-env) - Manage environments
 - [`flash undeploy`](#flash-undeploy) - Delete deployments
-- [`flash run`](#flash-run) - Test locally before deploying
+- [`flash dev`](#flash-dev) - Test locally before deploying
 
 ### Related Workflows
 
diff --git a/docs/cli/getting-started.md b/docs/cli/getting-started.md
index e32766d..29f2b8f 100644
--- a/docs/cli/getting-started.md
+++ b/docs/cli/getting-started.md
@@ -92,7 +92,7 @@ async def process_request(payload: dict) -> dict:
 Start the development server:
 
 ```bash
-uv run flash run
+uv run flash dev
 ```
 
 **Expected output:**
@@ -104,7 +104,7 @@ INFO: Application startup complete
 
 **Checkpoint:** Server is running at http://localhost:8888
 
-**What's happening:** Your FastAPI app runs locally on your machine, but when you call an `@Endpoint` function, it executes on Runpod Serverless. This hybrid architecture gives you hot-reload for rapid development while testing real GPU/CPU workloads in the cloud. Endpoints created during `flash run` are prefixed with `live-` to keep them separate from production.
+**What's happening:** Your FastAPI app runs locally on your machine, but when you call an `@Endpoint` function, it executes on Runpod Serverless. This hybrid architecture gives you hot-reload for rapid development while testing real GPU/CPU workloads in the cloud. Endpoints created during `flash dev` are prefixed with `live-` to keep them separate from production.
 
 ---
 
@@ -275,7 +275,7 @@ curl -X POST https://abcd1234-hello-flash-gpu.runpod.io/run \
 In the past 10 minutes, you've:
 
 1. ✅ Created a Flash project with `flash init`
-2. ✅ Run a development server with `flash run`
+2. ✅ Run a development server with `flash dev`
 3. ✅ Tested locally via Swagger UI
 4. ✅ Created a deployment environment
 5. ✅ Built a deployment package with `flash build`
@@ -294,7 +294,7 @@ The flash-examples repository contains production-ready examples:
 git clone https://github.com/runpod/flash-examples.git
 cd flash-examples
 uv sync && uv pip install -e .
-uv run flash run
+uv run flash dev
 # Visit http://localhost:8888/docs to explore all examples
 ```
 
@@ -309,7 +309,7 @@ uv run flash run
 ### Learn More Commands
 
 **Development:**
-- [flash run options](../CLI-REFERENCE.md#flash-run) - Custom host, port, auto-reload
+- [flash dev options](../CLI-REFERENCE.md#flash-run) - Custom host, port, auto-reload
 - [flash build options](../CLI-REFERENCE.md#flash-build) - Size optimization, custom names
 
 **Deployment:**
@@ -350,7 +350,7 @@ uv run flash --version
 **Solution:**
 ```bash
 # Use different port
-uv run flash run --port 9000
+uv run flash dev --port 9000
 
 # Or find and kill process using port 8888
 lsof -ti:8888 | xargs kill -9
@@ -386,7 +386,7 @@ See [Troubleshooting Guide](troubleshooting.md) for more solutions.
 | Command | Purpose |
 |---------|---------|
 | `uv run flash init <name>` | Create new project |
-| `uv run flash run` | Run development server |
+| `uv run flash dev` | Run development server |
 | `uv run flash build` | Build deployment package |
 | `uv run flash deploy --env <name>` | Deploy to environment |
 | `uv run flash env create <name>` | Create environment |
diff --git a/docs/cli/troubleshooting.md b/docs/cli/troubleshooting.md
index 839b97d..0c2c430 100644
--- a/docs/cli/troubleshooting.md
+++ b/docs/cli/troubleshooting.md
@@ -6,7 +6,7 @@ Solutions to common Flash CLI problems organized by command and error type.
 
 - [Installation Issues](#installation-issues)
 - [flash init Problems](#flash-init-problems)
-- [flash run Issues](#flash-run-issues)
+- [flash dev Issues](#flash-dev-issues)
 - [flash build Failures](#flash-build-failures)
 - [flash deploy Errors](#flash-deploy-errors)
 - [Environment Management](#environment-management)
@@ -90,7 +90,7 @@ python --version  # Should be 3.10+
 
 **Symptoms:**
 ```bash
-$ flash run
+$ flash dev
 Traceback (most recent call last):
   ImportError: cannot import name 'remote' from 'runpod_flash'
 ```
@@ -220,7 +220,7 @@ flash init my-api
 
 ---
 
-## flash run Issues
+## flash dev Issues
 
 ### Port Already in Use
 
@@ -228,7 +228,7 @@ flash init my-api
 
 **Symptoms:**
 ```bash
-$ flash run
+$ flash dev
 ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8888): address already in use
 ```
 
@@ -236,7 +236,7 @@ ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8888):
 
 **1. Use different port:**
 ```bash
-flash run --port 9000
+flash dev --port 9000
 ```
 
 **2. Find and kill process using port:**
@@ -255,11 +255,11 @@ kill <pid>
 **3. Use environment variable:**
 ```bash
 export FLASH_PORT=9000
-flash run
+flash dev
 ```
 
 **References:**
-- [flash run command](commands.md#flash-run)
+- [flash dev command](commands.md#flash-dev)
 - [Workflows: Local Development](workflows.md#local-development-workflow)
 
 ### Module Not Found Error
@@ -268,7 +268,7 @@ flash run
 
 **Symptoms:**
 ```bash
-$ flash run
+$ flash dev
 ModuleNotFoundError: No module named 'fastapi'
 ```
 
@@ -320,14 +320,14 @@ dependencies = [
 **1. Check reload is enabled:**
 ```bash
 # Reload is default, but verify:
-flash run  # Should show "StatReload" in output
+flash dev  # Should show "StatReload" in output
 ```
 
 **2. Manually restart:**
 ```bash
 # Press Ctrl+C to stop
 # Run again
-flash run
+flash dev
 ```
 
 **3. Check file watching:**
@@ -342,14 +342,14 @@ mv ignored_dir/worker.py workers/worker.py
 **4. Disable and re-enable reload:**
 ```bash
 # Try without reload
-flash run --no-reload
+flash dev --no-reload
 
 # Then with reload
-flash run
+flash dev
 ```
 
 **References:**
-- [flash run command](commands.md#flash-run)
+- [flash dev command](commands.md#flash-dev)
 
 ### Cannot Access from Network
 
@@ -363,7 +363,7 @@ flash run
 
 **1. Bind to 0.0.0.0:**
 ```bash
-flash run --host 0.0.0.0
+flash dev --host 0.0.0.0
 ```
 
 **2. Check firewall:**
@@ -391,7 +391,7 @@ ip addr show
 ```
 
 **References:**
-- [flash run command](commands.md#flash-run)
+- [flash dev command](commands.md#flash-dev)
 
 ---
 
@@ -1271,7 +1271,7 @@ When troubleshooting any issue:
 - [ ] No permission issues (`ls -la`)
 - [ ] Recent Flash version (`pip install --upgrade runpod-flash`)
 - [ ] Checked logs and error messages
-- [ ] Tested locally first (`flash run`)
+- [ ] Tested locally first (`flash dev`)
 - [ ] Reviewed documentation
 
 ---
@@ -1301,7 +1301,7 @@ git checkout main
 
 # 6. Fix issue properly
 # ... make changes ...
-flash run  # Test locally
+flash dev  # Test locally
 flash deploy --env staging  # Test in staging
 flash deploy --env production  # Redeploy to production
 ```
@@ -1342,7 +1342,7 @@ pip install runpod-flash
 pip install -e .
 
 # 6. Test locally
-flash run
+flash dev
 
 # 7. Redeploy
 flash env create production
@@ -1357,7 +1357,7 @@ flash deploy --env production
 
 1. **Always test locally first:**
    ```bash
-   flash run
+   flash dev
    # Test all endpoints
    ```
 
@@ -1417,7 +1417,7 @@ flash deploy --env production
 | Issue | Quick Fix |
 |-------|-----------|
 | Command not found | `pip install runpod-flash` |
-| Port in use | `flash run --port 9000` |
+| Port in use | `flash dev --port 9000` |
 | Build too large | `flash build --exclude torch,torchvision` |
 | Missing API key | `export RUNPOD_API_KEY=your-key` |
 | Environment not found | `flash env create <name>` |
diff --git a/docs/cli/workflows.md b/docs/cli/workflows.md
index b008ce5..74b216e 100644
--- a/docs/cli/workflows.md
+++ b/docs/cli/workflows.md
@@ -85,7 +85,7 @@ FLASH_PORT=8888
 #### 4. Start Development Server
 
 ```bash
-flash run
+flash dev
 ```
 
 **Expected output:**
@@ -151,7 +151,7 @@ Repeat steps 5-7:
 **Use Multiple Terminals:**
 ```bash
 # Terminal 1: Run server
-flash run
+flash dev
 
 # Terminal 2: Test with curl
 curl http://localhost:8888/process ...
@@ -162,18 +162,18 @@ tail -f logs/application.log
 
 **Disable Hot Reload for Debugging:**
 ```bash
-flash run --no-reload
+flash dev --no-reload
 # Easier to attach debugger
 ```
 
 **Change Port if Conflict:**
 ```bash
-flash run --port 9000
+flash dev --port 9000
 ```
 
 **Access from Network:**
 ```bash
-flash run --host 0.0.0.0
+flash dev --host 0.0.0.0
 # Access from mobile devices or other machines
 ```
 
@@ -182,7 +182,7 @@ flash run --host 0.0.0.0
 **Port in use:**
 ```bash
 # Solution: Use different port
-flash run --port 9000
+flash dev --port 9000
 ```
 
 **Import errors:**
@@ -194,11 +194,11 @@ pip install -e .
 **Changes not reflecting:**
 ```bash
 # Solution: Check hot reload is enabled
-flash run  # --reload is default
+flash dev  # --reload is default
 
 # Or manually restart
 # Ctrl+C to stop
-flash run
+flash dev
 ```
 
 ### Next Step
@@ -217,7 +217,7 @@ Deploy a tested application to Runpod with proper environment isolation and vali
 
 ### Prerequisites
 
-- Application tested locally (`flash run`)
+- Application tested locally (`flash dev`)
 - Runpod API key configured (`RUNPOD_API_KEY`)
 - Environment created (or will create in workflow)
 
@@ -228,7 +228,7 @@ Deploy a tested application to Runpod with proper environment isolation and vali
 Before deploying, ensure everything works locally:
 
 ```bash
-flash run
+flash dev
 # Test all endpoints via http://localhost:8888/docs
 # Ctrl+C when done
 ```
@@ -401,7 +401,7 @@ To deploy code changes:
 ```bash
 # Make changes to code
 # Test locally
-flash run
+flash dev
 
 # Redeploy (same environment)
 flash deploy --env production
@@ -956,7 +956,7 @@ jobs:
 Before production deployment:
 
 **Local Testing:**
-- [ ] `flash run` works without errors
+- [ ] `flash dev` works without errors
 - [ ] All endpoints tested via Swagger UI
 - [ ] Edge cases handled correctly
 - [ ] Error handling tested
@@ -1606,7 +1606,7 @@ async def infer(payload: dict) -> dict:
 **3. Test incrementally:**
 ```bash
 # Local
-flash run  # Does it work?
+flash dev  # Does it work?
 
 # Preview
 flash deploy --preview  # Does build work?
@@ -1655,7 +1655,7 @@ flash env get production
 
 Avoid deployment issues:
 
-- [ ] Test locally with `flash run`
+- [ ] Test locally with `flash dev`
 - [ ] Test build with `flash build`
 - [ ] Test preview with `flash deploy --preview`
 - [ ] Deploy to staging first