From f9628ca95508cf6d63f3d48f9939d7812369922d Mon Sep 17 00:00:00 2001 From: zeke <40004347+KAJdev@users.noreply.github.com> Date: Wed, 27 May 2026 14:54:43 -0700 Subject: [PATCH] fix: replace flash run with flash dev, remove standalone python references SLS-132 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 01_getting_started/01_hello_world/README.md | 11 ++--- .../01_hello_world/gpu_worker.py | 3 +- 01_getting_started/02_cpu_worker/README.md | 11 ++--- .../02_cpu_worker/cpu_worker.py | 3 +- 01_getting_started/03_mixed_workers/README.md | 4 +- .../03_mixed_workers/cpu_worker.py | 3 +- .../03_mixed_workers/gpu_worker.py | 3 +- .../03_mixed_workers/pipeline.py | 2 +- 01_getting_started/04_dependencies/README.md | 9 ++-- .../04_dependencies/cpu_worker.py | 3 +- .../04_dependencies/gpu_worker.py | 3 +- .../04_dependencies/mixed_worker.py | 3 +- 02_ml_inference/01_text_to_speech/README.md | 6 +-- .../01_text_to_speech/gpu_worker.py | 3 +- .../05_load_balancer/README.md | 12 ++--- .../05_load_balancer/cpu_lb.py | 3 +- .../05_load_balancer/gpu_lb.py | 3 +- .../01_autoscaling/README.md | 2 +- .../01_autoscaling/cpu_worker.py | 3 +- .../01_autoscaling/gpu_worker.py | 3 +- .../02_datacenters/README.md | 2 +- .../02_datacenters/cpu_worker.py | 2 +- .../02_datacenters/gpu_worker.py | 2 +- .../01_network_volumes/README.md | 2 +- .../01_network_volumes/cpu_worker.py | 3 +- .../01_network_volumes/gpu_worker.py | 3 +- 06_real_world/README.md | 2 +- CLAUDE.md | 17 ++++--- CLI-REFERENCE.md | 20 ++++---- CONTRIBUTING.md | 14 +++--- DEVELOPMENT.md | 22 ++++----- README.md | 6 +-- docs/cli/commands.md | 46 +++++++++---------- docs/cli/getting-started.md | 14 +++--- docs/cli/troubleshooting.md | 40 ++++++++-------- docs/cli/workflows.md | 28 +++++------ 37 files changed, 144 insertions(+), 174 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d305842..ab7cdd3 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -25,7 +25,7 @@ If adding a new example, which category does it belong to? ## Checklist ### Functionality -- [ ] Example runs successfully with `flash run` +- [ ] Example runs successfully with `flash dev` - [ ] All endpoints return correct responses - [ ] Tested locally - [ ] Error handling implemented diff --git a/01_getting_started/01_hello_world/README.md b/01_getting_started/01_hello_world/README.md index beb291c..494dfb9 100644 --- a/01_getting_started/01_hello_world/README.md +++ b/01_getting_started/01_hello_world/README.md @@ -21,14 +21,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`. ### 3. Run Locally ```bash -uv run flash run +uv run flash dev ``` Server starts at **http://localhost:8888** ### 4. Test the API -Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@Endpoint` functions. +Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash dev` based on your `@Endpoint` functions. ```bash curl -X POST http://localhost:8888/gpu_worker/runsync \ @@ -133,14 +133,9 @@ The worker uses PyTorch to detect and report GPU information: ## Development -### Test Worker Locally -```bash -python gpu_worker.py -``` - ### Run the Application ```bash -flash run +flash dev ``` ## Next Steps diff --git a/01_getting_started/01_hello_world/gpu_worker.py b/01_getting_started/01_hello_world/gpu_worker.py index d7a330f..47c4b61 100644 --- a/01_getting_started/01_hello_world/gpu_worker.py +++ b/01_getting_started/01_hello_world/gpu_worker.py @@ -1,6 +1,5 @@ # gpu serverless worker -- detects available GPU hardware. -# run with: flash run -# test directly: python gpu_worker.py +# run with: flash dev from runpod_flash import Endpoint, GpuType diff --git a/01_getting_started/02_cpu_worker/README.md b/01_getting_started/02_cpu_worker/README.md index 4d5fb88..4de7f54 100644 --- a/01_getting_started/02_cpu_worker/README.md +++ b/01_getting_started/02_cpu_worker/README.md @@ -21,14 +21,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`. ### 3. Run Locally ```bash -uv run flash run +uv run flash dev ``` Server starts at **http://localhost:8888** ### 4. Test the API -Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@Endpoint` functions. +Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash dev` based on your `@Endpoint` functions. ```bash curl -X POST http://localhost:8888/cpu_worker/runsync \ @@ -135,14 +135,9 @@ The CPU worker scales to zero when idle: ## Development -### Test Worker Locally -```bash -python cpu_worker.py -``` - ### Run the Application ```bash -flash run +flash dev ``` ## When to Use CPU Workers diff --git a/01_getting_started/02_cpu_worker/cpu_worker.py b/01_getting_started/02_cpu_worker/cpu_worker.py index 0679296..a0c21a2 100644 --- a/01_getting_started/02_cpu_worker/cpu_worker.py +++ b/01_getting_started/02_cpu_worker/cpu_worker.py @@ -1,6 +1,5 @@ # cpu serverless worker -- lightweight processing without GPU. -# run with: flash run -# test directly: python cpu_worker.py +# run with: flash dev from runpod_flash import CpuInstanceType, Endpoint diff --git a/01_getting_started/03_mixed_workers/README.md b/01_getting_started/03_mixed_workers/README.md index e85fad4..768189d 100644 --- a/01_getting_started/03_mixed_workers/README.md +++ b/01_getting_started/03_mixed_workers/README.md @@ -44,7 +44,7 @@ Response ```bash cd 01_getting_started/03_mixed_workers -flash run +flash dev ``` ### Alternative: Standalone Setup @@ -60,7 +60,7 @@ uv run flash login # Or create .env file with RUNPOD_API_KEY=your_api_key_here # Run -uv run flash run +uv run flash dev ``` Server starts at http://localhost:8888 diff --git a/01_getting_started/03_mixed_workers/cpu_worker.py b/01_getting_started/03_mixed_workers/cpu_worker.py index f65fd6c..8e89264 100644 --- a/01_getting_started/03_mixed_workers/cpu_worker.py +++ b/01_getting_started/03_mixed_workers/cpu_worker.py @@ -1,7 +1,6 @@ # cpu workers for text preprocessing and postprocessing. # part of the mixed CPU/GPU pipeline example. -# run with: flash run -# test directly: python cpu_worker.py +# run with: flash dev from runpod_flash import CpuInstanceType, Endpoint diff --git a/01_getting_started/03_mixed_workers/gpu_worker.py b/01_getting_started/03_mixed_workers/gpu_worker.py index b6ae065..bba5b5d 100644 --- a/01_getting_started/03_mixed_workers/gpu_worker.py +++ b/01_getting_started/03_mixed_workers/gpu_worker.py @@ -1,7 +1,6 @@ # gpu worker for ML inference (sentiment classification). # part of the mixed CPU/GPU pipeline example. -# run with: flash run -# test directly: python gpu_worker.py +# run with: flash dev from runpod_flash import Endpoint, GpuGroup diff --git a/01_getting_started/03_mixed_workers/pipeline.py b/01_getting_started/03_mixed_workers/pipeline.py index 6a4615f..d27ae78 100644 --- a/01_getting_started/03_mixed_workers/pipeline.py +++ b/01_getting_started/03_mixed_workers/pipeline.py @@ -1,6 +1,6 @@ # classification pipeline: CPU preprocess -> GPU inference -> CPU postprocess. # demonstrates cross-worker orchestration via a load-balanced endpoint. -# run with: flash run +# run with: flash dev from runpod_flash import Endpoint pipeline = Endpoint(name="01_03_classify_pipeline", cpu="cpu3c-1-2", workers=(1, 3)) diff --git a/01_getting_started/04_dependencies/README.md b/01_getting_started/04_dependencies/README.md index cf9a4a8..73f0784 100644 --- a/01_getting_started/04_dependencies/README.md +++ b/01_getting_started/04_dependencies/README.md @@ -29,7 +29,7 @@ Learn how to manage Python packages and system dependencies in Flash workers. ```bash cd 01_getting_started/04_dependencies -flash run +flash dev ``` Server starts at http://localhost:8888 @@ -47,7 +47,7 @@ uv run flash login # Or create .env file with RUNPOD_API_KEY=your_api_key_here # Run -uv run flash run +uv run flash dev ``` ## GPU vs CPU Packaging @@ -273,9 +273,8 @@ async def fetch_data(url: str): ### 3. Test Dependency Compatibility ```bash -# Test locally first -python gpu_worker.py -python cpu_worker.py +# test locally +flash dev ``` ### 4. Document Dependencies diff --git a/01_getting_started/04_dependencies/cpu_worker.py b/01_getting_started/04_dependencies/cpu_worker.py index 64e2c96..f77efc1 100644 --- a/01_getting_started/04_dependencies/cpu_worker.py +++ b/01_getting_started/04_dependencies/cpu_worker.py @@ -1,6 +1,5 @@ # cpu workers demonstrating data science and zero-dependency patterns. -# run with: flash run -# test directly: python cpu_worker.py +# run with: flash dev from runpod_flash import CpuInstanceType, Endpoint diff --git a/01_getting_started/04_dependencies/gpu_worker.py b/01_getting_started/04_dependencies/gpu_worker.py index 07df859..3567979 100644 --- a/01_getting_started/04_dependencies/gpu_worker.py +++ b/01_getting_started/04_dependencies/gpu_worker.py @@ -1,6 +1,5 @@ # gpu workers demonstrating Python and system dependency management. -# run with: flash run -# test directly: python gpu_worker.py +# run with: flash dev from runpod_flash import Endpoint, GpuGroup diff --git a/01_getting_started/04_dependencies/mixed_worker.py b/01_getting_started/04_dependencies/mixed_worker.py index 4b15892..c6f8983 100644 --- a/01_getting_started/04_dependencies/mixed_worker.py +++ b/01_getting_started/04_dependencies/mixed_worker.py @@ -3,8 +3,7 @@ # - GPU images (runpod/pytorch:*) have numpy pre-installed # - CPU images (python-slim) install numpy from the build artifact # -# run with: flash run -# test directly: python mixed_worker.py +# run with: flash dev from runpod_flash import CpuInstanceType, Endpoint, GpuType diff --git a/02_ml_inference/01_text_to_speech/README.md b/02_ml_inference/01_text_to_speech/README.md index 4b89a47..bb5980a 100644 --- a/02_ml_inference/01_text_to_speech/README.md +++ b/02_ml_inference/01_text_to_speech/README.md @@ -33,14 +33,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`. ### Run ```bash -uv run flash run +uv run flash dev ``` First run provisions the endpoint (~1 min). Server starts at http://localhost:8888 ### Test the Endpoint -Visit http://localhost:8888/docs for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@Endpoint` functions. +Visit http://localhost:8888/docs for interactive API documentation. QB endpoints are auto-generated by `flash dev` based on your `@Endpoint` functions. **Generate speech (JSON with base64 audio):** ```bash @@ -136,7 +136,7 @@ flash deploy send production ## Common Issues -- **Cold start delay**: First request after idle takes 20-30s to load the model. Use `flash run --auto-provision` during development. +- **Cold start delay**: First request after idle takes 20-30s to load the model. Use `flash dev --auto-provision` during development. - **Out of memory**: The model requires 24GB+ VRAM. Ensure `GpuGroup.ADA_24` or higher is configured. - **Invalid speaker/language**: Use `get_voices` to check valid options. diff --git a/02_ml_inference/01_text_to_speech/gpu_worker.py b/02_ml_inference/01_text_to_speech/gpu_worker.py index 6d60e01..42488e2 100644 --- a/02_ml_inference/01_text_to_speech/gpu_worker.py +++ b/02_ml_inference/01_text_to_speech/gpu_worker.py @@ -1,6 +1,5 @@ # Qwen3-TTS text-to-speech GPU worker. -# run with: flash run -# test directly: python gpu_worker.py +# run with: flash dev from runpod_flash import Endpoint, GpuGroup diff --git a/03_advanced_workers/05_load_balancer/README.md b/03_advanced_workers/05_load_balancer/README.md index 2c6eadc..45d8043 100644 --- a/03_advanced_workers/05_load_balancer/README.md +++ b/03_advanced_workers/05_load_balancer/README.md @@ -40,14 +40,14 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`. ### 3. Run Locally (from repository root) ```bash -uv run flash run +uv run flash dev ``` Visit **http://localhost:8888/docs** for interactive API documentation (unified app with all examples). ### 4. Test Endpoints (via unified app) -When using `flash run` from the repository root, routes are prefixed with the example name: +When using `flash dev` from the repository root, routes are prefixed with the example name: **GPU Service (Compute)**: ```bash @@ -256,14 +256,10 @@ Response: } ``` -## Testing Workers Locally +## Testing Locally ```bash -# Test GPU worker -python gpu_lb.py - -# Test CPU worker -python cpu_lb.py +flash dev ``` ## Deployment diff --git a/03_advanced_workers/05_load_balancer/cpu_lb.py b/03_advanced_workers/05_load_balancer/cpu_lb.py index 08a9105..4317dcb 100644 --- a/03_advanced_workers/05_load_balancer/cpu_lb.py +++ b/03_advanced_workers/05_load_balancer/cpu_lb.py @@ -1,6 +1,5 @@ # cpu load-balanced endpoints with custom HTTP routes. -# run with: flash run -# test directly: python cpu_lb.py +# run with: flash dev from runpod_flash import Endpoint api = Endpoint( diff --git a/03_advanced_workers/05_load_balancer/gpu_lb.py b/03_advanced_workers/05_load_balancer/gpu_lb.py index 2637bef..38ad679 100644 --- a/03_advanced_workers/05_load_balancer/gpu_lb.py +++ b/03_advanced_workers/05_load_balancer/gpu_lb.py @@ -1,6 +1,5 @@ # gpu load-balanced endpoints with custom HTTP routes. -# run with: flash run -# test directly: python gpu_lb.py +# run with: flash dev from runpod_flash import Endpoint, GpuType api = Endpoint( diff --git a/04_scaling_performance/01_autoscaling/README.md b/04_scaling_performance/01_autoscaling/README.md index 0e02e67..983a18c 100644 --- a/04_scaling_performance/01_autoscaling/README.md +++ b/04_scaling_performance/01_autoscaling/README.md @@ -8,7 +8,7 @@ Configure Flash worker autoscaling for different workload patterns. This example ```bash cd 04_scaling_performance/01_autoscaling -flash run +flash dev ``` Server starts at http://localhost:8888 -- visit http://localhost:8888/docs for interactive API docs. diff --git a/04_scaling_performance/01_autoscaling/cpu_worker.py b/04_scaling_performance/01_autoscaling/cpu_worker.py index 6660ea3..05d23c3 100644 --- a/04_scaling_performance/01_autoscaling/cpu_worker.py +++ b/04_scaling_performance/01_autoscaling/cpu_worker.py @@ -1,6 +1,5 @@ # cpu autoscaling strategies -- scale-to-zero and burst-ready. -# run with: flash run -# test directly: python cpu_worker.py +# run with: flash dev from runpod_flash import CpuInstanceType, Endpoint diff --git a/04_scaling_performance/01_autoscaling/gpu_worker.py b/04_scaling_performance/01_autoscaling/gpu_worker.py index 2d12fb0..8df93a7 100644 --- a/04_scaling_performance/01_autoscaling/gpu_worker.py +++ b/04_scaling_performance/01_autoscaling/gpu_worker.py @@ -1,6 +1,5 @@ # gpu autoscaling strategies -- scale-to-zero, always-on, high-throughput. -# run with: flash run -# test directly: python gpu_worker.py +# run with: flash dev from runpod_flash import Endpoint, GpuType, ServerlessScalerType diff --git a/04_scaling_performance/02_datacenters/README.md b/04_scaling_performance/02_datacenters/README.md index 2e3573b..8029c02 100644 --- a/04_scaling_performance/02_datacenters/README.md +++ b/04_scaling_performance/02_datacenters/README.md @@ -10,7 +10,7 @@ By default, endpoints deploy across all available data centers. The `datacenter` ```bash pip install -r requirements.txt -flash run +flash dev ``` ## What You'll Learn diff --git a/04_scaling_performance/02_datacenters/cpu_worker.py b/04_scaling_performance/02_datacenters/cpu_worker.py index 142a3de..6637e45 100644 --- a/04_scaling_performance/02_datacenters/cpu_worker.py +++ b/04_scaling_performance/02_datacenters/cpu_worker.py @@ -1,7 +1,7 @@ # cpu worker pinned to a cpu-supported datacenter. # cpu endpoints are only available in a subset of datacenters # (see CPU_DATACENTERS). selecting an unsupported DC raises an error. -# run with: flash run +# run with: flash dev from runpod_flash import Endpoint, DataCenter api = Endpoint( diff --git a/04_scaling_performance/02_datacenters/gpu_worker.py b/04_scaling_performance/02_datacenters/gpu_worker.py index cdacfd4..bfa70a5 100644 --- a/04_scaling_performance/02_datacenters/gpu_worker.py +++ b/04_scaling_performance/02_datacenters/gpu_worker.py @@ -1,5 +1,5 @@ # gpu workers pinned to specific datacenters. -# run with: flash run +# run with: flash dev from runpod_flash import Endpoint, GpuGroup, DataCenter diff --git a/05_data_workflows/01_network_volumes/README.md b/05_data_workflows/01_network_volumes/README.md index bd9cf24..6c0314f 100644 --- a/05_data_workflows/01_network_volumes/README.md +++ b/05_data_workflows/01_network_volumes/README.md @@ -25,7 +25,7 @@ Or create a `.env` file with `RUNPOD_API_KEY=your_api_key_here`. ### 3. Run Locally ```bash -uv run flash run +uv run flash dev ``` Server starts at `http://localhost:8888` diff --git a/05_data_workflows/01_network_volumes/cpu_worker.py b/05_data_workflows/01_network_volumes/cpu_worker.py index d9e0408..0c9b0b8 100644 --- a/05_data_workflows/01_network_volumes/cpu_worker.py +++ b/05_data_workflows/01_network_volumes/cpu_worker.py @@ -1,6 +1,5 @@ # cpu worker with network volume for listing and serving generated images. -# run with: flash run -# test directly: python cpu_worker.py +# run with: flash dev from runpod_flash import Endpoint, DataCenter, NetworkVolume # same volume as gpu_worker.py -- must match name and datacenter diff --git a/05_data_workflows/01_network_volumes/gpu_worker.py b/05_data_workflows/01_network_volumes/gpu_worker.py index 34fb339..e4a8ce3 100644 --- a/05_data_workflows/01_network_volumes/gpu_worker.py +++ b/05_data_workflows/01_network_volumes/gpu_worker.py @@ -1,6 +1,5 @@ # gpu worker with network volume for Stable Diffusion image generation. -# run with: flash run -# test directly: python gpu_worker.py +# run with: flash dev import logging from runpod_flash import Endpoint, GpuType, DataCenter, NetworkVolume diff --git a/06_real_world/README.md b/06_real_world/README.md index e640184..ee3739c 100644 --- a/06_real_world/README.md +++ b/06_real_world/README.md @@ -118,7 +118,7 @@ All real-world examples include: ### Development ```bash cd example_name -flash run +flash dev ``` ### Production diff --git a/CLAUDE.md b/CLAUDE.md index b3e33be..f87f72f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ ## Project Overview -Production-ready examples demonstrating Flash framework capabilities. Flat-file pattern: each worker is a standalone `.py` file with `@Endpoint` decorator, auto-discovered by `flash run`. 6 categories, 18 worker files. Root `pyproject.toml` declares only `runpod-flash` dependency; runtime deps declared inline via `Endpoint(dependencies=[...])`. +Production-ready examples demonstrating Flash framework capabilities. Flat-file pattern: each worker is a standalone `.py` file with `@Endpoint` decorator, auto-discovered by `flash dev`. 6 categories, 18 worker files. Root `pyproject.toml` declares only `runpod-flash` dependency; runtime deps declared inline via `Endpoint(dependencies=[...])`. ## Architecture @@ -14,12 +14,12 @@ Production-ready examples demonstrating Flash framework capabilities. Flat-file 2. **Endpoint routes (LB)** -- Load-balanced pattern. `api = Endpoint(...)` with `@api.get()/@api.post()` route decorators for HTTP endpoints. 3. **@Endpoint decorator (class)** -- Used on `SimpleSD` class (`05_data_workflows`). Class-based pattern for stateful workers. 4. **Cross-worker orchestration** -- Pipeline files import from QB workers, chain with `await`. LB endpoint orchestrates QB workers. -5. **Flat-file discovery** -- No FastAPI boilerplate, no routers, no `main.py`. `flash run` auto-generates routes from decorated functions. +5. **Flat-file discovery** -- No FastAPI boilerplate, no routers, no `main.py`. `flash dev` auto-generates routes from decorated functions. 6. **In-function imports** -- Heavy libs (torch, transformers, etc.) imported inside `@Endpoint` body, only `runpod_flash` at module level. ### Entry Points -All worker files across 6 categories. Each file is an independent entry point discovered by `flash run`. +All worker files across 6 categories. Each file is an independent entry point discovered by `flash dev`. ### Module Structure @@ -136,7 +136,7 @@ flash-examples --> flash (runpod_flash) --> runpod-python (runpod) ### Known Drift -- No automated tests -- changes caught only at import time or `flash run` +- No automated tests -- changes caught only at import time or `flash dev` - No CI that validates examples against current flash version - Python version: inherits from flash (3.10+) @@ -152,9 +152,8 @@ uv sync --all-groups ### Testing ```bash -flash run # Start local dev server (localhost:8888) +flash dev # Start local dev server (localhost:8888) # Visit http://localhost:8888/docs for interactive API docs -python gpu_worker.py # Test a single worker directly (if __name__ == "__main__" block) ``` ### Quality @@ -202,14 +201,14 @@ No formal test infrastructure exists. Each worker has an optional `if __name__ = - **100% uncovered** -- no test framework, no conftest, no pytest config - No smoke tests that verify examples import successfully -- No integration tests that run `flash run` against examples +- No integration tests that run `flash dev` against examples ### Patterns To test manually: ```bash cd 01_getting_started/01_hello_world -flash run # Starts dev server, auto-discovers workers +flash dev # Starts dev server, auto-discovers workers # Use http://localhost:8888/docs to invoke endpoints ``` @@ -217,7 +216,7 @@ flash run # Starts dev server, auto-discovers workers 1. Add `tests/test_imports.py` that imports every worker file (catches `Endpoint` signature drift) 2. Add `tests/test_configs.py` that validates all resource configs construct without error -3. Add CI job that runs `flash run --check` (dry-run mode) against each example category +3. Add CI job that runs `flash dev --check` (dry-run mode) against each example category ## Common Mistakes diff --git a/CLI-REFERENCE.md b/CLI-REFERENCE.md index 5653416..5b87462 100644 --- a/CLI-REFERENCE.md +++ b/CLI-REFERENCE.md @@ -23,7 +23,7 @@ flash --help # Show help for specific command |---------|---------| | [`flash login`](#flash-login) | Authenticate with Runpod | | [`flash init`](#flash-init) | Create new Flash project | -| [`flash run`](#flash-run) | Run development server | +| [`flash dev`](#flash-dev) | Run development server | | [`flash build`](#flash-build) | Build application package | | [`flash deploy`](#flash-deploy) | Build and deploy application | | [`flash undeploy`](#flash-undeploy) | Delete deployed endpoints | @@ -101,7 +101,7 @@ Get your API key from [Runpod Settings](https://www.runpod.io/console/user/setti ### Related Commands -- [`flash run`](#flash-run) - Run development server (requires authentication) +- [`flash dev`](#flash-dev) - Run development server (requires authentication) - [`flash deploy`](#flash-deploy) - Deploy to Runpod (requires authentication) --- @@ -156,19 +156,19 @@ flash init my-api --force ### Related Commands -- [`flash run`](#flash-run) - Run the initialized project locally +- [`flash dev`](#flash-dev) - Run the initialized project locally - [Getting Started Guide](docs/cli/getting-started.md) - Full tutorial --- -## flash run +## flash dev Run the Flash development server locally with hot reloading. ### Syntax ```bash -flash run [OPTIONS] +flash dev [OPTIONS] ``` ### Options @@ -189,26 +189,26 @@ flash run [OPTIONS] **Basic local development:** ```bash -flash run +flash dev # Server runs at http://localhost:8888 # Visit http://localhost:8888/docs for Swagger UI ``` **Custom host and port:** ```bash -flash run --host 0.0.0.0 --port 3000 +flash dev --host 0.0.0.0 --port 3000 # Accessible from network at http://:3000 ``` **Disable auto-reload:** ```bash -flash run --no-reload +flash dev --no-reload # Useful for debugging or production-like testing ``` **Auto-provision resources:** ```bash -flash run --auto-provision +flash dev --auto-provision # Automatically creates Runpod endpoints on startup ``` @@ -216,7 +216,7 @@ flash run --auto-provision ```bash export FLASH_HOST=0.0.0.0 export FLASH_PORT=9000 -flash run +flash dev ``` ### What It Does diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 647529a..1af25bb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,13 +40,13 @@ Add examples that demonstrate: All examples must meet these standards: ### 1. Functional Requirements -- [ ] Runs successfully with `flash run` +- [ ] Runs successfully with `flash dev` - [ ] All endpoints return correct responses - [ ] Error handling is implemented - [ ] Environment variables are documented - [ ] Dependencies are declared in pyproject.toml - [ ] Runtime deps declared in `Endpoint(dependencies=[...])` -- [ ] Example discovered by `flash run` from project root +- [ ] Example discovered by `flash dev` from project root ### 2. Code Quality - [ ] Clear, readable code @@ -126,17 +126,17 @@ Follow the [standard example structure](#example-structure). ```bash cd your_category/your_example -flash run +flash dev # Test all endpoints ``` ### 5. Verify Discovery -`flash run` auto-discovers all `.py` files containing `@Endpoint` functions. Verify your example loads: +`flash dev` auto-discovers all `.py` files containing `@Endpoint` functions. Verify your example loads: ```bash # From the repository root -flash run +flash dev # Check http://localhost:8888/docs for your new endpoints ``` @@ -177,7 +177,7 @@ your_example/ └── architecture.png ``` -`flash run` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories, no router wiring. +`flash dev` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories, no router wiring. ### Minimal Worker (`gpu_worker.py`) @@ -286,7 +286,7 @@ Test your example thoroughly: ```bash # Run the application -flash run +flash dev # Test health endpoint curl http://localhost:8888/health diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 984e29a..0963730 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -165,16 +165,16 @@ Run commands with the package manager prefix without activation: ```bash # With uv -uv run flash run +uv run flash dev # With poetry -poetry run flash run +poetry run flash dev # With pipenv -pipenv run flash run +pipenv run flash dev # With conda -conda run -p ./.venv flash run +conda run -p ./.venv flash dev ``` **Option B: Activate Virtual Environment (works with all managers)** @@ -189,7 +189,7 @@ source .venv/bin/activate .venv\Scripts\activate # Then run normally -flash run +flash dev ``` Once activated, you can run Flash and other commands directly without a prefix. @@ -268,9 +268,9 @@ Or set `RUNPOD_API_KEY` in your `.env` file. ```bash # Run development server -flash run # Default: localhost:8888 -flash run --port 9000 # Custom port -flash run --host 0.0.0.0 # Network accessible +flash dev # Default: localhost:8888 +flash dev --port 9000 # Custom port +flash dev --host 0.0.0.0 # Network accessible # Build deployment package flash build # Standard build @@ -511,7 +511,7 @@ cd 01_getting_started/05_new_example touch README.md gpu_worker.py pyproject.toml ``` -Each worker file (named `*_worker.py` by convention) is self-contained with `@Endpoint` decorated functions. `flash run` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories needed. +Each worker file (named `*_worker.py` by convention) is self-contained with `@Endpoint` decorated functions. `flash dev` discovers all `.py` files with `@Endpoint` functions automatically -- no `main.py`, no `workers/` directories needed. **3. Declare dependencies:** @@ -521,7 +521,7 @@ Add a `pyproject.toml` with `runpod-flash` as the only local dependency. Runtime ```bash cd ../../ # Back to root -flash run # Discovers all .py files with @Endpoint functions +flash dev # Discovers all .py files with @Endpoint functions ``` ### Cleaning Up @@ -544,7 +544,7 @@ make setup ## Unified App Architecture -The root directory provides a programmatic discovery system that automatically finds and loads all examples when you run `flash run` from the project root. +The root directory provides a programmatic discovery system that automatically finds and loads all examples when you run `flash dev` from the project root. ### Discovery Process diff --git a/README.md b/README.md index c73bcdf..487ce16 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ async def generate_image(prompt: str) -> bytes: **Key features:** - **`@Endpoint` decorator**: Mark any async function to run on serverless infrastructure - **Auto-scaling**: Scale to zero when idle, scale up under load -- **Local development**: `flash run` starts a local server with hot reload +- **Local development**: `flash dev` starts a local server with hot reload - **One-command deploy**: `flash deploy` packages and ships your code ## Prerequisites @@ -46,7 +46,7 @@ uv sync && uv pip install -e . uv run flash login # Run all examples locally -uv run flash run +uv run flash dev ``` Open **http://localhost:8888/docs** to explore all endpoints. @@ -72,7 +72,7 @@ More examples coming soon in each category. ```bash flash login # Authenticate with Runpod (opens browser) -flash run # Run development server (localhost:8888) +flash dev # Run development server (localhost:8888) flash build # Build deployment package flash deploy --env # Build and deploy to environment flash undeploy # Delete deployed endpoint diff --git a/docs/cli/commands.md b/docs/cli/commands.md index 62ee5c7..ab86903 100644 --- a/docs/cli/commands.md +++ b/docs/cli/commands.md @@ -6,7 +6,7 @@ Exhaustive documentation for all Flash CLI commands. This guide covers every opt - [flash login](#flash-login) - Authenticate with Runpod - [flash init](#flash-init) - Create new Flash project -- [flash run](#flash-run) - Run development server +- [flash dev](#flash-dev) - Run development server - [flash build](#flash-build) - Build deployment package - [flash deploy](#flash-deploy) - Build and deploy application - [flash undeploy](#flash-undeploy) - Delete deployed endpoints @@ -225,7 +225,7 @@ The generated `.gitignore` already includes necessary patterns. 3. **Run Locally** ```bash - uv run flash run + uv run flash dev ``` 4. **View Documentation** @@ -267,7 +267,7 @@ flash init my-api ### Related Commands -- [`flash run`](#flash-run) - Run the initialized project +- [`flash dev`](#flash-dev) - Run the initialized project - [`flash build`](#flash-build) - Build the project - [`flash deploy`](#flash-deploy) - Deploy the project @@ -278,14 +278,14 @@ flash init my-api --- -## flash run +## flash dev Run the Flash development server locally with hot reloading for rapid development and testing. ### Synopsis ```bash -flash run [OPTIONS] +flash dev [OPTIONS] ``` ### Description @@ -294,7 +294,7 @@ Starts a local uvicorn development server that runs your Flash application. The ### Architecture: Hybrid Local + Cloud -With `flash run`, your system operates in a **hybrid architecture**: +With `flash dev`, your system operates in a **hybrid architecture**: - **Your FastAPI app runs locally** on your machine (localhost:8888) - **`@Endpoint` functions run on Runpod** as serverless endpoints @@ -350,7 +350,7 @@ RUNPOD_API_KEY=your-key-here #### Basic Development Server ```bash -flash run +flash dev ``` Output: @@ -367,7 +367,7 @@ Visit `http://localhost:8888/docs` for interactive API documentation. #### Custom Host and Port ```bash -flash run --host 0.0.0.0 --port 3000 +flash dev --host 0.0.0.0 --port 3000 ``` Makes server accessible from network at `http://:3000`. Useful for: @@ -378,7 +378,7 @@ Makes server accessible from network at `http://:3000`. Useful for: #### Disable Auto-Reload ```bash -flash run --no-reload +flash dev --no-reload ``` Useful for: @@ -391,7 +391,7 @@ Useful for: ```bash export FLASH_HOST=0.0.0.0 export FLASH_PORT=9000 -flash run +flash dev ``` Or with `.env` file: @@ -402,14 +402,14 @@ FLASH_PORT=9000 ``` ```bash -flash run +flash dev # Automatically loads .env ``` #### Auto-Provision Resources ```bash -flash run --auto-provision +flash dev --auto-provision ``` This will: @@ -423,7 +423,7 @@ This will: #### Combine Options ```bash -flash run --host 0.0.0.0 --port 9000 --no-reload +flash dev --host 0.0.0.0 --port 9000 --no-reload ``` Runs on all network interfaces, port 9000, without auto-reload. @@ -431,7 +431,7 @@ Runs on all network interfaces, port 9000, without auto-reload. #### Development with External Access ```bash -flash run --host 0.0.0.0 +flash dev --host 0.0.0.0 # Server accessible at http://192.168.1.100:8888 (your local IP) ``` @@ -501,7 +501,7 @@ When `--reload` is enabled (default): **Example workflow:** ```bash # Terminal 1: Run server -flash run +flash dev # Terminal 2: Edit code echo 'print("Updated!")' >> gpu_worker.py @@ -520,7 +520,7 @@ echo 'print("Updated!")' >> gpu_worker.py **For production-like testing:** ```bash -flash run --no-reload +flash dev --no-reload # More representative of deployed performance ``` @@ -536,7 +536,7 @@ ERROR: [Errno 48] Address already in use Solutions: ```bash # Option 1: Use different port -flash run --port 9000 +flash dev --port 9000 # Option 2: Kill process using port lsof -ti:8888 | xargs kill -9 @@ -576,7 +576,7 @@ Solutions: echo "large_files/" >> .gitignore # Or disable reload temporarily -flash run --no-reload +flash dev --no-reload ``` **Cannot Access from Network** @@ -586,7 +586,7 @@ Problem: Cannot reach server from other devices. Solutions: ```bash # Use 0.0.0.0 instead of localhost -flash run --host 0.0.0.0 +flash dev --host 0.0.0.0 # Check firewall settings # macOS: System Preferences → Security & Privacy → Firewall @@ -603,7 +603,7 @@ Error: RUNPOD_API_KEY environment variable not set Solution: ```bash export RUNPOD_API_KEY=your-key-here -flash run --auto-provision +flash dev --auto-provision ``` ### Related Commands @@ -1012,7 +1012,7 @@ flash build --exclude problematic-package ### Related Commands - [`flash deploy`](#flash-deploy) - Build and deploy in one step -- [`flash run`](#flash-run) - Test before building +- [`flash dev`](#flash-dev) - Test before building ### Related Workflows @@ -1047,7 +1047,7 @@ With `flash deploy`, your **entire application** runs on Runpod Serverless: - **No `live-` prefix** on endpoint names—these are production endpoints - **No hot reload**—code changes require a new deployment -This is different from `flash run`, where your FastAPI app runs locally on your machine. With `flash deploy`, everything is in the cloud for production use. +This is different from `flash dev`, where your FastAPI app runs locally on your machine. With `flash deploy`, everything is in the cloud for production use. ### Options @@ -1415,7 +1415,7 @@ flash deploy --env production - [`flash build`](#flash-build) - Build without deploying - [`flash env`](#flash-env) - Manage environments - [`flash undeploy`](#flash-undeploy) - Delete deployments -- [`flash run`](#flash-run) - Test locally before deploying +- [`flash dev`](#flash-dev) - Test locally before deploying ### Related Workflows diff --git a/docs/cli/getting-started.md b/docs/cli/getting-started.md index e32766d..29f2b8f 100644 --- a/docs/cli/getting-started.md +++ b/docs/cli/getting-started.md @@ -92,7 +92,7 @@ async def process_request(payload: dict) -> dict: Start the development server: ```bash -uv run flash run +uv run flash dev ``` **Expected output:** @@ -104,7 +104,7 @@ INFO: Application startup complete **Checkpoint:** Server is running at http://localhost:8888 -**What's happening:** Your FastAPI app runs locally on your machine, but when you call an `@Endpoint` function, it executes on Runpod Serverless. This hybrid architecture gives you hot-reload for rapid development while testing real GPU/CPU workloads in the cloud. Endpoints created during `flash run` are prefixed with `live-` to keep them separate from production. +**What's happening:** Your FastAPI app runs locally on your machine, but when you call an `@Endpoint` function, it executes on Runpod Serverless. This hybrid architecture gives you hot-reload for rapid development while testing real GPU/CPU workloads in the cloud. Endpoints created during `flash dev` are prefixed with `live-` to keep them separate from production. --- @@ -275,7 +275,7 @@ curl -X POST https://abcd1234-hello-flash-gpu.runpod.io/run \ In the past 10 minutes, you've: 1. ✅ Created a Flash project with `flash init` -2. ✅ Run a development server with `flash run` +2. ✅ Run a development server with `flash dev` 3. ✅ Tested locally via Swagger UI 4. ✅ Created a deployment environment 5. ✅ Built a deployment package with `flash build` @@ -294,7 +294,7 @@ The flash-examples repository contains production-ready examples: git clone https://github.com/runpod/flash-examples.git cd flash-examples uv sync && uv pip install -e . -uv run flash run +uv run flash dev # Visit http://localhost:8888/docs to explore all examples ``` @@ -309,7 +309,7 @@ uv run flash run ### Learn More Commands **Development:** -- [flash run options](../CLI-REFERENCE.md#flash-run) - Custom host, port, auto-reload +- [flash dev options](../CLI-REFERENCE.md#flash-run) - Custom host, port, auto-reload - [flash build options](../CLI-REFERENCE.md#flash-build) - Size optimization, custom names **Deployment:** @@ -350,7 +350,7 @@ uv run flash --version **Solution:** ```bash # Use different port -uv run flash run --port 9000 +uv run flash dev --port 9000 # Or find and kill process using port 8888 lsof -ti:8888 | xargs kill -9 @@ -386,7 +386,7 @@ See [Troubleshooting Guide](troubleshooting.md) for more solutions. | Command | Purpose | |---------|---------| | `uv run flash init ` | Create new project | -| `uv run flash run` | Run development server | +| `uv run flash dev` | Run development server | | `uv run flash build` | Build deployment package | | `uv run flash deploy --env ` | Deploy to environment | | `uv run flash env create ` | Create environment | diff --git a/docs/cli/troubleshooting.md b/docs/cli/troubleshooting.md index 839b97d..0c2c430 100644 --- a/docs/cli/troubleshooting.md +++ b/docs/cli/troubleshooting.md @@ -6,7 +6,7 @@ Solutions to common Flash CLI problems organized by command and error type. - [Installation Issues](#installation-issues) - [flash init Problems](#flash-init-problems) -- [flash run Issues](#flash-run-issues) +- [flash dev Issues](#flash-dev-issues) - [flash build Failures](#flash-build-failures) - [flash deploy Errors](#flash-deploy-errors) - [Environment Management](#environment-management) @@ -90,7 +90,7 @@ python --version # Should be 3.10+ **Symptoms:** ```bash -$ flash run +$ flash dev Traceback (most recent call last): ImportError: cannot import name 'remote' from 'runpod_flash' ``` @@ -220,7 +220,7 @@ flash init my-api --- -## flash run Issues +## flash dev Issues ### Port Already in Use @@ -228,7 +228,7 @@ flash init my-api **Symptoms:** ```bash -$ flash run +$ flash dev ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8888): address already in use ``` @@ -236,7 +236,7 @@ ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8888): **1. Use different port:** ```bash -flash run --port 9000 +flash dev --port 9000 ``` **2. Find and kill process using port:** @@ -255,11 +255,11 @@ kill **3. Use environment variable:** ```bash export FLASH_PORT=9000 -flash run +flash dev ``` **References:** -- [flash run command](commands.md#flash-run) +- [flash dev command](commands.md#flash-dev) - [Workflows: Local Development](workflows.md#local-development-workflow) ### Module Not Found Error @@ -268,7 +268,7 @@ flash run **Symptoms:** ```bash -$ flash run +$ flash dev ModuleNotFoundError: No module named 'fastapi' ``` @@ -320,14 +320,14 @@ dependencies = [ **1. Check reload is enabled:** ```bash # Reload is default, but verify: -flash run # Should show "StatReload" in output +flash dev # Should show "StatReload" in output ``` **2. Manually restart:** ```bash # Press Ctrl+C to stop # Run again -flash run +flash dev ``` **3. Check file watching:** @@ -342,14 +342,14 @@ mv ignored_dir/worker.py workers/worker.py **4. Disable and re-enable reload:** ```bash # Try without reload -flash run --no-reload +flash dev --no-reload # Then with reload -flash run +flash dev ``` **References:** -- [flash run command](commands.md#flash-run) +- [flash dev command](commands.md#flash-dev) ### Cannot Access from Network @@ -363,7 +363,7 @@ flash run **1. Bind to 0.0.0.0:** ```bash -flash run --host 0.0.0.0 +flash dev --host 0.0.0.0 ``` **2. Check firewall:** @@ -391,7 +391,7 @@ ip addr show ``` **References:** -- [flash run command](commands.md#flash-run) +- [flash dev command](commands.md#flash-dev) --- @@ -1271,7 +1271,7 @@ When troubleshooting any issue: - [ ] No permission issues (`ls -la`) - [ ] Recent Flash version (`pip install --upgrade runpod-flash`) - [ ] Checked logs and error messages -- [ ] Tested locally first (`flash run`) +- [ ] Tested locally first (`flash dev`) - [ ] Reviewed documentation --- @@ -1301,7 +1301,7 @@ git checkout main # 6. Fix issue properly # ... make changes ... -flash run # Test locally +flash dev # Test locally flash deploy --env staging # Test in staging flash deploy --env production # Redeploy to production ``` @@ -1342,7 +1342,7 @@ pip install runpod-flash pip install -e . # 6. Test locally -flash run +flash dev # 7. Redeploy flash env create production @@ -1357,7 +1357,7 @@ flash deploy --env production 1. **Always test locally first:** ```bash - flash run + flash dev # Test all endpoints ``` @@ -1417,7 +1417,7 @@ flash deploy --env production | Issue | Quick Fix | |-------|-----------| | Command not found | `pip install runpod-flash` | -| Port in use | `flash run --port 9000` | +| Port in use | `flash dev --port 9000` | | Build too large | `flash build --exclude torch,torchvision` | | Missing API key | `export RUNPOD_API_KEY=your-key` | | Environment not found | `flash env create ` | diff --git a/docs/cli/workflows.md b/docs/cli/workflows.md index b008ce5..74b216e 100644 --- a/docs/cli/workflows.md +++ b/docs/cli/workflows.md @@ -85,7 +85,7 @@ FLASH_PORT=8888 #### 4. Start Development Server ```bash -flash run +flash dev ``` **Expected output:** @@ -151,7 +151,7 @@ Repeat steps 5-7: **Use Multiple Terminals:** ```bash # Terminal 1: Run server -flash run +flash dev # Terminal 2: Test with curl curl http://localhost:8888/process ... @@ -162,18 +162,18 @@ tail -f logs/application.log **Disable Hot Reload for Debugging:** ```bash -flash run --no-reload +flash dev --no-reload # Easier to attach debugger ``` **Change Port if Conflict:** ```bash -flash run --port 9000 +flash dev --port 9000 ``` **Access from Network:** ```bash -flash run --host 0.0.0.0 +flash dev --host 0.0.0.0 # Access from mobile devices or other machines ``` @@ -182,7 +182,7 @@ flash run --host 0.0.0.0 **Port in use:** ```bash # Solution: Use different port -flash run --port 9000 +flash dev --port 9000 ``` **Import errors:** @@ -194,11 +194,11 @@ pip install -e . **Changes not reflecting:** ```bash # Solution: Check hot reload is enabled -flash run # --reload is default +flash dev # --reload is default # Or manually restart # Ctrl+C to stop -flash run +flash dev ``` ### Next Step @@ -217,7 +217,7 @@ Deploy a tested application to Runpod with proper environment isolation and vali ### Prerequisites -- Application tested locally (`flash run`) +- Application tested locally (`flash dev`) - Runpod API key configured (`RUNPOD_API_KEY`) - Environment created (or will create in workflow) @@ -228,7 +228,7 @@ Deploy a tested application to Runpod with proper environment isolation and vali Before deploying, ensure everything works locally: ```bash -flash run +flash dev # Test all endpoints via http://localhost:8888/docs # Ctrl+C when done ``` @@ -401,7 +401,7 @@ To deploy code changes: ```bash # Make changes to code # Test locally -flash run +flash dev # Redeploy (same environment) flash deploy --env production @@ -956,7 +956,7 @@ jobs: Before production deployment: **Local Testing:** -- [ ] `flash run` works without errors +- [ ] `flash dev` works without errors - [ ] All endpoints tested via Swagger UI - [ ] Edge cases handled correctly - [ ] Error handling tested @@ -1606,7 +1606,7 @@ async def infer(payload: dict) -> dict: **3. Test incrementally:** ```bash # Local -flash run # Does it work? +flash dev # Does it work? # Preview flash deploy --preview # Does build work? @@ -1655,7 +1655,7 @@ flash env get production Avoid deployment issues: -- [ ] Test locally with `flash run` +- [ ] Test locally with `flash dev` - [ ] Test build with `flash build` - [ ] Test preview with `flash deploy --preview` - [ ] Deploy to staging first