diff --git a/apps/computer-vision/app/pose_estimation/index.tsx b/apps/computer-vision/app/pose_estimation/index.tsx index 4546b628fa..8e16693f6a 100644 --- a/apps/computer-vision/app/pose_estimation/index.tsx +++ b/apps/computer-vision/app/pose_estimation/index.tsx @@ -1,10 +1,12 @@ import Spinner from '../../components/Spinner'; import { BottomBar } from '../../components/BottomBar'; +import { ModelPicker, ModelOption } from '../../components/ModelPicker'; import { getImage } from '../../utils'; import { models, usePoseEstimation, PoseDetections, + PoseEstimationModelSources, RnExecutorchError, RnExecutorchErrorCode, } from 'react-native-executorch'; @@ -17,6 +19,16 @@ import Svg, { Circle, Line } from 'react-native-svg'; import ErrorBanner from '../../components/ErrorBanner'; import { COCO_SKELETON_CONNECTIONS } from '../../components/utils/cocoSkeleton'; +const poseEstimation = models.pose_estimation; + +const MODELS: ModelOption[] = [ + { label: 'YOLO26N Pose', value: poseEstimation.yolo26n() }, + { + label: 'RF-DETR Keypoint (beta)', + value: poseEstimation.rfdetr_keypoint_preview(), + }, +]; + // Colors for different people const PERSON_COLORS = ['lime', 'cyan', 'magenta', 'yellow', 'orange', 'pink']; @@ -30,8 +42,10 @@ export default function PoseEstimationScreen() { }>(); const [inferenceTime, setInferenceTime] = useState(null); const [layout, setLayout] = useState({ width: 0, height: 0 }); + const [selectedModel, setSelectedModel] = + useState(poseEstimation.yolo26n()); - const model = usePoseEstimation({ model: models.pose_estimation.yolo26n() }); + const model = usePoseEstimation({ model: selectedModel }); const { setGlobalGenerating } = useContext(GeneratingContext); useEffect(() => { @@ -60,7 +74,7 @@ export default function PoseEstimationScreen() { if (imageUri) { try { const start = Date.now(); - const output = await model.forward(imageUri, { inputSize: 384 }); + const output = await model.forward(imageUri); setInferenceTime(Date.now() - start); setResults(output); } catch (e) { @@ -206,6 +220,16 @@ export default function PoseEstimationScreen() { )} + { + setSelectedModel(m); + setResults([]); + setInferenceTime(null); + }} + /> 0 ? results.length : null} diff --git a/docs/docs/03-hooks/02-computer-vision/usePoseEstimation.md b/docs/docs/03-hooks/02-computer-vision/usePoseEstimation.md index 465a0ab6a6..eb3d7a5368 100644 --- a/docs/docs/03-hooks/02-computer-vision/usePoseEstimation.md +++ b/docs/docs/03-hooks/02-computer-vision/usePoseEstimation.md @@ -129,10 +129,15 @@ See the full guide: [VisionCamera Integration](./visioncamera-integration.md). ## Supported models -| Model | Number of keypoints | Keypoint list | Multi-size Support | -| ------------------------------------------------------------------------------------------- | ------------------- | ----------------------------------------------------------- | ------------------ | -| [YOLO26N-Pose](https://huggingface.co/software-mansion/react-native-executorch-yolo26-pose) | 17 | [COCO](../../06-api-reference/enumerations/CocoKeypoint.md) | Yes (384/512/640) | +| Model | Number of keypoints | Keypoint list | Multi-size Support | +| ------------------------------------------------------------------------------------------------------------- | ------------------- | ----------------------------------------------------------- | ------------------ | +| [YOLO26N-Pose](https://huggingface.co/software-mansion/react-native-executorch-yolo26-pose) | 17 | [COCO](../../06-api-reference/enumerations/CocoKeypoint.md) | Yes (384/512/640) | +| [RF-DETR Keypoint (preview)](https://huggingface.co/software-mansion/react-native-executorch-rfdetr-keypoint) | 17 | [COCO](../../06-api-reference/enumerations/CocoKeypoint.md) | No | :::tip YOLO models support multiple input sizes (384px, 512px, 640px). Smaller sizes are faster but less accurate, while larger sizes are more accurate but slower. Choose based on your speed/accuracy requirements. ::: + +:::warning +`rfdetr_keypoint_preview` is a **preview weights** export and may be re-exported under a different constant once a stable version ships. It is a single-input-size model (no `inputSize` option) and ships `xnnpack`, `coreml`, and `mlx` backends — pass `{ backend }` to override the platform default, e.g. `models.pose_estimation.rfdetr_keypoint_preview({ backend: 'mlx' })`. +::: diff --git a/docs/docs/05-utilities/model-registry.md b/docs/docs/05-utilities/model-registry.md index 3611731235..1f135ea43c 100644 --- a/docs/docs/05-utilities/model-registry.md +++ b/docs/docs/05-utilities/model-registry.md @@ -26,7 +26,7 @@ Each leaf is a **function**. Call it (optionally with `{ quant, backend }`) to g | `classification` | `efficientnet_v2_s` | | `privacy_filter` | `openai`, `nemotron` | | `object_detection` | `ssdlite_320_mobilenet_v3_large`, `yolo26n` … `yolo26x`, `rf_detr_nano` | -| `pose_estimation` | `yolo26n` | +| `pose_estimation` | `yolo26n`, `rfdetr_keypoint_preview` _(beta)_ | | `semantic_segmentation` | `deeplab_v3_resnet50`, `lraspp_mobilenet_v3_large`, `fcn_resnet101`, `selfie_segmentation`, … | | `instance_segmentation` | `yolo26n` … `yolo26x`, `rf_detr_nano`, `fastsam_s`, `fastsam_x` | | `style_transfer` | `candy`, `mosaic`, `rain_princess`, `udnie` | diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts index ed44fa3a68..eb0c98dae7 100644 --- a/packages/react-native-executorch/src/constants/modelRegistry.ts +++ b/packages/react-native-executorch/src/constants/modelRegistry.ts @@ -328,6 +328,31 @@ const RF_DETR_NANO_SEG_VARIANTS = { }, }; +// RF-DETR Keypoint (pose estimation) — BETA preview. Configs mirror the +// All three backends ship fp32 +// (non-quantized); this entry may be re-exported under a different constant +// once more RF-DETR keypoint weights are released. +const RF_DETR_KEYPOINT_PREVIEW_VARIANTS = { + xnnpack: { + base: { + modelName: 'rfdetr-keypoint-preview' as const, + modelSource: M.RF_DETR_KEYPOINT_PREVIEW_XNNPACK_FP32_MODEL, + }, + }, + coreml: { + base: { + modelName: 'rfdetr-keypoint-preview' as const, + modelSource: M.RF_DETR_KEYPOINT_PREVIEW_COREML_FP32_MODEL, + }, + }, + mlx: { + base: { + modelName: 'rfdetr-keypoint-preview' as const, + modelSource: M.RF_DETR_KEYPOINT_PREVIEW_MLX_FP32_MODEL, + }, + }, +}; + const FASTSAM_S_VARIANTS = { xnnpack: { base: { @@ -608,6 +633,9 @@ export const models = { }, pose_estimation: { yolo26n: base(M.YOLO26N_POSE), + // BETA preview — may be re-exported under a different constant once a + // stable RF-DETR keypoint model ships. + rfdetr_keypoint_preview: variant(RF_DETR_KEYPOINT_PREVIEW_VARIANTS), }, semantic_segmentation: { deeplab_v3_resnet50: pair( diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 2520d84520..0e36f812ff 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -702,6 +702,28 @@ export const YOLO26N_POSE = { modelSource: YOLO26N_POSE_MODEL, } as const; +// RF-DETR Keypoint (pose estimation) — BETA preview. +// NOTE: served from the `preview/` path under PREVIOUS_VERSION_TAG (shipping as +// part of a patch release). This export is a preview and may be re-exported +// under a different constant once a stable version ships. +export const RF_DETR_KEYPOINT_PREVIEW_XNNPACK_FP32_MODEL = `${URL_PREFIX}-rfdetr-keypoint/${PREVIOUS_VERSION_TAG}/preview/xnnpack/rfdetr_keypoint_preview_xnnpack_fp32.pte`; +export const RF_DETR_KEYPOINT_PREVIEW_COREML_FP32_MODEL = `${URL_PREFIX}-rfdetr-keypoint/${PREVIOUS_VERSION_TAG}/preview/coreml/rfdetr_keypoint_preview_coreml_fp32.pte`; +export const RF_DETR_KEYPOINT_PREVIEW_MLX_FP32_MODEL = `${URL_PREFIX}-rfdetr-keypoint/${PREVIOUS_VERSION_TAG}/preview/mlx/rfdetr_keypoint_preview_mlx_fp32.pte`; +const RF_DETR_KEYPOINT_PREVIEW_MODEL = + Platform.OS === 'ios' + ? RF_DETR_KEYPOINT_PREVIEW_COREML_FP32_MODEL + : RF_DETR_KEYPOINT_PREVIEW_XNNPACK_FP32_MODEL; + +/** + * @category Models - Pose Estimation + * @beta Preview export — may be re-exported under a different constant once a + * stable RF-DETR keypoint model ships. + */ +export const RF_DETR_KEYPOINT_PREVIEW = { + modelName: 'rfdetr-keypoint-preview', + modelSource: RF_DETR_KEYPOINT_PREVIEW_MODEL, +} as const; + // Style transfer /** * Builds the four `(backend, precision)` URLs for a single style-transfer style. diff --git a/packages/react-native-executorch/src/modules/computer_vision/PoseEstimationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/PoseEstimationModule.ts index ff2b68b1fd..34ddf45952 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/PoseEstimationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/PoseEstimationModule.ts @@ -29,8 +29,20 @@ const YOLO_POSE_CONFIG = { defaultKeypointThreshold: 0.5, } satisfies PoseEstimationConfig; +// RF-DETR keypoint preview (BETA). Unlike yolo26n-pose's multi-method +// `forward_` export, this ships a single `forward` method — omitting +// availableInputSizes/defaultInputSize makes forward() dispatch to plain +// `forward`. May be renamed once a stable model ships. +const RFDETR_KEYPOINT_CONFIG = { + keypointMap: CocoKeypoint, + preprocessorConfig: undefined, + defaultDetectionThreshold: 0.5, + defaultKeypointThreshold: 0.5, +} satisfies PoseEstimationConfig; + const ModelConfigs = { 'yolo26n-pose': YOLO_POSE_CONFIG, + 'rfdetr-keypoint-preview': RFDETR_KEYPOINT_CONFIG, } as const satisfies Record< PoseEstimationModelName, PoseEstimationConfig diff --git a/packages/react-native-executorch/src/types/poseEstimation.ts b/packages/react-native-executorch/src/types/poseEstimation.ts index 03afc592c3..c7ae352925 100644 --- a/packages/react-native-executorch/src/types/poseEstimation.ts +++ b/packages/react-native-executorch/src/types/poseEstimation.ts @@ -62,10 +62,16 @@ export type PoseEstimationConfig = { * Each model name maps to its required fields. * @category Types */ -export type PoseEstimationModelSources = { - modelName: 'yolo26n-pose'; - modelSource: ResourceSource; -}; +export type PoseEstimationModelSources = + | { + modelName: 'yolo26n-pose'; + modelSource: ResourceSource; + } + // RF-DETR keypoint preview (BETA) — may be renamed once a stable model ships. + | { + modelName: 'rfdetr-keypoint-preview'; + modelSource: ResourceSource; + }; /** * Union of all built-in pose estimation model names.