From f7e9ff31d5113bd009621fd232acf2f5cf1729d9 Mon Sep 17 00:00:00 2001 From: Diode-exe Date: Sat, 25 Apr 2026 23:58:58 -0500 Subject: [PATCH 1/3] Create .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..b331dac0 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pth \ No newline at end of file From d222ebb47bf3bd655cf1c68404cb5c46b613fdb5 Mon Sep 17 00:00:00 2001 From: Diode-exe Date: Sun, 26 Apr 2026 00:36:12 -0500 Subject: [PATCH 2/3] Added webcam version --- .gitignore | 3 +- run.py | 55 +++++++++++++++++++ run_video.py | 8 +++ run_video_webcam.py | 81 +++++++++++++++++++++++++++ run_webcam.py | 131 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 run_video_webcam.py create mode 100644 run_webcam.py diff --git a/.gitignore b/.gitignore index b331dac0..71ba845f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -*.pth \ No newline at end of file +*.pth +__pycache__/ \ No newline at end of file diff --git a/run.py b/run.py index 14810ff3..b9836eb3 100644 --- a/run.py +++ b/run.py @@ -1,3 +1,7 @@ +<<<<<<< Updated upstream +======= +@ -1,73 +0,0 @@ +>>>>>>> Stashed changes import argparse import cv2 import glob @@ -11,6 +15,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Depth Anything V2') +<<<<<<< Updated upstream parser.add_argument('--img-path', type=str) parser.add_argument('--input-size', type=int, default=518) @@ -25,17 +30,41 @@ DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' +======= + + parser.add_argument('--img-path', type=str) + parser.add_argument('--input-size', type=int, default=518) + parser.add_argument('--outdir', type=str, default='./vis_depth') + + parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg']) + + parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction') + parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette') + + args = parser.parse_args() + + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + +>>>>>>> Stashed changes model_configs = { 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} } +<<<<<<< Updated upstream depth_anything = DepthAnythingV2(**model_configs[args.encoder]) depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu')) depth_anything = depth_anything.to(DEVICE).eval() +======= + + depth_anything = DepthAnythingV2(**model_configs[args.encoder]) + depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu')) + depth_anything = depth_anything.to(DEVICE).eval() + +>>>>>>> Stashed changes if os.path.isfile(args.img_path): if args.img_path.endswith('txt'): with open(args.img_path, 'r') as f: @@ -44,6 +73,7 @@ filenames = [args.img_path] else: filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True) +<<<<<<< Updated upstream os.makedirs(args.outdir, exist_ok=True) @@ -59,15 +89,40 @@ depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 depth = depth.astype(np.uint8) +======= + + os.makedirs(args.outdir, exist_ok=True) + + cmap = matplotlib.colormaps.get_cmap('Spectral_r') + + for k, filename in enumerate(filenames): + print(f'Progress {k+1}/{len(filenames)}: {filename}') + + raw_image = cv2.imread(filename) + + depth = depth_anything.infer_image(raw_image, args.input_size) + + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.astype(np.uint8) + +>>>>>>> Stashed changes if args.grayscale: depth = np.repeat(depth[..., np.newaxis], 3, axis=-1) else: depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8) +<<<<<<< Updated upstream +======= + +>>>>>>> Stashed changes if args.pred_only: cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), depth) else: split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255 combined_result = cv2.hconcat([raw_image, split_region, depth]) +<<<<<<< Updated upstream +======= + +>>>>>>> Stashed changes cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), combined_result) \ No newline at end of file diff --git a/run_video.py b/run_video.py index cc3c5b6a..3a2063ba 100644 --- a/run_video.py +++ b/run_video.py @@ -1,3 +1,7 @@ +<<<<<<< Updated upstream +======= +@ -1,92 +0,0 @@ +>>>>>>> Stashed changes import argparse import cv2 import glob @@ -89,4 +93,8 @@ out.write(combined_frame) raw_video.release() +<<<<<<< Updated upstream out.release() +======= + out.release() +>>>>>>> Stashed changes diff --git a/run_video_webcam.py b/run_video_webcam.py new file mode 100644 index 00000000..fe3e96a9 --- /dev/null +++ b/run_video_webcam.py @@ -0,0 +1,81 @@ +import argparse +import cv2 +import matplotlib +import numpy as np +import torch +from depth_anything_v2.dpt import DepthAnythingV2 + + +def get_device(): + return 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Depth Anything V2 - Live Feed') + + parser.add_argument('--input-size', type=int, default=518) + parser.add_argument('--encoder', type=str, default='vits', choices=['vits', 'vitb', 'vitl', 'vitg']) + parser.add_argument('--pred-only', dest='pred_only', action='store_true') + parser.add_argument('--grayscale', dest='grayscale', action='store_true') + parser.add_argument('--camera-index', type=int, default=0) + + args = parser.parse_args() + + DEVICE = get_device() + + # Reverted to your original configuration + model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} + } + + print(f"Initializing model on {DEVICE}...") + depth_anything = DepthAnythingV2(**model_configs[args.encoder]) + depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu')) + depth_anything = depth_anything.to(DEVICE).eval() + + # Initialize webcam + raw_video = cv2.VideoCapture(args.camera_index) + if not raw_video.isOpened(): + raise RuntimeError(f'Unable to open webcam at index {args.camera_index}.') + + margin_width = 50 + cmap = matplotlib.colormaps.get_cmap('Spectral_r') + + print("Starting live feed. Press 'q' to exit.") + + while raw_video.isOpened(): + ret, raw_frame = raw_video.read() + if not ret: + break + + # Inference + depth = depth_anything.infer_image(raw_frame, args.input_size) + + # Normalize and colorize based on your original logic + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.astype(np.uint8) + + if args.grayscale: + depth = np.repeat(depth[..., np.newaxis], 3, axis=-1) + else: + depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8) + + # Combine or isolate frames + if args.pred_only: + combined_frame = depth + else: + frame_height = raw_frame.shape[0] + split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255 + combined_frame = cv2.hconcat([raw_frame, split_region, depth]) + + # Display window + cv2.imshow('Depth Anything V2 - Live', combined_frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + raw_video.release() + cv2.destroyAllWindows() \ No newline at end of file diff --git a/run_webcam.py b/run_webcam.py new file mode 100644 index 00000000..fe6f5306 --- /dev/null +++ b/run_webcam.py @@ -0,0 +1,131 @@ +import argparse +import cv2 +import glob +import matplotlib +import numpy as np +import os +import torch + +from depth_anything_v2.dpt import DepthAnythingV2 + + +def get_device(): + return 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + + +def load_model(encoder, target_device): + model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} + } + + model = DepthAnythingV2(**model_configs[encoder]) + model.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location='cpu')) + return model.to(target_device).eval() + + +def render_depth(depth, colormap, grayscale): + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.astype(np.uint8) + + if grayscale: + return np.repeat(depth[..., np.newaxis], 3, axis=-1) + + return (colormap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8) + + +def combine_frames(raw_frame, depth_frame, pred_only): + if pred_only: + return depth_frame + + split_region = np.ones((raw_frame.shape[0], 50, 3), dtype=np.uint8) * 255 + return cv2.hconcat([raw_frame, split_region, depth_frame]) + + +def get_filenames(img_path): + if os.path.isfile(img_path): + if img_path.endswith('txt'): + with open(img_path, 'r', encoding='utf-8') as handle: + return handle.read().splitlines() + + return [img_path] + + return glob.glob(os.path.join(img_path, '**/*'), recursive=True) + + +def run_on_webcam(model, input_size, colormap, grayscale, pred_only, camera_index): + raw_video = cv2.VideoCapture(camera_index) + if not raw_video.isOpened(): + raise RuntimeError(f'Unable to open webcam at index {camera_index}.') + + print("Starting live feed. Press 'q' to exit.") + + try: + while raw_video.isOpened(): + ret, raw_frame = raw_video.read() + if not ret: + break + + depth = model.infer_image(raw_frame, input_size) + depth_frame = render_depth(depth, colormap, grayscale) + combined_frame = combine_frames(raw_frame, depth_frame, pred_only) + + cv2.imshow('Depth Anything V2 - Live', combined_frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + finally: + raw_video.release() + cv2.destroyAllWindows() + + +def run_on_files(model, input_filenames, input_size, outdir, colormap, grayscale, pred_only): + os.makedirs(outdir, exist_ok=True) + + for k, filename in enumerate(input_filenames): + print(f'Progress {k+1}/{len(input_filenames)}: {filename}') + + raw_image = cv2.imread(filename) + if raw_image is None: + print(f'Skipping unreadable file: {filename}') + continue + + depth = model.infer_image(raw_image, input_size) + depth_frame = render_depth(depth, colormap, grayscale) + output_frame = combine_frames(raw_image, depth_frame, pred_only) + + cv2.imwrite(os.path.join(outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), output_frame) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Depth Anything V2') + + parser.add_argument('--img-path', type=str) + parser.add_argument('--input-size', type=int, default=518) + parser.add_argument('--outdir', type=str, default='./vis_depth') + parser.add_argument('--webcam', action='store_true', help='run on a live webcam feed instead of image files') + parser.add_argument('--camera-index', type=int, default=0, help='OpenCV camera index to use with --webcam') + + parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg']) + + parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction') + parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette') + + args = parser.parse_args() + + if not args.webcam and not args.img_path: + parser.error('--img-path is required unless --webcam is set') + + device = get_device() + print(f'Initializing model on {device}...') + depth_anything = load_model(args.encoder, device) + + cmap = matplotlib.colormaps.get_cmap('Spectral_r') + + if args.webcam: + run_on_webcam(depth_anything, args.input_size, cmap, args.grayscale, args.pred_only, args.camera_index) + else: + filenames = get_filenames(args.img_path) + run_on_files(depth_anything, filenames, args.input_size, args.outdir, cmap, args.grayscale, args.pred_only) \ No newline at end of file From 70d8de544f25d4d087b603576cef3973fdac4ba3 Mon Sep 17 00:00:00 2001 From: Diode-exe Date: Sun, 26 Apr 2026 00:39:19 -0500 Subject: [PATCH 3/3] Accidentally committed that text --- run.py | 55 ---------------------------------------------------- run_video.py | 8 -------- 2 files changed, 63 deletions(-) diff --git a/run.py b/run.py index b9836eb3..14810ff3 100644 --- a/run.py +++ b/run.py @@ -1,7 +1,3 @@ -<<<<<<< Updated upstream -======= -@ -1,73 +0,0 @@ ->>>>>>> Stashed changes import argparse import cv2 import glob @@ -15,7 +11,6 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Depth Anything V2') -<<<<<<< Updated upstream parser.add_argument('--img-path', type=str) parser.add_argument('--input-size', type=int, default=518) @@ -30,41 +25,17 @@ DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' -======= - - parser.add_argument('--img-path', type=str) - parser.add_argument('--input-size', type=int, default=518) - parser.add_argument('--outdir', type=str, default='./vis_depth') - - parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg']) - - parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction') - parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette') - - args = parser.parse_args() - - DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' - ->>>>>>> Stashed changes model_configs = { 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} } -<<<<<<< Updated upstream depth_anything = DepthAnythingV2(**model_configs[args.encoder]) depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu')) depth_anything = depth_anything.to(DEVICE).eval() -======= - - depth_anything = DepthAnythingV2(**model_configs[args.encoder]) - depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu')) - depth_anything = depth_anything.to(DEVICE).eval() - ->>>>>>> Stashed changes if os.path.isfile(args.img_path): if args.img_path.endswith('txt'): with open(args.img_path, 'r') as f: @@ -73,7 +44,6 @@ filenames = [args.img_path] else: filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True) -<<<<<<< Updated upstream os.makedirs(args.outdir, exist_ok=True) @@ -89,40 +59,15 @@ depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 depth = depth.astype(np.uint8) -======= - - os.makedirs(args.outdir, exist_ok=True) - - cmap = matplotlib.colormaps.get_cmap('Spectral_r') - - for k, filename in enumerate(filenames): - print(f'Progress {k+1}/{len(filenames)}: {filename}') - - raw_image = cv2.imread(filename) - - depth = depth_anything.infer_image(raw_image, args.input_size) - - depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 - depth = depth.astype(np.uint8) - ->>>>>>> Stashed changes if args.grayscale: depth = np.repeat(depth[..., np.newaxis], 3, axis=-1) else: depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8) -<<<<<<< Updated upstream -======= - ->>>>>>> Stashed changes if args.pred_only: cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), depth) else: split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255 combined_result = cv2.hconcat([raw_image, split_region, depth]) -<<<<<<< Updated upstream -======= - ->>>>>>> Stashed changes cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), combined_result) \ No newline at end of file diff --git a/run_video.py b/run_video.py index 3a2063ba..cc3c5b6a 100644 --- a/run_video.py +++ b/run_video.py @@ -1,7 +1,3 @@ -<<<<<<< Updated upstream -======= -@ -1,92 +0,0 @@ ->>>>>>> Stashed changes import argparse import cv2 import glob @@ -93,8 +89,4 @@ out.write(combined_frame) raw_video.release() -<<<<<<< Updated upstream out.release() -======= - out.release() ->>>>>>> Stashed changes