From f7e9ff31d5113bd009621fd232acf2f5cf1729d9 Mon Sep 17 00:00:00 2001
From: Diode-exe <DiodesCAER@protonmail.com>
Date: Sat, 25 Apr 2026 23:58:58 -0500
Subject: [PATCH 1/3] Create .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..b331dac0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pth
\ No newline at end of file

From d222ebb47bf3bd655cf1c68404cb5c46b613fdb5 Mon Sep 17 00:00:00 2001
From: Diode-exe <DiodesCAER@protonmail.com>
Date: Sun, 26 Apr 2026 00:36:12 -0500
Subject: [PATCH 2/3] Added webcam version

---
 .gitignore          |   3 +-
 run.py              |  55 +++++++++++++++++++
 run_video.py        |   8 +++
 run_video_webcam.py |  81 +++++++++++++++++++++++++++
 run_webcam.py       | 131 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 run_video_webcam.py
 create mode 100644 run_webcam.py

diff --git a/.gitignore b/.gitignore
index b331dac0..71ba845f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-*.pth
\ No newline at end of file
+*.pth
+__pycache__/
\ No newline at end of file
diff --git a/run.py b/run.py
index 14810ff3..b9836eb3 100644
--- a/run.py
+++ b/run.py
@@ -1,3 +1,7 @@
+<<<<<<< Updated upstream
+=======
+@ -1,73 +0,0 @@
+>>>>>>> Stashed changes
 import argparse
 import cv2
 import glob
@@ -11,6 +15,7 @@
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Depth Anything V2')
+<<<<<<< Updated upstream
     
     parser.add_argument('--img-path', type=str)
     parser.add_argument('--input-size', type=int, default=518)
@@ -25,17 +30,41 @@
     
     DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
     
+=======
+
+    parser.add_argument('--img-path', type=str)
+    parser.add_argument('--input-size', type=int, default=518)
+    parser.add_argument('--outdir', type=str, default='./vis_depth')
+
+    parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'])
+
+    parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction')
+    parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette')
+
+    args = parser.parse_args()
+
+    DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
+
+>>>>>>> Stashed changes
     model_configs = {
         'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
         'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
         'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
         'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
     }
+<<<<<<< Updated upstream
     
     depth_anything = DepthAnythingV2(**model_configs[args.encoder])
     depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
     depth_anything = depth_anything.to(DEVICE).eval()
     
+=======
+
+    depth_anything = DepthAnythingV2(**model_configs[args.encoder])
+    depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
+    depth_anything = depth_anything.to(DEVICE).eval()
+
+>>>>>>> Stashed changes
     if os.path.isfile(args.img_path):
         if args.img_path.endswith('txt'):
             with open(args.img_path, 'r') as f:
@@ -44,6 +73,7 @@
             filenames = [args.img_path]
     else:
         filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True)
+<<<<<<< Updated upstream
     
     os.makedirs(args.outdir, exist_ok=True)
     
@@ -59,15 +89,40 @@
         depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
         depth = depth.astype(np.uint8)
         
+=======
+
+    os.makedirs(args.outdir, exist_ok=True)
+
+    cmap = matplotlib.colormaps.get_cmap('Spectral_r')
+
+    for k, filename in enumerate(filenames):
+        print(f'Progress {k+1}/{len(filenames)}: {filename}')
+
+        raw_image = cv2.imread(filename)
+
+        depth = depth_anything.infer_image(raw_image, args.input_size)
+
+        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+        depth = depth.astype(np.uint8)
+
+>>>>>>> Stashed changes
         if args.grayscale:
             depth = np.repeat(depth[..., np.newaxis], 3, axis=-1)
         else:
             depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
+<<<<<<< Updated upstream
         
+=======
+
+>>>>>>> Stashed changes
         if args.pred_only:
             cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), depth)
         else:
             split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255
             combined_result = cv2.hconcat([raw_image, split_region, depth])
+<<<<<<< Updated upstream
             
+=======
+
+>>>>>>> Stashed changes
             cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), combined_result)
\ No newline at end of file
diff --git a/run_video.py b/run_video.py
index cc3c5b6a..3a2063ba 100644
--- a/run_video.py
+++ b/run_video.py
@@ -1,3 +1,7 @@
+<<<<<<< Updated upstream
+=======
+@ -1,92 +0,0 @@
+>>>>>>> Stashed changes
 import argparse
 import cv2
 import glob
@@ -89,4 +93,8 @@
                 out.write(combined_frame)
         
         raw_video.release()
+<<<<<<< Updated upstream
         out.release()
+=======
+        out.release()
+>>>>>>> Stashed changes
diff --git a/run_video_webcam.py b/run_video_webcam.py
new file mode 100644
index 00000000..fe3e96a9
--- /dev/null
+++ b/run_video_webcam.py
@@ -0,0 +1,81 @@
+import argparse
+import cv2
+import matplotlib
+import numpy as np
+import torch
+from depth_anything_v2.dpt import DepthAnythingV2
+
+
+def get_device():
+    return 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Depth Anything V2 - Live Feed')
+
+    parser.add_argument('--input-size', type=int, default=518)
+    parser.add_argument('--encoder', type=str, default='vits', choices=['vits', 'vitb', 'vitl', 'vitg'])
+    parser.add_argument('--pred-only', dest='pred_only', action='store_true')
+    parser.add_argument('--grayscale', dest='grayscale', action='store_true')
+    parser.add_argument('--camera-index', type=int, default=0)
+
+    args = parser.parse_args()
+
+    DEVICE = get_device()
+
+    # Reverted to your original configuration
+    model_configs = {
+        'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
+        'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
+        'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
+        'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
+    }
+
+    print(f"Initializing model on {DEVICE}...")
+    depth_anything = DepthAnythingV2(**model_configs[args.encoder])
+    depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
+    depth_anything = depth_anything.to(DEVICE).eval()
+
+    # Initialize webcam
+    raw_video = cv2.VideoCapture(args.camera_index)
+    if not raw_video.isOpened():
+        raise RuntimeError(f'Unable to open webcam at index {args.camera_index}.')
+
+    margin_width = 50
+    cmap = matplotlib.colormaps.get_cmap('Spectral_r')
+
+    print("Starting live feed. Press 'q' to exit.")
+
+    while raw_video.isOpened():
+        ret, raw_frame = raw_video.read()
+        if not ret:
+            break
+
+        # Inference
+        depth = depth_anything.infer_image(raw_frame, args.input_size)
+
+        # Normalize and colorize based on your original logic
+        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+        depth = depth.astype(np.uint8)
+
+        if args.grayscale:
+            depth = np.repeat(depth[..., np.newaxis], 3, axis=-1)
+        else:
+            depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
+
+        # Combine or isolate frames
+        if args.pred_only:
+            combined_frame = depth
+        else:
+            frame_height = raw_frame.shape[0]
+            split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
+            combined_frame = cv2.hconcat([raw_frame, split_region, depth])
+
+        # Display window
+        cv2.imshow('Depth Anything V2 - Live', combined_frame)
+
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    raw_video.release()
+    cv2.destroyAllWindows()
\ No newline at end of file
diff --git a/run_webcam.py b/run_webcam.py
new file mode 100644
index 00000000..fe6f5306
--- /dev/null
+++ b/run_webcam.py
@@ -0,0 +1,131 @@
+import argparse
+import cv2
+import glob
+import matplotlib
+import numpy as np
+import os
+import torch
+
+from depth_anything_v2.dpt import DepthAnythingV2
+
+
+def get_device():
+    return 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
+
+
+def load_model(encoder, target_device):
+    model_configs = {
+        'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
+        'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
+        'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
+        'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
+    }
+
+    model = DepthAnythingV2(**model_configs[encoder])
+    model.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location='cpu'))
+    return model.to(target_device).eval()
+
+
+def render_depth(depth, colormap, grayscale):
+    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+    depth = depth.astype(np.uint8)
+
+    if grayscale:
+        return np.repeat(depth[..., np.newaxis], 3, axis=-1)
+
+    return (colormap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
+
+
+def combine_frames(raw_frame, depth_frame, pred_only):
+    if pred_only:
+        return depth_frame
+
+    split_region = np.ones((raw_frame.shape[0], 50, 3), dtype=np.uint8) * 255
+    return cv2.hconcat([raw_frame, split_region, depth_frame])
+
+
+def get_filenames(img_path):
+    if os.path.isfile(img_path):
+        if img_path.endswith('txt'):
+            with open(img_path, 'r', encoding='utf-8') as handle:
+                return handle.read().splitlines()
+
+        return [img_path]
+
+    return glob.glob(os.path.join(img_path, '**/*'), recursive=True)
+
+
+def run_on_webcam(model, input_size, colormap, grayscale, pred_only, camera_index):
+    raw_video = cv2.VideoCapture(camera_index)
+    if not raw_video.isOpened():
+        raise RuntimeError(f'Unable to open webcam at index {camera_index}.')
+
+    print("Starting live feed. Press 'q' to exit.")
+
+    try:
+        while raw_video.isOpened():
+            ret, raw_frame = raw_video.read()
+            if not ret:
+                break
+
+            depth = model.infer_image(raw_frame, input_size)
+            depth_frame = render_depth(depth, colormap, grayscale)
+            combined_frame = combine_frames(raw_frame, depth_frame, pred_only)
+
+            cv2.imshow('Depth Anything V2 - Live', combined_frame)
+
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+    finally:
+        raw_video.release()
+        cv2.destroyAllWindows()
+
+
+def run_on_files(model, input_filenames, input_size, outdir, colormap, grayscale, pred_only):
+    os.makedirs(outdir, exist_ok=True)
+
+    for k, filename in enumerate(input_filenames):
+        print(f'Progress {k+1}/{len(input_filenames)}: {filename}')
+
+        raw_image = cv2.imread(filename)
+        if raw_image is None:
+            print(f'Skipping unreadable file: {filename}')
+            continue
+
+        depth = model.infer_image(raw_image, input_size)
+        depth_frame = render_depth(depth, colormap, grayscale)
+        output_frame = combine_frames(raw_image, depth_frame, pred_only)
+
+        cv2.imwrite(os.path.join(outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), output_frame)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Depth Anything V2')
+
+    parser.add_argument('--img-path', type=str)
+    parser.add_argument('--input-size', type=int, default=518)
+    parser.add_argument('--outdir', type=str, default='./vis_depth')
+    parser.add_argument('--webcam', action='store_true', help='run on a live webcam feed instead of image files')
+    parser.add_argument('--camera-index', type=int, default=0, help='OpenCV camera index to use with --webcam')
+
+    parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'])
+
+    parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction')
+    parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette')
+
+    args = parser.parse_args()
+
+    if not args.webcam and not args.img_path:
+        parser.error('--img-path is required unless --webcam is set')
+
+    device = get_device()
+    print(f'Initializing model on {device}...')
+    depth_anything = load_model(args.encoder, device)
+
+    cmap = matplotlib.colormaps.get_cmap('Spectral_r')
+
+    if args.webcam:
+        run_on_webcam(depth_anything, args.input_size, cmap, args.grayscale, args.pred_only, args.camera_index)
+    else:
+        filenames = get_filenames(args.img_path)
+        run_on_files(depth_anything, filenames, args.input_size, args.outdir, cmap, args.grayscale, args.pred_only)
\ No newline at end of file

From 70d8de544f25d4d087b603576cef3973fdac4ba3 Mon Sep 17 00:00:00 2001
From: Diode-exe <DiodesCAER@protonmail.com>
Date: Sun, 26 Apr 2026 00:39:19 -0500
Subject: [PATCH 3/3] Accidentally committed that text

---
 run.py       | 55 ----------------------------------------------------
 run_video.py |  8 --------
 2 files changed, 63 deletions(-)

diff --git a/run.py b/run.py
index b9836eb3..14810ff3 100644
--- a/run.py
+++ b/run.py
@@ -1,7 +1,3 @@
-<<<<<<< Updated upstream
-=======
-@ -1,73 +0,0 @@
->>>>>>> Stashed changes
 import argparse
 import cv2
 import glob
@@ -15,7 +11,6 @@
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Depth Anything V2')
-<<<<<<< Updated upstream
     
     parser.add_argument('--img-path', type=str)
     parser.add_argument('--input-size', type=int, default=518)
@@ -30,41 +25,17 @@
     
     DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
     
-=======
-
-    parser.add_argument('--img-path', type=str)
-    parser.add_argument('--input-size', type=int, default=518)
-    parser.add_argument('--outdir', type=str, default='./vis_depth')
-
-    parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'])
-
-    parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction')
-    parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette')
-
-    args = parser.parse_args()
-
-    DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
-
->>>>>>> Stashed changes
     model_configs = {
         'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
         'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
         'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
         'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
     }
-<<<<<<< Updated upstream
     
     depth_anything = DepthAnythingV2(**model_configs[args.encoder])
     depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
     depth_anything = depth_anything.to(DEVICE).eval()
     
-=======
-
-    depth_anything = DepthAnythingV2(**model_configs[args.encoder])
-    depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
-    depth_anything = depth_anything.to(DEVICE).eval()
-
->>>>>>> Stashed changes
     if os.path.isfile(args.img_path):
         if args.img_path.endswith('txt'):
             with open(args.img_path, 'r') as f:
@@ -73,7 +44,6 @@
             filenames = [args.img_path]
     else:
         filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True)
-<<<<<<< Updated upstream
     
     os.makedirs(args.outdir, exist_ok=True)
     
@@ -89,40 +59,15 @@
         depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
         depth = depth.astype(np.uint8)
         
-=======
-
-    os.makedirs(args.outdir, exist_ok=True)
-
-    cmap = matplotlib.colormaps.get_cmap('Spectral_r')
-
-    for k, filename in enumerate(filenames):
-        print(f'Progress {k+1}/{len(filenames)}: {filename}')
-
-        raw_image = cv2.imread(filename)
-
-        depth = depth_anything.infer_image(raw_image, args.input_size)
-
-        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
-        depth = depth.astype(np.uint8)
-
->>>>>>> Stashed changes
         if args.grayscale:
             depth = np.repeat(depth[..., np.newaxis], 3, axis=-1)
         else:
             depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
-<<<<<<< Updated upstream
         
-=======
-
->>>>>>> Stashed changes
         if args.pred_only:
             cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), depth)
         else:
             split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255
             combined_result = cv2.hconcat([raw_image, split_region, depth])
-<<<<<<< Updated upstream
             
-=======
-
->>>>>>> Stashed changes
             cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), combined_result)
\ No newline at end of file
diff --git a/run_video.py b/run_video.py
index 3a2063ba..cc3c5b6a 100644
--- a/run_video.py
+++ b/run_video.py
@@ -1,7 +1,3 @@
-<<<<<<< Updated upstream
-=======
-@ -1,92 +0,0 @@
->>>>>>> Stashed changes
 import argparse
 import cv2
 import glob
@@ -93,8 +89,4 @@
                 out.write(combined_frame)
         
         raw_video.release()
-<<<<<<< Updated upstream
         out.release()
-=======
-        out.release()
->>>>>>> Stashed changes