Merge pull request #12 from Y-B-Class-Projects/refactor-fall-function

Ian-I-Chien · web-flow · commit 956aed936560 · 2025-05-31T16:31:08.000+08:00
Refactor fall function
diff --git a/README.md b/README.md
@@ -60,6 +60,23 @@ These videos demonstrate how the model successfully and accurately recognizes hu
 - **Short Term**: Optimize the if-else-based model using a time-sliding window
 - **Long Term**: Integrate a time-series model (e.g., LSTM) for more accurate detection
 
+## Fall Detect Logic
+The fall detection logic uses the following parameters:
+
+| Parameter              | Description |
+|------------------------|-------------|
+| `FPS`                 | Frames per second. Used to calculate time and velocity. |
+| `WINDOW_SIZE`         | Number of frames in each analysis window. Defines how many frames are used to evaluate pose changes. |
+| `V_THRESH`            | Threshold for center-of-mass velocity. Movements faster than this are considered potentially abnormal. |
+| `DY_THRESH`           | Threshold for vertical (Y-axis) displacement of the center of mass. |
+| `ASPECT_RATIO_THRESH` | Threshold for change in aspect ratio (width/height) of the body. Indicates whether the body has become horizontal. |
+
+These values can be configured via a `.env` file.  
+If not provided, default values defined in the code will be used.
+
+> **Note:** This logic is still under development and subject to change as part of our ongoing implementation and validation process.
+
+
 ## Possible Future Improvements
 
 In order to alert human falls and save lives, the real-time system may be deployed and implemented in nursing homes, hospitals, and senior living facilities.
diff --git a/config.py b/config.py
@@ -0,0 +1,26 @@
+# config.py
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+def get_env_int(var_name, default):
+    try:
+        return int(os.getenv(var_name, default))
+    except ValueError:
+        print(f"Warning: {var_name} is not a valid int, using default {default}")
+        return default
+
+def get_env_float(var_name, default):
+    try:
+        return float(os.getenv(var_name, default))
+    except ValueError:
+        print(f"Warning: {var_name} is not a valid float, using default {default}")
+        return default
+
+
+FPS = get_env_int("FPS", 30)
+WINDOW_SIZE = get_env_int("WINDOW_SIZE", 30)
+V_THRESH = get_env_int("V_THRESH", 0)
+DY_THRESH = get_env_int("DY_THRESH", 0)
+ASPECT_RATIO_THRESH = get_env_float("ASPECT_RATIO_THRESH", 0)
diff --git a/fall_core.py b/fall_core.py
@@ -9,6 +9,54 @@
 from utils.plots import output_to_keypoint
 
 
+def compute_center_of_mass(pose):
+    left_shoulder = (pose[10], pose[11])
+    right_shoulder = (pose[13], pose[14])
+    left_hip = (pose[22], pose[23])
+    right_hip = (pose[25], pose[26])
+    cx = (left_shoulder[0] + right_shoulder[0] + left_hip[0] + right_hip[0]) / 4
+    cy = (left_shoulder[1] + right_shoulder[1] + left_hip[1] + right_hip[1]) / 4
+    return cx, cy
+
+
+def compute_center_velocity(pose_start, pose_end, fps, window_size):
+    com_start = compute_center_of_mass(pose_start)
+    com_end = compute_center_of_mass(pose_end)
+    dx = com_end[0] - com_start[0]
+    dy = com_end[1] - com_start[1]
+    distance = math.sqrt(dx ** 2 + dy ** 2)
+    time_elapsed = (window_size - 1) / fps
+    velocity = distance / time_elapsed
+    return min(velocity, 300.0), dy
+
+
+def compute_bbox_aspect_ratio(pose):
+    x_vals = [pose[i] for i in range(0, len(pose), 3)]
+    y_vals = [pose[i] for i in range(1, len(pose), 3)]
+    width = max(x_vals) - min(x_vals)
+    height = max(y_vals) - min(y_vals)
+    return width / height if height != 0 else 0
+
+
+def compute_aspect_ratio_delta(pose_start, pose_end):
+    ar_start = compute_bbox_aspect_ratio(pose_start)
+    ar_end = compute_bbox_aspect_ratio(pose_end)
+    return ar_end - ar_start, ar_start, ar_end
+
+
+def find_most_similar_pose(reference_pose, candidate_poses):
+    ref_cx, ref_cy = compute_center_of_mass(reference_pose)
+    min_dist = float("inf")
+    best_pose = None
+    for pose in candidate_poses:
+        cx, cy = compute_center_of_mass(pose)
+        dist = (ref_cx - cx)**2 + (ref_cy - cy)**2
+        if dist < min_dist:
+            min_dist = dist
+            best_pose = pose
+    return best_pose
+
+
 def get_pose_model():
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     print("device: ", device)
@@ -49,7 +97,6 @@ def prepare_vid_out(video_path, vid_cap, output_dir):
     if not success:
         raise RuntimeError(f"Failed to read first frame for output setup: {video_path}")
 
-    # Reset video capture position to beginning
     vid_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
 
     vid_write_image = letterbox(first_frame, 960, stride=64, auto=True)[0]
@@ -67,37 +114,40 @@ def prepare_vid_out(video_path, vid_cap, output_dir):
     return out
 
 
-def fall_detection(poses):
-    for pose in poses:
-        xmin, ymin = (pose[2] - pose[4] / 2), (pose[3] - pose[5] / 2)
-        xmax, ymax = (pose[2] + pose[4] / 2), (pose[3] + pose[5] / 2)
-        left_shoulder_y = pose[23]
-        left_shoulder_x = pose[22]
-        right_shoulder_y = pose[26]
-        left_body_y = pose[41]
-        left_body_x = pose[40]
-        right_body_y = pose[44]
-        len_factor = math.sqrt(
-            (left_shoulder_y - left_body_y) ** 2 + (left_shoulder_x - left_body_x) ** 2
-        )
-        left_foot_y = pose[53]
-        right_foot_y = pose[56]
-        dx = int(xmax) - int(xmin)
-        dy = int(ymax) - int(ymin)
-        difference = dy - dx
-        if (
-            left_shoulder_y > left_foot_y - len_factor
-            and left_body_y > left_foot_y - (len_factor / 2)
-            and left_shoulder_y > left_body_y - (len_factor / 2)
-            or (
-                right_shoulder_y > right_foot_y - len_factor
-                and right_body_y > right_foot_y - (len_factor / 2)
-                and right_shoulder_y > right_body_y - (len_factor / 2)
-            )
-            or difference < 0
-        ):
-            return True, (xmin, ymin, xmax, ymax)
-    return False, None
+def fall_detection(pose_window, window_size, fps, v_thresh, aspect_ratio_thresh, dy_thresh):
+    if len(pose_window) < window_size:
+        return False, None, None, None
+
+    pose_start = pose_window[0][0]
+    pose_end_all = pose_window[-1]
+    pose_end = find_most_similar_pose(pose_start, pose_end_all)
+
+    v, dy = compute_center_velocity(pose_start, pose_end, fps, window_size)
+    ar_delta, ar_start, ar_end = compute_aspect_ratio_delta(pose_start, pose_end)
+
+    debug_text = (
+        f"v={v:.2f}/{v_thresh:.2f}px/s, "
+        f"y={dy:.1f}/{dy_thresh:.1f}, "
+        f"ar={ar_delta:.2f}/{aspect_ratio_thresh:.2f}"
+    )
+    print(f"[TRACE] {debug_text}")
+    
+    cond_speed_drop = v > v_thresh and dy > dy_thresh
+    cond_down_flat = dy > dy_thresh and ar_delta > aspect_ratio_thresh
+
+    if cond_speed_drop or cond_down_flat:
+        tag = (
+            ("SpeedDrop " if cond_speed_drop else "") +
+            ("DownFlat " if cond_down_flat else "")
+        ).strip()
+
+        xmin = pose_end[2] - pose_end[4] / 2
+        ymin = pose_end[3] - pose_end[5] / 2
+        xmax = pose_end[2] + pose_end[4] / 2
+        ymax = pose_end[3] + pose_end[5] / 2
+        return True, (xmin, ymin, xmax, ymax), debug_text, tag
+
+    return False, None, debug_text, ""
 
 
 def falling_alarm(image, bbox):
@@ -121,7 +171,6 @@ def falling_alarm(image, bbox):
         lineType=cv2.LINE_AA,
     )
 
-
 def draw_fps(frame, prev_time):
     import time
     curr_time = time.time()
diff --git a/realtime.py b/realtime.py
@@ -5,12 +5,15 @@
 import cv2
 import time
 import torch
+from collections import deque
+from config import WINDOW_SIZE, FPS, V_THRESH, ASPECT_RATIO_THRESH, DY_THRESH
 
 
 def process_realtime_camera():
     model, device = get_pose_model()
     cap = cv2.VideoCapture(0)
     prev_time = time.time()
+    pose_window = deque(maxlen=WINDOW_SIZE)
 
     while cap.isOpened():
         ret, frame = cap.read()
@@ -20,10 +23,24 @@ def process_realtime_camera():
         frame, prev_time = draw_fps(frame, prev_time)
         image_tensor, output = get_pose(frame, model, device)
         _image = prepare_image(image_tensor)
-        is_fall, bbox = fall_detection(output)
-
-        if is_fall:
-            falling_alarm(_image, bbox)
+        if len(output) > 0:
+            pose_window.append(output)
+            if len(pose_window) == WINDOW_SIZE:
+                is_fall, bbox, debug_text, tag = fall_detection(
+                    pose_window,
+                    WINDOW_SIZE,
+                    FPS,
+                    V_THRESH,
+                    ASPECT_RATIO_THRESH,
+                    DY_THRESH
+                )
+                if debug_text:
+                    _image = cv2.putText(
+                        _image,  f"{tag}: {debug_text}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
+                        0.7, (0, 255, 0), 2
+                    )
+                if is_fall:
+                    falling_alarm(_image, bbox)
 
         cv2.imshow("Real-Time Fall Detection", _image[:,:,::-1])
         if cv2.waitKey(1) & 0xFF == ord("q"):
diff --git a/requirements.txt b/requirements.txt
@@ -12,6 +12,8 @@ torch>=1.7.0,!=1.12.0
 torchvision>=0.8.1,!=0.13.0
 tqdm>=4.41.0
 protobuf<4.21.3
+python-dotenv>=1.0.0
+
 
 # Logging -------------------------------------
 tensorboard>=2.4.1
diff --git a/video.py b/video.py
@@ -5,6 +5,8 @@
 import cv2
 import os
 from tqdm import tqdm
+from collections import deque
+from config import WINDOW_SIZE, FPS, V_THRESH, ASPECT_RATIO_THRESH, DY_THRESH
 
 
 def process_video_file(video_path, output_dir):
@@ -15,6 +17,7 @@ def process_video_file(video_path, output_dir):
 
     model, device = get_pose_model()
     vid_out = prepare_vid_out(video_path, vid_cap, output_dir)
+    pose_window = deque(maxlen=WINDOW_SIZE)
 
     success, frame = vid_cap.read()
     frames = []
@@ -25,10 +28,24 @@ def process_video_file(video_path, output_dir):
     for image in tqdm(frames, desc=f"Processing {os.path.basename(video_path)}"):
         image, output = get_pose(image, model, device)
         _image = prepare_image(image)
-        is_fall, bbox = fall_detection(output)
-
-        if is_fall:
-            falling_alarm(_image, bbox)
+        if len(output) > 0:
+            pose_window.append(output)
+            if len(pose_window) == WINDOW_SIZE:
+                is_fall, bbox, debug_text, tag= fall_detection(
+                    pose_window,
+                    WINDOW_SIZE,
+                    FPS,
+                    V_THRESH,
+                    ASPECT_RATIO_THRESH,
+                    DY_THRESH
+                )
+                # debug
+                _image = cv2.putText(
+                    _image,  f"{tag}: {debug_text}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
+                    0.7, (0, 255, 0), 2
+                )
+                if is_fall:
+                    falling_alarm(_image, bbox)
         vid_out.write(_image[:,:,::-1])
 
     vid_out.release()