From 9bb04cee8803d5f8479ae58197c21931c75b5613 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 14 Jan 2026 16:08:31 +0900 Subject: [PATCH 001/162] Update docker installation commit for perception_evaluation --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c5240c498..22d18f5ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,7 +61,7 @@ RUN python3 -m pip --no-cache-dir install \ RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1 # Install autoware-perception-evaluation -RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@develop +RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@dd37a546352f953565033f1d4b8cb443df1232c59 # Need to dowgrade setuptools to 60.2.0 to fix setup RUN python3 -m pip --no-cache-dir install \ From 1a7ebee39f66d37cd45b20e23575d352a6cdc493 Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Thu, 19 Mar 2026 20:57:55 +0900 Subject: [PATCH 002/162] Update config --- ...evfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...idar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...oxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++-- ...idar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...oxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++-- ...fline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} | 4 ++-- ...idar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...oxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++-- ...usion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} | 8 ++++---- ..._cosine.py => default_20e_8xb8_adamw_linear_cosine.py} | 6 +++--- ...8_adamw_cosine.py => default_30e_8xb8_adamw_cosine.py} | 4 ++-- ..._cosine.py => default_30e_8xb8_adamw_linear_cosine.py} | 4 ++-- ...8_adamw_cosine.py => default_50e_8xb8_adamw_cosine.py} | 4 ++-- 13 files changed, 29 insertions(+), 29 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} (94%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_20e_4xb8_adamw_linear_cosine.py => default_20e_8xb8_adamw_linear_cosine.py} (97%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_4xb8_adamw_cosine.py => default_30e_8xb8_adamw_cosine.py} (98%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_4xb8_adamw_linear_cosine.py => default_30e_8xb8_adamw_linear_cosine.py} (98%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_4xb8_adamw_cosine.py => default_50e_8xb8_adamw_cosine.py} (98%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py index b781e2c71..e65c52ece 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_linear_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m" +experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py index a1ab10f57..4f81af760 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", - "../default/schedulers/default_20e_4xb8_adamw_linear_cosine.py", + "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_camera_lidar_intensity/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m" +experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py index a9887a15c..6556cf818 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", - "../default/schedulers/default_20e_4xb8_adamw_linear_cosine.py", + "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m" +experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 8b0aef32e..5d743e184 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_offline_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_offline/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py index 59f91c8a4..da461a567 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", "../default/pipelines/default_offline_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_offline/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index df500705c..89bb7cd7b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -4,7 +4,7 @@ "../default/pipelines/default_offline_lidar_120m.py", "../models/default_lidar_second_secfpn_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_4xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -17,7 +17,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_offline/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_4xb8_base_120m" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 178f5ff3d..69be0f0d9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py index e2b2d1678..e6addac7d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py similarity index 94% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index d65c470d9..a5c72aef0 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_4xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_1/" -experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_4xb8_base_120m" +experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index 8dc5e7bf1..d491eaa4b 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,11 +1,11 @@ # learning rate -# lr = 0.0001 -lr = 1e-4 +# lr = 0.0002 +lr = 2e-4 t_max = 6 max_epochs = 20 val_interval = 1 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index 07a9110c7..c5053d943 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,10 +1,10 @@ # learning rate -lr = 0.0001 +lr = 2e-4 t_max = 8 max_epochs = 30 val_interval = 5 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 9d4ee8e61..c05aeafcb 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,10 +1,10 @@ # learning rate -lr = 1e-4 +lr = 2e-4 t_max = 8 max_epochs = 30 val_interval = 1 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index 29fc38b4c..fc1914dde 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,10 +1,10 @@ # learning rate -lr = 0.0001 +lr = 2e-4 t_max = 15 max_epochs = 50 val_interval = 5 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 From 84024516eafe70aac0252f8b1cafbf40ba7fd05f Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Tue, 24 Mar 2026 16:48:43 +0900 Subject: [PATCH 003/162] Update config --- ..._voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py} | 6 +++--- ...ar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 6 ++++-- ..._voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py} | 10 ++++++---- 3 files changed, 13 insertions(+), 9 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py} (92%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py index 6556cf818..20c85b1d8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py @@ -1,6 +1,6 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", @@ -15,8 +15,8 @@ data_root = "data/t4dataset/" info_directory_path = "info/user_name/" -experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m" +experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 69be0f0d9..e43f9c485 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_1/" -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -143,3 +143,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py similarity index 92% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index e6addac7d..8538784f8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -1,6 +1,6 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", "../default/schedulers/default_30e_8xb8_adamw_cosine.py", @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_1/" -experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m" +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -143,3 +143,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file From cf6e13dea087c2983e045df78840b86c166cd79a Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Wed, 25 Mar 2026 19:07:44 +0900 Subject: [PATCH 004/162] Update config --- ...n_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 78 +++++++++++++++++++ ..._30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 78 +++++++++++++++++++ ...d_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 78 +++++++++++++++++++ 3 files changed, 234 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py new file mode 100644 index 000000000..f048ab1a8 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -0,0 +1,78 @@ +_base_ = [ + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py", +] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# Add evaluator configs +perception_evaluator_configs = dict( + dataset_paths=_base_.data_root, + frame_id="base_link", + evaluation_config_dict=_base_.evaluator_metric_configs, + load_raw_data=False, +) + +frame_pass_fail_config = dict( + target_labels=_base_.class_names, + # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + confidence_threshold_list=None, +) + +training_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name +) +testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name +validation_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name +) + +val_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="validation", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=False, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) + +test_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="testing", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=True, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py new file mode 100644 index 000000000..b4d8ddfbf --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -0,0 +1,78 @@ +_base_ = [ + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py", +] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# Add evaluator configs +perception_evaluator_configs = dict( + dataset_paths=_base_.data_root, + frame_id="base_link", + evaluation_config_dict=_base_.evaluator_metric_configs, + load_raw_data=False, +) + +frame_pass_fail_config = dict( + target_labels=_base_.class_names, + # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + confidence_threshold_list=None, +) + +training_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name +) +testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name +validation_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name +) + +val_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="validation", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=False, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) + +test_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="testing", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=True, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py new file mode 100644 index 000000000..7a0215139 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -0,0 +1,78 @@ +_base_ = [ + "./bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py", +] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# Add evaluator configs +perception_evaluator_configs = dict( + dataset_paths=_base_.data_root, + frame_id="base_link", + evaluation_config_dict=_base_.evaluator_metric_configs, + load_raw_data=False, +) + +frame_pass_fail_config = dict( + target_labels=_base_.class_names, + # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + confidence_threshold_list=None, +) + +training_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name +) +testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name +validation_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name +) + +val_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="validation", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=False, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) + +test_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="testing", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=True, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) From e1ebc993b9c8f17090a64b5a11476a3733e89084 Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Thu, 26 Mar 2026 20:20:08 +0900 Subject: [PATCH 005/162] Update config --- .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py | 1 - .../pipelines/default_camera_lidar_intensity_120m.py | 4 ++-- .../t4dataset/default/pipelines/default_lidar_120m.py | 8 ++++++-- .../default/pipelines/default_lidar_intensity_120m.py | 8 ++++++-- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 2 +- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_50e_8xb8_adamw_cosine.py | 2 +- 8 files changed, 18 insertions(+), 11 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index 28ba4ab33..a50cf8852 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -18,7 +18,6 @@ # dataset scene setting dataset_test_groups = { - "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", False), "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", True), } diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index acac440dc..9d1910dab 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -84,8 +84,8 @@ "traffic_cone", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 7ffedc232..7ee393ea6 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -64,8 +64,8 @@ "traffic_cone", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", @@ -89,6 +89,8 @@ "img_aug_matrix", "lidar_aug_matrix", "timestamp", + "vehicle_type", + "city", ], ), ] @@ -130,6 +132,8 @@ "num_pts_feats", "num_views", "timestamp", + "vehicle_type", + "city", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index c7fa1b2cb..8b154901e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -64,8 +64,8 @@ "traffic_cone", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", @@ -89,6 +89,8 @@ "img_aug_matrix", "lidar_aug_matrix", "timestamp", + "vehicle_type", + "city", ], ), ] @@ -130,6 +132,8 @@ "num_pts_feats", "num_views", "timestamp", + "vehicle_type", + "city", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index d491eaa4b..c3f82e76d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate # lr = 0.0002 -lr = 2e-4 +lr = 1.5e-4 t_max = 6 max_epochs = 20 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index c5053d943..94c2a4160 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 2e-4 +lr = 1.5e-4 t_max = 8 max_epochs = 30 val_interval = 5 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index c05aeafcb..f5c747e62 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 2e-4 +lr = 1.5e-4 t_max = 8 max_epochs = 30 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index fc1914dde..f0bd87ca4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 2e-4 +lr = 1.5e-4 t_max = 15 max_epochs = 50 val_interval = 5 From 2dac4333e0dce899701b0602778626e0df191a8a Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Fri, 27 Mar 2026 11:01:29 +0900 Subject: [PATCH 006/162] Update config --- .../t4dataset/default/pipelines/default_lidar_120m.py | 10 +++++----- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 6 +++++- .../schedulers/default_30e_8xb8_adamw_cosine.py | 6 +++++- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 6 +++++- .../schedulers/default_50e_8xb8_adamw_cosine.py | 6 +++++- 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 7ee393ea6..347ba6452 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -54,14 +54,14 @@ classes=[ "car", "truck", - "construction_vehicle", + # "construction_vehicle", "bus", - "trailer", - "barrier", - "motorcycle", + # "trailer", + # "barrier", + # "motorcycle", "bicycle", "pedestrian", - "traffic_cone", + # "traffic_cone", ], ), # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1), diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index c3f82e76d..a0be6f4ab 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate # lr = 0.0002 -lr = 1.5e-4 +lr = 1.0e-4 t_max = 6 max_epochs = 20 val_interval = 1 @@ -57,3 +57,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index 94c2a4160..edcbd74bf 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1.5e-4 +lr = 1.0e-4 t_max = 8 max_epochs = 30 val_interval = 5 @@ -69,3 +69,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index f5c747e62..32e8d59fa 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1.5e-4 +lr = 1.0e-4 t_max = 8 max_epochs = 30 val_interval = 1 @@ -56,3 +56,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index f0bd87ca4..58192c2de 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1.5e-4 +lr = 1.0e-4 t_max = 15 max_epochs = 50 val_interval = 5 @@ -69,3 +69,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file From 9046b7bb1b3603345dc886903794ea118e9737f4 Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Fri, 27 Mar 2026 11:29:07 +0900 Subject: [PATCH 007/162] Update config --- tools/detection3d/train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/detection3d/train.py b/tools/detection3d/train.py index c379025d1..254783b92 100644 --- a/tools/detection3d/train.py +++ b/tools/detection3d/train.py @@ -123,6 +123,10 @@ def main(): # build customized runner from the registry # if 'runner_type' is set in the cfg runner = RUNNERS.build(cfg) + + # Output all model + print_log(f"Runner model: ", logger="current") + print_log(f"{runner.model}", logger="current") # start training runner.train() From d4d93fa3bab4fbe63c528c2a029bfb8e3a5346db Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Sat, 28 Mar 2026 00:39:14 +0900 Subject: [PATCH 008/162] Added --- projects/BEVFusion/bevfusion/__init__.py | 3 ++- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- projects/BEVFusion/bevfusion/utils.py | 1 + .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index e849db227..947ebab23 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -6,7 +6,7 @@ from .sparse_encoder import BEVFusionSparseEncoder from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D -from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost +from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder __all__ = [ "BEVFusion", @@ -26,4 +26,5 @@ "TransformerDecoderLayer", "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", + "TransFusionBBoxCoder", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 143c35a14..853523c4f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -554,7 +554,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): vel = None boxes_dict = self.bbox_coder.decode( - score, rot, dim, center, height, vel + score, rot, dim, center, height, vel, filter=False ) # decode the prediction to real world metric bbox bboxes_tensor = boxes_dict[0]["bboxes"] gt_bboxes_tensor = gt_bboxes_3d.tensor.to(score.device) diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index c47604dbd..5b7c94877 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -93,6 +93,7 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): predictions_dicts.append(predictions_dict) if filter is False: + print("filter is False") return predictions_dicts # use score threshold diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index b5d9a8fdc..a7fac4b37 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -98,7 +98,7 @@ bbox_coder=dict( type="TransFusionBBoxCoder", post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.0, + score_threshold=0.1, out_size_factor=8, code_size=10, ), From b2714a87052e441b94bf8cc15793ef12fb39698f Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Tue, 31 Mar 2026 10:58:41 +0900 Subject: [PATCH 009/162] Added --- projects/BEVFusion/bevfusion/utils.py | 1 - .../default/models/default_lidar_second_secfpn_120m.py | 2 +- .../t4dataset/default/pipelines/default_lidar_120m.py | 7 +------ .../default/pipelines/default_lidar_intensity_120m.py | 5 ----- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 4 ++-- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 3 ++- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 3 ++- .../default/schedulers/default_50e_8xb8_adamw_cosine.py | 3 ++- 8 files changed, 10 insertions(+), 18 deletions(-) diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index 5b7c94877..c47604dbd 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -93,7 +93,6 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): predictions_dicts.append(predictions_dict) if filter is False: - print("filter is False") return predictions_dicts # use score threshold diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index a7fac4b37..b5d9a8fdc 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -98,7 +98,7 @@ bbox_coder=dict( type="TransFusionBBoxCoder", post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.1, + score_threshold=0.0, out_size_factor=8, code_size=10, ), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 347ba6452..e79c30710 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -54,17 +54,12 @@ classes=[ "car", "truck", - # "construction_vehicle", "bus", - # "trailer", - # "barrier", - # "motorcycle", "bicycle", "pedestrian", - # "traffic_cone", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 8b154901e..ce7985fd8 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -54,14 +54,9 @@ classes=[ "car", "truck", - "construction_vehicle", "bus", - "trailer", - "barrier", - "motorcycle", "bicycle", "pedestrian", - "traffic_cone", ], ), # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index a0be6f4ab..d1d11e7c9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate -# lr = 0.0002 -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 6 max_epochs = 20 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index edcbd74bf..f4f102170 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,5 +1,6 @@ # learning rate -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 8 max_epochs = 30 val_interval = 5 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 32e8d59fa..44870ccf4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,5 +1,6 @@ # learning rate -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 8 max_epochs = 30 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index 58192c2de..542ccdd8e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,5 +1,6 @@ # learning rate -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 15 max_epochs = 50 val_interval = 5 From 43adb38bf15ea01b0ff81b06a5e4e435a6e490f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 02:01:59 +0000 Subject: [PATCH 010/162] ci(pre-commit): autofix --- projects/BEVFusion/bevfusion/__init__.py | 2 +- ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...on_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 2 +- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_50e_8xb8_adamw_cosine.py | 2 +- tools/detection3d/train.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 947ebab23..60a64b532 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -26,5 +26,5 @@ "TransformerDecoderLayer", "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", - "TransFusionBBoxCoder", + "TransFusionBBoxCoder", ] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index e43f9c485..d984b5585 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 8538784f8..0878cef29 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index d1d11e7c9..15ba38878 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -60,4 +60,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index f4f102170..a2cd2d2e9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -73,4 +73,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 44870ccf4..264eda921 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -60,4 +60,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index 542ccdd8e..87571d0b3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -73,4 +73,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/tools/detection3d/train.py b/tools/detection3d/train.py index 254783b92..f7e6309fb 100644 --- a/tools/detection3d/train.py +++ b/tools/detection3d/train.py @@ -123,8 +123,8 @@ def main(): # build customized runner from the registry # if 'runner_type' is set in the cfg runner = RUNNERS.build(cfg) - - # Output all model + + # Output all model print_log(f"Runner model: ", logger="current") print_log(f"{runner.model}", logger="current") From 73dd1c40c1100879b3a700fe5e82536019045cb0 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 8 Apr 2026 19:05:59 +0900 Subject: [PATCH 011/162] Added --- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 4 +++- .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 2 +- projects/BEVFusion/setup.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index a5c72aef0..64bf2208a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" @@ -143,3 +143,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +resume = True \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index e79c30710..a74ad2ea0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 32 +num_workers = 16 input_modality = dict(use_lidar=True, use_camera=False) # range setting diff --git a/projects/BEVFusion/setup.py b/projects/BEVFusion/setup.py index 837d1f53e..38f588b20 100644 --- a/projects/BEVFusion/setup.py +++ b/projects/BEVFusion/setup.py @@ -43,7 +43,7 @@ def make_cuda_ext(name, module, sources, sources_cuda=[], extra_args=[], extra_i name="bev_pool", install_requires=[ "onnx_graphsurgeon==0.5.8", - "spconv-cu120==2.3.6", + "spconv-cu126==2.3.8", ], ext_modules=[ make_cuda_ext( From 89e26700d614ab6abc92212ad69083c84019083f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Apr 2026 10:06:34 +0000 Subject: [PATCH 012/162] ci(pre-commit): autofix --- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 64bf2208a..0a2a178c6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -resume = True \ No newline at end of file +resume = True From 380d7aa0d4e85934bc2bdd3bd8e88f7746acd4d4 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 10 Apr 2026 23:47:24 +0900 Subject: [PATCH 013/162] Added --- ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index d984b5585..605e3cf7c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" From b60e45ae0ca338c30fdb16a7efcc9fb5332bae0e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 12 Apr 2026 21:42:53 +0900 Subject: [PATCH 014/162] Update configs --- ..._lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 ++-- ..._second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- ...evfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 6 +++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 0878cef29..563f71cf9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index b4d8ddfbf..238054ab5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 0a2a178c6..8bf21b1b2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_6_2/" -experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -31,6 +31,10 @@ pts_middle_encoder=dict( in_channels=_base_.point_use_dim, sparse_shape=_base_.grid_size, + num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices From 9b38a42898e724c57532b5ad6bf94546c870e809 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 13 Apr 2026 18:34:04 +0900 Subject: [PATCH 015/162] Added --- autoware_ml/configs/detection3d/dataset/t4dataset/base.py | 1 - .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py | 2 +- ...on_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index c92d58431..d0744a131 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -48,7 +48,6 @@ "j6gen2_base": ("t4dataset_j6gen2_base_infos_test.pkl", False), "j6gen2": ("t4dataset_j6gen2_infos_test.pkl", False), "largebus": ("t4dataset_largebus_infos_test.pkl", False), - "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", False), "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", False), "base": ("t4dataset_base_infos_test.pkl", True), } diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index a50cf8852..b7ddb799a 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -18,7 +18,7 @@ # dataset scene setting dataset_test_groups = { - "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", True), + "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", True), } dataset_version_list = [ diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 0878cef29..563f71cf9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" From f39b5841db399a9ebd60bf4d40396c83cadd5089 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:08:05 +0900 Subject: [PATCH 016/162] Updated --- ...idar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 563f71cf9..b6677ff05 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_6_2/" -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -31,6 +31,10 @@ pts_middle_encoder=dict( in_channels=_base_.point_use_dim, sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices @@ -144,4 +148,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" From 24a88adf1e782379104d6d7ff64d408b3c263d8e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:08:56 +0900 Subject: [PATCH 017/162] Added --- ...on_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...el_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +- ...n_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 5 +++-- ...l_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- ...dar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 605e3cf7c..41a2152cf 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index f048ab1a8..7dfc7e0f8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 563f71cf9..998e5a22e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_no_bicycle_pooling" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -48,6 +48,7 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + dense_heatmap_pooling_classes=["car", "truck", "bus"], # Use class indices for pooling ), ) @@ -144,4 +145,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index b4d8ddfbf..238054ab5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index 7a0215139..4f9fb7b65 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" From c637420d5e6e9f2fa898c581184a45098ce6469b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:10:58 +0900 Subject: [PATCH 018/162] Added --- ..._lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++-- ..._second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +- ...lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 7 +++---- ...second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- ...vfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 4 +--- ...r_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- 6 files changed, 9 insertions(+), 12 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 41a2152cf..8c02ca112 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file +load_from "" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 7dfc7e0f8..afb150284 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 998e5a22e..3dda36c3a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_no_bicycle_pooling" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -48,7 +48,6 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), - dense_heatmap_pooling_classes=["car", "truck", "bus"], # Use class indices for pooling ), ) @@ -145,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file +load_from "" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 238054ab5..3320d2b08 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 0a2a178c6..38f3e369a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" @@ -143,5 +143,3 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) - -resume = True diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index 4f9fb7b65..c9a0050c0 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" From 48879b8cb6540d0e07124a81bf13aa4b67ecb51e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 03:11:22 +0000 Subject: [PATCH 019/162] ci(pre-commit): autofix --- ...usion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...sion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 8c02ca112..264f0da77 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from "" \ No newline at end of file +load_from "" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 3dda36c3a..f505ac5dc 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from "" \ No newline at end of file +load_from "" From 82457ab52003421646cf2c15070c60314eaabe6e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:12:18 +0900 Subject: [PATCH 020/162] Added --- .../default/pipelines/default_camera_lidar_intensity_120m.py | 2 -- .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 2 -- .../t4dataset/default/pipelines/default_lidar_intensity_120m.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 9d1910dab..963a218e1 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -84,8 +84,6 @@ "traffic_cone", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index a74ad2ea0..06d95be16 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -59,8 +59,6 @@ "pedestrian", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index ce7985fd8..4e74d3616 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -59,8 +59,6 @@ "pedestrian", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", From 827bbb24c66e04e2d6c38f53423305de0010c075 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 15:50:30 +0900 Subject: [PATCH 021/162] Updated --- ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 264f0da77..be535c560 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/user_name/" -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from "" +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" From 5f9a4a55156ee29a689a2e08b12064d208d8118d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 16:01:35 +0900 Subject: [PATCH 022/162] Updated --- ...n_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index be535c560..3e615b504 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" @@ -31,6 +31,10 @@ pts_middle_encoder=dict( in_channels=_base_.point_use_dim, sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices From 1d0ac8db9b9ec1a738a4f7294edcd6764adaef19 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 11:44:49 +0900 Subject: [PATCH 023/162] Added --- projects/BEVFusion/bevfusion/__init__.py | 3 + projects/BEVFusion/bevfusion/bevfusion.py | 30 +- .../bevfusion/bevfusion_voxel_encoder.py | 295 ++++++++++++++++++ ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 30 +- ...n_50e_8xb8_base_120m_sincos_10_channels.py | 161 ++++++++++ 5 files changed, 501 insertions(+), 18 deletions(-) create mode 100644 projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 60a64b532..3db358b55 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,6 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder +from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder __all__ = [ "BEVFusion", @@ -27,4 +28,6 @@ "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", "TransFusionBBoxCoder", + "BEVFusionVoxelEncoder", + "BEVFusionVoxelSinCosEncoder", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 243b3beb5..bc3f1b094 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -207,18 +207,29 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: points = [point.float() for point in points] feats, coords, sizes = self.voxelize(points) batch_size = coords[-1, 0] + 1 + + if self.pts_voxel_encoder is not None: + assert not self.voxelize_reduce + feats = self.pts_voxel_encoder(feats, sizes, coords) else: # NOTE(knzo25): onnx inference. Voxelization happens outside the graph with torch.cuda.amp.autocast(enabled=False): # with torch.autocast('cuda', enabled=False): + # NOTE(knzo25): onnx demmands this + # batch_size = coords[-1, 0] + 1 + # with torch.autocast('cuda', enabled=False): + # NOTE(knzo25): onnx demmands this # batch_size = coords[-1, 0] + 1 batch_size = 1 print("Run onnx point_eSpConvst") - assert self.voxelize_reduce - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + if self.pts_voxel_encoder is not None: + feats = self.pts_voxel_encoder(feats, sizes, coords) + else: + assert self.voxelize_reduce + if self.voxelize_reduce: + feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) x = self.pts_middle_encoder(feats, coords, batch_size) return x @@ -241,12 +252,13 @@ def voxelize(self, points): feats = torch.cat(feats, dim=0) coords = torch.cat(coords, dim=0) - if len(sizes) > 0: - sizes = torch.cat(sizes, dim=0) - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) - feats = feats.contiguous() - + assert len(sizes) > 0, "No points in the voxel" + sizes = torch.cat(sizes, dim=0) + + if self.voxelize_reduce: + feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + feats = feats.contiguous() + return feats, coords, sizes def predict( diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py new file mode 100644 index 000000000..efbc995e8 --- /dev/null +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -0,0 +1,295 @@ +from typing import Optional, Tuple + +import torch +import numpy as np +from mmcv.cnn import build_norm_layer +from mmcv.ops import DynamicScatter +from torch import Tensor, nn + +from mmdet3d.registry import MODELS +from mmdet3d.models.voxel_encoders.utils import get_paddings_indicator, PFNLayer + + +@MODELS.register_module() +class BEVFusionVoxelEncoder(nn.Module): + """BEVFusion Voxel Encoder Feature Net. + + The network is same as pillar featuer net. + The network prepares the pillar features and performs forward pass + through PFNLayers. + + Args: + in_channels (int, optional): Number of input features, + either x, y, z or x, y, z, r. Defaults to 4. + feat_channels (tuple, optional): Number of features in each of the + N PFNLayers. Defaults to (64, ). + with_distance (bool, optional): Whether to include Euclidean distance + to points. Defaults to False. + with_cluster_center (bool, optional): [description]. Defaults to True. + with_voxel_center (bool, optional): [description]. Defaults to True. + voxel_size (tuple[float], optional): Size of voxels, only utilize x + and y size. Defaults to (0.2, 0.2, 4). + point_cloud_range (tuple[float], optional): Point cloud range, only + utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1). + norm_cfg ([type], optional): [description]. + Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). + mode (str, optional): The mode to gather point features. Options are + 'max' or 'avg'. Defaults to 'max'. + legacy (bool, optional): Whether to use the new behavior or + the original behavior. Defaults to True. + """ + + def __init__(self, + min_norm_values: Optional[Tuple[float]] = None, + max_norm_values: Optional[Tuple[float]] = None, + in_channels: Optional[int] = 4, + feat_channels: Optional[tuple] = (64, ), + with_distance: Optional[bool] = False, + with_cluster_center: Optional[bool] = True, + with_voxel_center: Optional[bool] = True, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, + 40, 1), + norm_cfg: Optional[dict] = dict( + type='BN1d', eps=1e-3, momentum=0.01), + mode: Optional[str] = 'max', + legacy: Optional[bool] = True): + super(BEVFusionVoxelEncoder, self).__init__() + assert len(feat_channels) > 0 + self.legacy = legacy + if with_cluster_center: + in_channels += 3 + if with_voxel_center: + in_channels += 3 + if with_distance: + in_channels += 1 + self._with_distance = with_distance + self._with_cluster_center = with_cluster_center + self._with_voxel_center = with_voxel_center + # Create PillarFeatureNet layers + self.in_channels = in_channels + feat_channels = [in_channels] + list(feat_channels) + pfn_layers = [] + for i in range(len(feat_channels) - 1): + in_filters = feat_channels[i] + out_filters = feat_channels[i + 1] + if i < len(feat_channels) - 2: + last_layer = False + else: + last_layer = True + pfn_layers.append( + PFNLayer( + in_filters, + out_filters, + norm_cfg=norm_cfg, + last_layer=last_layer, + mode=mode)) + self.pfn_layers = nn.ModuleList(pfn_layers) + + # Need pillar (voxel) size and x/y offset in order to calculate offset + self.vx = voxel_size[0] + self.vy = voxel_size[1] + self.vz = voxel_size[2] + self.x_offset = self.vx / 2 + point_cloud_range[0] + self.y_offset = self.vy / 2 + point_cloud_range[1] + self.z_offset = self.vz / 2 + point_cloud_range[2] + self.point_cloud_range = point_cloud_range + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C). + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C). + """ + if self.min_norm_values is not None and self.max_norm_values is not None: + features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) + else: + features_norm = features + + features_ls = [features_norm] + # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available + if self._with_cluster_center: + points_mean = features[:, :, :3].sum( + dim=1, keepdim=True) / num_points.type_as(features).view( + -1, 1, 1) + f_cluster = features[:, :, :3] - points_mean + # Map to [0, 1] if available + if self.min_norm_values is not None and self.max_norm_values is not None: + voxel_size = features.new_tensor([self.vx, self.vy, self.vz]) + f_cluster = f_cluster / voxel_size + features_ls.append(f_cluster) + + # Find distance of x, y, and z from pillar center + dtype = features.dtype + if self._with_voxel_center: + if not self.legacy: + f_center = torch.zeros_like(features[:, :, :3]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + + self.y_offset) + f_center[:, :, 2] = features[:, :, 2] - ( + coors[:, 1].to(dtype).unsqueeze(1) * self.vz + + self.z_offset) + else: + f_center = features[:, :, :3] + f_center[:, :, 0] = f_center[:, :, 0] - ( + coors[:, 3].type_as(features).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = f_center[:, :, 1] - ( + coors[:, 2].type_as(features).unsqueeze(1) * self.vy + + self.y_offset) + f_center[:, :, 2] = f_center[:, :, 2] - ( + coors[:, 1].type_as(features).unsqueeze(1) * self.vz + + self.z_offset) + + if self.min_norm_values is not None and self.max_norm_values is not None: + f_center = f_center / (voxel_size * 0.5) + features_ls.append(f_center) + + if self._with_distance: + points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) + features_ls.append(points_dist) + + # Combine together feature decorations + features = torch.cat(features_ls, dim=-1) + # The feature decorations were calculated without regard to whether + # pillar was empty. Need to ensure that + # empty pillars remain set to zeros. + voxel_count = features.shape[1] + mask = get_paddings_indicator(num_points, voxel_count, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(features) + features *= mask + + for pfn in self.pfn_layers: + features = pfn(features, num_points) + + return features.squeeze(1) + + +@MODELS.register_module() +class BEVFusionVoxelSinCosEncoder(nn.Module): + def __init__(self, + min_norm_values: Tuple[float], + max_norm_values: Tuple[float], + in_channels: Optional[int] = 4, + with_distance: Optional[bool] = False, + with_cluster_center: Optional[bool] = True, + with_voxel_center: Optional[bool] = True, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, + 40, 1),): + super(BEVFusionVoxelSinCosEncoder, self).__init__() + + if with_cluster_center: + in_channels += 3 + if with_voxel_center: + in_channels += 3 + if with_distance: + in_channels += 1 + self._with_distance = with_distance + self._with_cluster_center = with_cluster_center + self._with_voxel_center = with_voxel_center + # Create PillarFeatureNet layers + self.in_channels = in_channels + + # Need pillar (voxel) size and x/y offset in order to calculate offset + self.vx = voxel_size[0] + self.vy = voxel_size[1] + self.vz = voxel_size[2] + self.x_offset = self.vx / 2 + point_cloud_range[0] + self.y_offset = self.vy / 2 + point_cloud_range[1] + self.z_offset = self.vz / 2 + point_cloud_range[2] + self.point_cloud_range = point_cloud_range + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) + self.register_buffer("exponents", (2 ** torch.arange(0, in_channels).float())) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C). + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C). + """ + features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) + features_ls = [features_norm] + # Find distance of x, y, and z from cluster center + if self._with_cluster_center: + points_mean = features[:, :, :3].sum( + dim=1, keepdim=True) / num_points.type_as(features).view( + -1, 1, 1) + + # Map to [-1, 1] + f_cluster = (features[:, :, :3] - points_mean) / self.voxel_size + # f_cluster = features[:, :, :3] - points_mean + features_ls.append(f_cluster) + + # Find distance of x, y, and z from pillar center + dtype = features.dtype + if self._with_voxel_center: + f_center = torch.zeros_like(features[:, :, :3]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + + self.y_offset) + f_center[:, :, 2] = features[:, :, 2] - ( + coors[:, 1].to(dtype).unsqueeze(1) * self.vz + + self.z_offset) + + # Map to [-1, 1] + f_center = f_center / (self.voxel_size * 0.5) + features_ls.append(f_center) + + if self._with_distance: + points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) + features_ls.append(points_dist) + + # Combine together feature decorations + features = torch.cat(features_ls, dim=-1) + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # SinCos encoding + # (N, M, C) -> (N, M, C, 1) -> (N, M, C, 1) * (1, 1, 1, C) -> (N, M, C, C) + y = features.unsqueeze(-1) * np.pi * self.exponents.unsqueeze(0).unsqueeze(0).unsqueeze(0) + # (N, M, C, C) -> (N, M, C*C) + y = y.reshape(num_voxels, max_points_per_voxel, self.in_channels ** 2) + # (N, M, C*C) -> (N, M, C*C*2) + features = torch.cat([torch.cos(y), torch.sin(y)], dim=-1) + + # The feature decorations were calculated without regard to whether + # pillar was empty. Need to ensure that + # empty pillars remain set to zeros. + mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(features) + features *= mask + + # Reduction by mean + # (N, M, C*C*2) -> (N, C*C*2) + features = features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) + features = features.contiguous() + + return features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index d33b33c56..17f16254d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" -experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -25,16 +25,28 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=True, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, + in_channels=100, sparse_shape=_base_.grid_size, - num_aug_features=4, + # num_aug_features=4, # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py new file mode 100644 index 000000000..531a07673 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py @@ -0,0 +1,161 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_10_channels" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=100, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From a26782abfb7c9b088845d9d778ab0babd520ed74 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:24:44 +0900 Subject: [PATCH 024/162] Updated --- ...l_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 ++-- ..._lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 6 +----- ..._second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index afb150284..62ea479fb 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -42,7 +42,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -64,7 +64,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 0eb440472..73c1e4671 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -148,8 +148,4 @@ ) log_processor = dict(window_size=50) -<<<<<<< HEAD -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" -======= -load_from "" ->>>>>>> feat/releave_bevfusion_2_6 +# load_from = "" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 3320d2b08..0109e96d9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -42,7 +42,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="base", + dataset_name="jpntaxi_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -64,7 +64,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="base", + dataset_name="jpntaxi_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, From f4c01a542618b3c592ddf1cd6e2b9d1657abccbf Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:51:58 +0900 Subject: [PATCH 025/162] Updated --- autoware_ml/detection3d/datasets/t4dataset.py | 2 +- ..._second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 7 ++----- ...lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +- ...second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 3 --- ...r_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 3 --- 5 files changed, 4 insertions(+), 13 deletions(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index ce1c78f31..74d274b87 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -64,7 +64,7 @@ def filter_data(self) -> List[dict]: break if entry["images"][camera_order]["img_path"] is None or not osp.exists( - entry["images"][camera_order]["img_path"] + self.data_root + entry["images"][camera_order]["img_path"] ): filtered = True break diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 62ea479fb..3476011ff 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -3,9 +3,6 @@ ] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" - experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -42,7 +39,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="j6gen2_base", + dataset_name="base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -64,7 +61,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="j6gen2_base", + dataset_name="base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 73c1e4671..4eea4c2aa 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 0109e96d9..49d91e05d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -3,9 +3,6 @@ ] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" - experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index c9a0050c0..3d976d970 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -3,9 +3,6 @@ ] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" - experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From 15371af9539e84c46d844efa5007d565acb80878 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:52:51 +0900 Subject: [PATCH 026/162] Update dataset name --- ...xel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 3476011ff..0748008ba 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -39,7 +39,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -61,7 +61,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, From 4977b332437c647cd617c66a88f1f109129ec9a2 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 20 Apr 2026 06:46:48 +0900 Subject: [PATCH 027/162] Update dataset name --- ...voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +- ...oxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 2 +- ..._lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 0748008ba..d1950d39a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 49d91e05d..6bd285ce1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index d33b33c56..78d287af6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index 3d976d970..fbcfe2dce 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From f85a8e906b887d56d67a4e7f88673c1032432a0c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 20 Apr 2026 13:04:20 +0900 Subject: [PATCH 028/162] Update dataset name --- ...usion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 3e615b504..4f220cbcb 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -148,4 +148,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" +# load_from = "" From 0f5b5888148efcd2aac5af2315befd9301907745 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 15:06:00 +0900 Subject: [PATCH 029/162] Update configs --- autoware_ml/detection3d/datasets/t4dataset.py | 2 +- .../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index 74d274b87..ce1c78f31 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -64,7 +64,7 @@ def filter_data(self) -> List[dict]: break if entry["images"][camera_order]["img_path"] is None or not osp.exists( - self.data_root + entry["images"][camera_order]["img_path"] + entry["images"][camera_order]["img_path"] ): filtered = True break diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 264eda921..23d29acc1 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,5 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 +lr = 1e-4 t_max = 8 max_epochs = 30 val_interval = 1 From caecca60228a1468c1f139d331b096884da19a4b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 17:38:51 +0900 Subject: [PATCH 030/162] Update base docstring --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 288 +++++++++++++++++- 1 file changed, 272 insertions(+), 16 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 9de8a2e34..72d47c4b3 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -70,18 +70,20 @@
Eval Range: 0.0 - 50.0m - | Model version | mAP | car
(107,309) | truck
(24,206) | bus
(5,712) | bicycle
(4,060) | pedestrian
(77,369) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8774 | 0.9049 | 0.8514 | 0.8824 | 0.8543 | 0.8941 | + | Model version | mAP | mAPH | car
(107,309) | truck
(24,206) | bus
(5,712) | bicycle
(4,060) | pedestrian
(77,369) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.8817 | 0.8496 | 0.9131 | 0.8552 | 0.9081 | 0.8357 | 0.8966 | + | BEVFusion-LiDAR base/2.6.0 | 0.8774 | 0.8443 | 0.9049 | 0.8514 | 0.8824 | 0.8543 | 0.8941 | -
+
Eval Range: 50.0 - 90.0m | Model version | mAP | mAPH | car
(94,080) | truck
(27,651) | bus
(4,761) | bicycle
(2,365) | pedestrian
(37,523) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.6824 | 0.6437 | 0.8005 | 0.6567 | 0.5783 | 0.6322 | 0.7445 | + | BEVFusion-LiDAR base/2.7.0 | 0.7002 | 0.6621 | 0.8174 | 0.6660 | 0.6414 | 0.6430 | 0.7331 | + | BEVFusion-LiDAR base/2.6.0 | 0.6824 | 0.6437 | 0.8005 | 0.6567 | 0.5783 | 0.6322 | 0.7445 |
@@ -90,7 +92,8 @@ | Model version | mAP | mAPH | car
(36,895) | truck
(17,759) | bus
(2,852) | bicycle
(519) | pedestrian
(17,091) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.5136 | 0.4788 | 0.6552 | 0.5023 | 0.2849 | 0.4369 | 0.6887 | + | BEVFusion-LiDAR base/2.7.0 | 0.5600 | 0.5254 | 0.6578 | 0.5131 | 0.5178 | 0.4296 | 0.6815 | + | BEVFusion-LiDAR base/2.6.0 | 0.5136 | 0.4788 | 0.6552 | 0.5023 | 0.2849 | 0.4369 | 0.6887 | @@ -99,7 +102,8 @@ | Model version | mAP | mAPH | car
(238,284) | truck
(69,616) | bus
(13,325) | bicycle
(6,944) | pedestrian
(131,983) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.7592 | 0.7227 | 0.8398 | 0.6994 | 0.6621 | 0.7595 | 0.8351 | + | BEVFusion-LiDAR base/2.7.0 | 0.7777 | 0.7420 | 0.8504 | 0.7065 | 0.7443 | 0.7538 | 0.8332 | + | BEVFusion-LiDAR base/2.6.0 | 0.7592 | 0.7227 | 0.8398 | 0.6994 | 0.6621 | 0.7595 | 0.8351 | @@ -119,7 +123,8 @@ | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 | + | BEVFusion-LiDAR base/2.7.0 | 0.8837 | 0.8562 | 0.9393 | 0.8587 | 0.8802 | 0.8268 | 0.9135 | + | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 | @@ -128,7 +133,8 @@ | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 | + | BEVFusion-LiDAR base/2.7.0 | 0.6901 | 0.6630 | 0.8382 | 0.6676 | 0.5007 | 0.6794 | 0.7645 | + | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 | @@ -137,7 +143,8 @@ | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 | + | BEVFusion-LiDAR base/2.7.0 | 0.5750 | 0.5466 | 0.6601 | 0.5131 | 0.5145 | 0.4541 | 0.7331 | + | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 | @@ -146,6 +153,7 @@ | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.7715 | 0.7432 | 0.8661 | 0.7010 | 0.6721 | 0.7611 | 0.8573 | | BEVFusion-LiDAR base/2.6.0 | 0.7471 | 0.7176 | 0.8667 | 0.6928 | 0.5446 | 0.7710 | 0.8606 | @@ -167,7 +175,8 @@ | Model version | mAP | mAPH | car
(14,883) | truck
(1,193) | bus
(336) | bicycle
(740) | pedestrian
(5,059) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8882 | 0.8475 | 0.9045 | 0.8793 | 0.9482 | 0.8489 | 0.8598 | + | BEVFusion-LiDAR base/2.7.0 | 0.8876 | 0.8447 | 0.9176 | 0.8727 | 0.9443 | 0.8396 | 0.8639 | + | BEVFusion-LiDAR base/2.6.0 | 0.8882 | 0.8475 | 0.9045 | 0.8793 | 0.9482 | 0.8489 | 0.8598 | @@ -176,7 +185,8 @@ | Model version | mAP | mAPH | car
(10,994) | truck
(1,011) | bus
(143) | bicycle
(463) | pedestrian
(3,754) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.7132 | 0.6586 | 0.8237 | 0.7245 | 0.7811 | 0.5497 | 0.6871 | + | BEVFusion-LiDAR base/2.7.0 | 0.7392 | 0.6842 | 0.8425 | 0.7288 | 0.8580 | 0.5826 | 0.6839 | + | BEVFusion-LiDAR base/2.6.0 | 0.7132 | 0.6586 | 0.8237 | 0.7245 | 0.7811 | 0.5497 | 0.6871 | @@ -185,7 +195,8 @@ | Model version | mAP | mAPH | car
(3,018) | truck
(602) | bus
(60) | bicycle
(85) | pedestrian
(1,121) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.5202 | 0.4736 | 0.6989 | 0.6297 | 0.4058 | 0.3609 | 0.5056 | + | BEVFusion-LiDAR base/2.7.0 | 0.5572 | 0.5118 | 0.7091 | 0.6393 | 0.6121 | 0.3386 | 0.4870 | + | BEVFusion-LiDAR base/2.6.0 | 0.5202 | 0.4736 | 0.6989 | 0.6297 | 0.4058 | 0.3609 | 0.5056 | @@ -194,6 +205,7 @@ | Model version | mAP | mAPH | car
(28,895) | truck
(2,806) | bus
(539) | bicycle
(1,288) | pedestrian
(9,934) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.8086 | 0.7594 | 0.8789 | 0.7783 | 0.8898 | 0.7288 | 0.7670 | | BEVFusion-LiDAR base/2.6.0 | 0.7995 | 0.7514 | 0.8640 | 0.7788 | 0.8608 | 0.7272 | 0.7669 | @@ -221,7 +233,8 @@ | Model version | mAP | mAPH | car
(49,637) | truck
(5,754) | bus
(1,939) | bicycle
(639) | pedestrian
(14,362) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8702 | 0.8284 | 0.8758 | 0.8410 | 0.9408 | 0.8590 | 0.8344 | + | BEVFusion-LiDAR base/2.7.0 | 0.8776 | 0.8370 | 0.8907 | 0.8438 | 0.9473 | 0.8665 | 0.8397 | + | BEVFusion-LiDAR base/2.6.0 | 0.8702 | 0.8284 | 0.8758 | 0.8410 | 0.9408 | 0.8590 | 0.8344 | @@ -230,7 +243,8 @@ | Model version | mAP | mAPH | car
(47,568) | truck
(4,090) | bus
(1,935) | bicycle
(295) | pedestrian
(6,529) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.6708 | 0.6165 | 0.7721 | 0.6421 | 0.7731 | 0.5472 | 0.6192 | + | BEVFusion-LiDAR base/2.7.0 | 0.6805 | 0.6279 | 0.7957 | 0.6451 | 0.7955 | 0.5394 | 0.6266 | + | BEVFusion-LiDAR base/2.6.0 | 0.6708 | 0.6165 | 0.7721 | 0.6421 | 0.7731 | 0.5472 | 0.6192 | @@ -239,6 +253,7 @@ | Model version | mAP | mAPH | car
(17,353) | truck
(2,570) | bus
(316) | bicycle
(70) | pedestrian
(1,673) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.4902 | 0.4491 | 0.6483 | 0.4871 | 0.5172 | 0.4406 | 0.3578 | | BEVFusion-LiDAR base/2.6.0 | 0.4462 | 0.4042 | 0.6346 | 0.4758 | 0.3215 | 0.4303 | 0.3688 | @@ -248,7 +263,8 @@ | Model version | mAP | mAPH | car
(114,558) | truck
(12,414) | bus
(4,190) | bicycle
(1,004) | pedestrian
(22,564) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.7712 | 0.7223 | 0.8110 | 0.7129 | 0.8348 | 0.7458 | 0.7515 | + | BEVFusion-LiDAR base/2.7.0 | 0.7822 | 0.7349 | 0.8292 | 0.7169 | 0.8590 | 0.7505 | 0.7556 | + | BEVFusion-LiDAR base/2.6.0 | 0.7712 | 0.7223 | 0.8110 | 0.7129 | 0.8348 | 0.7458 | 0.7515 | @@ -256,6 +272,246 @@ ## Release +### BEVFusion-LiDAR base/2.7.0 + +
+ Changes + +- Train by min-max normalizing (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739). +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/51628f64-9c15-4029-b3c5-5bf501d879e2?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1zopj68qxLmI244qi3NgxB0ELT997V4W3/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/logs.zip) + - [Google drive](https://drive.google.com/file/d/1-OIvsmsB69a5L_4sqjOSJ9IOltRWFDIv/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/best_epoch_48.pth) + - [Google drive](https://drive.google.com/file/d/1b8iwwLBLAmn0NwqRaTJOWHMINfS9p_fc/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/0f5b5888148efcd2aac5af2315befd9301907745/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py) +- Train time: NVIDIA H100 80GB * 8 * 50 epochs ~= 4 days +- Batch size: 8*8 = 64 +- Training Dataset (frames: 142,196): + - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames) + - j6: db_gsm8_v1 + db_j6_v1 + db_j6_v2 + db_j6_v3 + db_j6_v5 (29,336 frames) + - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (43,968 frames) + - largebus: db_largebus_v1 + db_largebus_v2 (12,605 frames) + - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (28,126 frames) + +
+ +
+ Evaluation + +**Base Datasets (15,154 frames)**: + + - j6gen2 (3,951 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3 + - jpntaxi_gen2 (9,975 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8817** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 107,309 | 0.9131 | 0.862 / 0.914 / 0.933 / 0.943 | 0.905 / 0.935 / 0.942 / 0.945 | 0.233 / 0.192 / 0.159 / 0.142 | +| truck | 24,206 | 0.8552 | 0.711 / 0.843 / 0.919 / 0.948 | 0.795 / 0.877 / 0.918 / 0.934 | 0.297 / 0.225 / 0.192 / 0.180 | +| bus | 5,712 | 0.9081 | 0.829 / 0.912 / 0.945 / 0.947 | 0.876 / 0.916 / 0.931 / 0.932 | 0.312 / 0.146 / 0.146 / 0.146 | +| bicycle | 4,060 | 0.8357 | 0.813 / 0.840 / 0.844 / 0.846 | 0.857 / 0.868 / 0.869 / 0.870 | 0.210 / 0.194 / 0.194 / 0.194 | +| pedestrian | 77,369 | 0.8966 | 0.877 / 0.895 / 0.903 / 0.911 | 0.857 / 0.867 / 0.874 / 0.878 | 0.148 / 0.148 / 0.148 / 0.147 | +| **ALL** | 218,656 | 0.8817 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7002** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 94,080 | 0.8174 | 0.708 / 0.817 / 0.864 / 0.881 | 0.782 / 0.844 / 0.867 / 0.872 | 0.212 / 0.166 / 0.164 / 0.161 | +| truck | 27,651 | 0.6660 | 0.463 / 0.626 / 0.759 / 0.815 | 0.612 / 0.714 / 0.787 / 0.812 | 0.229 / 0.190 / 0.154 / 0.130 | +| bus | 4,761 | 0.6414 | 0.393 / 0.602 / 0.775 / 0.795 | 0.554 / 0.691 / 0.798 / 0.807 | 0.324 / 0.219 / 0.181 / 0.138 | +| bicycle | 2,365 | 0.6430 | 0.586 / 0.658 / 0.663 / 0.666 | 0.683 / 0.715 / 0.716 / 0.717 | 0.141 / 0.141 / 0.141 / 0.141 | +| pedestrian | 37,523 | 0.7331 | 0.711 / 0.730 / 0.741 / 0.750 | 0.732 / 0.742 / 0.748 / 0.753 | 0.145 / 0.145 / 0.145 / 0.144 | +| **ALL** | 166,380 | 0.7002 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5600** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 36,895 | 0.6578 | 0.498 / 0.656 / 0.726 / 0.751 | 0.626 / 0.714 / 0.750 / 0.760 | 0.168 / 0.143 / 0.137 / 0.132 | +| truck | 17,759 | 0.5131 | 0.206 / 0.450 / 0.648 / 0.749 | 0.439 / 0.611 / 0.720 / 0.775 | 0.240 / 0.193 / 0.134 / 0.124 | +| bus | 2,852 | 0.5178 | 0.313 / 0.520 / 0.608 / 0.630 | 0.534 / 0.659 / 0.704 / 0.714 | 0.244 / 0.166 / 0.140 / 0.140 | +| bicycle | 519 | 0.4296 | 0.315 / 0.421 / 0.491 / 0.491 | 0.503 / 0.563 / 0.592 / 0.592 | 0.180 / 0.180 / 0.180 / 0.180 | +| pedestrian | 17,091 | 0.6815 | 0.660 / 0.678 / 0.687 / 0.700 | 0.698 / 0.708 / 0.712 / 0.719 | 0.126 / 0.126 / 0.126 / 0.126 | +| **ALL** | 75,116 | 0.5600 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7777** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 238,284 | 0.8504 | 0.760 / 0.851 / 0.888 / 0.903 | 0.818 / 0.868 / 0.886 / 0.890 | 0.219 / 0.184 / 0.161 / 0.158 | +| truck | 69,616 | 0.7065 | 0.492 / 0.671 / 0.802 / 0.861 | 0.641 / 0.752 / 0.822 / 0.851 | 0.251 / 0.216 / 0.173 / 0.136 | +| bus | 13,325 | 0.7443 | 0.575 / 0.735 / 0.827 / 0.840 | 0.703 / 0.791 / 0.843 / 0.849 | 0.345 / 0.181 / 0.181 / 0.146 | +| bicycle | 6,944 | 0.7538 | 0.714 / 0.761 / 0.769 / 0.771 | 0.776 / 0.797 / 0.800 / 0.801 | 0.186 / 0.176 / 0.176 / 0.176 | +| pedestrian | 131,983 | 0.8332 | 0.813 / 0.831 / 0.840 / 0.849 | 0.802 / 0.812 / 0.818 / 0.824 | 0.144 / 0.145 / 0.145 / 0.145 | +| **ALL** | 460,152 | 0.7777 | — | — | — | + +--- + +**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8876** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 14,883 | 0.9176 | 0.876 / 0.916 / 0.934 / 0.944 | 0.917 / 0.943 / 0.947 / 0.949 | 0.245 / 0.154 / 0.154 / 0.154 | +| truck | 1,193 | 0.8727 | 0.747 / 0.873 / 0.926 / 0.944 | 0.829 / 0.900 / 0.924 / 0.928 | 0.269 / 0.206 / 0.157 / 0.157 | +| bus | 336 | 0.9443 | 0.824 / 0.975 / 0.989 / 0.989 | 0.878 / 0.974 / 0.984 / 0.984 | 0.439 / 0.338 / 0.269 / 0.269 | +| bicycle | 740 | 0.8396 | 0.764 / 0.848 / 0.869 / 0.877 | 0.833 / 0.862 / 0.866 / 0.871 | 0.194 / 0.194 / 0.182 / 0.182 | +| pedestrian | 5,059 | 0.8639 | 0.848 / 0.863 / 0.869 / 0.876 | 0.837 / 0.845 / 0.850 / 0.853 | 0.167 / 0.167 / 0.167 / 0.154 | +| **ALL** | 22,211 | 0.8876 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7392** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 10,994 | 0.8425 | 0.745 / 0.846 / 0.883 / 0.896 | 0.810 / 0.869 / 0.886 / 0.891 | 0.210 / 0.170 / 0.153 / 0.153 | +| truck | 1,011 | 0.7288 | 0.537 / 0.722 / 0.818 / 0.838 | 0.670 / 0.784 / 0.834 / 0.840 | 0.184 / 0.158 / 0.113 / 0.113 | +| bus | 143 | 0.8580 | 0.589 / 0.944 / 0.944 / 0.956 | 0.730 / 0.929 / 0.929 / 0.929 | 0.510 / 0.463 / 0.463 / 0.463 | +| bicycle | 463 | 0.5826 | 0.477 / 0.607 / 0.622 / 0.625 | 0.606 / 0.667 / 0.671 / 0.673 | 0.118 / 0.112 / 0.102 / 0.102 | +| pedestrian | 3,754 | 0.6839 | 0.664 / 0.681 / 0.690 / 0.702 | 0.698 / 0.705 / 0.711 / 0.717 | 0.121 / 0.117 / 0.117 / 0.117 | +| **ALL** | 16,365 | 0.7392 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5572** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 3,018 | 0.7091 | 0.556 / 0.712 / 0.776 / 0.792 | 0.665 / 0.747 / 0.778 / 0.786 | 0.205 / 0.181 / 0.181 / 0.181 | +| truck | 602 | 0.6393 | 0.365 / 0.651 / 0.760 / 0.781 | 0.553 / 0.730 / 0.789 / 0.798 | 0.208 / 0.208 / 0.152 / 0.152 | +| bus | 60 | 0.6121 | 0.420 / 0.637 / 0.696 / 0.696 | 0.583 / 0.725 / 0.765 / 0.765 | 0.275 / 0.197 / 0.197 / 0.197 | +| bicycle | 85 | 0.3386 | 0.244 / 0.355 / 0.378 / 0.378 | 0.446 / 0.514 / 0.524 / 0.524 | 0.181 / 0.181 / 0.137 / 0.137 | +| pedestrian | 1,121 | 0.4870 | 0.473 / 0.483 / 0.490 / 0.502 | 0.579 / 0.586 / 0.591 / 0.593 | 0.137 / 0.137 / 0.137 / 0.137 | +| **ALL** | 4,886 | 0.5572 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.8086** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 28,895 | 0.8789 | 0.806 / 0.881 / 0.909 / 0.919 | 0.853 / 0.896 / 0.908 / 0.911 | 0.245 / 0.185 / 0.176 / 0.170 | +| truck | 2,806 | 0.7783 | 0.597 / 0.778 / 0.859 / 0.880 | 0.714 / 0.824 / 0.865 / 0.870 | 0.206 / 0.206 / 0.157 / 0.155 | +| bus | 539 | 0.8898 | 0.718 / 0.931 / 0.952 / 0.958 | 0.808 / 0.931 / 0.937 / 0.937 | 0.382 / 0.354 / 0.354 / 0.354 | +| bicycle | 1,288 | 0.7288 | 0.641 / 0.744 / 0.762 / 0.768 | 0.729 / 0.769 / 0.773 / 0.776 | 0.176 / 0.176 / 0.176 / 0.172 | +| pedestrian | 9,934 | 0.7670 | 0.749 / 0.765 / 0.772 / 0.782 | 0.757 / 0.765 / 0.771 / 0.775 | 0.137 / 0.137 / 0.137 / 0.137 | +| **ALL** | 43,462 | 0.8086 | — | — | — | + +--- + +**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (3,951 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8776** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 49,637 | 0.8907 | 0.841 / 0.890 / 0.909 / 0.922 | 0.896 / 0.924 / 0.931 / 0.934 | 0.269 / 0.199 / 0.159 / 0.135 | +| truck | 5,754 | 0.8438 | 0.718 / 0.833 / 0.894 / 0.930 | 0.794 / 0.862 / 0.893 / 0.915 | 0.222 / 0.194 / 0.171 / 0.171 | +| bus | 1,939 | 0.9473 | 0.878 / 0.942 / 0.983 / 0.986 | 0.925 / 0.963 / 0.981 / 0.982 | 0.206 / 0.140 / 0.140 / 0.140 | +| bicycle | 639 | 0.8665 | 0.854 / 0.871 / 0.871 / 0.871 | 0.867 / 0.875 / 0.875 / 0.875 | 0.176 / 0.176 / 0.176 / 0.176 | +| pedestrian | 14,362 | 0.8397 | 0.813 / 0.836 / 0.849 / 0.861 | 0.806 / 0.817 / 0.824 / 0.831 | 0.169 / 0.151 / 0.151 / 0.165 | +| **ALL** | 72,331 | 0.8776 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.6805** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 47,568 | 0.7957 | 0.662 / 0.795 / 0.851 / 0.875 | 0.760 / 0.838 / 0.866 / 0.874 | 0.212 / 0.184 / 0.164 / 0.164 | +| truck | 4,090 | 0.6451 | 0.451 / 0.622 / 0.729 / 0.778 | 0.606 / 0.711 / 0.768 / 0.789 | 0.234 / 0.205 / 0.176 / 0.165 | +| bus | 1,935 | 0.7955 | 0.571 / 0.760 / 0.912 / 0.938 | 0.694 / 0.815 / 0.906 / 0.916 | 0.345 / 0.240 / 0.182 / 0.168 | +| bicycle | 295 | 0.5394 | 0.494 / 0.552 / 0.554 / 0.557 | 0.628 / 0.669 / 0.669 / 0.669 | 0.137 / 0.138 / 0.138 / 0.138 | +| pedestrian | 6,529 | 0.6266 | 0.591 / 0.622 / 0.639 / 0.654 | 0.661 / 0.676 / 0.682 / 0.689 | 0.140 / 0.140 / 0.140 / 0.140 | +| **ALL** | 60,417 | 0.6805 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4902** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 17,353 | 0.6483 | 0.452 / 0.639 / 0.734 / 0.768 | 0.608 / 0.712 / 0.760 / 0.774 | 0.168 / 0.153 / 0.143 / 0.132 | +| truck | 2,570 | 0.4871 | 0.209 / 0.419 / 0.619 / 0.702 | 0.425 / 0.578 / 0.700 / 0.746 | 0.199 / 0.127 / 0.126 / 0.124 | +| bus | 316 | 0.5172 | 0.246 / 0.532 / 0.626 / 0.665 | 0.433 / 0.640 / 0.701 / 0.721 | 0.173 / 0.100 / 0.100 / 0.089 | +| bicycle | 70 | 0.4406 | 0.382 / 0.438 / 0.471 / 0.471 | 0.584 / 0.619 / 0.637 / 0.637 | 0.193 / 0.193 / 0.193 / 0.193 | +| pedestrian | 1,673 | 0.3578 | 0.344 / 0.354 / 0.362 / 0.371 | 0.492 / 0.496 / 0.500 / 0.505 | 0.137 / 0.107 / 0.107 / 0.111 | +| **ALL** | 21,982 | 0.4902 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7822** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 114,558 | 0.8292 | 0.725 / 0.826 / 0.872 / 0.894 | 0.800 / 0.859 / 0.881 / 0.888 | 0.232 / 0.194 / 0.164 / 0.158 | +| truck | 12,414 | 0.7169 | 0.534 / 0.691 / 0.795 / 0.847 | 0.665 / 0.760 / 0.816 / 0.843 | 0.251 / 0.194 / 0.166 / 0.151 | +| bus | 4,190 | 0.8590 | 0.703 / 0.840 / 0.938 / 0.955 | 0.790 / 0.874 / 0.929 / 0.936 | 0.345 / 0.186 / 0.182 / 0.168 | +| bicycle | 1,004 | 0.7505 | 0.724 / 0.758 / 0.760 / 0.760 | 0.781 / 0.798 / 0.799 / 0.799 | 0.176 / 0.176 / 0.176 / 0.176 | +| pedestrian | 22,564 | 0.7556 | 0.727 / 0.752 / 0.766 / 0.778 | 0.744 / 0.756 / 0.763 / 0.770 | 0.152 / 0.151 / 0.151 / 0.151 | +| **ALL** | 154,730 | 0.7822 | — | — | — | + +--- + +**JPNTaxi_Gen2**: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (9,975 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8837** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 42,789 | 0.9393 | 0.882 / 0.945 / 0.964 / 0.967 | 0.911 / 0.946 / 0.954 / 0.955 | 0.211 / 0.168 / 0.142 / 0.142 | +| truck | 17,259 | 0.8587 | 0.709 / 0.846 / 0.926 / 0.954 | 0.795 / 0.881 / 0.926 / 0.941 | 0.371 / 0.243 / 0.234 / 0.189 | +| bus | 3,437 | 0.8802 | 0.798 / 0.889 / 0.916 / 0.918 | 0.850 / 0.886 / 0.898 / 0.899 | 0.369 / 0.146 / 0.128 / 0.128 | +| bicycle | 2,681 | 0.8268 | 0.816 / 0.830 / 0.831 / 0.831 | 0.865 / 0.871 / 0.872 / 0.872 | 0.219 / 0.219 / 0.219 / 0.219 | +| pedestrian | 57,948 | 0.9135 | 0.896 / 0.912 / 0.919 / 0.926 | 0.872 / 0.882 / 0.889 / 0.893 | 0.148 / 0.140 / 0.143 / 0.140 | +| **ALL** | 124,114 | 0.8837 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.6901** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 35,518 | 0.8382 | 0.757 / 0.838 / 0.874 / 0.885 | 0.803 / 0.847 / 0.862 / 0.865 | 0.212 / 0.165 / 0.162 / 0.161 | +| truck | 22,550 | 0.6676 | 0.462 / 0.623 / 0.762 / 0.823 | 0.611 / 0.711 / 0.788 / 0.816 | 0.247 / 0.193 / 0.154 / 0.130 | +| bus | 2,683 | 0.5007 | 0.240 / 0.447 / 0.649 / 0.667 | 0.421 / 0.581 / 0.708 / 0.717 | 0.242 / 0.151 / 0.144 / 0.144 | +| bicycle | 1,607 | 0.6794 | 0.635 / 0.692 / 0.695 / 0.697 | 0.719 / 0.740 / 0.742 / 0.743 | 0.146 / 0.141 / 0.141 / 0.141 | +| pedestrian | 27,240 | 0.7645 | 0.745 / 0.762 / 0.772 / 0.780 | 0.753 / 0.764 / 0.769 / 0.773 | 0.156 / 0.144 / 0.145 / 0.145 | +| **ALL** | 89,598 | 0.6901 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5750** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 16,524 | 0.6601 | 0.539 / 0.665 / 0.710 / 0.727 | 0.643 / 0.715 / 0.740 / 0.745 | 0.138 / 0.108 / 0.108 / 0.109 | +| truck | 14,587 | 0.5131 | 0.200 / 0.448 / 0.649 / 0.756 | 0.438 / 0.613 / 0.721 / 0.779 | 0.248 / 0.193 / 0.134 / 0.124 | +| bus | 2,476 | 0.5145 | 0.318 / 0.515 / 0.602 / 0.623 | 0.547 / 0.661 / 0.704 / 0.714 | 0.244 / 0.163 / 0.152 / 0.148 | +| bicycle | 364 | 0.4541 | 0.324 / 0.439 / 0.527 / 0.527 | 0.504 / 0.567 / 0.604 / 0.604 | 0.174 / 0.171 / 0.171 / 0.171 | +| pedestrian | 14,297 | 0.7331 | 0.711 / 0.730 / 0.739 / 0.753 | 0.731 / 0.742 / 0.746 / 0.754 | 0.126 / 0.126 / 0.126 / 0.126 | +| **ALL** | 48,248 | 0.5750 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7715** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 94,831 | 0.8661 | 0.785 / 0.869 / 0.900 / 0.910 | 0.828 / 0.871 / 0.884 / 0.887 | 0.198 / 0.165 / 0.150 / 0.141 | +| truck | 54,396 | 0.7010 | 0.478 / 0.662 / 0.800 / 0.864 | 0.632 / 0.747 / 0.821 / 0.852 | 0.273 / 0.216 / 0.173 / 0.134 | +| bus | 8,596 | 0.6721 | 0.500 / 0.665 / 0.756 / 0.768 | 0.648 / 0.737 / 0.792 / 0.798 | 0.326 / 0.151 / 0.146 / 0.146 | +| bicycle | 4,652 | 0.7611 | 0.731 / 0.766 / 0.773 / 0.775 | 0.790 / 0.805 / 0.809 / 0.809 | 0.186 / 0.187 / 0.187 / 0.187 | +| pedestrian | 99,485 | 0.8573 | 0.838 / 0.855 / 0.864 / 0.872 | 0.820 / 0.830 / 0.836 / 0.841 | 0.145 / 0.143 / 0.145 / 0.143 | +| **ALL** | 261,960 | 0.7715 | — | — | — | + +
+ +--- + ### BEVFusion-LiDAR base/2.6.0
From 07c2e110802ec2537d4c620d9af7f7e1b8120b97 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 17:39:32 +0900 Subject: [PATCH 031/162] Update base docstring --- projects/BEVFusion/docs/BEVFusion-L/v2/base.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 72d47c4b3..ecdd1e9a8 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -277,7 +277,7 @@
Changes -- Train by min-max normalizing (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739). +- Train by min-max normalization (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739).
From 2665b277bda7865a10f04daa37b8eaa8ea6c5606 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 19:15:35 +0900 Subject: [PATCH 032/162] Update j6gen2_base and jpntaxi_base docstring --- .../v2/{j6gen2.md => j6gen2_base.md} | 220 +++++++++++++++++- .../docs/BEVFusion-L/v2/jpntaxi_base.md | 153 ++++++++++++ 2 files changed, 363 insertions(+), 10 deletions(-) rename projects/BEVFusion/docs/BEVFusion-L/v2/{j6gen2.md => j6gen2_base.md} (54%) create mode 100644 projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md similarity index 54% rename from projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md rename to projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md index 8ad986677..54e994313 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md @@ -64,7 +64,8 @@ | Model version | mAP | mAPH | car
(64,520) | truck
(6,947) | bus
(2,275) | bicycle
(1,379) | pedestrian
(19,421) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8810 | 0.8380 | 0.8873 | 0.8586 | 0.9476 | 0.8583 | 0.8534 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8828 | 0.8387 | 0.9022 | 0.8627 | 0.9440 | 0.8483 | 0.8569 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8810 | 0.8380 | 0.8873 | 0.8586 | 0.9476 | 0.8583 | 0.8534 |
@@ -73,7 +74,8 @@ | Model version | mAP | mAPH | car
(58,562) | truck
(5,101) | bus
(2,078) | bicycle
(758) | pedestrian
(10,283) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7032 | 0.6483 | 0.7876 | 0.6830 | 0.7911 | 0.5802 | 0.6741 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7193 | 0.6620 | 0.8197 | 0.6856 | 0.8249 | 0.5862 | 0.6801 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7032 | 0.6483 | 0.7876 | 0.6830 | 0.7911 | 0.5802 | 0.6741 |
@@ -82,7 +84,8 @@ | Model version | mAP | mAPH | car
(20,371) | truck
(3,172) | bus
(376) | bicycle
(155) | pedestrian
(2,794) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4938 | 0.4494 | 0.6564 | 0.5192 | 0.3777 | 0.4406 | 0.4752 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5223 | 0.4757 | 0.6814 | 0.5181 | 0.5381 | 0.4165 | 0.4573 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4938 | 0.4494 | 0.6564 | 0.5192 | 0.3777 | 0.4406 | 0.4752 | @@ -91,6 +94,7 @@ | Model version | mAP | mAPH | car
(143,453) | truck
(15,220) | bus
(4,729) | bicycle
(2,292) | pedestrian
(32,498) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7990 | 0.7487 | 0.8508 | 0.7435 | 0.8711 | 0.7487 | 0.7809 | | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7903 | 0.7413 | 0.8266 | 0.7409 | 0.8510 | 0.7541 | 0.7790 | @@ -112,6 +116,7 @@ | Model version | mAP | mAPH | car
(14,883) | truck
(1,193) | bus
(336) | bicycle
(740) | pedestrian
(5,059) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8947 | 0.8393 | 0.9231 | 0.8893 | 0.9564 | 0.8264 | 0.8782 | | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8985 | 0.8484 | 0.9087 | 0.8974 | 0.9636 | 0.8447 | 0.8780 | @@ -121,7 +126,8 @@ | Model version | mAP | mAPH | car
(10,994) | truck
(1,011) | bus
(143) | bicycle
(463) | pedestrian
(3,754) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7475 | 0.6925 | 0.8317 | 0.7758 | 0.7910 | 0.5959 | 0.7433 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7679 | 0.7089 | 0.8567 | 0.7666 | 0.8723 | 0.5955 | 0.7485 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7475 | 0.6925 | 0.8317 | 0.7758 | 0.7910 | 0.5959 | 0.7433 | @@ -130,7 +136,8 @@ | Model version | mAP | mAPH | car
(3,018) | truck
(602) | bus
(60) | bicycle
(85) | pedestrian
(1,121) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.5636 | 0.5191 | 0.7125 | 0.6383 | 0.4781 | 0.4293 | 0.5595 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5924 | 0.5370 | 0.7238 | 0.6616 | 0.6305 | 0.3964 | 0.5497 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.5636 | 0.5191 | 0.7125 | 0.6383 | 0.4781 | 0.4293 | 0.5595 | @@ -139,7 +146,8 @@ | Model version | mAP | mAPH | car
(28,895) | truck
(2,806) | bus
(539) | bicycle
(1,288) | pedestrian
(9,934) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8198 | 0.7666 | 0.8690 | 0.8052 | 0.8756 | 0.7455 | 0.8036 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8267 | 0.7675 | 0.8888 | 0.8055 | 0.9009 | 0.7334 | 0.8051 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8198 | 0.7666 | 0.8690 | 0.8052 | 0.8756 | 0.7455 | 0.8036 | @@ -166,7 +174,8 @@ | Model version | mAP | mAPH | car
(49,637) | truck
(5,754) | bus
(1,939) | bicycle
(639) | pedestrian
(14,362) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8788 | 0.8368 | 0.8813 | 0.8505 | 0.9427 | 0.8749 | 0.8448 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8836 | 0.8431 | 0.8942 | 0.8569 | 0.9393 | 0.8780 | 0.8494 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8788 | 0.8368 | 0.8813 | 0.8505 | 0.9427 | 0.8749 | 0.8448 | @@ -175,7 +184,8 @@ | Model version | mAP | mAPH | car
(47,568) | truck
(4,090) | bus
(1,935) | bicycle
(295) | pedestrian
(6,529) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.6864 | 0.6344 | 0.7772 | 0.6609 | 0.7913 | 0.5671 | 0.6357 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7040 | 0.6488 | 0.8118 | 0.6662 | 0.8221 | 0.5781 | 0.6417 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.6864 | 0.6344 | 0.7772 | 0.6609 | 0.7913 | 0.5671 | 0.6357 | @@ -184,7 +194,8 @@ | Model version | mAP | mAPH | car
(17,353) | truck
(2,570) | bus
(316) | bicycle
(70) | pedestrian
(1,673) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4766 | 0.4309 | 0.6465 | 0.4903 | 0.3618 | 0.4627 | 0.4214 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5030 | 0.4572 | 0.6739 | 0.4847 | 0.5186 | 0.4430 | 0.3948 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4766 | 0.4309 | 0.6465 | 0.4903 | 0.3618 | 0.4627 | 0.4214 | @@ -193,7 +204,8 @@ | Model version | mAP | mAPH | car
(114,558) | truck
(12,414) | bus
(4,190) | bicycle
(1,004) | pedestrian
(22,564) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7851 | 0.7375 | 0.8166 | 0.7262 | 0.8481 | 0.7661 | 0.7687 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7958 | 0.7472 | 0.8408 | 0.7294 | 0.8673 | 0.7710 | 0.7706 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7851 | 0.7375 | 0.8166 | 0.7262 | 0.8481 | 0.7661 | 0.7687 | @@ -201,6 +213,194 @@ ## Release +### BEVFusion-LiDAR J6Gen2_base/2.7.1 + +
+ Changes + +- Finetune from `BEVFusion-LiDAR base/2.7.0` with j6gen2 base dataset and intensity. +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/ab0f33f5-2c8e-4adf-b122-f8f0c229c91e?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1Sw2UkqsoOP_YhoPpLqaBvHFnBapBV1kw/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/logs.zip) + - [Google drive](https://drive.google.com/file/d/1M_Ae0rQ9L1I4NbzSL9tlJ8D0KVGvunKF/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/best_epoch_28.pth) + - [Google drive](https://drive.google.com/file/d/1xsFKCIkqVnt273o2SKjjCayuh_4IV-Vd/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/07c2e110802ec2537d4c620d9af7f7e1b8120b97/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py) +- Train time: NVIDIA H100 80GB * 8 * 30 epochs = 20 hours +- Batch size: 8*8 = 64 +- Training Dataset (frames: 55,714): + - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 (43,109 frames) + - largebus: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (12,605 frames) + +
+ +
+ Evaluation + +**J6Gen2_base Datasets (5,179 frames)**: + + - j6gen2 (3,951 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8828** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 64,520 | 0.9022 | 0.853 / 0.901 / 0.921 / 0.933 | 0.904 / 0.931 / 0.937 / 0.939 | 0.260 / 0.193 / 0.180 / 0.172 | +| truck | 6,947 | 0.8627 | 0.736 / 0.863 / 0.910 / 0.942 | 0.800 / 0.877 / 0.903 / 0.920 | 0.244 / 0.191 / 0.188 / 0.166 | +| bus | 2,275 | 0.9440 | 0.866 / 0.940 / 0.983 / 0.986 | 0.912 / 0.958 / 0.978 / 0.980 | 0.203 / 0.177 / 0.163 / 0.138 | +| bicycle | 1,379 | 0.8483 | 0.802 / 0.849 / 0.869 / 0.874 | 0.847 / 0.867 / 0.876 / 0.879 | 0.205 / 0.191 / 0.172 / 0.172 | +| pedestrian | 19,421 | 0.8569 | 0.834 / 0.854 / 0.865 / 0.875 | 0.822 / 0.833 / 0.838 / 0.844 | 0.163 / 0.152 / 0.152 / 0.152 | +| **ALL** | 94,542 | 0.8828 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7193** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 58,562 | 0.8197 | 0.694 / 0.818 / 0.873 / 0.893 | 0.782 / 0.853 / 0.879 / 0.886 | 0.228 / 0.173 / 0.164 / 0.164 | +| truck | 5,101 | 0.6856 | 0.484 / 0.670 / 0.773 / 0.815 | 0.633 / 0.743 / 0.798 / 0.816 | 0.213 / 0.206 / 0.184 / 0.164 | +| bus | 2,078 | 0.8249 | 0.626 / 0.815 / 0.918 / 0.941 | 0.730 / 0.846 / 0.904 / 0.919 | 0.342 / 0.211 / 0.210 / 0.160 | +| bicycle | 758 | 0.5862 | 0.495 / 0.603 / 0.622 / 0.624 | 0.637 / 0.679 / 0.683 / 0.683 | 0.183 / 0.155 / 0.155 / 0.183 | +| pedestrian | 10,283 | 0.6801 | 0.650 / 0.676 / 0.691 / 0.703 | 0.692 / 0.705 / 0.713 / 0.720 | 0.136 / 0.136 / 0.136 / 0.136 | +| **ALL** | 76,782 | 0.7193 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5223** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 20,371 | 0.6814 | 0.493 / 0.674 / 0.763 / 0.796 | 0.638 / 0.737 / 0.781 / 0.795 | 0.193 / 0.159 / 0.151 / 0.151 | +| truck | 3,172 | 0.5181 | 0.227 / 0.454 / 0.652 / 0.738 | 0.447 / 0.601 / 0.715 / 0.762 | 0.206 / 0.206 / 0.162 / 0.140 | +| bus | 376 | 0.5381 | 0.272 / 0.557 / 0.643 / 0.680 | 0.462 / 0.669 / 0.714 / 0.731 | 0.217 / 0.151 / 0.115 / 0.115 | +| bicycle | 155 | 0.4165 | 0.316 / 0.419 / 0.466 / 0.466 | 0.487 / 0.553 / 0.589 / 0.589 | 0.199 / 0.166 / 0.190 / 0.190 | +| pedestrian | 2,794 | 0.4573 | 0.443 / 0.452 / 0.462 / 0.472 | 0.564 / 0.569 / 0.573 / 0.578 | 0.120 / 0.120 / 0.120 / 0.120 | +| **ALL** | 26,868 | 0.5223 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7990** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 143,453 | 0.8508 | 0.752 / 0.849 / 0.891 / 0.910 | 0.820 / 0.874 / 0.894 / 0.900 | 0.232 / 0.189 / 0.174 / 0.164 | +| truck | 15,220 | 0.7435 | 0.555 / 0.725 / 0.824 / 0.871 | 0.677 / 0.780 / 0.834 / 0.858 | 0.234 / 0.206 / 0.186 / 0.165 | +| bus | 4,729 | 0.8711 | 0.726 / 0.865 / 0.939 / 0.954 | 0.804 / 0.890 / 0.928 / 0.937 | 0.408 / 0.211 / 0.177 / 0.161 | +| bicycle | 2,292 | 0.7487 | 0.682 / 0.754 / 0.777 / 0.781 | 0.760 / 0.789 / 0.799 / 0.801 | 0.191 / 0.189 / 0.189 / 0.190 | +| pedestrian | 32,498 | 0.7809 | 0.756 / 0.777 / 0.790 / 0.801 | 0.760 / 0.772 / 0.778 / 0.784 | 0.151 / 0.136 / 0.136 / 0.136 | +| **ALL** | 198,192 | 0.7990 | — | — | — | + +--- + +**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8947** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 14,883 | 0.9231 | 0.884 / 0.925 / 0.937 / 0.946 | 0.923 / 0.947 / 0.952 / 0.953 | 0.234 / 0.178 / 0.178 / 0.178 | +| truck | 1,193 | 0.8893 | 0.754 / 0.905 / 0.938 / 0.961 | 0.832 / 0.922 / 0.939 / 0.945 | 0.269 / 0.201 / 0.188 / 0.116 | +| bus | 336 | 0.9564 | 0.872 / 0.983 / 0.985 / 0.986 | 0.904 / 0.962 / 0.965 / 0.965 | 0.419 / 0.174 / 0.174 / 0.174 | +| bicycle | 740 | 0.8264 | 0.749 / 0.825 / 0.862 / 0.870 | 0.824 / 0.854 / 0.867 / 0.872 | 0.249 / 0.247 / 0.198 / 0.198 | +| pedestrian | 5,059 | 0.8782 | 0.862 / 0.876 / 0.883 / 0.891 | 0.849 / 0.857 / 0.861 / 0.866 | 0.148 / 0.148 / 0.139 / 0.140 | +| **ALL** | 22,211 | 0.8947 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7679** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 10,994 | 0.8567 | 0.759 / 0.860 / 0.897 / 0.911 | 0.824 / 0.881 / 0.898 / 0.901 | 0.210 / 0.164 / 0.160 / 0.160 | +| truck | 1,011 | 0.7666 | 0.593 / 0.770 / 0.843 / 0.860 | 0.710 / 0.818 / 0.851 / 0.854 | 0.234 / 0.219 / 0.166 / 0.150 | +| bus | 143 | 0.8723 | 0.698 / 0.921 / 0.932 / 0.939 | 0.788 / 0.904 / 0.911 / 0.911 | 0.294 / 0.498 / 0.498 / 0.498 | +| bicycle | 463 | 0.5955 | 0.472 / 0.616 / 0.647 / 0.648 | 0.625 / 0.685 / 0.692 / 0.692 | 0.151 / 0.151 / 0.151 / 0.151 | +| pedestrian | 3,754 | 0.7485 | 0.726 / 0.747 / 0.755 / 0.766 | 0.740 / 0.749 / 0.755 / 0.761 | 0.124 / 0.124 / 0.121 / 0.121 | +| **ALL** | 16,365 | 0.7679 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5924** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 3,018 | 0.7238 | 0.573 / 0.728 / 0.789 / 0.806 | 0.688 / 0.765 / 0.792 / 0.801 | 0.221 / 0.228 / 0.158 / 0.158 | +| truck | 602 | 0.6616 | 0.381 / 0.676 / 0.780 / 0.809 | 0.575 / 0.756 / 0.811 / 0.822 | 0.216 / 0.208 / 0.176 / 0.176 | +| bus | 60 | 0.6305 | 0.434 / 0.626 / 0.730 / 0.732 | 0.608 / 0.745 / 0.793 / 0.793 | 0.217 / 0.217 / 0.087 / 0.087 | +| bicycle | 85 | 0.3964 | 0.298 / 0.382 / 0.452 / 0.453 | 0.468 / 0.544 / 0.595 / 0.595 | 0.166 / 0.166 / 0.166 / 0.166 | +| pedestrian | 1,121 | 0.5497 | 0.536 / 0.546 / 0.552 / 0.565 | 0.624 / 0.629 / 0.633 / 0.638 | 0.120 / 0.118 / 0.118 / 0.118 | +| **ALL** | 4,886 | 0.5924 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.8267** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 28,895 | 0.8888 | 0.815 / 0.891 / 0.919 / 0.930 | 0.864 / 0.905 / 0.917 / 0.919 | 0.230 / 0.180 / 0.180 / 0.176 | +| truck | 2,806 | 0.8055 | 0.623 / 0.816 / 0.879 / 0.903 | 0.736 / 0.851 / 0.882 / 0.888 | 0.233 / 0.207 / 0.183 / 0.169 | +| bus | 539 | 0.9009 | 0.783 / 0.929 / 0.945 / 0.948 | 0.838 / 0.921 / 0.929 / 0.929 | 0.430 / 0.208 / 0.208 / 0.208 | +| bicycle | 1,288 | 0.7334 | 0.637 / 0.738 / 0.776 / 0.783 | 0.730 / 0.774 / 0.793 / 0.796 | 0.186 / 0.161 / 0.161 / 0.161 | +| pedestrian | 9,934 | 0.8051 | 0.787 / 0.803 / 0.811 / 0.820 | 0.782 / 0.790 / 0.796 / 0.801 | 0.149 / 0.135 / 0.128 / 0.135 | +| **ALL** | 43,462 | 0.8267 | — | — | — | + +--- + +**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (3,951 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8836** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 49,637 | 0.8942 | 0.843 / 0.891 / 0.912 / 0.931 | 0.899 / 0.926 / 0.933 / 0.935 | 0.277 / 0.202 / 0.189 / 0.172 | +| truck | 5,754 | 0.8569 | 0.732 / 0.854 / 0.905 / 0.937 | 0.794 / 0.867 / 0.896 / 0.915 | 0.244 / 0.191 / 0.189 / 0.180 | +| bus | 1,939 | 0.9393 | 0.864 / 0.932 / 0.975 / 0.986 | 0.916 / 0.958 / 0.981 / 0.984 | 0.203 / 0.187 / 0.139 / 0.138 | +| bicycle | 639 | 0.8780 | 0.868 / 0.881 / 0.881 / 0.882 | 0.881 / 0.888 / 0.888 / 0.888 | 0.172 / 0.172 / 0.172 / 0.172 | +| pedestrian | 14,362 | 0.8494 | 0.824 / 0.846 / 0.858 / 0.869 | 0.813 / 0.825 / 0.831 / 0.837 | 0.163 / 0.161 / 0.155 / 0.155 | +| **ALL** | 72,331 | 0.8836 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7040** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 47,568 | 0.8118 | 0.679 / 0.810 / 0.868 / 0.890 | 0.772 / 0.846 / 0.874 / 0.883 | 0.228 / 0.173 / 0.164 / 0.163 | +| truck | 4,090 | 0.6662 | 0.459 / 0.645 / 0.757 / 0.804 | 0.614 / 0.724 / 0.785 / 0.807 | 0.213 / 0.206 / 0.184 / 0.164 | +| bus | 1,935 | 0.8221 | 0.621 / 0.806 / 0.919 / 0.943 | 0.727 / 0.842 / 0.904 / 0.921 | 0.413 / 0.211 / 0.206 / 0.160 | +| bicycle | 295 | 0.5781 | 0.542 / 0.588 / 0.590 / 0.592 | 0.674 / 0.686 / 0.686 / 0.690 | 0.215 / 0.206 / 0.206 / 0.206 | +| pedestrian | 6,529 | 0.6417 | 0.608 / 0.636 / 0.655 / 0.668 | 0.666 / 0.682 / 0.692 / 0.699 | 0.136 / 0.136 / 0.136 / 0.136 | +| **ALL** | 60,417 | 0.7040 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5030** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 17,353 | 0.6739 | 0.479 / 0.664 / 0.759 / 0.794 | 0.631 / 0.732 / 0.780 / 0.794 | 0.193 / 0.159 / 0.146 / 0.146 | +| truck | 2,570 | 0.4847 | 0.194 / 0.401 / 0.621 / 0.723 | 0.414 / 0.562 / 0.692 / 0.751 | 0.206 / 0.179 / 0.130 / 0.128 | +| bus | 316 | 0.5186 | 0.238 / 0.541 / 0.625 / 0.670 | 0.433 / 0.657 / 0.703 / 0.724 | 0.218 / 0.151 / 0.115 / 0.115 | +| bicycle | 70 | 0.4430 | 0.340 / 0.465 / 0.483 / 0.483 | 0.513 / 0.584 / 0.602 / 0.602 | 0.199 / 0.199 / 0.199 / 0.199 | +| pedestrian | 1,673 | 0.3948 | 0.381 / 0.389 / 0.401 / 0.408 | 0.524 / 0.528 / 0.532 / 0.535 | 0.125 / 0.125 / 0.125 / 0.125 | +| **ALL** | 21,982 | 0.5030 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7958** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 114,558 | 0.8408 | 0.737 / 0.837 / 0.882 / 0.906 | 0.809 / 0.866 / 0.888 / 0.895 | 0.236 / 0.189 / 0.164 / 0.164 | +| truck | 12,414 | 0.7294 | 0.539 / 0.704 / 0.811 / 0.863 | 0.664 / 0.764 / 0.823 / 0.851 | 0.244 / 0.206 / 0.183 / 0.164 | +| bus | 4,190 | 0.8673 | 0.719 / 0.856 / 0.939 / 0.956 | 0.800 / 0.886 / 0.928 / 0.939 | 0.342 / 0.211 / 0.161 / 0.161 | +| bicycle | 1,004 | 0.7710 | 0.747 / 0.778 / 0.780 / 0.780 | 0.801 / 0.813 / 0.814 / 0.815 | 0.191 / 0.191 / 0.191 / 0.191 | +| pedestrian | 22,564 | 0.7706 | 0.743 / 0.766 / 0.781 / 0.792 | 0.751 / 0.764 / 0.771 / 0.778 | 0.152 / 0.146 / 0.136 / 0.146 | +| **ALL** | 154,730 | 0.7958 | — | — | — | + +
+ +--- + ### BEVFusion-LiDAR J6Gen2_base/2.6.1
diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md new file mode 100644 index 000000000..fc9e2677d --- /dev/null +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md @@ -0,0 +1,153 @@ +# Deployed model for BEVFusion-LiDAR JPNTaxi_base/2.X +## Summary + +### Main Parameters + + - **Range:** [122.40m, 122.40m, 8.0m] + - **Voxel Size:** [0.17, 0.17, 0.2] + - **Grid Size:** [1440, 1440, 40] + - **With Intensity** + +### Testing Datasets + +- **Total Frames: 5,179** + +
+ jpntaxi_gen2 (9,975 frames) + - `db_jpntaxigen2_v1` + - `db_jpntaxigen2_v2` + +
+ +### mAP -JPNTaxi_gen2 + +- **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m** + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.8862 | 0.8586 | 0.9397 | 0.8591 | 0.8839 | 0.8264 | 0.9218 | + +
+ +
+ Eval Range: 50.0 - 90.0m + + | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7125 | 0.6854 | 0.8453 | 0.6838 | 0.5362 | 0.6969 | 0.8003 | + +
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.6030 | 0.5762 | 0.6947 | 0.5260 | 0.5030 | 0.5321 | 0.7591 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7805 | 0.7527 | 0.8730 | 0.7118 | 0.6785 | 0.7655 | 0.8739 | + +
+ +## Release + +### BEVFusion-LiDAR JPNTaxi_base/2.7.1 + +
+ Changes + +- Finetune from `BEVFusion-LiDAR base/2.7.0` with JPNTaxi_base dataset and intensity. +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/47abcab3-34e1-4971-9bdf-5a2af5d2b2e6?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1nQlYrnCjlxXbUamEj7MCL_sKxojoU_wk/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/logs.zip) + - [Google drive](https://drive.google.com/file/d/1q_3zj9nF6mnA5IgyO1QRswS7XqnXqvUH/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/best_epoch_30.pth) + - [Google drive](https://drive.google.com/file/d/1K7rDv7fb8T2haXHxttbZN7FUEoLYESTr/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/07c2e110802ec2537d4c620d9af7f7e1b8120b97/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py) +- Train time: NVIDIA H100 80GB * 8 * 30 epochs = 20 hours +- Batch size: 8*8 = 64 +- Training Dataset (frames: 56,287): + - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames) + - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (28,126 frames) + +
+ +
+ Evaluation + +**JPNTaxi_gen2 Datasets (9,975 frames)**: + + - jpntaxi_gen2 (9,975 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8862** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 42,789 | 0.9397 | 0.891 / 0.943 / 0.960 / 0.965 | 0.918 / 0.946 / 0.953 / 0.954 | 0.284 / 0.175 / 0.175 / 0.164 | +| truck | 17,259 | 0.8591 | 0.701 / 0.842 / 0.935 / 0.958 | 0.792 / 0.882 / 0.932 / 0.946 | 0.409 / 0.321 / 0.241 / 0.241 | +| bus | 3,437 | 0.8839 | 0.796 / 0.888 / 0.925 / 0.927 | 0.853 / 0.897 / 0.910 / 0.910 | 0.296 / 0.184 / 0.104 / 0.104 | +| bicycle | 2,681 | 0.8264 | 0.819 / 0.829 / 0.829 / 0.829 | 0.866 / 0.871 / 0.871 / 0.871 | 0.223 / 0.223 / 0.223 / 0.223 | +| pedestrian | 57,948 | 0.9218 | 0.906 / 0.921 / 0.927 / 0.933 | 0.883 / 0.893 / 0.899 / 0.903 | 0.135 / 0.129 / 0.125 / 0.132 | +| **ALL** | 124,114 | 0.8862 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7125** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 35,518 | 0.8453 | 0.763 / 0.846 / 0.881 / 0.891 | 0.819 / 0.860 / 0.875 / 0.879 | 0.227 / 0.180 / 0.166 / 0.166 | +| truck | 22,550 | 0.6838 | 0.475 / 0.640 / 0.782 / 0.838 | 0.632 / 0.730 / 0.808 / 0.831 | 0.286 / 0.195 / 0.167 / 0.128 | +| bus | 2,683 | 0.5362 | 0.263 / 0.524 / 0.668 / 0.689 | 0.465 / 0.660 / 0.742 / 0.751 | 0.241 / 0.180 / 0.174 / 0.171 | +| bicycle | 1,607 | 0.6969 | 0.656 / 0.709 / 0.710 / 0.713 | 0.745 / 0.770 / 0.771 / 0.772 | 0.145 / 0.138 / 0.138 / 0.138 | +| pedestrian | 27,240 | 0.8003 | 0.782 / 0.798 / 0.807 / 0.814 | 0.782 / 0.790 / 0.795 / 0.799 | 0.163 / 0.163 / 0.163 / 0.164 | +| **ALL** | 89,598 | 0.7125 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.6030** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 16,524 | 0.6947 | 0.580 / 0.698 / 0.744 / 0.757 | 0.692 / 0.755 / 0.778 / 0.781 | 0.202 / 0.154 / 0.151 / 0.144 | +| truck | 14,587 | 0.5260 | 0.229 / 0.469 / 0.639 / 0.767 | 0.464 / 0.630 / 0.726 / 0.793 | 0.288 / 0.185 / 0.169 / 0.130 | +| bus | 2,476 | 0.5030 | 0.305 / 0.486 / 0.597 / 0.624 | 0.530 / 0.636 / 0.703 / 0.719 | 0.297 / 0.201 / 0.149 / 0.156 | +| bicycle | 364 | 0.5321 | 0.381 / 0.521 / 0.613 / 0.613 | 0.563 / 0.631 / 0.670 / 0.670 | 0.219 / 0.219 / 0.219 / 0.219 | +| pedestrian | 14,297 | 0.7591 | 0.737 / 0.756 / 0.766 / 0.778 | 0.750 / 0.760 / 0.765 / 0.771 | 0.134 / 0.127 / 0.129 / 0.132 | +| **ALL** | 48,248 | 0.6030 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7805** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 94,831 | 0.8730 | 0.799 / 0.875 / 0.905 / 0.914 | 0.845 / 0.884 / 0.896 / 0.899 | 0.235 / 0.189 / 0.165 / 0.165 | +| truck | 54,396 | 0.7118 | 0.490 / 0.674 / 0.809 / 0.875 | 0.645 / 0.757 / 0.831 / 0.862 | 0.314 / 0.240 / 0.178 / 0.153 | +| bus | 8,596 | 0.6785 | 0.504 / 0.674 / 0.761 / 0.775 | 0.655 / 0.761 / 0.807 / 0.813 | 0.285 / 0.180 / 0.168 / 0.168 | +| bicycle | 4,652 | 0.7655 | 0.736 / 0.770 / 0.778 / 0.778 | 0.800 / 0.816 / 0.819 / 0.820 | 0.194 / 0.159 / 0.159 / 0.159 | +| pedestrian | 99,485 | 0.8739 | 0.857 / 0.872 / 0.880 / 0.887 | 0.835 / 0.845 / 0.850 / 0.854 | 0.142 / 0.137 / 0.135 / 0.137 | +| **ALL** | 261,960 | 0.7805 | — | — | — | + +
+ +--- From 08b50e6d71f31577a1053f8792ae381fcafdf524 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 23 Apr 2026 14:47:27 +0900 Subject: [PATCH 033/162] Add the script --- projects/BEVFusion/bevfusion/__init__.py | 3 +- .../BEVFusion/bevfusion/bevfusion_head.py | 22 ++- .../bevfusion/bevfusion_voxel_encoder.py | 184 ++++++++++++++---- ...n_50e_8xb8_base_120m_sincos_10_channels.py | 161 --------------- .../default_lidar_second_secfpn_120m.py | 2 + 5 files changed, 168 insertions(+), 204 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 3db358b55..2e9822d76 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,7 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder +from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder, BEVFusionVoxelMeanSinCosEncoder __all__ = [ "BEVFusion", @@ -30,4 +30,5 @@ "TransFusionBBoxCoder", "BEVFusionVoxelEncoder", "BEVFusionVoxelSinCosEncoder", + "BEVFusionVoxelMeanSinCosEncoder", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 853523c4f..a8ef7129f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -62,6 +62,7 @@ def __init__( norm_cfg=dict(type="BN1d"), bias="auto", # loss + loss_iou=None, loss_cls=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean"), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), @@ -87,6 +88,7 @@ def __init__( if not self.use_sigmoid_cls: self.num_classes += 1 self.loss_cls = MODELS.build(loss_cls) + self.loss_iou = MODELS.build(loss_iou) if loss_iou is not None else None self.loss_bbox = MODELS.build(loss_bbox) self.loss_heatmap = MODELS.build(loss_heatmap) @@ -369,8 +371,8 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F for layer_id, preds_dict in enumerate(preds_dicts): batch_size = preds_dict[0]["heatmap"].shape[0] batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid() - # if self.loss_iou.loss_weight != 0: - # batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 + if self.loss_iou is not None: + batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1) batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot @@ -679,7 +681,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): ious[None], int(pos_inds.shape[0]), float(mean_iou), - heatmap[None], + heatmap[None] ) def loss(self, batch_feats, batch_data_samples): @@ -711,7 +713,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ious, num_pos, matched_ious, - heatmap, + heatmap ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0]) if hasattr(self, "on_the_image_mask"): label_weights = label_weights * self.on_the_image_mask @@ -798,7 +800,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li loss_dict[f"{prefix}_loss_cls"] = layer_loss_cls loss_dict[f"{prefix}_loss_bbox"] = layer_loss_bbox - # loss_dict[f'{prefix}_loss_iou'] = layer_loss_iou + + # Output iou for iou-aware loss + if self.loss_iou is not None: + layer_ious = preds_dict["iou"][ + ... + idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, + ] # [BS, num_proposals] + + # [BS, num_proposals] + layer_iou_weights = layer_bbox_weights[:, :, 0] + loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1)) loss_dict["matched_ious"] = layer_loss_cls.new_tensor(matched_ious) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index efbc995e8..086acc1e0 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -185,21 +185,21 @@ class BEVFusionVoxelSinCosEncoder(nn.Module): def __init__(self, min_norm_values: Tuple[float], max_norm_values: Tuple[float], + time_lag_channel_index: int = 3, + time_exp_factor: Optional[float] = None, + feat_channels: Optional[tuple] = (16, ), in_channels: Optional[int] = 4, with_distance: Optional[bool] = False, with_cluster_center: Optional[bool] = True, with_voxel_center: Optional[bool] = True, voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1),): + 40, 1), + norm_cfg: Optional[dict] = dict( + type='BN1d', eps=1e-3, momentum=0.01), + mode: Optional[str] = 'max'): super(BEVFusionVoxelSinCosEncoder, self).__init__() - if with_cluster_center: - in_channels += 3 - if with_voxel_center: - in_channels += 3 - if with_distance: - in_channels += 1 self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center @@ -214,11 +214,42 @@ def __init__(self, self.y_offset = self.vy / 2 + point_cloud_range[1] self.z_offset = self.vz / 2 + point_cloud_range[2] self.point_cloud_range = point_cloud_range + + self.xyz_channels = 3 + feat_offset_channels = in_channels - self.xyz_channels + if with_cluster_center: + feat_offset_channels += 3 + if with_voxel_center: + feat_offset_channels += 3 + if with_distance: + feat_offset_channels += 1 + + feat_channels = [feat_offset_channels] + list(feat_channels) + assert len(feat_channels) > 0, "feat_channels must be greater than 0" + pfn_layers = [] + for i in range(len(feat_channels) - 1): + in_filters = feat_channels[i] + out_filters = feat_channels[i + 1] + if i < len(feat_channels) - 2: + last_layer = False + else: + last_layer = True + pfn_layers.append( + PFNLayer( + in_filters, + out_filters, + norm_cfg=norm_cfg, + last_layer=last_layer, + mode=mode)) + self.pfn_layers = nn.ModuleList(pfn_layers) + self.time_lag_channel_index = time_lag_channel_index + self.time_exp_factor = time_exp_factor + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) - self.register_buffer("exponents", (2 ** torch.arange(0, in_channels).float())) + self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float()) def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: @@ -232,19 +263,53 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, Returns: torch.Tensor: Features of pillars in shape (M, C). - """ - features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) - features_ls = [features_norm] + """ + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() + + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + # PFN + # Other features, for example, intensity or time_lag + other_features = features[:, :, self.xyz_channels:] + + # Normalization + other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) + + time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels + # exponentiate time_lag features, it's higher when the normlized time lag is lower + # (1.0 when time_lag_features is 0.0) + if self.time_exp_factor is not None: + other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) + else: + # Inverse the time_lag feature + other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] + + # Offsets + voxel_feature_offsets = [other_features_norm] # Find distance of x, y, and z from cluster center if self._with_cluster_center: points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_points.type_as(features).view( -1, 1, 1) - # Map to [-1, 1] - f_cluster = (features[:, :, :3] - points_mean) / self.voxel_size - # f_cluster = features[:, :, :3] - points_mean - features_ls.append(f_cluster) + # f_cluster = (features[:, :, :3] - points_mean) + f_cluster = features[:, :, :3] - points_mean + voxel_feature_offsets.append(f_cluster) # Find distance of x, y, and z from pillar center dtype = features.dtype @@ -261,35 +326,80 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, self.z_offset) # Map to [-1, 1] - f_center = f_center / (self.voxel_size * 0.5) - features_ls.append(f_center) + # f_center = f_center / (self.voxel_size * 0.5) + voxel_feature_offsets.append(f_center) if self._with_distance: points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) - features_ls.append(points_dist) + voxel_feature_offsets.append(points_dist) - # Combine together feature decorations - features = torch.cat(features_ls, dim=-1) - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - - # SinCos encoding - # (N, M, C) -> (N, M, C, 1) -> (N, M, C, 1) * (1, 1, 1, C) -> (N, M, C, C) - y = features.unsqueeze(-1) * np.pi * self.exponents.unsqueeze(0).unsqueeze(0).unsqueeze(0) - # (N, M, C, C) -> (N, M, C*C) - y = y.reshape(num_voxels, max_points_per_voxel, self.in_channels ** 2) - # (N, M, C*C) -> (N, M, C*C*2) - features = torch.cat([torch.cos(y), torch.sin(y)], dim=-1) - + voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) # The feature decorations were calculated without regard to whether # pillar was empty. Need to ensure that # empty pillars remain set to zeros. mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(features) - features *= mask - - # Reduction by mean - # (N, M, C*C*2) -> (N, C*C*2) - features = features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) - features = features.contiguous() + mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) + voxel_feature_offsets *= mask + # PFN + for pfn in self.pfn_layers: + voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) + + # Concat + features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) return features + + + +@MODELS.register_module() +class BEVFusionVoxelMeanSinCosEncoder(nn.Module): + def __init__(self, + min_norm_values: Tuple[float], + max_norm_values: Tuple[float], + in_channels: Optional[int] = 4, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, + 40, 1), + mode: Optional[str] = 'max'): + super(BEVFusionVoxelSinCosEncoder, self).__init__() + + # Create PillarFeatureNet layers + self.in_channels = in_channels + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C). + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C). + """ + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() + + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + return voxel_fourier_features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py deleted file mode 100644 index 531a07673..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py +++ /dev/null @@ -1,161 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_10_channels" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=100, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index b5d9a8fdc..4843f5677 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -94,6 +94,7 @@ ], ), dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"], # Use class indices for pooling + # common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]), common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]), bbox_coder=dict( type="TransFusionBBoxCoder", @@ -110,6 +111,7 @@ reduction="mean", loss_weight=1.0, ), + # loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), ), From dead69b6bf0a744cde4fc4db0d410b974ac4f40a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 23 Apr 2026 14:47:39 +0900 Subject: [PATCH 034/162] Add the script --- ...second_secfpn_50e_8xb8_base_120m_sincos.py | 156 +++++++++++++++++ ...n_50e_8xb8_base_120m_sincos_34_channels.py | 163 +++++++++++++++++ ...b8_base_120m_sincos_timeexp_34_channels.py | 165 ++++++++++++++++++ ...fault_lidar_second_secfpn_120m_iou_loss.py | 117 +++++++++++++ 4 files changed, 601 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py new file mode 100644 index 000000000..d856b1d4b --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py @@ -0,0 +1,156 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_sincos" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelMeanSinCosEncoder", + in_channels=4, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=32, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py new file mode 100644 index 000000000..54af6be5f --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py @@ -0,0 +1,163 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=34, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py new file mode 100644 index 000000000..d7e61102b --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py @@ -0,0 +1,165 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + time_lag_channel_index=3, + time_exp_factor=1.0, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=34, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py new file mode 100644 index 000000000..4c7e996d9 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py @@ -0,0 +1,117 @@ +num_proposals = 500 +max_num_points = 10 +max_voxels = [120000, 160000] + +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + max_num_points=max_num_points, + max_voxels=max_voxels, + voxelize_reduce=True, + ), + data_preprocessor=dict( + type="Det3DDataPreprocessor", + pad_size_divisor=32, + ), + pts_voxel_encoder=dict(type="HardSimpleVFE"), + pts_middle_encoder=dict( + type="BEVFusionSparseEncoder", + in_channels=5, + aug_features_min_values=[], + aug_features_max_values=[], + num_aug_features=0, + order=("conv", "norm", "act"), + norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), + encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), + encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), + block_type="basicblock", + ), + pts_backbone=dict( + type="SECOND", + in_channels=256, + out_channels=[128, 256], + layer_nums=[5, 5], + layer_strides=[1, 2], + norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), + conv_cfg=dict(type="Conv2d", bias=False), + ), + pts_neck=dict( + type="SECONDFPN", + in_channels=[128, 256], + out_channels=[256, 256], + upsample_strides=[1, 2], + norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), + upsample_cfg=dict(type="deconv", bias=False), + use_conv_for_no_stride=True, + ), + bbox_head=dict( + type="BEVFusionHead", + num_proposals=num_proposals, + auxiliary=True, + in_channels=512, + hidden_channel=128, + nms_kernel_size=3, + bn_momentum=0.1, + num_decoder_layers=1, + decoder_layer=dict( + type="TransformerDecoderLayer", + self_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), + cross_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), + ffn_cfg=dict( + embed_dims=128, + feedforward_channels=256, + num_fcs=2, + ffn_drop=0.1, + act_cfg=dict(type="ReLU", inplace=True), + ), + norm_cfg=dict(type="LN"), + pos_encoding_cfg=dict(input_channel=2, num_pos_feats=128), + ), + train_cfg=dict( + dataset="t4datasets", + out_size_factor=8, + gaussian_overlap=0.1, + min_radius=2, + pos_weight=-1, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], + assigner=dict( + type="HungarianAssigner3D", + iou_calculator=dict(type="BboxOverlaps3D", coordinate="lidar"), + cls_cost=dict(type="mmdet.FocalLossCost", gamma=2.0, alpha=0.25, weight=0.15), + reg_cost=dict(type="BBoxBEVL1Cost", weight=0.25), + iou_cost=dict(type="IoU3DCost", weight=0.25), + ), + ), + test_cfg=dict( + dataset="t4datasets", + out_size_factor=8, + nms_type=None, # Set to "circle" for circle_nms + # Set NMS for different clusters + nms_clusters=[ + dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms + dict(class_names=["bicycle"], nms_threshold=0.5), + dict(class_names=["pedestrian"], nms_threshold=0.175), + ], + ), + dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"], # Use class indices for pooling + common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]), + bbox_coder=dict( + type="TransFusionBBoxCoder", + post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], + score_threshold=0.0, + out_size_factor=8, + code_size=10, + ), + loss_cls=dict( + type="mmdet.FocalLoss", + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + reduction="mean", + loss_weight=1.0, + ), + loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), + loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), + loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + ), +) From 1725f79575ff203ae80300504d7b85b5b0f5f796 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 23 Apr 2026 16:36:11 +0900 Subject: [PATCH 035/162] Update dataset name --- .../BEVFusion/bevfusion/bevfusion_head.py | 25 ++++++++++--------- .../bevfusion/bevfusion_voxel_encoder.py | 2 +- ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 13 +++------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index a8ef7129f..0852ebf16 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -62,7 +62,7 @@ def __init__( norm_cfg=dict(type="BN1d"), bias="auto", # loss - loss_iou=None, + loss_iou=None, loss_cls=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean"), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), @@ -372,7 +372,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F batch_size = preds_dict[0]["heatmap"].shape[0] batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid() if self.loss_iou is not None: - batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 + batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].clamp(min=0.0, max=1.0)) # noqa: E501 one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1) batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot @@ -801,16 +801,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li loss_dict[f"{prefix}_loss_cls"] = layer_loss_cls loss_dict[f"{prefix}_loss_bbox"] = layer_loss_bbox - # Output iou for iou-aware loss - if self.loss_iou is not None: - layer_ious = preds_dict["iou"][ - ... - idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - ] # [BS, num_proposals] - - # [BS, num_proposals] - layer_iou_weights = layer_bbox_weights[:, :, 0] - loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1)) + # Output iou for iou-aware loss + if self.loss_iou is not None: + layer_ious = preds_dict["iou"][ + ..., + idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, + ].squeeze(1) # [BS, num_proposals] + + # [BS, num_proposals] + layer_iou_weights = layer_bbox_weights[:, :, 0] + # print(layer_ious.shape, ious.shape, layer_iou_weights.shape, "layer_ious.shape, ious.shape, layer_iou_weights.shape") + loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1)) loss_dict["matched_ious"] = layer_loss_cls.new_tensor(matched_ious) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 086acc1e0..5037113aa 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -361,7 +361,7 @@ def __init__(self, point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, 40, 1), mode: Optional[str] = 'max'): - super(BEVFusionVoxelSinCosEncoder, self).__init__() + super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() # Create PillarFeatureNet layers self.in_channels = in_channels diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 17f16254d..7c1286df8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -2,7 +2,7 @@ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", + "../default/models/default_lidar_second_secfpn_120m_iou_loss.py", "../default/schedulers/default_50e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m" +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_iou_loss" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -29,19 +29,14 @@ ), pts_voxel_encoder=dict( _delete_=True, - type="BEVFusionVoxelSinCosEncoder", + type="BEVFusionVoxelMeanSinCosEncoder", in_channels=4, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), pts_middle_encoder=dict( - in_channels=100, + in_channels=32, sparse_shape=_base_.grid_size, # num_aug_features=4, # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here From 24d780bb788fd25813481007d898c85051c944cb Mon Sep 17 00:00:00 2001 From: KokSeang Date: Fri, 24 Apr 2026 16:31:47 +0900 Subject: [PATCH 036/162] Added --- tools/detection3d/t4dataset_converters/t4converter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/detection3d/t4dataset_converters/t4converter.py b/tools/detection3d/t4dataset_converters/t4converter.py index 842b0f458..5dfd1dc1f 100644 --- a/tools/detection3d/t4dataset_converters/t4converter.py +++ b/tools/detection3d/t4dataset_converters/t4converter.py @@ -626,6 +626,10 @@ def get_lidarseg_annotations( ) -> dict: if not hasattr(t4, "lidarseg") or not t4.lidarseg: return dict() + + if sd_record.info_filename is None: + print(f"sample {lidar_token} doesn't have lidar info_filename") + return dict() assert i < len(t4.lidarseg), "Index exceeds number of lidarseg records!" assert t4.lidarseg[i].sample_data_token == lidar_token, "Sample data token mismatch!" From 8175419ca1604a9fe25b39ab3715616f3c8fc07f Mon Sep 17 00:00:00 2001 From: KokSeang Date: Fri, 24 Apr 2026 16:54:00 +0900 Subject: [PATCH 037/162] Added --- .../configs/detection3d/dataset/t4dataset/base.py | 13 +++++++++---- .../detection3d/dataset/t4dataset/j6gen2.py | 13 +++++++++---- .../detection3d/dataset/t4dataset/j6gen2_base.py | 14 ++++++++++---- .../detection3d/dataset/t4dataset/jpntaxi_base.py | 14 ++++++++++---- .../detection3d/dataset/t4dataset/jpntaxi_gen2.py | 14 ++++++++++---- .../detection3d/dataset/t4dataset/largebus.py | 14 ++++++++++---- 6 files changed, 58 insertions(+), 24 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index d0744a131..3f90e7e0c 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -91,8 +91,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -113,7 +113,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -123,7 +123,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -143,6 +143,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -151,6 +154,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier" ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index 3c8675c13..e737994aa 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -72,8 +72,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -94,7 +94,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -104,7 +104,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -124,6 +124,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -132,6 +135,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index cc3a86d3e..a8f6c6e7d 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -78,8 +78,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -100,7 +100,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -110,7 +110,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -130,14 +130,20 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } + class_names = [ "car", "truck", "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index b7ddb799a..229ff7604 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -68,8 +68,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -90,7 +90,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -100,7 +100,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -120,6 +120,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -128,7 +131,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index f91bbc22f..411cabe7e 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -65,8 +65,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -87,7 +87,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -97,7 +97,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -117,6 +117,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -125,7 +128,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index b117c3798..a611750d3 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -67,8 +67,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -89,7 +89,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -99,7 +99,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -119,6 +119,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -127,7 +130,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) From 33f11cd5db171246654950d2a0afc22a757dcce5 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Sat, 25 Apr 2026 14:08:04 +0900 Subject: [PATCH 038/162] Added --- .../download_t4dataset/download_t4dataset.py | 4 ++-- tools/detection3d/create_data_t4dataset.py | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pipelines/webauto/download_t4dataset/download_t4dataset.py b/pipelines/webauto/download_t4dataset/download_t4dataset.py index f06f6979d..d06b85717 100644 --- a/pipelines/webauto/download_t4dataset/download_t4dataset.py +++ b/pipelines/webauto/download_t4dataset/download_t4dataset.py @@ -68,8 +68,8 @@ def get_t4dataset_ids(config_path: str) -> list[str]: for key in required_keys: for t4dataset_ids in data_splits[key]: t4dataset_ids = t4dataset_ids.split("/") - if len(t4dataset_ids) == 4: - t4dataset_id, t4dataset_version_id, city, vehicle_type = t4dataset_ids + if len(t4dataset_ids) == 5: + t4dataset_id, t4dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = t4dataset_ids elif len(t4dataset_ids) == 2: t4dataset_id, t4dataset_version_id = t4dataset_ids elif len(t4dataset_ids) == 1: diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index 1e61af9d8..e75a36a04 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -104,6 +104,7 @@ def get_info( max_sweeps: int, city: Optional[str] = None, vehicle_type: Optional[str] = None, + traffic_cone_barrier_status: Optional[str] = None, ) -> Dict[str, Any]: lidar_token = get_lidar_token(sample) if lidar_token is None: @@ -129,6 +130,11 @@ def get_info( sd_record: SampleData = t4.get("sample_data", lidar_token) info = get_empty_standard_data_info(cfg.camera_types) + + if traffic_cone_barrier_status is not None and traffic_cone_barrier_status == "true": + traffic_cone_barrier_status = True + else: + traffic_cone_barrier_status = False basic_info = dict( sample_idx=i, @@ -139,6 +145,7 @@ def get_info( scene_name=scene_record.name, city=city, vehicle_type=vehicle_type, + traffic_cone_barrier_status=traffic_cone_barrier_status, ) for new_info in [ @@ -302,8 +309,8 @@ def main(): f"Creating data info for scene: {scene_id}, steps: {sample_steps}, sweeps: {args.max_sweeps}" ) dataset_scene_info = scene_id.split("/") - if len(dataset_scene_info) == 4: - t4_dataset_id, t4_dataset_version_id, city, vehicle_type = dataset_scene_info + if len(dataset_scene_info) == 5: + t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info elif len(dataset_scene_info) == 2: t4_dataset_id, t4_dataset_version_id = dataset_scene_info city = vehicle_type = None @@ -326,7 +333,7 @@ def main(): infos = [] for i in range(0, len(t4.sample), sample_steps): sample = t4.sample[i] - info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type) + info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type, traffic_cone_barrier_status) if info is None: continue # info["version"] = dataset_version # used for visualizations during debugging. From 2237522607f97186e9900c4b4884159684ba9fad Mon Sep 17 00:00:00 2001 From: KokSeang Date: Sat, 25 Apr 2026 14:36:45 +0900 Subject: [PATCH 039/162] Added --- .../configs/detection3d/dataset/t4dataset/base.py | 12 ++++++------ .../configs/detection3d/dataset/t4dataset/j6gen2.py | 12 ++++++------ .../detection3d/dataset/t4dataset/j6gen2_base.py | 12 ++++++------ .../detection3d/dataset/t4dataset/jpntaxi_base.py | 12 ++++++------ .../detection3d/dataset/t4dataset/jpntaxi_gen2.py | 12 ++++++------ .../detection3d/dataset/t4dataset/largebus.py | 12 ++++++------ 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 3f90e7e0c..4248c90e6 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -123,7 +123,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -143,9 +143,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -154,8 +154,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier" + "traffic_cone", + "barrier" ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index e737994aa..0324e7207 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -104,7 +104,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -124,9 +124,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -135,8 +135,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index a8f6c6e7d..b9ec03f27 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -110,7 +110,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -130,9 +130,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } @@ -142,8 +142,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index 229ff7604..c08decfa1 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -100,7 +100,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -120,9 +120,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -131,8 +131,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index 411cabe7e..6b7250673 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -97,7 +97,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -117,9 +117,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -128,8 +128,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index a611750d3..2b54629eb 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -99,7 +99,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -119,9 +119,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -130,8 +130,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) From 60df4c0911f1559db0e917a2f2d9045ab07f83f3 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Sat, 25 Apr 2026 23:16:34 +0900 Subject: [PATCH 040/162] Added --- tools/detection3d/create_data_t4dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index e75a36a04..62169bc7a 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -102,9 +102,9 @@ def get_info( sample: Sample, i: int, max_sweeps: int, + traffic_cone_barrier_status: str, city: Optional[str] = None, vehicle_type: Optional[str] = None, - traffic_cone_barrier_status: Optional[str] = None, ) -> Dict[str, Any]: lidar_token = get_lidar_token(sample) if lidar_token is None: @@ -130,8 +130,7 @@ def get_info( sd_record: SampleData = t4.get("sample_data", lidar_token) info = get_empty_standard_data_info(cfg.camera_types) - - if traffic_cone_barrier_status is not None and traffic_cone_barrier_status == "true": + if traffic_cone_barrier_status == "true": traffic_cone_barrier_status = True else: traffic_cone_barrier_status = False @@ -333,7 +332,7 @@ def main(): infos = [] for i in range(0, len(t4.sample), sample_steps): sample = t4.sample[i] - info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type, traffic_cone_barrier_status) + info = get_info(cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type) if info is None: continue # info["version"] = dataset_version # used for visualizations during debugging. From b5dabf2d53e4cde87b994c723f5233dadb267ec6 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 27 Apr 2026 14:27:49 +0900 Subject: [PATCH 041/162] Added --- .../BEVFusion/bevfusion/bevfusion_head.py | 79 +++++++++++++++---- projects/BEVFusion/bevfusion/utils.py | 11 ++- .../default_lidar_second_secfpn_120m.py | 5 +- .../default/pipelines/default_lidar_120m.py | 2 + .../pipelines/default_lidar_intensity_120m.py | 2 + 5 files changed, 81 insertions(+), 18 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 853523c4f..9bbc6469c 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -69,6 +69,7 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, + partial_traffic_cone_barrier=False, ): super().__init__() self.class_names = class_names @@ -82,7 +83,8 @@ def __init__( self.nms_kernel_size = nms_kernel_size self.train_cfg = train_cfg self.test_cfg = test_cfg - + # If true, only compute loss for traffic cone and barrier when it's available in the frame + self.partial_traffic_cone_barrier = partial_traffic_cone_barrier self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) if not self.use_sigmoid_cls: self.num_classes += 1 @@ -185,6 +187,13 @@ def __init__( cluster["class_indices"] = sorted( [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]] ) + + if self.partial_traffic_cone_barrier: + assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier" + self.ignore_labels = [self.class_name_to_indices["traffic_cone"], self.class_name_to_indices["barrier"]] + else: + self.ignore_labels = None + def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] @@ -456,7 +465,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F return rets[0] - def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict]): + def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict]): """Generate training targets. Args: batch_gt_instances_3d (List[InstanceData]): @@ -500,6 +509,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis batch_gt_instances_3d, list_of_pred_dict, np.arange(len(batch_gt_instances_3d)), + batch_metadata, ) labels = torch.cat(res_tuple[0], dim=0) label_weights = torch.cat(res_tuple[1], dim=0) @@ -509,6 +519,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis num_pos = np.sum(res_tuple[5]) matched_ious = np.mean(res_tuple[6]) heatmap = torch.cat(res_tuple[7], dim=0) + heatmap_weights = torch.cat(res_tuple[8], dim=0) return ( labels, label_weights, @@ -518,9 +529,10 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis num_pos, matched_ious, heatmap, + heatmap_weights, ) - def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): + def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metadata): """Generate training targets for a single sample. Args: gt_instances_3d (:obj:`InstanceData`): ground truth of instances. @@ -563,6 +575,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): num_layer = self.num_decoder_layers else: num_layer = 1 + + traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) + if self.ignore_labels is not None and not traffic_cone_barrier_status: + ignore_labels = self.ignore_labels + else: + ignore_labels = None assign_result_list = [] for idx_layer in range(num_layer): @@ -581,6 +599,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): gt_labels_3d, score_layer, self.train_cfg, + ignore_labels, ) elif self.train_cfg.assigner.type == "HeuristicAssigner": assign_result = self.bbox_assigner.assign( @@ -637,10 +656,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight - + if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 - + # # compute dense heatmap targets device = labels.device gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device) @@ -671,6 +690,15 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): draw_heatmap_gaussian(heatmap[gt_labels_3d[idx]], center_int[[1, 0]], radius) mean_iou = ious[pos_inds].sum() / max(len(pos_inds), 1) + heatmap_weights = torch.ones_like(heatmap) + + # Ignore labels for traffic cone and barrier + if self.ignore_labels is not None and not traffic_cone_barrier_status: + pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False) + ignore_preds_masks = pred_labels.isin(self.ignore_labels) + label_weights[ignore_preds_masks] = 0.0 # Set to 0 to ignore these proposals + heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals + return ( labels[None], label_weights[None], @@ -680,6 +708,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): int(pos_inds.shape[0]), float(mean_iou), heatmap[None], + heatmap_weights[None], ) def loss(self, batch_feats, batch_data_samples): @@ -698,11 +727,11 @@ def loss(self, batch_feats, batch_data_samples): batch_input_metas.append(data_sample.metainfo) batch_gt_instances_3d.append(data_sample.gt_instances_3d) preds_dicts = self(batch_feats, batch_input_metas) - loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d) + loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d, batch_input_metas) return loss - def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], *args, **kwargs): + def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas): ( labels, label_weights, @@ -712,7 +741,8 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li num_pos, matched_ious, heatmap, - ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0]) + heatmap_weights, + ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0], batch_input_metas) if hasattr(self, "on_the_image_mask"): label_weights = label_weights * self.on_the_image_mask bbox_weights = bbox_weights * self.on_the_image_mask[:, :, None] @@ -721,13 +751,32 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li loss_dict = dict() # compute heatmap loss - loss_heatmap = self.loss_heatmap( - clip_sigmoid(preds_dict["dense_heatmap"]).float(), - heatmap.float(), - avg_factor=max(heatmap.eq(1).float().sum().item(), 1), - ) - loss_dict["loss_heatmap"] = loss_heatmap - + preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) + num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) + if self.ignore_labels is not None: + loss_heatmap = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + avg_factor=num_pos_dense_heatmap, + ) + loss_dict["loss_heatmap"] = loss_heatmap + else: + # When ignore labels is found, we compute the loss for each class + # heatmap focal loss + loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + ) + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # (Batch, num_classes) + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + + # Prevent loss item to avoid computing gradients twice. This is for logging. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False): diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index c47604dbd..b27d9e681 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -238,7 +238,7 @@ def __init__( self.iou_cost = TASK_UTILS.build(iou_cost) self.iou_calculator = TASK_UTILS.build(iou_calculator) - def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg): + def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_labels=None): num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0) # 1. assign -1 by default @@ -259,9 +259,16 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg): reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg) iou = self.iou_calculator(bboxes, gt_bboxes) iou_cost = self.iou_cost(iou) - + # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost + + if ignore_labels is not None: + preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) + print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) + ignore_preds_masks = preds_labels.isin(ignore_labels) + cost[ignore_preds_masks] = 10000 + print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) # 3. do Hungarian matching on CPU using linear_sum_assignment cost = cost.detach().cpu() diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index b5d9a8fdc..94fca2829 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -91,9 +91,11 @@ dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms dict(class_names=["bicycle"], nms_threshold=0.5), dict(class_names=["pedestrian"], nms_threshold=0.175), + dict(class_names=["barrier"], nms_threshold=0.25), + dict(class_names=["traffic_cone"], nms_threshold=0.175), ], ), - dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"], # Use class indices for pooling + dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]), bbox_coder=dict( type="TransFusionBBoxCoder", @@ -112,5 +114,6 @@ ), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + partial_traffic_cone_barrier=True ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 06d95be16..c3e8e18ee 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -84,6 +84,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -127,6 +128,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 4e74d3616..a9032fcdc 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -84,6 +84,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -127,6 +128,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] From 79024cf9f861e086d77b7b1362b62be6c81f6bc5 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 27 Apr 2026 14:41:33 +0900 Subject: [PATCH 042/162] Added --- projects/BEVFusion/bevfusion/bevfusion_head.py | 3 +++ .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 9bbc6469c..d616725d2 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -15,6 +15,7 @@ from mmdet.models.utils import multi_apply from mmengine.structures import InstanceData from torch import nn +from mmengine.logging import print_log def clip_sigmoid(x, eps=1e-4): @@ -194,6 +195,8 @@ def __init__( else: self.ignore_labels = None + print_log(f"BEVFusionHead Ignore labels: {self.ignore_labels}, dense heatmap pooling classes: \ + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 94fca2829..ec37de42a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -91,7 +91,7 @@ dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms dict(class_names=["bicycle"], nms_threshold=0.5), dict(class_names=["pedestrian"], nms_threshold=0.175), - dict(class_names=["barrier"], nms_threshold=0.25), + dict(class_names=["barrier"], nms_threshold=0.5), dict(class_names=["traffic_cone"], nms_threshold=0.175), ], ), From e771a69cb77196e16263b0049919837d0edb275e Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 27 Apr 2026 18:18:08 +0900 Subject: [PATCH 043/162] Added --- .../BEVFusion/bevfusion/bevfusion_head.py | 21 +++++++++++-------- projects/BEVFusion/bevfusion/utils.py | 12 +++++------ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index d616725d2..7e62d21a4 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -579,12 +579,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad else: num_layer = 1 - traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) - if self.ignore_labels is not None and not traffic_cone_barrier_status: - ignore_labels = self.ignore_labels - else: - ignore_labels = None - assign_result_list = [] for idx_layer in range(num_layer): bboxes_tensor_layer = bboxes_tensor[ @@ -638,7 +632,8 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad ious = assign_result_ensemble.max_overlaps ious = torch.clamp(ious, min=0.0, max=1.0) labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) - label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) + label_weights = bboxes_tensor.new_zeros([num_proposals, self.num_classes], dtype=torch.long) + # label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) if gt_labels_3d is not None: # default label is -1 labels += self.num_classes @@ -696,11 +691,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad heatmap_weights = torch.ones_like(heatmap) # Ignore labels for traffic cone and barrier + traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) if self.ignore_labels is not None and not traffic_cone_barrier_status: pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False) ignore_preds_masks = pred_labels.isin(self.ignore_labels) - label_weights[ignore_preds_masks] = 0.0 # Set to 0 to ignore these proposals heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals + label_weights[:, self.ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier return ( labels[None], @@ -791,10 +787,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ].reshape(-1) + # layer_label_weights = label_weights[ + # ..., + # idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, + # ].reshape(-1) layer_label_weights = label_weights[ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - ].reshape(-1) + ] + # (Batch*num_proposals, num_classes) + layer_label_weights = layer_label_weights.reshape(-1, self.num_classes) + print_log(f"layer_label_weights: {layer_label_weights.shape}", logger="current") layer_score = preds_dict["heatmap"][ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index b27d9e681..b6bd2be41 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -263,12 +263,12 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost - if ignore_labels is not None: - preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) - print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) - ignore_preds_masks = preds_labels.isin(ignore_labels) - cost[ignore_preds_masks] = 10000 - print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) + # if ignore_labels is not None: + # preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) + # print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) + # ignore_preds_masks = preds_labels.isin(ignore_labels) + # cost[ignore_preds_masks] = 10000 + # print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) # 3. do Hungarian matching on CPU using linear_sum_assignment cost = cost.detach().cpu() From 05703cbad1bf353279c4feff7af59ae6926c6281 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 27 Apr 2026 20:07:18 +0900 Subject: [PATCH 044/162] Update configs --- .../BEVFusion/bevfusion/bevfusion_head.py | 30 ++-- ..._secfpn_50e_8xb8_base_120m_traffic_cone.py | 163 ++++++++++++++++++ .../default_lidar_second_secfpn_120m.py | 2 +- .../default/pipelines/default_lidar_120m.py | 2 + 4 files changed, 179 insertions(+), 18 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 7e62d21a4..ace7f26b8 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,7 +70,7 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_traffic_cone_barrier=False, + partial_ignore_labels=None ): super().__init__() self.class_names = class_names @@ -84,8 +84,6 @@ def __init__( self.nms_kernel_size = nms_kernel_size self.train_cfg = train_cfg self.test_cfg = test_cfg - # If true, only compute loss for traffic cone and barrier when it's available in the frame - self.partial_traffic_cone_barrier = partial_traffic_cone_barrier self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) if not self.use_sigmoid_cls: self.num_classes += 1 @@ -189,13 +187,14 @@ def __init__( [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]] ) - if self.partial_traffic_cone_barrier: + # If true, only compute loss for traffic cone and barrier when it's available in the frame + if partial_ignore_labels is not None: assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier" - self.ignore_labels = [self.class_name_to_indices["traffic_cone"], self.class_name_to_indices["barrier"]] + self.partial_ignore_labels = [self.class_name_to_indices[class_name] for class_name in partial_ignore_labels] else: - self.ignore_labels = None + self.partial_ignore_labels = None - print_log(f"BEVFusionHead Ignore labels: {self.ignore_labels}, dense heatmap pooling classes: \ + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): @@ -535,7 +534,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis heatmap_weights, ) - def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metadata): + def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): """Generate training targets for a single sample. Args: gt_instances_3d (:obj:`InstanceData`): ground truth of instances. @@ -596,7 +595,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad gt_labels_3d, score_layer, self.train_cfg, - ignore_labels, ) elif self.train_cfg.assigner.type == "HeuristicAssigner": assign_result = self.bbox_assigner.assign( @@ -691,12 +689,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad heatmap_weights = torch.ones_like(heatmap) # Ignore labels for traffic cone and barrier - traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) - if self.ignore_labels is not None and not traffic_cone_barrier_status: - pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False) - ignore_preds_masks = pred_labels.isin(self.ignore_labels) - heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals - label_weights[:, self.ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier + traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) + if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals + label_weights[:, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier return ( labels[None], @@ -752,7 +748,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - if self.ignore_labels is not None: + if self.partial_ignore_labels is not None: loss_heatmap = self.loss_heatmap( preds_dense_heatmap, heatmap.float(), @@ -797,12 +793,12 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ] # (Batch*num_proposals, num_classes) layer_label_weights = layer_label_weights.reshape(-1, self.num_classes) - print_log(f"layer_label_weights: {layer_label_weights.shape}", logger="current") layer_score = preds_dict["heatmap"][ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ] layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes) + print_log(f"layer_label_weights: {layer_label_weights.shape}, layer_score: {layer_score.shape}, layer_labels: {layer_labels.shape}", logger="current") layer_loss_cls = self.loss_cls( layer_cls_score.float(), layer_labels, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py new file mode 100644 index 000000000..39c6ddf54 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py @@ -0,0 +1,163 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index ec37de42a..023c6774d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -114,6 +114,6 @@ ), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), - partial_traffic_cone_barrier=True + partial_ignore_labels=None ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index c3e8e18ee..455c2761a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters From 06ae9c2f75fc2060d102a1731d6365001bd91de8 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 27 Apr 2026 20:53:07 +0900 Subject: [PATCH 045/162] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index ace7f26b8..c58a04dca 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -748,7 +748,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - if self.partial_ignore_labels is not None: + if self.partial_ignore_labels is None: loss_heatmap = self.loss_heatmap( preds_dense_heatmap, heatmap.float(), @@ -798,7 +798,6 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ] layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes) - print_log(f"layer_label_weights: {layer_label_weights.shape}, layer_score: {layer_score.shape}, layer_labels: {layer_labels.shape}", logger="current") layer_loss_cls = self.loss_cls( layer_cls_score.float(), layer_labels, From d8c19749f3d54dd9a437b02ce58b2b0ca1af755d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 09:52:10 +0900 Subject: [PATCH 046/162] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index c58a04dca..adbd64835 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -762,6 +762,10 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li preds_dense_heatmap, heatmap.float(), ) + loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).clone().detach() + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap From 25e581c902b3ad907da44a5bcbd41d9d249a65d3 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:02:59 +0900 Subject: [PATCH 047/162] Update configs --- tools/detection3d/create_data_t4dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index 62169bc7a..9550b2872 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -273,7 +273,8 @@ def main(): if cfg.filter_attributes is None: print_log("No attribute filtering is applied!") - + + remove_non_traffic_cone_barrier = cfg.get("remove_non_traffic_cone_barrier", False) # Get every pair of min-max distance filtering thresholds bev_distance_ranges = [] if hasattr(cfg, "evaluator_metric_configs"): @@ -310,6 +311,9 @@ def main(): dataset_scene_info = scene_id.split("/") if len(dataset_scene_info) == 5: t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info + if remove_non_traffic_cone_barrier and traffic_cone_barrier_status == "false": + print_log(f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", logger="current") + continue elif len(dataset_scene_info) == 2: t4_dataset_id, t4_dataset_version_id = dataset_scene_info city = vehicle_type = None From 9ba440a640eb8f9742cc47425e3e4d6977f58705 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:05:29 +0900 Subject: [PATCH 048/162] Update configs --- .../t4dataset/j6gen2_base_traffic_cone.py | 211 ++++++++++++++++++ .../t4dataset/jpntaxi_base_traffic_cone.py | 202 +++++++++++++++++ 2 files changed, 413 insertions(+) create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py new file mode 100644 index 000000000..8c57cf4fa --- /dev/null +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py @@ -0,0 +1,211 @@ +custom_imports = dict( + imports=[ + "autoware_ml.detection3d.datasets.t4dataset", + "autoware_ml.detection3d.evaluation.t4metric.t4metric", + "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", + ] +) + +# dataset type setting +dataset_type = "T4Dataset" +info_train_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_train.pkl" +info_val_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_val.pkl" +info_test_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_test.pkl" + +info_train_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_train.parquet" +info_val_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_val.parquet" +info_test_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_test.parquet" + +# dataset scene setting +dataset_version_list = [ + "db_j6gen2_v1", + "db_j6gen2_v2", + "db_j6gen2_v3", + "db_j6gen2_v4", + "db_j6gen2_v5", + "db_j6gen2_v6", + "db_j6gen2_v7", + "db_j6gen2_v8", + "db_j6gen2_v9", + "db_largebus_v1", + "db_largebus_v2", + "db_largebus_v3", +] + +dataset_test_groups = { + "largebus": ("t4dataset_largebus_traffic_cone_infos_test.pkl", False), + "j6gen2": ("t4dataset_j6gen2_traffic_cone_infos_test.pkl", False), + "j6gen2_base": ("t4dataset_j6gen2_base_traffic_cone_infos_test.pkl", True), +} + +# dataset format setting +data_prefix = dict( + pts="", + CAM_FRONT="", + CAM_FRONT_LEFT="", + CAM_FRONT_RIGHT="", + CAM_BACK="", + CAM_BACK_RIGHT="", + CAM_BACK_LEFT="", + sweeps="", +) + +camera_types = { + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", +} + +# class setting +name_mapping = { + # DBv1.0 + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.emergency (ambulance & police)": "car", + "vehicle.motorcycle": "bicycle", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + "vehicle.bicycle": "bicycle", + "vehicle.bus (bendy & rigid)": "bus", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "movable_object.barrier": "barrier", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.traffic_cone": "traffic_cone", + "animal": "animal", + "static_object.bicycle_rack": "bicycle_rack", + # DBv1.1 and UCv2.0 + "car": "car", + "truck": "truck", + "bus": "bus", + "trailer": "trailer", + "motorcycle": "bicycle", + "bicycle": "bicycle", + "police_car": "car", + "pedestrian": "pedestrian", + "police_officer": "pedestrian", + "forklift": "car", + "construction_worker": "pedestrian", + "stroller": "pedestrian", + # DBv2.0 and DBv3.0 + "animal": "animal", + "movable_object.barrier": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.traffic_cone": "traffic_cone", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "static_object.bicycle rack": "bicycle rack", + "static_object.bollard": "bollard", + "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car + "vehicle.bicycle": "bicycle", + "vehicle.bus": "bus", + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.fire": "truck", + "vehicle.motorcycle": "bicycle", + "vehicle.police": "car", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + # DBv1.3 + "ambulance": "car", + "kart": "car", + "wheelchair": "pedestrian", + "personal_mobility": "pedestrian", + "fire_truck": "truck", + "semi_trailer": "trailer", + "tractor_unit": "truck", + "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", +} + + +class_names = [ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", +] +num_class = len(class_names) +metainfo = dict(classes=class_names) + +merge_objects = [ + ("truck", ["truck", "trailer"]), +] +merge_type = "extend_longer" # One of ["extend_longer","union", None] + +# visualization +class_colors = { + "car": (30, 144, 255), + "truck": (140, 0, 255), + "construction_vehicle": (255, 255, 0), + "bus": (111, 255, 111), + "trailer": (0, 255, 255), + "barrier": (0, 0, 0), + "motorcycle": (100, 0, 30), + "bicycle": (255, 0, 30), + "pedestrian": (255, 200, 200), + "traffic_cone": (120, 120, 120), +} +camera_panels = [ + "data/CAM_FRONT_LEFT", + "data/CAM_FRONT", + "data/CAM_FRONT_RIGHT", + "data/CAM_BACK_LEFT", + "data/CAM_BACK", + "data/CAM_BACK_RIGHT", +] + +filter_attributes = [ + ("vehicle.bicycle", "vehicle_state.parked"), + ("vehicle.bicycle", "cycle_state.without_rider"), + ("vehicle.bicycle", "motorcycle_state.without_rider"), + ("vehicle.motorcycle", "vehicle_state.parked"), + ("vehicle.motorcycle", "cycle_state.without_rider"), + ("vehicle.motorcycle", "motorcycle_state.without_rider"), + ("bicycle", "vehicle_state.parked"), + ("bicycle", "cycle_state.without_rider"), + ("bicycle", "motorcycle_state.without_rider"), + ("motorcycle", "vehicle_state.parked"), + ("motorcycle", "cycle_state.without_rider"), + ("motorcycle", "motorcycle_state.without_rider"), +] + +evaluator_metric_configs = dict( + evaluation_task="detection", + target_labels=class_names, + center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], + # plane_distance_thresholds is required for the pass fail evaluation + plane_distance_thresholds=[2.0, 4.0], + iou_2d_thresholds=None, + iou_3d_thresholds=None, + label_prefix="autoware", + # bev minimum distance ranges for each range bucket, must be the same length as max_distance, + # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering + min_distance=[0.0, 50.0, 90.0, 0.0], + # bev maximum distance ranges for each range bucket, must be the same length as min_distance + max_distance=[50.0, 90.0, 121.0, 121.0], + min_point_numbers=0, + matching_class_agnostic_fps=False, +) + +remove_non_traffic_cone_barrier = True \ No newline at end of file diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py new file mode 100644 index 000000000..3643b4475 --- /dev/null +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py @@ -0,0 +1,202 @@ +custom_imports = dict( + imports=[ + "autoware_ml.detection3d.datasets.t4dataset", + "autoware_ml.detection3d.evaluation.t4metric.t4metric", + "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", + ] +) + +# dataset type setting +dataset_type = "T4Dataset" +info_train_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_train.pkl" +info_val_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_val.pkl" +info_test_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl" + +info_train_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_train.parquet" +info_val_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_val.parquet" +info_test_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_test.parquet" + +# dataset scene setting +dataset_test_groups = { + "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_traffic_cone_infos_test.pkl", True), +} + +dataset_version_list = [ + "db_jpntaxigen2_v1", + "db_jpntaxigen2_v2", + "db_jpntaxi_v1", + "db_jpntaxi_v2", + "db_jpntaxi_v4", +] + +# dataset format setting +data_prefix = dict( + pts="", + CAM_FRONT="", + CAM_FRONT_LEFT="", + CAM_FRONT_RIGHT="", + CAM_BACK="", + CAM_BACK_RIGHT="", + CAM_BACK_LEFT="", + sweeps="", +) +camera_types = { + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", +} + +# class setting +name_mapping = { + # DBv1.0 + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.emergency (ambulance & police)": "car", + "vehicle.motorcycle": "bicycle", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + "vehicle.bicycle": "bicycle", + "vehicle.bus (bendy & rigid)": "bus", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "movable_object.barrier": "barrier", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.traffic_cone": "traffic_cone", + "animal": "animal", + "static_object.bicycle_rack": "bicycle_rack", + # DBv1.1 and UCv2.0 + "car": "car", + "truck": "truck", + "bus": "bus", + "trailer": "trailer", + "motorcycle": "bicycle", + "bicycle": "bicycle", + "police_car": "car", + "pedestrian": "pedestrian", + "police_officer": "pedestrian", + "forklift": "car", + "construction_worker": "pedestrian", + "stroller": "pedestrian", + # DBv2.0 and DBv3.0 + "animal": "animal", + "movable_object.barrier": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.traffic_cone": "traffic_cone", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "static_object.bicycle rack": "bicycle rack", + "static_object.bollard": "bollard", + "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car + "vehicle.bicycle": "bicycle", + "vehicle.bus": "bus", + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.fire": "truck", + "vehicle.motorcycle": "bicycle", + "vehicle.police": "car", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + # DBv1.3 + "ambulance": "car", + "kart": "car", + "wheelchair": "pedestrian", + "personal_mobility": "pedestrian", + "fire_truck": "truck", + "semi_trailer": "trailer", + "tractor_unit": "truck", + "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", +} + +class_names = [ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", +] + +num_class = len(class_names) +metainfo = dict(classes=class_names) + +merge_objects = [ + ("truck", ["truck", "trailer"]), +] +merge_type = "extend_longer" # One of ["extend_longer","union", None] + +# visualization +class_colors = { + "car": (30, 144, 255), + "truck": (140, 0, 255), + "construction_vehicle": (255, 255, 0), + "bus": (111, 255, 111), + "trailer": (0, 255, 255), + "barrier": (0, 0, 0), + "motorcycle": (100, 0, 30), + "bicycle": (255, 0, 30), + "pedestrian": (255, 200, 200), + "traffic_cone": (120, 120, 120), +} +camera_panels = [ + "data/CAM_FRONT_LEFT", + "data/CAM_FRONT", + "data/CAM_FRONT_RIGHT", + "data/CAM_BACK_LEFT", + "data/CAM_BACK", + "data/CAM_BACK_RIGHT", +] + +# Add filter attributes +filter_attributes = [ + ("vehicle.bicycle", "vehicle_state.parked"), + ("vehicle.bicycle", "cycle_state.without_rider"), + ("vehicle.bicycle", "motorcycle_state.without_rider"), + ("vehicle.motorcycle", "vehicle_state.parked"), + ("vehicle.motorcycle", "cycle_state.without_rider"), + ("vehicle.motorcycle", "motorcycle_state.without_rider"), + ("bicycle", "vehicle_state.parked"), + ("bicycle", "cycle_state.without_rider"), + ("bicycle", "motorcycle_state.without_rider"), + ("motorcycle", "vehicle_state.parked"), + ("motorcycle", "cycle_state.without_rider"), + ("motorcycle", "motorcycle_state.without_rider"), +] + +evaluator_metric_configs = dict( + evaluation_task="detection", + target_labels=class_names, + center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], + # plane_distance_thresholds is required for the pass fail evaluation + plane_distance_thresholds=[2.0, 4.0], + iou_2d_thresholds=None, + iou_3d_thresholds=None, + label_prefix="autoware", + # bev minimum distance ranges for each range bucket, must be the same length as max_distance, + # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering + min_distance=[0.0, 50.0, 90.0, 0.0], + # bev maximum distance ranges for each range bucket, must be the same length as min_distance + max_distance=[50.0, 90.0, 121.0, 121.0], + min_point_numbers=0, + matching_class_agnostic_fps=False, +) + +remove_non_traffic_cone_barrier = True \ No newline at end of file From aac1e1d3fa8c342d628ccaf723fe7a2419cf0a8a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:14:25 +0900 Subject: [PATCH 049/162] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index adbd64835..b0a42249a 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -692,7 +692,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals - label_weights[:, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier + label_weights[neg_inds, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier return ( labels[None], From 09c06d79ee992ef0479876b374271b6276aa632c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:17:53 +0900 Subject: [PATCH 050/162] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index b0a42249a..226237ff3 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -762,7 +762,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li preds_dense_heatmap, heatmap.float(), ) - loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).clone().detach() + loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() for cls_i, class_name in enumerate(self.class_names): loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] From 5279b178195f4a8aa7290eb7cec905aeb354576c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:18:52 +0900 Subject: [PATCH 051/162] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 226237ff3..3f857e22d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -692,7 +692,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals - label_weights[neg_inds, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier + if len(neg_inds) > 0: + # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K. + _cols = torch.as_tensor( + self.partial_ignore_labels, device=label_weights.device, dtype=torch.long + ) + label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 return ( labels[None], From 0ce7a720a12904400e236281b8a4e3f4c17fd9a7 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:19:27 +0900 Subject: [PATCH 052/162] Add the script --- ...0m_sincos_timeexp_34_channels_32_points.py | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py new file mode 100644 index 000000000..f784b2386 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py @@ -0,0 +1,166 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp_32_points" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + max_num_points=32, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + time_lag_channel_index=3, + time_exp_factor=1.0, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=34, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From 9b4c2f292c10bc3561110541089c3c7bb0d0dcb9 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Tue, 28 Apr 2026 12:48:58 +0900 Subject: [PATCH 053/162] Added --- .../detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py index 3643b4475..c7e631458 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py @@ -18,7 +18,7 @@ # dataset scene setting dataset_test_groups = { - "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_traffic_cone_infos_test.pkl", True), + "jpntaxi_base_traffic_cone": ("t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl", True), } dataset_version_list = [ From 5c0ada87126561c3483253ebd0a684ece02b332c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:49:32 +0900 Subject: [PATCH 054/162] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 71 ++++++++++++------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 3f857e22d..a1819b309 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -753,34 +753,53 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - if self.partial_ignore_labels is None: - loss_heatmap = self.loss_heatmap( - preds_dense_heatmap, - heatmap.float(), - avg_factor=num_pos_dense_heatmap, - ) - loss_dict["loss_heatmap"] = loss_heatmap - else: - # When ignore labels is found, we compute the loss for each class - # heatmap focal loss - loss_heatmap_cls: torch.Tensor = self.loss_heatmap( - preds_dense_heatmap, - heatmap.float(), - ) - loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() - for cls_i, class_name in enumerate(self.class_names): - loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] - - # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) - loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() - loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap - # (Batch, num_classes) - for cls_i, class_name in enumerate(self.class_names): - loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + # if self.partial_ignore_labels is None: + # loss_heatmap = self.loss_heatmap( + # preds_dense_heatmap, + # heatmap.float(), + # avg_factor=num_pos_dense_heatmap, + # ) + # loss_dict["loss_heatmap"] = loss_heatmap + # else: + # # When ignore labels is found, we compute the loss for each class + # # heatmap focal loss + # loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + # preds_dense_heatmap, + # heatmap.float(), + # ) + # # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() + # # for cls_i, class_name in enumerate(self.class_names): + # # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] + + # # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + # loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + # loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # # (Batch, num_classes) + # for cls_i, class_name in enumerate(self.class_names): + # loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - # Prevent loss item to avoid computing gradients twice. This is for logging. - loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # # Prevent loss item to avoid computing gradients twice. This is for logging. + # loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + + # # When ignore labels is found, we compute the loss for each class + # # heatmap focal loss + loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + ) + # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() + # for cls_i, class_name in enumerate(self.class_names): + # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] + + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # (Batch, num_classes) + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + # Prevent loss item to avoid computing gradients twice. This is for logging. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False): From ad6b07a14afd1bd08a3178342856660fe364185a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:49:46 +0900 Subject: [PATCH 055/162] Add the script --- ...pn_50e_8xb8_base_120m_traffic_cone_full.py | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py new file mode 100644 index 000000000..38b1e8ea5 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py @@ -0,0 +1,163 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_traffic_cone_full/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + partial_ignore_labels=None, + loss_heatmap=dict( + reduction="none", + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From a592868d6add0b0e589a1e7dd2ee47d55848be13 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 16:28:38 +0900 Subject: [PATCH 056/162] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 70 +++----- ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 164 ++++++++++++++++++ 2 files changed, 188 insertions(+), 46 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index a1819b309..0b18803c4 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -753,53 +753,31 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - # if self.partial_ignore_labels is None: - # loss_heatmap = self.loss_heatmap( - # preds_dense_heatmap, - # heatmap.float(), - # avg_factor=num_pos_dense_heatmap, - # ) - # loss_dict["loss_heatmap"] = loss_heatmap - # else: - # # When ignore labels is found, we compute the loss for each class - # # heatmap focal loss - # loss_heatmap_cls: torch.Tensor = self.loss_heatmap( - # preds_dense_heatmap, - # heatmap.float(), - # ) - # # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() - # # for cls_i, class_name in enumerate(self.class_names): - # # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] - - # # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) - # loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() - # loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap - # # (Batch, num_classes) - # for cls_i, class_name in enumerate(self.class_names): - # loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + if self.partial_ignore_labels is None: + loss_heatmap = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + avg_factor=num_pos_dense_heatmap, + ) + loss_dict["loss_heatmap"] = loss_heatmap + else: + # When ignore labels is found, we compute the loss for each class + # heatmap focal loss + loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + ) + + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # (Batch, num_classes) + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - # # Prevent loss item to avoid computing gradients twice. This is for logging. - # loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() - - # # When ignore labels is found, we compute the loss for each class - # # heatmap focal loss - loss_heatmap_cls: torch.Tensor = self.loss_heatmap( - preds_dense_heatmap, - heatmap.float(), - ) - # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() - # for cls_i, class_name in enumerate(self.class_names): - # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] - - # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) - loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() - loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap - # (Batch, num_classes) - for cls_i, class_name in enumerate(self.class_names): - loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - - # Prevent loss item to avoid computing gradients twice. This is for logging. - loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # Prevent loss item to avoid computing gradients twice. This is for logging. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False): diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py new file mode 100644 index 000000000..1ca622714 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -0,0 +1,164 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/user_name/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), + partial_ignore_labels=None, +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = None From 2929ff67b324627b3adbbe50da221e235f004ec5 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 20:35:00 +0900 Subject: [PATCH 057/162] Update configs --- ...cond_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py | 4 ++-- .../default/pipelines/default_lidar_intensity_120m.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py index 1ca622714..57afc7e75 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" @@ -161,4 +161,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index a9032fcdc..19051a04f 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters From 57d6ae6771fac9787e9ac4deeb3f6edca50e5bc3 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 21:18:15 +0900 Subject: [PATCH 058/162] Update configs --- .../datasets/transforms/loading.py | 41 +++++ ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 167 ++++++++++++++++++ .../default_30e_8xb8_adamw_cosine.py | 2 +- 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 autoware_ml/detection3d/datasets/transforms/loading.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py new file mode 100644 index 000000000..3e23218e4 --- /dev/null +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -0,0 +1,41 @@ +from mmcv.transforms import BaseTransform +from mmdet3d.structures.ops import box_np_ops +from mmengine.registry import TRANSFORMS + + +@TRANSFORMS.register_module() +class LoadPointsFromCurrentFileSweep(BaseTransform): + """Load points from the current file and sweep. + This is used to load the points from the current file and sweep for copy-paste augmentation. + + Args: + coord_type (str): The type of coordinates of points cloud. + load_dim (int): The dimension of the loaded points. + use_dim (list[int] | int): Which dimensions of the points to use. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. + """ + + def __init__(self, + coord_type: str, + load_dim: int = 6, + use_dim: Union[int, List[int]] = [0, 1, 2], + shift_height: bool = False, + use_color: bool = False, + norm_intensity: bool = False, + norm_elongation: bool = False, + backend_args: Optional[dict] = None) -> None: + self.shift_height = shift_height + self.use_color = use_color + if isinstance(use_dim, int): + use_dim = list(range(use_dim)) + assert max(use_dim) < load_dim, \ + f'Expect all used dimensions < {load_dim}, got {use_dim}' + assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH'] + + self.coord_type = coord_type + self.load_dim = load_dim + self.use_dim = use_dim + self.norm_intensity = norm_intensity + self.norm_elongation = norm_elongation + self.backend_args = backend_args \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py new file mode 100644 index 000000000..80bd595dd --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py @@ -0,0 +1,167 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index a2cd2d2e9..388705848 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -3,7 +3,7 @@ lr = 1.4141e-4 t_max = 8 max_epochs = 30 -val_interval = 5 +val_interval = 1 train_gpu_size = 8 test_batch_size = 2 From 2e03655ea03b7b9147c49e30d591ae33df1ee08a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 01:31:43 +0900 Subject: [PATCH 059/162] Add the script --- .../datasets/transforms/__init__.py | 3 +- .../datasets/transforms/loading.py | 53 +-- ..._base_120m_traffic_cone_full_copy_paste.py | 312 ++++++++++++++++++ ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 10 +- .../default/pipelines/default_lidar_120m.py | 2 + .../pipelines/default_lidar_intensity_120m.py | 2 + ...default_lidar_intensity_120m_copy_paste.py | 180 ++++++++++ 7 files changed, 532 insertions(+), 30 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index 6bc932f1a..dc95d27f8 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,3 +1,4 @@ from .object_min_points_filter import ObjectMinPointsFilter +from .loading import LoadPointsFromCurrentFileSweep -__all__ = ["ObjectMinPointsFilter"] +__all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"] diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py index 3e23218e4..d96a87b5a 100644 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -1,22 +1,24 @@ from mmcv.transforms import BaseTransform from mmdet3d.structures.ops import box_np_ops +from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps from mmengine.registry import TRANSFORMS + @TRANSFORMS.register_module() class LoadPointsFromCurrentFileSweep(BaseTransform): - """Load points from the current file and sweep. - This is used to load the points from the current file and sweep for copy-paste augmentation. + """Load points from the current file and sweep. + This is used to load the points from the current file and sweep for copy-paste augmentation. - Args: - coord_type (str): The type of coordinates of points cloud. - load_dim (int): The dimension of the loaded points. - use_dim (list[int] | int): Which dimensions of the points to use. - backend_args (dict, optional): Arguments to instantiate the - corresponding backend. Defaults to None. - """ + Args: + coord_type (str): The type of coordinates of points cloud. + load_dim (int): The dimension of the loaded points. + use_dim (list[int] | int): Which dimensions of the points to use. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. + """ - def __init__(self, + def __init__(self, coord_type: str, load_dim: int = 6, use_dim: Union[int, List[int]] = [0, 1, 2], @@ -24,18 +26,21 @@ def __init__(self, use_color: bool = False, norm_intensity: bool = False, norm_elongation: bool = False, - backend_args: Optional[dict] = None) -> None: - self.shift_height = shift_height - self.use_color = use_color - if isinstance(use_dim, int): - use_dim = list(range(use_dim)) - assert max(use_dim) < load_dim, \ - f'Expect all used dimensions < {load_dim}, got {use_dim}' - assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH'] + backend_args: Optional[dict] = None, + sweeps_num: int = 10, + pad_empty_sweeps: bool = False, + remove_close: bool = False, + test_mode: bool = False + ) -> None: + + self.points_loader = LoadPointsFromFile(coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args) + if sweeps_num > 0: + self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(sweeps_num=sweeps_num, pad_empty_sweeps=pad_empty_sweeps, remove_close=remove_close, test_mode=test_mode) + else: + self.points_from_multi_sweeps_loader = None - self.coord_type = coord_type - self.load_dim = load_dim - self.use_dim = use_dim - self.norm_intensity = norm_intensity - self.norm_elongation = norm_elongation - self.backend_args = backend_args \ No newline at end of file + def transform(self, results: dict) -> dict: + points = self.points_loader(results) + if self.points_from_multi_sweeps_loader is not None: + points = self.points_from_multi_sweeps_loader(points) + return points diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py new file mode 100644 index 000000000..6c7fb78a8 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -0,0 +1,312 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ) + ), +) + +db_sampler = dict( + data_root=data_root, + info_path=info_directory_path + _base_.info_train_file_name, + rate=1.0, + prepare=dict( + filter_by_difficulty=[-1], + filter_by_min_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + traffic_cone=5, + barrier=5, + bicycle=5, + pedestrian=5)), + classes=_base_.class_names, + sample_groups=dict( + car=0, + truck=0, + bus=0, + barrier=2, + traffic_cone=4), + points_loader=dict( + type='LoadPointsFromCurrentFileSweep', + coord_type='LIDAR', + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + sweeps_num=_base_.sweeps_num, + pad_empty_sweeps=True, + remove_close=True, + test_mode=False, + )) + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict(type="ObjectSample", db_sampler=db_sampler), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" + +custom_hooks = [] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py index 80bd595dd..68c736749 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -64,11 +64,11 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), - ), - partial_ignore_labels=["traffic_cone", "barrier"], - loss_heatmap=dict( + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( reduction="none", ), + ), ) # Dataset parameters @@ -164,4 +164,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 455c2761a..09b9f7b26 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -59,6 +59,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 19051a04f..9c7e02977 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -59,6 +59,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py new file mode 100644 index 000000000..a7c7cddfe --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py @@ -0,0 +1,180 @@ +# Dataset parameters +backend_args = None +num_workers = 32 +input_modality = dict(use_lidar=True, use_camera=False) + +# range setting +point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] +eval_class_range = { + "car": 120, + "truck": 120, + "bus": 120, + "bicycle": 120, + "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, +} + +# LiDAR parameters +point_load_dim = 5 # x, y, z, intensity, ring_id +point_use_dim = 5 +lidar_sweep_dims = [0, 1, 2, 3, 4] # x, y, z, intensity, time_lag +sweeps_num = 1 + +db_sampler = dict( + data_root=data_root, + info_path=data_root + 'nuscenes_dbinfos_train.pkl', + rate=1.0, + prepare=dict( + filter_by_difficulty=[-1], + filter_by_min_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + construction_vehicle=5, + traffic_cone=5, + barrier=5, + motorcycle=5, + bicycle=5, + pedestrian=5)), + classes=class_names, + sample_groups=dict( + car=2, + truck=3, + construction_vehicle=7, + bus=4, + trailer=6, + barrier=2, + motorcycle=6, + bicycle=6, + pedestrian=2, + traffic_cone=2), + points_loader=dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=5, + use_dim=[0, 1, 2, 3, 4], + backend_args=backend_args)) + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Filtering configuration +# Note: +# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, +# e.g., dict(filter_frames_with_missing_image=True). +# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so +# image-based filtering does not apply and `filter_cfg` is intentionally None. +filter_cfg = None From e2a69c1851b6149f256e32f48803b968756f018d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 01:35:46 +0900 Subject: [PATCH 060/162] Add the script --- .../datasets/transforms/loading.py | 2 + ..._base_120m_traffic_cone_full_copy_paste.py | 2 + ...default_lidar_intensity_120m_copy_paste.py | 180 ------------------ 3 files changed, 4 insertions(+), 180 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py index d96a87b5a..535653d9b 100644 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -1,3 +1,5 @@ +from typing import List, Optional, Union + from mmcv.transforms import BaseTransform from mmdet3d.structures.ops import box_np_ops from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py index 6c7fb78a8..6e0d7445b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -138,6 +138,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py deleted file mode 100644 index a7c7cddfe..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py +++ /dev/null @@ -1,180 +0,0 @@ -# Dataset parameters -backend_args = None -num_workers = 32 -input_modality = dict(use_lidar=True, use_camera=False) - -# range setting -point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] -eval_class_range = { - "car": 120, - "truck": 120, - "bus": 120, - "bicycle": 120, - "pedestrian": 120, - "traffic_cone": 120, - "barrier": 120, -} - -# LiDAR parameters -point_load_dim = 5 # x, y, z, intensity, ring_id -point_use_dim = 5 -lidar_sweep_dims = [0, 1, 2, 3, 4] # x, y, z, intensity, time_lag -sweeps_num = 1 - -db_sampler = dict( - data_root=data_root, - info_path=data_root + 'nuscenes_dbinfos_train.pkl', - rate=1.0, - prepare=dict( - filter_by_difficulty=[-1], - filter_by_min_points=dict( - car=5, - truck=5, - bus=5, - trailer=5, - construction_vehicle=5, - traffic_cone=5, - barrier=5, - motorcycle=5, - bicycle=5, - pedestrian=5)), - classes=class_names, - sample_groups=dict( - car=2, - truck=3, - construction_vehicle=7, - bus=4, - trailer=6, - barrier=2, - motorcycle=6, - bicycle=6, - pedestrian=2, - traffic_cone=2), - points_loader=dict( - type='LoadPointsFromFile', - coord_type='LIDAR', - load_dim=5, - use_dim=[0, 1, 2, 3, 4], - backend_args=backend_args)) - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Filtering configuration -# Note: -# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, -# e.g., dict(filter_frames_with_missing_image=True). -# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so -# image-based filtering does not apply and `filter_cfg` is intentionally None. -filter_cfg = None From ebc80340e07dac07833f2b2b5d7bd9df15fe3450 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 01:37:23 +0900 Subject: [PATCH 061/162] Update configs --- ..._second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py index 57afc7e75..b9fafe7a9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -65,7 +65,6 @@ voxel_size=_base_.voxel_size[0:2], ), ), - partial_ignore_labels=None, ) # Dataset parameters From bb35205a445beb1ad37bfe075a9400d0a8fb960a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 10:33:08 +0900 Subject: [PATCH 062/162] Add the script --- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index 388705848..a2cd2d2e9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -3,7 +3,7 @@ lr = 1.4141e-4 t_max = 8 max_epochs = 30 -val_interval = 1 +val_interval = 5 train_gpu_size = 8 test_batch_size = 2 From f343fbeae0811127e1d54870e21f7eb5850af3b2 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 10:42:46 +0900 Subject: [PATCH 063/162] Update configs --- autoware_ml/detection3d/datasets/t4dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index ce1c78f31..526150755 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -191,5 +191,8 @@ def parse_data_info(self, info: dict) -> dict: info["lidar2img"] = np.array(info["images"][self.default_cam_key]["lidar2img"]) else: info["lidar2img"] = info["cam2img"] @ info["lidar2cam"] - + + # Default difficulty to 0 if not present + if 'difficulty' not in info: + info['difficulty'] = 0 return info From 6f55027b662c778c264d5ebe967d8d2f34813676 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 03:22:00 +0900 Subject: [PATCH 064/162] Update configs --- ...xel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py} (98%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py index 39c6ddf54..90136a748 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter From d99abd97ed98a0bdfcc90c1b26383aded31eedcf Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 12:25:17 +0900 Subject: [PATCH 065/162] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 9 +- ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 1 + ..._base_120m_traffic_cone_full_copy_paste.py | 21 +- ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 1 + ...ase_120m_traffic_cone_ignore_copy_paste.py | 317 ++++++++++++++++++ 5 files changed, 336 insertions(+), 13 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 0b18803c4..4894ad2e7 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,7 +70,8 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_ignore_labels=None + partial_ignore_labels=None, + partial_ignore_dense_heatmap=False ): super().__init__() self.class_names = class_names @@ -194,7 +195,8 @@ def __init__( else: self.partial_ignore_labels = None - print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ + self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): @@ -691,7 +693,8 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): # Ignore labels for traffic cone and barrier traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: - heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals + if self.partial_ignore_dense_heatmap: + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals if len(neg_inds) > 0: # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K. _cols = torch.as_tensor( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py index b9fafe7a9..88e3cbc54 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -64,6 +64,7 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_dense_heatmap=False ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py index 6e0d7445b..903df577c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -63,7 +63,8 @@ bbox_coder=dict( pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], - ) + ), + partial_ignore_dense_heatmap=False ), ) @@ -113,7 +114,7 @@ type="LoadPointsFromMultiSweeps", sweeps_num=_base_.sweeps_num, load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, + use_dim=_base_.lidar_sweep_dims, pad_empty_sweeps=True, remove_close=True, backend_args=_base_.backend_args, @@ -176,21 +177,21 @@ dict( type="LoadPointsFromFile", coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, ), dict( type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, pad_empty_sweeps=True, remove_close=True, - backend_args=backend_args, + backend_args=_base_.backend_args, test_mode=True, ), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), dict( type="Pack3DDetInputs", keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py index 68c736749..bb10d484d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py @@ -65,6 +65,7 @@ voxel_size=_base_.voxel_size[0:2], ), partial_ignore_labels=["traffic_cone", "barrier"], + partial_ignore_dense_heatmap=True, loss_heatmap=dict( reduction="none", ), diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py new file mode 100644 index 000000000..61b9d35f3 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py @@ -0,0 +1,317 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + partial_ignore_labels=["traffic_cone", "barrier"], + partial_ignore_dense_heatmap=False, + loss_heatmap=dict( + reduction="none", + ), + ), +) + +db_sampler = dict( + data_root=data_root, + info_path=info_directory_path + _base_.info_train_file_name, + rate=1.0, + prepare=dict( + filter_by_difficulty=[-1], + filter_by_min_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + traffic_cone=5, + barrier=5, + bicycle=5, + pedestrian=5)), + classes=_base_.class_names, + sample_groups=dict( + car=0, + truck=0, + bus=0, + barrier=2, + traffic_cone=4), + points_loader=dict( + type='LoadPointsFromCurrentFileSweep', + coord_type='LIDAR', + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + sweeps_num=_base_.sweeps_num, + pad_empty_sweeps=True, + remove_close=True, + test_mode=False, + )) + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict(type="ObjectSample", db_sampler=db_sampler), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" From b1b247a148f1a060eec5f70f7d8f1d8911524ebe Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 12:28:55 +0900 Subject: [PATCH 066/162] Add the script --- ...pn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py | 2 ++ ..._30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py index 903df577c..7fef2db47 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -88,6 +88,8 @@ car=0, truck=0, bus=0, + bicycle=0, + pedestrian=0, barrier=2, traffic_cone=4), points_loader=dict( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py index 61b9d35f3..e5e9c9ff3 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py @@ -92,6 +92,8 @@ car=0, truck=0, bus=0, + bicycle=0, + pedestrian=0, barrier=2, traffic_cone=4), points_loader=dict( From 36e3811139144ea84899aadfa8b94f66e421dd9d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 20:38:01 +0900 Subject: [PATCH 067/162] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 3 + ...ase_120m_traffic_cone_ignore_copy_paste.py | 73 ++++--------------- 2 files changed, 19 insertions(+), 57 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 4894ad2e7..5b0e156d0 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -701,6 +701,9 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): self.partial_ignore_labels, device=label_weights.device, dtype=torch.long ) label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 + + print("heatmap with traffic cone: ", heatmap[5].sum()) + print("heatmap with barrier: ", heatmap[6].sum()) return ( labels[None], diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py index e5e9c9ff3..41629bb17 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py @@ -74,19 +74,20 @@ db_sampler = dict( data_root=data_root, - info_path=info_directory_path + _base_.info_train_file_name, + info_path=data_root + info_directory_path + _base_.info_train_file_name, rate=1.0, - prepare=dict( - filter_by_difficulty=[-1], - filter_by_min_points=dict( - car=5, - truck=5, - bus=5, - trailer=5, - traffic_cone=5, - barrier=5, - bicycle=5, - pedestrian=5)), + prepare=dict(), + # prepare=dict( + # filter_by_difficulty=[-1], + # filter_by_min_points=dict( + # car=5, + # truck=5, + # bus=5, + # trailer=5, + # traffic_cone=5, + # barrier=5, + # bicycle=5, + # pedestrian=5)), classes=_base_.class_names, sample_groups=dict( car=0, @@ -179,50 +180,6 @@ ), ] -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - # Dataset parameters train_dataloader = dict( batch_size=_base_.train_batch_size, @@ -231,7 +188,7 @@ sampler=dict(type="DefaultSampler", shuffle=True), dataset=dict( type=_base_.dataset_type, - pipeline=_base_.train_pipeline, + pipeline=train_pipeline, modality=_base_.input_modality, backend_args=_base_.backend_args, data_root=data_root, @@ -317,3 +274,5 @@ log_processor = dict(window_size=50) load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" + +custom_hooks = [] From e9052633e80ae8ba8b36fb6554ded9c04e7bf672 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 7 May 2026 15:29:37 +0900 Subject: [PATCH 068/162] Add traffic cone and barrier --- .../BEVFusion/bevfusion/bevfusion_head.py | 13 +- ...second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 + ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 164 --------- ..._base_120m_traffic_cone_full_copy_paste.py | 317 ------------------ ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 168 ---------- ...ase_120m_traffic_cone_ignore_copy_paste.py | 278 --------------- ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 + ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 4 + ...pn_50e_8xb8_base_120m_traffic_cone_full.py | 163 --------- ..._50e_8xb8_base_120m_traffic_cone_ignore.py | 163 --------- 10 files changed, 14 insertions(+), 1264 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 5b0e156d0..da056efcc 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -633,7 +633,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): ious = torch.clamp(ious, min=0.0, max=1.0) labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) label_weights = bboxes_tensor.new_zeros([num_proposals, self.num_classes], dtype=torch.long) - # label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) if gt_labels_3d is not None: # default label is -1 labels += self.num_classes @@ -693,17 +692,13 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): # Ignore labels for traffic cone and barrier traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: - if self.partial_ignore_dense_heatmap: - heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids if len(neg_inds) > 0: - # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K. + # neg_inds [N] and column indices [K] must broadcast (not pair); _cols = torch.as_tensor( self.partial_ignore_labels, device=label_weights.device, dtype=torch.long ) label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 - - print("heatmap with traffic cone: ", heatmap[5].sum()) - print("heatmap with barrier: ", heatmap[6].sum()) return ( labels[None], @@ -795,10 +790,6 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ].reshape(-1) - # layer_label_weights = label_weights[ - # ..., - # idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - # ].reshape(-1) layer_label_weights = label_weights[ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 9da67036e..d32dc9c70 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -64,6 +64,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py deleted file mode 100644 index 88e3cbc54..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ /dev/null @@ -1,164 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_dense_heatmap=False - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py deleted file mode 100644 index 7fef2db47..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ /dev/null @@ -1,317 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_dense_heatmap=False - ), -) - -db_sampler = dict( - data_root=data_root, - info_path=info_directory_path + _base_.info_train_file_name, - rate=1.0, - prepare=dict( - filter_by_difficulty=[-1], - filter_by_min_points=dict( - car=5, - truck=5, - bus=5, - trailer=5, - traffic_cone=5, - barrier=5, - bicycle=5, - pedestrian=5)), - classes=_base_.class_names, - sample_groups=dict( - car=0, - truck=0, - bus=0, - bicycle=0, - pedestrian=0, - barrier=2, - traffic_cone=4), - points_loader=dict( - type='LoadPointsFromCurrentFileSweep', - coord_type='LIDAR', - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - sweeps_num=_base_.sweeps_num, - pad_empty_sweeps=True, - remove_close=True, - test_mode=False, - )) - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - dict(type="ObjectSample", db_sampler=db_sampler), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" - -custom_hooks = [] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py deleted file mode 100644 index bb10d484d..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py +++ /dev/null @@ -1,168 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - partial_ignore_dense_heatmap=True, - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py deleted file mode 100644 index 41629bb17..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py +++ /dev/null @@ -1,278 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - partial_ignore_dense_heatmap=False, - loss_heatmap=dict( - reduction="none", - ), - ), -) - -db_sampler = dict( - data_root=data_root, - info_path=data_root + info_directory_path + _base_.info_train_file_name, - rate=1.0, - prepare=dict(), - # prepare=dict( - # filter_by_difficulty=[-1], - # filter_by_min_points=dict( - # car=5, - # truck=5, - # bus=5, - # trailer=5, - # traffic_cone=5, - # barrier=5, - # bicycle=5, - # pedestrian=5)), - classes=_base_.class_names, - sample_groups=dict( - car=0, - truck=0, - bus=0, - bicycle=0, - pedestrian=0, - barrier=2, - traffic_cone=4), - points_loader=dict( - type='LoadPointsFromCurrentFileSweep', - coord_type='LIDAR', - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - sweeps_num=_base_.sweeps_num, - pad_empty_sweeps=True, - remove_close=True, - test_mode=False, - )) - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - dict(type="ObjectSample", db_sampler=db_sampler), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" - -custom_hooks = [] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index c884c0aef..406e87655 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -64,6 +64,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 79337d976..e8068332a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -62,6 +62,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py deleted file mode 100644 index 38b1e8ea5..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py +++ /dev/null @@ -1,163 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_traffic_cone_full/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=None, - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py deleted file mode 100644 index 90136a748..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py +++ /dev/null @@ -1,163 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) From dc2265e33d56726313862e0f42fbd87f8fd65fde Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 06:32:44 +0000 Subject: [PATCH 069/162] ci(pre-commit): autofix --- .../detection3d/dataset/t4dataset/base.py | 14 ++---- .../t4dataset/j6gen2_base_traffic_cone.py | 2 +- .../t4dataset/jpntaxi_base_traffic_cone.py | 2 +- .../dataset/t4dataset/jpntaxi_gen2.py | 6 +-- .../detection3d/dataset/t4dataset/largebus.py | 4 +- autoware_ml/detection3d/datasets/t4dataset.py | 6 +-- .../datasets/transforms/__init__.py | 2 +- .../datasets/transforms/loading.py | 47 +++++++++++-------- .../BEVFusion/bevfusion/bevfusion_head.py | 45 +++++++++++------- projects/BEVFusion/bevfusion/utils.py | 4 +- .../default_lidar_second_secfpn_120m.py | 2 +- .../pipelines/default_lidar_intensity_120m.py | 6 +-- tools/detection3d/create_data_t4dataset.py | 15 ++++-- .../t4dataset_converters/t4converter.py | 2 +- 14 files changed, 86 insertions(+), 71 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 4248c90e6..3be587072 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -143,20 +143,12 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", "barrier": "barrier", } -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier" -] +class_names = ["car", "truck", "bus", "bicycle", "pedestrian", "traffic_cone", "barrier"] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py index 8c57cf4fa..176763b54 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py @@ -208,4 +208,4 @@ matching_class_agnostic_fps=False, ) -remove_non_traffic_cone_barrier = True \ No newline at end of file +remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py index c7e631458..61e9e915c 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py @@ -199,4 +199,4 @@ matching_class_agnostic_fps=False, ) -remove_non_traffic_cone_barrier = True \ No newline at end of file +remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index 6b7250673..dbd6e2813 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -117,9 +117,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index 2b54629eb..2212b8e56 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -130,8 +130,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index 526150755..d7fed6256 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -191,8 +191,8 @@ def parse_data_info(self, info: dict) -> dict: info["lidar2img"] = np.array(info["images"][self.default_cam_key]["lidar2img"]) else: info["lidar2img"] = info["cam2img"] @ info["lidar2cam"] - + # Default difficulty to 0 if not present - if 'difficulty' not in info: - info['difficulty'] = 0 + if "difficulty" not in info: + info["difficulty"] = 0 return info diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index dc95d27f8..b517bf1ea 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,4 +1,4 @@ -from .object_min_points_filter import ObjectMinPointsFilter from .loading import LoadPointsFromCurrentFileSweep +from .object_min_points_filter import ObjectMinPointsFilter __all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"] diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py index 535653d9b..09beddc34 100644 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -1,15 +1,14 @@ from typing import List, Optional, Union from mmcv.transforms import BaseTransform -from mmdet3d.structures.ops import box_np_ops from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps +from mmdet3d.structures.ops import box_np_ops from mmengine.registry import TRANSFORMS - @TRANSFORMS.register_module() class LoadPointsFromCurrentFileSweep(BaseTransform): - """Load points from the current file and sweep. + """Load points from the current file and sweep. This is used to load the points from the current file and sweep for copy-paste augmentation. Args: @@ -20,24 +19,32 @@ class LoadPointsFromCurrentFileSweep(BaseTransform): corresponding backend. Defaults to None. """ - def __init__(self, - coord_type: str, - load_dim: int = 6, - use_dim: Union[int, List[int]] = [0, 1, 2], - shift_height: bool = False, - use_color: bool = False, - norm_intensity: bool = False, - norm_elongation: bool = False, - backend_args: Optional[dict] = None, - sweeps_num: int = 10, - pad_empty_sweeps: bool = False, - remove_close: bool = False, - test_mode: bool = False - ) -> None: - - self.points_loader = LoadPointsFromFile(coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args) + def __init__( + self, + coord_type: str, + load_dim: int = 6, + use_dim: Union[int, List[int]] = [0, 1, 2], + shift_height: bool = False, + use_color: bool = False, + norm_intensity: bool = False, + norm_elongation: bool = False, + backend_args: Optional[dict] = None, + sweeps_num: int = 10, + pad_empty_sweeps: bool = False, + remove_close: bool = False, + test_mode: bool = False, + ) -> None: + + self.points_loader = LoadPointsFromFile( + coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args + ) if sweeps_num > 0: - self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(sweeps_num=sweeps_num, pad_empty_sweeps=pad_empty_sweeps, remove_close=remove_close, test_mode=test_mode) + self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps( + sweeps_num=sweeps_num, + pad_empty_sweeps=pad_empty_sweeps, + remove_close=remove_close, + test_mode=test_mode, + ) else: self.points_from_multi_sweeps_loader = None diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index da056efcc..b62113f65 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -13,9 +13,9 @@ from mmdet3d.structures import xywhr2xyxyr from mmdet.models.task_modules import AssignResult, PseudoSampler, build_assigner, build_bbox_coder, build_sampler from mmdet.models.utils import multi_apply +from mmengine.logging import print_log from mmengine.structures import InstanceData from torch import nn -from mmengine.logging import print_log def clip_sigmoid(x, eps=1e-4): @@ -71,7 +71,7 @@ def __init__( test_cfg=None, bbox_coder=None, partial_ignore_labels=None, - partial_ignore_dense_heatmap=False + partial_ignore_dense_heatmap=False, ): super().__init__() self.class_names = class_names @@ -187,17 +187,24 @@ def __init__( cluster["class_indices"] = sorted( [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]] ) - + # If true, only compute loss for traffic cone and barrier when it's available in the frame if partial_ignore_labels is not None: - assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier" - self.partial_ignore_labels = [self.class_name_to_indices[class_name] for class_name in partial_ignore_labels] + assert ( + loss_heatmap["reduction"] == "none" + ), "Loss reduction must be 'none' for partial traffic cone and barrier" + self.partial_ignore_labels = [ + self.class_name_to_indices[class_name] for class_name in partial_ignore_labels + ] else: self.partial_ignore_labels = None - + self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap - print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ - {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") + print_log( + f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", + logger="current", + ) def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] @@ -469,7 +476,9 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F return rets[0] - def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict]): + def get_targets( + self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict] + ): """Generate training targets. Args: batch_gt_instances_3d (List[InstanceData]): @@ -579,7 +588,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): num_layer = self.num_decoder_layers else: num_layer = 1 - + assign_result_list = [] for idx_layer in range(num_layer): bboxes_tensor_layer = bboxes_tensor[ @@ -653,10 +662,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight - + if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 - + # # compute dense heatmap targets device = labels.device gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device) @@ -692,12 +701,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): # Ignore labels for traffic cone and barrier traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: - heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids if len(neg_inds) > 0: # neg_inds [N] and column indices [K] must broadcast (not pair); - _cols = torch.as_tensor( - self.partial_ignore_labels, device=label_weights.device, dtype=torch.long - ) + _cols = torch.as_tensor(self.partial_ignore_labels, device=label_weights.device, dtype=torch.long) label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 return ( @@ -732,7 +739,9 @@ def loss(self, batch_feats, batch_data_samples): return loss - def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas): + def loss_by_feat( + self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas + ): ( labels, label_weights, @@ -775,7 +784,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # (Batch, num_classes) for cls_i, class_name in enumerate(self.class_names): loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - + # Prevent loss item to avoid computing gradients twice. This is for logging. loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index b6bd2be41..8fd83a0c5 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -259,10 +259,10 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg) iou = self.iou_calculator(bboxes, gt_bboxes) iou_cost = self.iou_cost(iou) - + # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost - + # if ignore_labels is not None: # preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) # print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 023c6774d..809179b20 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -114,6 +114,6 @@ ), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), - partial_ignore_labels=None + partial_ignore_labels=None, ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 9c7e02977..e2de195e9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -13,8 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, - "traffic_cone": 120, - "barrier": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters @@ -132,7 +132,7 @@ "timestamp", "vehicle_type", "city", - "traffic_cone_barrier_status", + "traffic_cone_barrier_status", ], ), ] diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index 9550b2872..3b02017e0 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -273,7 +273,7 @@ def main(): if cfg.filter_attributes is None: print_log("No attribute filtering is applied!") - + remove_non_traffic_cone_barrier = cfg.get("remove_non_traffic_cone_barrier", False) # Get every pair of min-max distance filtering thresholds bev_distance_ranges = [] @@ -310,9 +310,14 @@ def main(): ) dataset_scene_info = scene_id.split("/") if len(dataset_scene_info) == 5: - t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info + t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = ( + dataset_scene_info + ) if remove_non_traffic_cone_barrier and traffic_cone_barrier_status == "false": - print_log(f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", logger="current") + print_log( + f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", + logger="current", + ) continue elif len(dataset_scene_info) == 2: t4_dataset_id, t4_dataset_version_id = dataset_scene_info @@ -336,7 +341,9 @@ def main(): infos = [] for i in range(0, len(t4.sample), sample_steps): sample = t4.sample[i] - info = get_info(cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type) + info = get_info( + cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type + ) if info is None: continue # info["version"] = dataset_version # used for visualizations during debugging. diff --git a/tools/detection3d/t4dataset_converters/t4converter.py b/tools/detection3d/t4dataset_converters/t4converter.py index 5dfd1dc1f..ccc88b2d1 100644 --- a/tools/detection3d/t4dataset_converters/t4converter.py +++ b/tools/detection3d/t4dataset_converters/t4converter.py @@ -626,7 +626,7 @@ def get_lidarseg_annotations( ) -> dict: if not hasattr(t4, "lidarseg") or not t4.lidarseg: return dict() - + if sd_record.info_filename is None: print(f"sample {lidar_token} doesn't have lidar info_filename") return dict() From 15bbf0ef0920f41f6e267b312447d2900b5bd8fd Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 7 May 2026 15:35:46 +0900 Subject: [PATCH 070/162] remove unecessary changes --- .../t4dataset/j6gen2_base_traffic_cone.py | 211 ------------------ .../t4dataset/jpntaxi_base_traffic_cone.py | 202 ----------------- .../datasets/transforms/__init__.py | 3 +- .../datasets/transforms/loading.py | 55 ----- .../BEVFusion/bevfusion/bevfusion_head.py | 14 +- projects/BEVFusion/bevfusion/utils.py | 9 +- 6 files changed, 6 insertions(+), 488 deletions(-) delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py delete mode 100644 autoware_ml/detection3d/datasets/transforms/loading.py diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py deleted file mode 100644 index 176763b54..000000000 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py +++ /dev/null @@ -1,211 +0,0 @@ -custom_imports = dict( - imports=[ - "autoware_ml.detection3d.datasets.t4dataset", - "autoware_ml.detection3d.evaluation.t4metric.t4metric", - "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", - ] -) - -# dataset type setting -dataset_type = "T4Dataset" -info_train_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_train.pkl" -info_val_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_val.pkl" -info_test_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_test.pkl" - -info_train_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_train.parquet" -info_val_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_val.parquet" -info_test_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_test.parquet" - -# dataset scene setting -dataset_version_list = [ - "db_j6gen2_v1", - "db_j6gen2_v2", - "db_j6gen2_v3", - "db_j6gen2_v4", - "db_j6gen2_v5", - "db_j6gen2_v6", - "db_j6gen2_v7", - "db_j6gen2_v8", - "db_j6gen2_v9", - "db_largebus_v1", - "db_largebus_v2", - "db_largebus_v3", -] - -dataset_test_groups = { - "largebus": ("t4dataset_largebus_traffic_cone_infos_test.pkl", False), - "j6gen2": ("t4dataset_j6gen2_traffic_cone_infos_test.pkl", False), - "j6gen2_base": ("t4dataset_j6gen2_base_traffic_cone_infos_test.pkl", True), -} - -# dataset format setting -data_prefix = dict( - pts="", - CAM_FRONT="", - CAM_FRONT_LEFT="", - CAM_FRONT_RIGHT="", - CAM_BACK="", - CAM_BACK_RIGHT="", - CAM_BACK_LEFT="", - sweeps="", -) - -camera_types = { - "CAM_FRONT", - "CAM_FRONT_RIGHT", - "CAM_FRONT_LEFT", - "CAM_BACK", - "CAM_BACK_LEFT", - "CAM_BACK_RIGHT", -} - -# class setting -name_mapping = { - # DBv1.0 - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.emergency (ambulance & police)": "car", - "vehicle.motorcycle": "bicycle", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - "vehicle.bicycle": "bicycle", - "vehicle.bus (bendy & rigid)": "bus", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "movable_object.barrier": "barrier", - "movable_object.debris": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.trafficcone": "traffic_cone", - "movable_object.traffic_cone": "traffic_cone", - "animal": "animal", - "static_object.bicycle_rack": "bicycle_rack", - # DBv1.1 and UCv2.0 - "car": "car", - "truck": "truck", - "bus": "bus", - "trailer": "trailer", - "motorcycle": "bicycle", - "bicycle": "bicycle", - "police_car": "car", - "pedestrian": "pedestrian", - "police_officer": "pedestrian", - "forklift": "car", - "construction_worker": "pedestrian", - "stroller": "pedestrian", - # DBv2.0 and DBv3.0 - "animal": "animal", - "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.traffic_cone": "traffic_cone", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", - "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car - "vehicle.bicycle": "bicycle", - "vehicle.bus": "bus", - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.fire": "truck", - "vehicle.motorcycle": "bicycle", - "vehicle.police": "car", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - # DBv1.3 - "ambulance": "car", - "kart": "car", - "wheelchair": "pedestrian", - "personal_mobility": "pedestrian", - "fire_truck": "truck", - "semi_trailer": "trailer", - "tractor_unit": "truck", - "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", -} - - -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", -] -num_class = len(class_names) -metainfo = dict(classes=class_names) - -merge_objects = [ - ("truck", ["truck", "trailer"]), -] -merge_type = "extend_longer" # One of ["extend_longer","union", None] - -# visualization -class_colors = { - "car": (30, 144, 255), - "truck": (140, 0, 255), - "construction_vehicle": (255, 255, 0), - "bus": (111, 255, 111), - "trailer": (0, 255, 255), - "barrier": (0, 0, 0), - "motorcycle": (100, 0, 30), - "bicycle": (255, 0, 30), - "pedestrian": (255, 200, 200), - "traffic_cone": (120, 120, 120), -} -camera_panels = [ - "data/CAM_FRONT_LEFT", - "data/CAM_FRONT", - "data/CAM_FRONT_RIGHT", - "data/CAM_BACK_LEFT", - "data/CAM_BACK", - "data/CAM_BACK_RIGHT", -] - -filter_attributes = [ - ("vehicle.bicycle", "vehicle_state.parked"), - ("vehicle.bicycle", "cycle_state.without_rider"), - ("vehicle.bicycle", "motorcycle_state.without_rider"), - ("vehicle.motorcycle", "vehicle_state.parked"), - ("vehicle.motorcycle", "cycle_state.without_rider"), - ("vehicle.motorcycle", "motorcycle_state.without_rider"), - ("bicycle", "vehicle_state.parked"), - ("bicycle", "cycle_state.without_rider"), - ("bicycle", "motorcycle_state.without_rider"), - ("motorcycle", "vehicle_state.parked"), - ("motorcycle", "cycle_state.without_rider"), - ("motorcycle", "motorcycle_state.without_rider"), -] - -evaluator_metric_configs = dict( - evaluation_task="detection", - target_labels=class_names, - center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], - # plane_distance_thresholds is required for the pass fail evaluation - plane_distance_thresholds=[2.0, 4.0], - iou_2d_thresholds=None, - iou_3d_thresholds=None, - label_prefix="autoware", - # bev minimum distance ranges for each range bucket, must be the same length as max_distance, - # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering - min_distance=[0.0, 50.0, 90.0, 0.0], - # bev maximum distance ranges for each range bucket, must be the same length as min_distance - max_distance=[50.0, 90.0, 121.0, 121.0], - min_point_numbers=0, - matching_class_agnostic_fps=False, -) - -remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py deleted file mode 100644 index 61e9e915c..000000000 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py +++ /dev/null @@ -1,202 +0,0 @@ -custom_imports = dict( - imports=[ - "autoware_ml.detection3d.datasets.t4dataset", - "autoware_ml.detection3d.evaluation.t4metric.t4metric", - "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", - ] -) - -# dataset type setting -dataset_type = "T4Dataset" -info_train_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_train.pkl" -info_val_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_val.pkl" -info_test_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl" - -info_train_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_train.parquet" -info_val_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_val.parquet" -info_test_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_test.parquet" - -# dataset scene setting -dataset_test_groups = { - "jpntaxi_base_traffic_cone": ("t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl", True), -} - -dataset_version_list = [ - "db_jpntaxigen2_v1", - "db_jpntaxigen2_v2", - "db_jpntaxi_v1", - "db_jpntaxi_v2", - "db_jpntaxi_v4", -] - -# dataset format setting -data_prefix = dict( - pts="", - CAM_FRONT="", - CAM_FRONT_LEFT="", - CAM_FRONT_RIGHT="", - CAM_BACK="", - CAM_BACK_RIGHT="", - CAM_BACK_LEFT="", - sweeps="", -) -camera_types = { - "CAM_FRONT", - "CAM_FRONT_RIGHT", - "CAM_FRONT_LEFT", - "CAM_BACK", - "CAM_BACK_LEFT", - "CAM_BACK_RIGHT", -} - -# class setting -name_mapping = { - # DBv1.0 - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.emergency (ambulance & police)": "car", - "vehicle.motorcycle": "bicycle", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - "vehicle.bicycle": "bicycle", - "vehicle.bus (bendy & rigid)": "bus", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "movable_object.barrier": "barrier", - "movable_object.debris": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.trafficcone": "traffic_cone", - "movable_object.traffic_cone": "traffic_cone", - "animal": "animal", - "static_object.bicycle_rack": "bicycle_rack", - # DBv1.1 and UCv2.0 - "car": "car", - "truck": "truck", - "bus": "bus", - "trailer": "trailer", - "motorcycle": "bicycle", - "bicycle": "bicycle", - "police_car": "car", - "pedestrian": "pedestrian", - "police_officer": "pedestrian", - "forklift": "car", - "construction_worker": "pedestrian", - "stroller": "pedestrian", - # DBv2.0 and DBv3.0 - "animal": "animal", - "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.traffic_cone": "traffic_cone", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", - "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car - "vehicle.bicycle": "bicycle", - "vehicle.bus": "bus", - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.fire": "truck", - "vehicle.motorcycle": "bicycle", - "vehicle.police": "car", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - # DBv1.3 - "ambulance": "car", - "kart": "car", - "wheelchair": "pedestrian", - "personal_mobility": "pedestrian", - "fire_truck": "truck", - "semi_trailer": "trailer", - "tractor_unit": "truck", - "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", -} - -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", -] - -num_class = len(class_names) -metainfo = dict(classes=class_names) - -merge_objects = [ - ("truck", ["truck", "trailer"]), -] -merge_type = "extend_longer" # One of ["extend_longer","union", None] - -# visualization -class_colors = { - "car": (30, 144, 255), - "truck": (140, 0, 255), - "construction_vehicle": (255, 255, 0), - "bus": (111, 255, 111), - "trailer": (0, 255, 255), - "barrier": (0, 0, 0), - "motorcycle": (100, 0, 30), - "bicycle": (255, 0, 30), - "pedestrian": (255, 200, 200), - "traffic_cone": (120, 120, 120), -} -camera_panels = [ - "data/CAM_FRONT_LEFT", - "data/CAM_FRONT", - "data/CAM_FRONT_RIGHT", - "data/CAM_BACK_LEFT", - "data/CAM_BACK", - "data/CAM_BACK_RIGHT", -] - -# Add filter attributes -filter_attributes = [ - ("vehicle.bicycle", "vehicle_state.parked"), - ("vehicle.bicycle", "cycle_state.without_rider"), - ("vehicle.bicycle", "motorcycle_state.without_rider"), - ("vehicle.motorcycle", "vehicle_state.parked"), - ("vehicle.motorcycle", "cycle_state.without_rider"), - ("vehicle.motorcycle", "motorcycle_state.without_rider"), - ("bicycle", "vehicle_state.parked"), - ("bicycle", "cycle_state.without_rider"), - ("bicycle", "motorcycle_state.without_rider"), - ("motorcycle", "vehicle_state.parked"), - ("motorcycle", "cycle_state.without_rider"), - ("motorcycle", "motorcycle_state.without_rider"), -] - -evaluator_metric_configs = dict( - evaluation_task="detection", - target_labels=class_names, - center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], - # plane_distance_thresholds is required for the pass fail evaluation - plane_distance_thresholds=[2.0, 4.0], - iou_2d_thresholds=None, - iou_3d_thresholds=None, - label_prefix="autoware", - # bev minimum distance ranges for each range bucket, must be the same length as max_distance, - # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering - min_distance=[0.0, 50.0, 90.0, 0.0], - # bev maximum distance ranges for each range bucket, must be the same length as min_distance - max_distance=[50.0, 90.0, 121.0, 121.0], - min_point_numbers=0, - matching_class_agnostic_fps=False, -) - -remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index b517bf1ea..6bc932f1a 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,4 +1,3 @@ -from .loading import LoadPointsFromCurrentFileSweep from .object_min_points_filter import ObjectMinPointsFilter -__all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"] +__all__ = ["ObjectMinPointsFilter"] diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py deleted file mode 100644 index 09beddc34..000000000 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import List, Optional, Union - -from mmcv.transforms import BaseTransform -from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps -from mmdet3d.structures.ops import box_np_ops -from mmengine.registry import TRANSFORMS - - -@TRANSFORMS.register_module() -class LoadPointsFromCurrentFileSweep(BaseTransform): - """Load points from the current file and sweep. - This is used to load the points from the current file and sweep for copy-paste augmentation. - - Args: - coord_type (str): The type of coordinates of points cloud. - load_dim (int): The dimension of the loaded points. - use_dim (list[int] | int): Which dimensions of the points to use. - backend_args (dict, optional): Arguments to instantiate the - corresponding backend. Defaults to None. - """ - - def __init__( - self, - coord_type: str, - load_dim: int = 6, - use_dim: Union[int, List[int]] = [0, 1, 2], - shift_height: bool = False, - use_color: bool = False, - norm_intensity: bool = False, - norm_elongation: bool = False, - backend_args: Optional[dict] = None, - sweeps_num: int = 10, - pad_empty_sweeps: bool = False, - remove_close: bool = False, - test_mode: bool = False, - ) -> None: - - self.points_loader = LoadPointsFromFile( - coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args - ) - if sweeps_num > 0: - self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps( - sweeps_num=sweeps_num, - pad_empty_sweeps=pad_empty_sweeps, - remove_close=remove_close, - test_mode=test_mode, - ) - else: - self.points_from_multi_sweeps_loader = None - - def transform(self, results: dict) -> dict: - points = self.points_loader(results) - if self.points_from_multi_sweeps_loader is not None: - points = self.points_from_multi_sweeps_loader(points) - return points diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index b62113f65..dd566eab1 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,9 +70,7 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_ignore_labels=None, - partial_ignore_dense_heatmap=False, - ): + partial_ignore_labels=None): super().__init__() self.class_names = class_names self.num_classes = len(self.class_names) @@ -198,13 +196,9 @@ def __init__( ] else: self.partial_ignore_labels = None - - self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap - print_log( - f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ - {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", - logger="current", - ) + + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index 8fd83a0c5..c47604dbd 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -238,7 +238,7 @@ def __init__( self.iou_cost = TASK_UTILS.build(iou_cost) self.iou_calculator = TASK_UTILS.build(iou_calculator) - def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_labels=None): + def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg): num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0) # 1. assign -1 by default @@ -263,13 +263,6 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost - # if ignore_labels is not None: - # preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) - # print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) - # ignore_preds_masks = preds_labels.isin(ignore_labels) - # cost[ignore_preds_masks] = 10000 - # print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) - # 3. do Hungarian matching on CPU using linear_sum_assignment cost = cost.detach().cpu() if linear_sum_assignment is None: From 1323d4ed662678fc225ca43ef7baaf5a8b144cc1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 06:36:38 +0000 Subject: [PATCH 071/162] ci(pre-commit): autofix --- projects/BEVFusion/bevfusion/bevfusion_head.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index dd566eab1..c37c5a538 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,7 +70,8 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_ignore_labels=None): + partial_ignore_labels=None, + ): super().__init__() self.class_names = class_names self.num_classes = len(self.class_names) @@ -196,7 +197,7 @@ def __init__( ] else: self.partial_ignore_labels = None - + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") From 585a0b2b068d5c721d657a93e15bbbe6f904cf45 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 7 May 2026 20:15:05 +0900 Subject: [PATCH 072/162] remove unecessary changes --- Dockerfile | 6 +- .../dataset/t4dataset/j6gen2_v2.py | 194 ++++++++++++++++++ ...second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...n_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 +- ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py | 8 +- ..._30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 4 +- 6 files changed, 207 insertions(+), 11 deletions(-) create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py diff --git a/Dockerfile b/Dockerfile index 3e9caecb9..2fbcaa620 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,13 +61,15 @@ RUN python3 -m pip --no-cache-dir install \ RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1 # Install autoware-perception-evaluation -RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@9d8c9773d35177bb0b7f2606f429f58a5fb708ca +RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@3c9577dc23fd76a049559b42656ca46c1c32fa66 # Need to dowgrade setuptools to 60.2.0 to fix setup RUN python3 -m pip --no-cache-dir install \ setuptools==60.2.0 \ transformers==4.51.3 \ - polars==1.37.1 + polars==1.37.1 \ + onnx_graphsurgeon==0.5.8 \ + spconv-cu126==2.3.8 # NOTE(knzo25): this patch is needed to use numpy versions over 1.23.5 (version used in mmdet3d 1.4.0) # It can be safely deleted when mmdet3d updates the numpy version diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py new file mode 100644 index 000000000..e4375d576 --- /dev/null +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py @@ -0,0 +1,194 @@ +custom_imports = dict( + imports=[ + "autoware_ml.detection3d.datasets.t4dataset", + "autoware_ml.detection3d.evaluation.t4metric.t4metric", + "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", + ] +) + +# dataset type setting +dataset_type = "T4Dataset" +info_train_file_name = "t4dataset_j6gen2_v2_infos_train.pkl" +info_val_file_name = "t4dataset_j6gen2_v2_infos_val.pkl" +info_test_file_name = "t4dataset_j6gen2_v2_infos_test.pkl" + +info_train_statistics_file_name = "t4dataset_j6gen2_v2_statistics_train.parquet" +info_val_statistics_file_name = "t4dataset_j6gen2_v2_statistics_val.parquet" +info_test_statistics_file_name = "t4dataset_j6gen2_v2_statistics_test.parquet" + +# dataset scene setting +dataset_version_list = [ + "db_j6gen2_v2", +] + +dataset_test_groups = { + "j6gen2_v2": ("t4dataset_j6gen2_v2_infos_test.pkl", True), +} + +# dataset format setting +data_prefix = dict( + pts="", + CAM_FRONT="", + CAM_FRONT_LEFT="", + CAM_FRONT_RIGHT="", + CAM_BACK="", + CAM_BACK_RIGHT="", + CAM_BACK_LEFT="", + sweeps="", +) +camera_types = { + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", +} + +# class setting +name_mapping = { + # DBv1.0 + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.emergency (ambulance & police)": "car", + "vehicle.motorcycle": "bicycle", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + "vehicle.bicycle": "bicycle", + "vehicle.bus (bendy & rigid)": "bus", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "movable_object.barrier": "barrier", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.traffic_cone": "traffic_cone", + "animal": "animal", + "static_object.bicycle_rack": "bicycle_rack", + # DBv1.1 and UCv2.0 + "car": "car", + "truck": "truck", + "bus": "bus", + "trailer": "trailer", + "motorcycle": "bicycle", + "bicycle": "bicycle", + "police_car": "car", + "pedestrian": "pedestrian", + "police_officer": "pedestrian", + "forklift": "car", + "construction_worker": "pedestrian", + "stroller": "pedestrian", + # DBv2.0 and DBv3.0 + "animal": "animal", + "movable_object.barrier": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.traffic_cone": "traffic_cone", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "static_object.bicycle rack": "bicycle rack", + "static_object.bollard": "bollard", + "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car + "vehicle.bicycle": "bicycle", + "vehicle.bus": "bus", + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.fire": "truck", + "vehicle.motorcycle": "bicycle", + "vehicle.police": "car", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + # DBv1.3 + "ambulance": "car", + "kart": "car", + "wheelchair": "pedestrian", + "personal_mobility": "pedestrian", + "fire_truck": "truck", + "semi_trailer": "trailer", + "tractor_unit": "truck", + "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", +} + +class_names = [ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", +] +num_class = len(class_names) +metainfo = dict(classes=class_names) + +merge_objects = [ + ("truck", ["truck", "trailer"]), +] +merge_type = "extend_longer" # One of ["extend_longer","union", None] + +# visualization +class_colors = { + "car": (30, 144, 255), + "truck": (140, 0, 255), + "construction_vehicle": (255, 255, 0), + "bus": (111, 255, 111), + "trailer": (0, 255, 255), + "barrier": (0, 0, 0), + "motorcycle": (100, 0, 30), + "bicycle": (255, 0, 30), + "pedestrian": (255, 200, 200), + "traffic_cone": (120, 120, 120), +} +camera_panels = [ + "data/CAM_FRONT_LEFT", + "data/CAM_FRONT", + "data/CAM_FRONT_RIGHT", + "data/CAM_BACK_LEFT", + "data/CAM_BACK", + "data/CAM_BACK_RIGHT", +] + +filter_attributes = [ + ("vehicle.bicycle", "vehicle_state.parked"), + ("vehicle.bicycle", "cycle_state.without_rider"), + ("vehicle.bicycle", "motorcycle_state.without_rider"), + ("vehicle.motorcycle", "vehicle_state.parked"), + ("vehicle.motorcycle", "cycle_state.without_rider"), + ("vehicle.motorcycle", "motorcycle_state.without_rider"), + ("bicycle", "vehicle_state.parked"), + ("bicycle", "cycle_state.without_rider"), + ("bicycle", "motorcycle_state.without_rider"), + ("motorcycle", "vehicle_state.parked"), + ("motorcycle", "cycle_state.without_rider"), + ("motorcycle", "motorcycle_state.without_rider"), +] + +evaluator_metric_configs = dict( + evaluation_task="detection", + target_labels=class_names, + center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], + # plane_distance_thresholds is required for the pass fail evaluation + plane_distance_thresholds=[2.0, 4.0], + iou_2d_thresholds=None, + iou_3d_thresholds=None, + label_prefix="autoware", + # bev minimum distance ranges for each range bucket, must be the same length as max_distance, + # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering + min_distance=[0.0, 50.0, 90.0, 0.0], + # bev maximum distance ranges for each range bucket, must be the same length as min_distance + max_distance=[50.0, 90.0, 121.0, 121.0], + min_point_numbers=0, + matching_class_agnostic_fps=False, +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index d32dc9c70..380a4ba81 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 39462b1f6..e3f7d5146 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 406e87655..eec87a585 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_ignore" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -164,4 +164,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_traffic_cone/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index b50b093f7..5190182cc 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) From a47646ea6476603518857c0a60cf18b30d5720a8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 11:15:34 +0000 Subject: [PATCH 073/162] ci(pre-commit): autofix --- ...oxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 5190182cc..213f0041b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) From 16eb517a21911d55513ad85863b47c0a6576a200 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 12:04:10 +0900 Subject: [PATCH 074/162] Add the script --- .../bevfusion/bevfusion_voxel_encoder.py | 66 ++++--- ..._base_120m_sincos_48_channels_32_points.py | 164 ++++++++++++++++++ 2 files changed, 207 insertions(+), 23 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 5037113aa..2cde57cc5 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -57,18 +57,19 @@ def __init__(self, super(BEVFusionVoxelEncoder, self).__init__() assert len(feat_channels) > 0 self.legacy = legacy + pfn_in_channels = 0 if with_cluster_center: - in_channels += 3 + pfn_in_channels += 3 if with_voxel_center: - in_channels += 3 + pfn_in_channels += 3 if with_distance: - in_channels += 1 + pfn_in_channels += 1 self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center # Create PillarFeatureNet layers self.in_channels = in_channels - feat_channels = [in_channels] + list(feat_channels) + feat_channels = [pfn_in_channels] + list(feat_channels) pfn_layers = [] for i in range(len(feat_channels) - 1): in_filters = feat_channels[i] @@ -97,7 +98,8 @@ def __init__(self, self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) + self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: @@ -112,12 +114,26 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, Returns: torch.Tensor: Features of pillars in shape (M, C). """ - if self.min_norm_values is not None and self.max_norm_values is not None: - features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) - else: - features_norm = features + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() - features_ls = [features_norm] + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + features_ls = [] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available if self._with_cluster_center: points_mean = features[:, :, :3].sum( @@ -125,9 +141,9 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, -1, 1, 1) f_cluster = features[:, :, :3] - points_mean # Map to [0, 1] if available - if self.min_norm_values is not None and self.max_norm_values is not None: - voxel_size = features.new_tensor([self.vx, self.vy, self.vz]) - f_cluster = f_cluster / voxel_size + # if self.min_norm_values is not None and self.max_norm_values is not None: + # voxel_size = features.new_tensor([self.vx, self.vy, self.vz]) + # f_cluster = f_cluster / voxel_size features_ls.append(f_cluster) # Find distance of x, y, and z from pillar center @@ -156,8 +172,8 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, coors[:, 1].type_as(features).unsqueeze(1) * self.vz + self.z_offset) - if self.min_norm_values is not None and self.max_norm_values is not None: - f_center = f_center / (voxel_size * 0.5) + # if self.min_norm_values is not None and self.max_norm_values is not None: + # f_center = f_center / (voxel_size * 0.5) features_ls.append(f_center) if self._with_distance: @@ -165,19 +181,23 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, features_ls.append(points_dist) # Combine together feature decorations - features = torch.cat(features_ls, dim=-1) + voxel_feature_offsets = torch.cat(features_ls, dim=-1) + # The feature decorations were calculated without regard to whether # pillar was empty. Need to ensure that # empty pillars remain set to zeros. - voxel_count = features.shape[1] - mask = get_paddings_indicator(num_points, voxel_count, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(features) - features *= mask - + mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) + voxel_feature_offsets *= mask + + # PFN for pfn in self.pfn_layers: - features = pfn(features, num_points) + voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) + + # Concat + features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) - return features.squeeze(1) + return features @MODELS.register_module() diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py new file mode 100644 index 000000000..073249a3e --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py @@ -0,0 +1,164 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_48_channels_32_points" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + max_num_points=32, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=48, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From 731f6d9fea6db8f90a4e4c36c9dedb541292da4a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 16:58:35 +0900 Subject: [PATCH 075/162] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion.py | 19 +- .../bevfusion/bevfusion_voxel_encoder.py | 6 +- .../BEVFusion/bevfusion/sparse_encoder.py | 20 --- ...second_secfpn_30e_8xb8_j6gen2_base_120m.py | 28 +-- ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py | 32 ++-- ...oxel_second_secfpn_50e_8xb16_base_120m.py} | 19 +- ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 160 ----------------- ...n_50e_8xb8_base_120m_sincos_34_channels.py | 163 ----------------- ...b8_base_120m_sincos_timeexp_34_channels.py | 165 ----------------- ...0m_sincos_timeexp_34_channels_32_points.py | 166 ------------------ ...d_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- .../default_lidar_second_secfpn_120m.py | 15 +- ...fault_lidar_second_secfpn_120m_iou_loss.py | 117 +----------- .../default_camera_lidar_intensity_120m.py | 15 +- .../pipelines/default_lidar_intensity_120m.py | 2 +- ...e.py => default_30e_8xb16_adamw_cosine.py} | 9 +- ...e.py => default_50e_8xb16_adamw_cosine.py} | 9 +- 17 files changed, 59 insertions(+), 888 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py} (85%) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_8xb8_adamw_cosine.py => default_30e_8xb16_adamw_cosine.py} (95%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_8xb8_adamw_cosine.py => default_50e_8xb16_adamw_cosine.py} (95%) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index bc3f1b094..b113bb566 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -56,12 +56,10 @@ def __init__( super().__init__(data_preprocessor=data_preprocessor, init_cfg=init_cfg) if voxelize_cfg is not None: - self.voxelize_reduce = voxelize_cfg.pop("voxelize_reduce") self.pts_voxel_layer = Voxelization(**voxelize_cfg) self.pts_voxel_encoder = MODELS.build(pts_voxel_encoder) self.pts_middle_encoder = MODELS.build(pts_middle_encoder) else: - self.voxelize_reduce = False self.pts_voxel_layer = None self.pts_voxel_encoder = None self.pts_middle_encoder = None @@ -207,10 +205,6 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: points = [point.float() for point in points] feats, coords, sizes = self.voxelize(points) batch_size = coords[-1, 0] + 1 - - if self.pts_voxel_encoder is not None: - assert not self.voxelize_reduce - feats = self.pts_voxel_encoder(feats, sizes, coords) else: # NOTE(knzo25): onnx inference. Voxelization happens outside the graph with torch.cuda.amp.autocast(enabled=False): @@ -224,12 +218,7 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: # batch_size = coords[-1, 0] + 1 batch_size = 1 print("Run onnx point_eSpConvst") - if self.pts_voxel_encoder is not None: - feats = self.pts_voxel_encoder(feats, sizes, coords) - else: - assert self.voxelize_reduce - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + feats = self.pts_voxel_encoder(feats, sizes, coords) x = self.pts_middle_encoder(feats, coords, batch_size) return x @@ -255,9 +244,9 @@ def voxelize(self, points): assert len(sizes) > 0, "No points in the voxel" sizes = torch.cat(sizes, dim=0) - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) - feats = feats.contiguous() + # if self.voxelize_reduce: + # feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + # feats = feats.contiguous() return feats, coords, sizes diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 2cde57cc5..6c41234c5 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -376,11 +376,7 @@ class BEVFusionVoxelMeanSinCosEncoder(nn.Module): def __init__(self, min_norm_values: Tuple[float], max_norm_values: Tuple[float], - in_channels: Optional[int] = 4, - voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), - point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1), - mode: Optional[str] = 'max'): + in_channels: Optional[int] = 4): super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() # Create PillarFeatureNet layers diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index 019cb630c..ce45d4536 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -47,9 +47,6 @@ class BEVFusionSparseEncoder(SparseEncoder): def __init__( self, in_channels, - aug_features_min_values, - aug_features_max_values, - num_aug_features, sparse_shape, order=("conv", "norm", "act"), norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), @@ -64,9 +61,6 @@ def __init__( assert block_type in ["conv_module", "basicblock"] self.sparse_shape = sparse_shape self.in_channels = in_channels - self.register_buffer("aug_features_min_values", torch.tensor(aug_features_min_values)) - self.register_buffer("aug_features_max_values", torch.tensor(aug_features_max_values)) - self.num_aug_features = num_aug_features self.order = order self.base_channels = base_channels self.output_channels = output_channels @@ -77,10 +71,6 @@ def __init__( self.return_middle_feats = return_middle_feats # Spconv init all weight on its own - if num_aug_features: - self.in_channels = in_channels * num_aug_features * 2 - self.register_buffer("exponents", (2 ** torch.arange(0, num_aug_features).float())) - assert isinstance(order, tuple) and len(order) == 3 assert set(order) == {"conv", "norm", "act"} @@ -140,16 +130,6 @@ def forward(self, voxel_features, coors, batch_size): output features. When self.return_middle_feats is True, the module returns middle features. """ - - if self.num_aug_features: - num_points = voxel_features.shape[0] - x = (voxel_features - self.aug_features_min_values.view(1, -1)) / ( - self.aug_features_max_values - self.aug_features_min_values - ).view(1, -1) - y = x.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - y = y.reshape(num_points, -1) - voxel_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - coors = coors.int() input_sp_tensor = SparseConvTensor(voxel_features, coors, self.sparse_shape, batch_size) x = self.conv_input(input_sp_tensor) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 380a4ba81..4cf51faa5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -25,28 +25,16 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=True, ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_voxel_encoder=dict( + in_channels=len(_base_.lidar_sweep_dims), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + ), pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, + in_channels=50, sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index eec87a585..3b7c23b18 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -15,8 +15,8 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_ignore" +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -25,28 +25,16 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=True, ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_voxel_encoder=dict( + in_channels=len(_base_.lidar_sweep_dims), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + ), pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, + in_channels=50, sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py similarity index 85% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index d856b1d4b..6d3a1f93b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_sincos" +experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -25,23 +25,16 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=False, ), pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelMeanSinCosEncoder", - in_channels=4, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + in_channels=len(_base_.lidar_sweep_dims), + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), pts_middle_encoder=dict( in_channels=32, sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py deleted file mode 100644 index 1f52662a4..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ /dev/null @@ -1,160 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m_iou_loss.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelMeanSinCosEncoder", - in_channels=4, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=32, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py deleted file mode 100644 index 54af6be5f..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py +++ /dev/null @@ -1,163 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=34, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py deleted file mode 100644 index d7e61102b..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py +++ /dev/null @@ -1,165 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - time_lag_channel_index=3, - time_exp_factor=1.0, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=34, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py deleted file mode 100644 index f784b2386..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py +++ /dev/null @@ -1,166 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp_32_points" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - max_num_points=32, - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - time_lag_channel_index=3, - time_exp_factor=1.0, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=34, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index efcd091f5..98a65a3f9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 11a1b42b7..c097d10bf 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -1,5 +1,5 @@ num_proposals = 500 -max_num_points = 10 +max_num_points = 32 max_voxels = [120000, 160000] model = dict( @@ -7,19 +7,18 @@ voxelize_cfg=dict( max_num_points=max_num_points, max_voxels=max_voxels, - voxelize_reduce=True, ), data_preprocessor=dict( type="Det3DDataPreprocessor", pad_size_divisor=32, ), - pts_voxel_encoder=dict(type="HardSimpleVFE"), + pts_voxel_encoder=dict( + type="BEVFusionVoxelMeanSinCosEncoder", + in_channels=4, + ), pts_middle_encoder=dict( type="BEVFusionSparseEncoder", in_channels=5, - aug_features_min_values=[], - aug_features_max_values=[], - num_aug_features=0, order=("conv", "norm", "act"), norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), @@ -112,9 +111,7 @@ reduction="mean", loss_weight=1.0, ), - # loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), - loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), + loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="none", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), - partial_ignore_labels=None, ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py index 792392c09..e90687fe3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py @@ -1,119 +1,10 @@ -num_proposals = 500 -max_num_points = 10 -max_voxels = [120000, 160000] +_base_ = [ + "./default_lidar_second_secfpn_120m.py", +] model = dict( - type="BEVFusion", - voxelize_cfg=dict( - max_num_points=max_num_points, - max_voxels=max_voxels, - voxelize_reduce=True, - ), - data_preprocessor=dict( - type="Det3DDataPreprocessor", - pad_size_divisor=32, - ), - pts_voxel_encoder=dict(type="HardSimpleVFE"), - pts_middle_encoder=dict( - type="BEVFusionSparseEncoder", - in_channels=5, - aug_features_min_values=[], - aug_features_max_values=[], - num_aug_features=0, - order=("conv", "norm", "act"), - norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), - encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), - encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), - block_type="basicblock", - ), - pts_backbone=dict( - type="SECOND", - in_channels=256, - out_channels=[128, 256], - layer_nums=[5, 5], - layer_strides=[1, 2], - norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), - conv_cfg=dict(type="Conv2d", bias=False), - ), - pts_neck=dict( - type="SECONDFPN", - in_channels=[128, 256], - out_channels=[256, 256], - upsample_strides=[1, 2], - norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), - upsample_cfg=dict(type="deconv", bias=False), - use_conv_for_no_stride=True, - ), bbox_head=dict( - type="BEVFusionHead", - num_proposals=num_proposals, - auxiliary=True, - in_channels=512, - hidden_channel=128, - nms_kernel_size=3, - bn_momentum=0.1, - num_decoder_layers=1, - decoder_layer=dict( - type="TransformerDecoderLayer", - self_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), - cross_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), - ffn_cfg=dict( - embed_dims=128, - feedforward_channels=256, - num_fcs=2, - ffn_drop=0.1, - act_cfg=dict(type="ReLU", inplace=True), - ), - norm_cfg=dict(type="LN"), - pos_encoding_cfg=dict(input_channel=2, num_pos_feats=128), - ), - train_cfg=dict( - dataset="t4datasets", - out_size_factor=8, - gaussian_overlap=0.1, - min_radius=2, - pos_weight=-1, - code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], - assigner=dict( - type="HungarianAssigner3D", - iou_calculator=dict(type="BboxOverlaps3D", coordinate="lidar"), - cls_cost=dict(type="mmdet.FocalLossCost", gamma=2.0, alpha=0.25, weight=0.15), - reg_cost=dict(type="BBoxBEVL1Cost", weight=0.25), - iou_cost=dict(type="IoU3DCost", weight=0.25), - ), - ), - test_cfg=dict( - dataset="t4datasets", - out_size_factor=8, - nms_type=None, # Set to "circle" for circle_nms - # Set NMS for different clusters - nms_clusters=[ - dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms - dict(class_names=["bicycle"], nms_threshold=0.5), - dict(class_names=["pedestrian"], nms_threshold=0.175), - dict(class_names=["barrier"], nms_threshold=0.5), - dict(class_names=["traffic_cone"], nms_threshold=0.175), - ], - ), - dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]), - bbox_coder=dict( - type="TransFusionBBoxCoder", - post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.0, - out_size_factor=8, - code_size=10, - ), - loss_cls=dict( - type="mmdet.FocalLoss", - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - reduction="mean", - loss_weight=1.0, - ), - loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), - loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), - loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 963a218e1..0b0f44c08 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 32 +num_workers = 16 input_modality = dict(use_lidar=True, use_camera=True) # range setting @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters @@ -74,14 +76,11 @@ classes=[ "car", "truck", - "construction_vehicle", "bus", - "trailer", - "barrier", - "motorcycle", "bicycle", "pedestrian", "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), @@ -107,6 +106,9 @@ "img_aug_matrix", "lidar_aug_matrix", "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", ], ), ] @@ -164,6 +166,9 @@ "num_pts_feats", "num_views", "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index e2de195e9..1ce2aa2be 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 32 +num_workers = 16 input_modality = dict(use_lidar=True, use_camera=False) # range setting diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index a2cd2d2e9..1e1ce37ea 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -1,13 +1,12 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 -t_max = 8 +lr = 2.0e-4 +t_max = 3 max_epochs = 30 val_interval = 5 train_gpu_size = 8 -test_batch_size = 2 -train_batch_size = 8 +test_batch_size = 4 +train_batch_size = 16 param_scheduler = [ # learning rate scheduler diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py index 87571d0b3..5be98b3d9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py @@ -1,13 +1,12 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 -t_max = 15 +lr = 2.0e-4 +t_max = 5 max_epochs = 50 val_interval = 5 train_gpu_size = 8 -test_batch_size = 2 -train_batch_size = 8 +test_batch_size = 4 +train_batch_size = 16 param_scheduler = [ # learning rate scheduler From ad4f746d8e4150a826e82009a80beedab991c7ad Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 17:00:43 +0900 Subject: [PATCH 076/162] Resolve conflict --- .../default/schedulers/default_30e_8xb16_adamw_cosine.py | 6 +++--- .../default/schedulers/default_50e_8xb16_adamw_cosine.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index 1e1ce37ea..e3975f6eb 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -10,7 +10,7 @@ param_scheduler = [ # learning rate scheduler - # During the first (max_epochs * 0.4) epochs, learning rate increases from 0 to lr * 10 + # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 10 # during the next epochs, learning rate decreases from lr * 10 to # lr * 1e-4 dict( @@ -23,7 +23,7 @@ convert_to_iter_based=True, ), dict( - type="CosineAnnealingLR", + type="CosineAnnealingLR T_max=(max_epochs - t_max), eta_min=lr * 1e-4, begin=t_max, @@ -32,7 +32,7 @@ convert_to_iter_based=True, ), # momentum scheduler - # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95 + # During the first (max_epochs * 0.10) epochs, momentum increases from 0 to 0.85 / 0.95 # during the next epochs, momentum increases from 0.85 / 0.95 to 1 dict( type="CosineAnnealingMomentum", diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py index 5be98b3d9..d209d0c1b 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py @@ -10,7 +10,7 @@ param_scheduler = [ # learning rate scheduler - # During the first (max_epochs * 0.4) epochs, learning rate increases from 0 to lr * 10 + # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 10 # during the next epochs, learning rate decreases from lr * 10 to # lr * 1e-4 dict( @@ -32,7 +32,7 @@ convert_to_iter_based=True, ), # momentum scheduler - # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95 + # During the first (0.10 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95 # during the next epochs, momentum increases from 0.85 / 0.95 to 1 dict( type="CosineAnnealingMomentum", From be69b11cb6732c96d5ae185db7b5c6521a65708d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 17:03:29 +0900 Subject: [PATCH 077/162] Resolve conflict --- ...ion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} | 0 ...xel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} | 0 ...on_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} | 0 ...el_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} | 0 ..._secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py} | 2 +- ...idar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} | 0 6 files changed, 1 insertion(+), 1 deletion(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} (100%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py index 073249a3e..44acb083c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py From 258e64c8ce89af6e174d9eccb17a45737e63c0cb Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 18:13:14 +0900 Subject: [PATCH 078/162] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion_head.py | 5 +++-- .../default/models/default_lidar_second_secfpn_120m.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 0b510eae7..1de3af05f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -782,8 +782,9 @@ def loss_by_feat( for cls_i, class_name in enumerate(self.class_names): loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - # Prevent loss item to avoid computing gradients twice. This is for logging. - loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # Logging-only aggregate. Detach so it does not retain the autograd graph; + # the per-class `loss_heatmap_{class_name}` entries are what drive gradients. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum().detach() # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index c097d10bf..d56e6d1a3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -111,7 +111,10 @@ reduction="mean", loss_weight=1.0, ), + loss_iou=None, loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="none", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + # partial_ + partial_ignore_labels=["traffic_cone", "barrier"], ), ) From 75a46d3cfe983dc76fc4e3e478cc82b0ffe02e86 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 19:07:24 +0900 Subject: [PATCH 079/162] Updated --- .../bevfusion/bevfusion_voxel_encoder.py | 2 +- ...ond_secfpn_50e_8xb16_base_120m_48_channels.py} | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py} (88%) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 6c41234c5..06ca2e434 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -107,7 +107,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, Args: features (torch.Tensor): Point features or raw points in shape - (N, M, C). + (N, M, C) in (x, y, z, intensity, time_lag) if C is 5, (x, y, z, time_lag) if C is 4. num_points (torch.Tensor): Number of points in each pillar in shape (M). coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py similarity index 88% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 44acb083c..02f9642f2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -13,20 +13,18 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_48_channels_32_points" +experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter model = dict( type="BEVFusion", voxelize_cfg=dict( - max_num_points=32, point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=False, ), pts_voxel_encoder=dict( _delete_=True, @@ -35,21 +33,18 @@ with_distance=False, with_cluster_center=True, with_voxel_center=True, - feat_channels=[16], + feat_channels=[16, 16], point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + legacy=False ), pts_middle_encoder=dict( in_channels=48, sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices From 16fe09be0b4acd4bce50017351c816327f11d3fc Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:26:29 +0900 Subject: [PATCH 080/162] Updated --- projects/BEVFusion/bevfusion/__init__.py | 2 +- .../bevfusion/bevfusion_voxel_encoder.py | 489 +++++++++--------- 2 files changed, 239 insertions(+), 252 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 2e9822d76..ce9b31aa5 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,7 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder, BEVFusionVoxelMeanSinCosEncoder +from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelEncoder __all__ = [ "BEVFusion", diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 06ca2e434..843624b56 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -11,32 +11,80 @@ @MODELS.register_module() -class BEVFusionVoxelEncoder(nn.Module): +class HardSimpleVoxelSinCosEncoder(nn.Module): + def __init__(self, + min_norm_values: Tuple[float], + max_norm_values: Tuple[float], + in_channels: Optional[int] = 4) -> None: + """ + Simple voxel encoder that only performs mean pooling on the normalize features, and then + performs sin-cos (fourier encoding) on each voxel channels. + + The output shape of each voxel is (N, feature_channels*2). + Args: + min_norm_values (Tuple[float]): Minimum values for the features. + max_norm_values (Tuple[float]): Maximum values for the features. + in_channels (int): Number of input channels. + """ + super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() + + # Create PillarFeatureNet layers + self.in_channels = in_channels + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C) in (x, y, z, intensity, time_lag) if C is 5, (x, y, z, time_lag) if C is 4. + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C*C*2). + + """ + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() + + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + return voxel_fourier_features + + +@MODELS.register_module() +class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. - The network is same as pillar featuer net. - The network prepares the pillar features and performs forward pass - through PFNLayers. + The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers and max-pooling on the + offset features, for example, distances. After that, it concatenates the fourier features and the offset features + along the channel dimension for each voxel. Args: - in_channels (int, optional): Number of input features, - either x, y, z or x, y, z, r. Defaults to 4. + min_norm_values (Tuple[float]): Minimum values for the features. + max_norm_values (Tuple[float]): Maximum values for the features. + in_channels (int): Number of input channels. feat_channels (tuple, optional): Number of features in each of the N PFNLayers. Defaults to (64, ). - with_distance (bool, optional): Whether to include Euclidean distance - to points. Defaults to False. - with_cluster_center (bool, optional): [description]. Defaults to True. - with_voxel_center (bool, optional): [description]. Defaults to True. - voxel_size (tuple[float], optional): Size of voxels, only utilize x - and y size. Defaults to (0.2, 0.2, 4). - point_cloud_range (tuple[float], optional): Point cloud range, only - utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1). - norm_cfg ([type], optional): [description]. - Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). - mode (str, optional): The mode to gather point features. Options are - 'max' or 'avg'. Defaults to 'max'. - legacy (bool, optional): Whether to use the new behavior or - the original behavior. Defaults to True. """ def __init__(self, @@ -54,7 +102,11 @@ def __init__(self, type='BN1d', eps=1e-3, momentum=0.01), mode: Optional[str] = 'max', legacy: Optional[bool] = True): - super(BEVFusionVoxelEncoder, self).__init__() + + super(BEVFusionVoxelEncoder, self).__init__( + min_norm_values=min_norm_values, + max_norm_values=max_norm_values, in_channels=in_channels + ) assert len(feat_channels) > 0 self.legacy = legacy pfn_in_channels = 0 @@ -64,11 +116,13 @@ def __init__(self, pfn_in_channels += 3 if with_distance: pfn_in_channels += 1 + + assert pfn_in_channels > 0, "pfn_in_channels must be greater than 0" self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center - # Create PillarFeatureNet layers - self.in_channels = in_channels + + # Create VoxelFeatureNet layers feat_channels = [pfn_in_channels] + list(feat_channels) pfn_layers = [] for i in range(len(feat_channels) - 1): @@ -96,11 +150,6 @@ def __init__(self, self.z_offset = self.vz / 2 + point_cloud_range[2] self.point_cloud_range = point_cloud_range - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) - # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: """Forward function. @@ -112,26 +161,13 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). Returns: - torch.Tensor: Features of pillars in shape (M, C). + torch.Tensor: Features of pillars in shape (M, C*C*2 + feat_channels[-1]). """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() - - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + # (M, C*C*2) + voxel_fourier_features = super().forward(features, num_points, coors) - # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) - voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + # Offset features + max_points_per_voxel = features.shape[1] features_ls = [] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available @@ -200,222 +236,173 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, return features -@MODELS.register_module() -class BEVFusionVoxelSinCosEncoder(nn.Module): - def __init__(self, - min_norm_values: Tuple[float], - max_norm_values: Tuple[float], - time_lag_channel_index: int = 3, - time_exp_factor: Optional[float] = None, - feat_channels: Optional[tuple] = (16, ), - in_channels: Optional[int] = 4, - with_distance: Optional[bool] = False, - with_cluster_center: Optional[bool] = True, - with_voxel_center: Optional[bool] = True, - voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), - point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1), - norm_cfg: Optional[dict] = dict( - type='BN1d', eps=1e-3, momentum=0.01), - mode: Optional[str] = 'max'): - super(BEVFusionVoxelSinCosEncoder, self).__init__() - - self._with_distance = with_distance - self._with_cluster_center = with_cluster_center - self._with_voxel_center = with_voxel_center - # Create PillarFeatureNet layers - self.in_channels = in_channels - - # Need pillar (voxel) size and x/y offset in order to calculate offset - self.vx = voxel_size[0] - self.vy = voxel_size[1] - self.vz = voxel_size[2] - self.x_offset = self.vx / 2 + point_cloud_range[0] - self.y_offset = self.vy / 2 + point_cloud_range[1] - self.z_offset = self.vz / 2 + point_cloud_range[2] - self.point_cloud_range = point_cloud_range +# @MODELS.register_module() +# class BEVFusionVoxelSinCosEncoder(nn.Module): +# def __init__(self, +# min_norm_values: Tuple[float], +# max_norm_values: Tuple[float], +# time_lag_channel_index: int = 3, +# time_exp_factor: Optional[float] = None, +# feat_channels: Optional[tuple] = (16, ), +# in_channels: Optional[int] = 4, +# with_distance: Optional[bool] = False, +# with_cluster_center: Optional[bool] = True, +# with_voxel_center: Optional[bool] = True, +# voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), +# point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, +# 40, 1), +# norm_cfg: Optional[dict] = dict( +# type='BN1d', eps=1e-3, momentum=0.01), +# mode: Optional[str] = 'max'): +# super(BEVFusionVoxelSinCosEncoder, self).__init__() + +# self._with_distance = with_distance +# self._with_cluster_center = with_cluster_center +# self._with_voxel_center = with_voxel_center +# # Create PillarFeatureNet layers +# self.in_channels = in_channels + +# # Need pillar (voxel) size and x/y offset in order to calculate offset +# self.vx = voxel_size[0] +# self.vy = voxel_size[1] +# self.vz = voxel_size[2] +# self.x_offset = self.vx / 2 + point_cloud_range[0] +# self.y_offset = self.vy / 2 + point_cloud_range[1] +# self.z_offset = self.vz / 2 + point_cloud_range[2] +# self.point_cloud_range = point_cloud_range - self.xyz_channels = 3 - feat_offset_channels = in_channels - self.xyz_channels - if with_cluster_center: - feat_offset_channels += 3 - if with_voxel_center: - feat_offset_channels += 3 - if with_distance: - feat_offset_channels += 1 - - feat_channels = [feat_offset_channels] + list(feat_channels) - assert len(feat_channels) > 0, "feat_channels must be greater than 0" - pfn_layers = [] - for i in range(len(feat_channels) - 1): - in_filters = feat_channels[i] - out_filters = feat_channels[i + 1] - if i < len(feat_channels) - 2: - last_layer = False - else: - last_layer = True - pfn_layers.append( - PFNLayer( - in_filters, - out_filters, - norm_cfg=norm_cfg, - last_layer=last_layer, - mode=mode)) - self.pfn_layers = nn.ModuleList(pfn_layers) - - self.time_lag_channel_index = time_lag_channel_index - self.time_exp_factor = time_exp_factor +# self.xyz_channels = 3 +# feat_offset_channels = in_channels - self.xyz_channels +# if with_cluster_center: +# feat_offset_channels += 3 +# if with_voxel_center: +# feat_offset_channels += 3 +# if with_distance: +# feat_offset_channels += 1 + +# feat_channels = [feat_offset_channels] + list(feat_channels) +# assert len(feat_channels) > 0, "feat_channels must be greater than 0" +# pfn_layers = [] +# for i in range(len(feat_channels) - 1): +# in_filters = feat_channels[i] +# out_filters = feat_channels[i + 1] +# if i < len(feat_channels) - 2: +# last_layer = False +# else: +# last_layer = True +# pfn_layers.append( +# PFNLayer( +# in_filters, +# out_filters, +# norm_cfg=norm_cfg, +# last_layer=last_layer, +# mode=mode)) +# self.pfn_layers = nn.ModuleList(pfn_layers) + +# self.time_lag_channel_index = time_lag_channel_index +# self.time_exp_factor = time_exp_factor - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) - self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float()) - - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: - """Forward function. - - Args: - features (torch.Tensor): Point features or raw points in shape - (N, M, C). - num_points (torch.Tensor): Number of points in each pillar in shape (M). - coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). - - Returns: - torch.Tensor: Features of pillars in shape (M, C). - """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] +# self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) +# self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) +# self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) +# self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float()) + +# def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, +# *args, **kwargs) -> Tensor: +# """Forward function. + +# Args: +# features (torch.Tensor): Point features or raw points in shape +# (N, M, C). +# num_points (torch.Tensor): Number of points in each pillar in shape (M). +# coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + +# Returns: +# torch.Tensor: Features of pillars in shape (M, C). +# """ +# num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() - - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) +# # Mean in the voxel +# # (N, M, 3) -> (N, 3) +# voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( +# -1, 1)).contiguous() + +# # min-max normalization, (N, 3) -> (N, 3) +# voxel_features_norm = (voxel_features - \ +# self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) - # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) - voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - - # PFN - # Other features, for example, intensity or time_lag - other_features = features[:, :, self.xyz_channels:] +# # SinCos encoding +# # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) +# y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) +# # (N*3, 3) -> (N, 3*3) +# y = y.reshape(num_voxels, -1) +# # (N, 3*3) -> (N, 3*3*2) +# voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + +# # PFN +# # Other features, for example, intensity or time_lag +# other_features = features[:, :, self.xyz_channels:] - # Normalization - other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) - - time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels - # exponentiate time_lag features, it's higher when the normlized time lag is lower - # (1.0 when time_lag_features is 0.0) - if self.time_exp_factor is not None: - other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) - else: - # Inverse the time_lag feature - other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] +# # Normalization +# other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) + +# time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels +# # exponentiate time_lag features, it's higher when the normlized time lag is lower +# # (1.0 when time_lag_features is 0.0) +# if self.time_exp_factor is not None: +# other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) +# else: +# # Inverse the time_lag feature +# other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] - # Offsets - voxel_feature_offsets = [other_features_norm] - # Find distance of x, y, and z from cluster center - if self._with_cluster_center: - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_points.type_as(features).view( - -1, 1, 1) +# # Offsets +# voxel_feature_offsets = [other_features_norm] +# # Find distance of x, y, and z from cluster center +# if self._with_cluster_center: +# points_mean = features[:, :, :3].sum( +# dim=1, keepdim=True) / num_points.type_as(features).view( +# -1, 1, 1) - # f_cluster = (features[:, :, :3] - points_mean) - f_cluster = features[:, :, :3] - points_mean - voxel_feature_offsets.append(f_cluster) - - # Find distance of x, y, and z from pillar center - dtype = features.dtype - if self._with_voxel_center: - f_center = torch.zeros_like(features[:, :, :3]) - f_center[:, :, 0] = features[:, :, 0] - ( - coors[:, 3].to(dtype).unsqueeze(1) * self.vx + - self.x_offset) - f_center[:, :, 1] = features[:, :, 1] - ( - coors[:, 2].to(dtype).unsqueeze(1) * self.vy + - self.y_offset) - f_center[:, :, 2] = features[:, :, 2] - ( - coors[:, 1].to(dtype).unsqueeze(1) * self.vz + - self.z_offset) +# # f_cluster = (features[:, :, :3] - points_mean) +# f_cluster = features[:, :, :3] - points_mean +# voxel_feature_offsets.append(f_cluster) + +# # Find distance of x, y, and z from pillar center +# dtype = features.dtype +# if self._with_voxel_center: +# f_center = torch.zeros_like(features[:, :, :3]) +# f_center[:, :, 0] = features[:, :, 0] - ( +# coors[:, 3].to(dtype).unsqueeze(1) * self.vx + +# self.x_offset) +# f_center[:, :, 1] = features[:, :, 1] - ( +# coors[:, 2].to(dtype).unsqueeze(1) * self.vy + +# self.y_offset) +# f_center[:, :, 2] = features[:, :, 2] - ( +# coors[:, 1].to(dtype).unsqueeze(1) * self.vz + +# self.z_offset) - # Map to [-1, 1] - # f_center = f_center / (self.voxel_size * 0.5) - voxel_feature_offsets.append(f_center) +# # Map to [-1, 1] +# # f_center = f_center / (self.voxel_size * 0.5) +# voxel_feature_offsets.append(f_center) - if self._with_distance: - points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) - voxel_feature_offsets.append(points_dist) +# if self._with_distance: +# points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) +# voxel_feature_offsets.append(points_dist) - voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) - # The feature decorations were calculated without regard to whether - # pillar was empty. Need to ensure that - # empty pillars remain set to zeros. - mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) - voxel_feature_offsets *= mask +# voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) +# # The feature decorations were calculated without regard to whether +# # pillar was empty. Need to ensure that +# # empty pillars remain set to zeros. +# mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) +# mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) +# voxel_feature_offsets *= mask - # PFN - for pfn in self.pfn_layers: - voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) +# # PFN +# for pfn in self.pfn_layers: +# voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - # Concat - features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) - return features - - - -@MODELS.register_module() -class BEVFusionVoxelMeanSinCosEncoder(nn.Module): - def __init__(self, - min_norm_values: Tuple[float], - max_norm_values: Tuple[float], - in_channels: Optional[int] = 4): - super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() - - # Create PillarFeatureNet layers - self.in_channels = in_channels +# # Concat +# features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) +# return features - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: - """Forward function. - - Args: - features (torch.Tensor): Point features or raw points in shape - (N, M, C). - num_points (torch.Tensor): Number of points in each pillar in shape (M). - coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). - - Returns: - torch.Tensor: Features of pillars in shape (M, C). - """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() - - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) - - # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) - voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - - return voxel_fourier_features From db8e7f8b2193e883ac806923a6f335dc127c8c8b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:41:59 +0900 Subject: [PATCH 081/162] Updated --- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 6 +++--- ...r_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 843624b56..6c1955505 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -26,7 +26,7 @@ def __init__(self, max_norm_values (Tuple[float]): Maximum values for the features. in_channels (int): Number of input channels. """ - super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() + super(HardSimpleVoxelSinCosEncoder, self).__init__() # Create PillarFeatureNet layers self.in_channels = in_channels @@ -75,8 +75,8 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. - The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers and max-pooling on the - offset features, for example, distances. After that, it concatenates the fourier features and the offset features + The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the + offset features, for example, distances. After that, it concatenates the fourier features and the PFN features along the channel dimension for each voxel. Args: diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 02f9642f2..72e73c036 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -27,7 +27,6 @@ voxel_size=_base_.voxel_size, ), pts_voxel_encoder=dict( - _delete_=True, type="BEVFusionVoxelEncoder", in_channels=4, with_distance=False, From 4c907aa1af5f986eaa21ac91a6664b68b7c7de07 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:43:23 +0900 Subject: [PATCH 082/162] Updated --- projects/BEVFusion/bevfusion/__init__.py | 7 +++---- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 2 +- ..._voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index ce9b31aa5..fa23d120c 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,7 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelEncoder +from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelFeatureNet __all__ = [ "BEVFusion", @@ -28,7 +28,6 @@ "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", "TransFusionBBoxCoder", - "BEVFusionVoxelEncoder", - "BEVFusionVoxelSinCosEncoder", - "BEVFusionVoxelMeanSinCosEncoder", + "HardSimpleVoxelSinCosEncoder", + "BEVFusionVoxelFeatureNet", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 6c1955505..f7a5c481c 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -72,7 +72,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, @MODELS.register_module() -class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder): +class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 72e73c036..b6ad6cac2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -27,7 +27,7 @@ voxel_size=_base_.voxel_size, ), pts_voxel_encoder=dict( - type="BEVFusionVoxelEncoder", + type="BEVFusionVoxelFeatureNet", in_channels=4, with_distance=False, with_cluster_center=True, From 8cb422d5ab8a3c35943ef089169fb8fb89046b3e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:45:03 +0900 Subject: [PATCH 083/162] Updated --- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 2 +- ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index f7a5c481c..efed0ce5d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -103,7 +103,7 @@ def __init__(self, mode: Optional[str] = 'max', legacy: Optional[bool] = True): - super(BEVFusionVoxelEncoder, self).__init__( + super(BEVFusionVoxelFeatureNet, self).__init__( min_norm_values=min_norm_values, max_norm_values=max_norm_values, in_channels=in_channels ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index b6ad6cac2..83a607386 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -28,7 +28,7 @@ ), pts_voxel_encoder=dict( type="BEVFusionVoxelFeatureNet", - in_channels=4, + in_channels=len(_base_.lidar_sweep_dims), with_distance=False, with_cluster_center=True, with_voxel_center=True, diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index d56e6d1a3..5a880d975 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -13,7 +13,7 @@ pad_size_divisor=32, ), pts_voxel_encoder=dict( - type="BEVFusionVoxelMeanSinCosEncoder", + type="HardSimpleVoxelSinCosEncoder", in_channels=4, ), pts_middle_encoder=dict( From 5635a003821391a44d67eed601be99c88a58a84d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:48:59 +0900 Subject: [PATCH 084/162] Updated --- ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 83a607386..36c39dd5a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -32,7 +32,7 @@ with_distance=False, with_cluster_center=True, with_voxel_center=True, - feat_channels=[16, 16], + feat_channels=[16], point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), From fb27f498e2312bf60dee8ecb0c1e5c4b489bba39 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 12 May 2026 05:56:51 +0900 Subject: [PATCH 085/162] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index efed0ce5d..83cd70482 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -109,7 +109,7 @@ def __init__(self, ) assert len(feat_channels) > 0 self.legacy = legacy - pfn_in_channels = 0 + pfn_in_channels = in_channels if with_cluster_center: pfn_in_channels += 3 if with_voxel_center: @@ -165,11 +165,14 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, """ # (M, C*C*2) voxel_fourier_features = super().forward(features, num_points, coors) + + # Normalize the features + norm_features = (features - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) # Offset features max_points_per_voxel = features.shape[1] - features_ls = [] + features_ls = [norm_features] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available if self._with_cluster_center: points_mean = features[:, :, :3].sum( From bb5d7579e5d9906bf89e3ec9a88f54802992bd49 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 12 May 2026 22:24:10 +0900 Subject: [PATCH 086/162] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 1de3af05f..69417347b 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -387,7 +387,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F batch_size = preds_dict[0]["heatmap"].shape[0] batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid() if self.loss_iou is not None: - batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].clamp(min=0.0, max=1.0)) # noqa: E501 + batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1) batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot From 3f64c2c5323efc1ee5d0283f44f1255bd53dc3e5 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 13 May 2026 19:31:59 +0900 Subject: [PATCH 087/162] Resolve conflict --- ..._lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 6 +++--- .../default/schedulers/default_30e_8xb16_adamw_cosine.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 4cf51faa5..71c1829d4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -152,4 +152,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index e3975f6eb..d28468f71 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -23,7 +23,7 @@ convert_to_iter_based=True, ), dict( - type="CosineAnnealingLR + type="CosineAnnealingLR", T_max=(max_epochs - t_max), eta_min=lr * 1e-4, begin=t_max, From 1a81b03587668e4009a34d80fe050878f895a757 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 14 May 2026 16:23:50 +0900 Subject: [PATCH 088/162] Add local 3d box expand --- .../datasets/transforms/__init__.py | 3 +- .../datasets/transforms/local_3d_bbox.py | 57 +++++++ .../pipelines/default_lidar_120m_width.py | 150 ++++++++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index 6bc932f1a..b20961db6 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,3 +1,4 @@ from .object_min_points_filter import ObjectMinPointsFilter +from .local_3d_bbox import Local3DBBoxExpand -__all__ = ["ObjectMinPointsFilter"] +__all__ = ["ObjectMinPointsFilter", "Local3DBBoxExpand"] diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py new file mode 100644 index 000000000..e417c4bfb --- /dev/null +++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py @@ -0,0 +1,57 @@ +import numpy as np + +from mmcv.transforms import BaseTransform +from mmdet3d.structures.ops import box_np_ops +from mmengine.registry import TRANSFORMS + + +@TRANSFORMS.register_module() +class Local3DBBoxExpand(BaseTransform): + """Locally expand the 3D bounding boxes by scaling the width, which it doesn't scale the points. + + Args: + expand_widths: (List[float]): Uniformly sampled expand width. + width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D + bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the + 4th dimension. + label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. + """ + + def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None: + assert isinstance(expand_widths, list) + assert len(expand_widths) == 2 + assert expand_widths[0] < expand_widths[1] + self.expand_widths = expand_widths + self.width_dim = width_dim + self.label_ids = label_ids + + def transform(self, input_dict: dict) -> dict: + """Call function to locally augment the 3D bounding boxes by scaling the width. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \ + key is updated in the result dict. + """ + # Label mask + if self.label_ids is not None: + label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] + else: + label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) + + for i in range(len(input_dict["gt_bboxes_3d"])): + if not label_masks[i]: + continue + + expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) + input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width + + return input_dict + + def __repr__(self) -> str: + """str: Return a string that describes the module.""" + repr_str = self.__class__.__name__ + repr_str += f"(expand_widths={self.expand_widths}, width_dim={self.width_dim}, label_ids={self.label_ids})" + return repr_str diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py new file mode 100644 index 000000000..0b32cc86a --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py @@ -0,0 +1,150 @@ +# Dataset parameters +backend_args = None +num_workers = 16 +input_modality = dict(use_lidar=True, use_camera=False) + +# range setting +point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] +eval_class_range = { + "car": 120, + "truck": 120, + "bus": 120, + "bicycle": 120, + "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, +} + +# LiDAR parameters +point_load_dim = 5 # x, y, z, intensity, ring_id +point_use_dim = 4 +lidar_sweep_dims = [0, 1, 2, 4] # x, y, z, time_lag +sweeps_num = 1 + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + # For the vehicle, we expand the width by 0.20 - 0.40 to try to include side mirros + dict(type="Local3DBBoxExpand", expand_widths=[0.20, 0.40], width_dim=4, label_ids=[0]), + # For truck and bus, they are usually huge vehicles, so we expand the width by 0.40 - 0.70 + dict(type="Local3DBBoxExpand", expand_widths=[0.40, 0.70], width_dim=4, label_ids=[1, 2]), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Filtering configuration +# Note: +# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, +# e.g., dict(filter_frames_with_missing_image=True). +# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so +# image-based filtering does not apply and `filter_cfg` is intentionally None. +filter_cfg = None From 5257c01a90aebb4f6aea4343717073c660089885 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 15 May 2026 00:26:00 +0900 Subject: [PATCH 089/162] Resolve conflict --- .../datasets/transforms/local_3d_bbox.py | 42 ++++++++++--------- ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py index e417c4bfb..96772cf44 100644 --- a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py +++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py @@ -1,3 +1,5 @@ +from typing import List + import numpy as np from mmcv.transforms import BaseTransform @@ -12,20 +14,20 @@ class Local3DBBoxExpand(BaseTransform): Args: expand_widths: (List[float]): Uniformly sampled expand width. width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D - bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the - 4th dimension. - label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. - """ + bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the + 4th dimension. + label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. + """ def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None: assert isinstance(expand_widths, list) assert len(expand_widths) == 2 assert expand_widths[0] < expand_widths[1] self.expand_widths = expand_widths - self.width_dim = width_dim - self.label_ids = label_ids + self.width_dim = width_dim + self.label_ids = label_ids - def transform(self, input_dict: dict) -> dict: + def transform(self, input_dict: dict) -> dict: """Call function to locally augment the 3D bounding boxes by scaling the width. Args: @@ -35,20 +37,20 @@ def transform(self, input_dict: dict) -> dict: dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \ key is updated in the result dict. """ - # Label mask - if self.label_ids is not None: - label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] - else: - label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) - - for i in range(len(input_dict["gt_bboxes_3d"])): - if not label_masks[i]: - continue - - expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) - input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width + # Label mask + if self.label_ids is not None: + label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] + else: + label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) + + for i in range(len(input_dict["gt_bboxes_3d"])): + if not label_masks[i]: + continue + + expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) + input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width - return input_dict + return input_dict def __repr__(self) -> str: """str: Return a string that describes the module.""" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 3b7c23b18..02ed7542a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -152,4 +152,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_traffic_cone/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore/epoch_48.pth" +load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" From 6dde84dd661ce4f48b23c7c9286d17f1f18be82e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 16 May 2026 18:29:09 +0900 Subject: [PATCH 090/162] Add local 3d box expand --- ..._second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index e3f7d5146..3bdda213e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py", ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs From 5213d864533cb7f879895d9552b7b04a7423b7ab Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 17 May 2026 16:46:20 +0900 Subject: [PATCH 091/162] Add local 3d box expand --- .../detection3d/dataset/t4dataset/base.py | 3 + .../detection3d/dataset/t4dataset/j6gen2.py | 3 + .../dataset/t4dataset/j6gen2_base.py | 3 + .../dataset/t4dataset/j6gen2_v2.py | 194 ------------------ ...30e_8xb16_jpntaxi_base_120m_t4metric_v2.py | 6 +- 5 files changed, 12 insertions(+), 197 deletions(-) delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 3be587072..8e49f2396 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -29,6 +29,9 @@ "db_j6gen2_v7", "db_j6gen2_v8", "db_j6gen2_v9", + "db_j6gen2_v10", + "db_j6gen2_v11", + "db_j6gen2_v12", "db_largebus_v1", "db_largebus_v2", "db_largebus_v3", diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index 0324e7207..a93bf56af 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -27,6 +27,9 @@ "db_j6gen2_v7", "db_j6gen2_v8", "db_j6gen2_v9", + "db_j6gen2_v10", + "db_j6gen2_v11", + "db_j6gen2_v12", ] dataset_test_groups = { diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index b9ec03f27..170086752 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -27,6 +27,9 @@ "db_j6gen2_v7", "db_j6gen2_v8", "db_j6gen2_v9", + "db_j6gen2_v10", + "db_j6gen2_v11", + "db_j6gen2_v12", "db_largebus_v1", "db_largebus_v2", "db_largebus_v3", diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py deleted file mode 100644 index e4375d576..000000000 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py +++ /dev/null @@ -1,194 +0,0 @@ -custom_imports = dict( - imports=[ - "autoware_ml.detection3d.datasets.t4dataset", - "autoware_ml.detection3d.evaluation.t4metric.t4metric", - "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", - ] -) - -# dataset type setting -dataset_type = "T4Dataset" -info_train_file_name = "t4dataset_j6gen2_v2_infos_train.pkl" -info_val_file_name = "t4dataset_j6gen2_v2_infos_val.pkl" -info_test_file_name = "t4dataset_j6gen2_v2_infos_test.pkl" - -info_train_statistics_file_name = "t4dataset_j6gen2_v2_statistics_train.parquet" -info_val_statistics_file_name = "t4dataset_j6gen2_v2_statistics_val.parquet" -info_test_statistics_file_name = "t4dataset_j6gen2_v2_statistics_test.parquet" - -# dataset scene setting -dataset_version_list = [ - "db_j6gen2_v2", -] - -dataset_test_groups = { - "j6gen2_v2": ("t4dataset_j6gen2_v2_infos_test.pkl", True), -} - -# dataset format setting -data_prefix = dict( - pts="", - CAM_FRONT="", - CAM_FRONT_LEFT="", - CAM_FRONT_RIGHT="", - CAM_BACK="", - CAM_BACK_RIGHT="", - CAM_BACK_LEFT="", - sweeps="", -) -camera_types = { - "CAM_FRONT", - "CAM_FRONT_RIGHT", - "CAM_FRONT_LEFT", - "CAM_BACK", - "CAM_BACK_LEFT", - "CAM_BACK_RIGHT", -} - -# class setting -name_mapping = { - # DBv1.0 - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.emergency (ambulance & police)": "car", - "vehicle.motorcycle": "bicycle", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - "vehicle.bicycle": "bicycle", - "vehicle.bus (bendy & rigid)": "bus", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "movable_object.barrier": "barrier", - "movable_object.debris": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.trafficcone": "traffic_cone", - "movable_object.traffic_cone": "traffic_cone", - "animal": "animal", - "static_object.bicycle_rack": "bicycle_rack", - # DBv1.1 and UCv2.0 - "car": "car", - "truck": "truck", - "bus": "bus", - "trailer": "trailer", - "motorcycle": "bicycle", - "bicycle": "bicycle", - "police_car": "car", - "pedestrian": "pedestrian", - "police_officer": "pedestrian", - "forklift": "car", - "construction_worker": "pedestrian", - "stroller": "pedestrian", - # DBv2.0 and DBv3.0 - "animal": "animal", - "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.traffic_cone": "traffic_cone", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", - "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car - "vehicle.bicycle": "bicycle", - "vehicle.bus": "bus", - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.fire": "truck", - "vehicle.motorcycle": "bicycle", - "vehicle.police": "car", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - # DBv1.3 - "ambulance": "car", - "kart": "car", - "wheelchair": "pedestrian", - "personal_mobility": "pedestrian", - "fire_truck": "truck", - "semi_trailer": "trailer", - "tractor_unit": "truck", - "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", -} - -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", -] -num_class = len(class_names) -metainfo = dict(classes=class_names) - -merge_objects = [ - ("truck", ["truck", "trailer"]), -] -merge_type = "extend_longer" # One of ["extend_longer","union", None] - -# visualization -class_colors = { - "car": (30, 144, 255), - "truck": (140, 0, 255), - "construction_vehicle": (255, 255, 0), - "bus": (111, 255, 111), - "trailer": (0, 255, 255), - "barrier": (0, 0, 0), - "motorcycle": (100, 0, 30), - "bicycle": (255, 0, 30), - "pedestrian": (255, 200, 200), - "traffic_cone": (120, 120, 120), -} -camera_panels = [ - "data/CAM_FRONT_LEFT", - "data/CAM_FRONT", - "data/CAM_FRONT_RIGHT", - "data/CAM_BACK_LEFT", - "data/CAM_BACK", - "data/CAM_BACK_RIGHT", -] - -filter_attributes = [ - ("vehicle.bicycle", "vehicle_state.parked"), - ("vehicle.bicycle", "cycle_state.without_rider"), - ("vehicle.bicycle", "motorcycle_state.without_rider"), - ("vehicle.motorcycle", "vehicle_state.parked"), - ("vehicle.motorcycle", "cycle_state.without_rider"), - ("vehicle.motorcycle", "motorcycle_state.without_rider"), - ("bicycle", "vehicle_state.parked"), - ("bicycle", "cycle_state.without_rider"), - ("bicycle", "motorcycle_state.without_rider"), - ("motorcycle", "vehicle_state.parked"), - ("motorcycle", "cycle_state.without_rider"), - ("motorcycle", "motorcycle_state.without_rider"), -] - -evaluator_metric_configs = dict( - evaluation_task="detection", - target_labels=class_names, - center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], - # plane_distance_thresholds is required for the pass fail evaluation - plane_distance_thresholds=[2.0, 4.0], - iou_2d_thresholds=None, - iou_3d_thresholds=None, - label_prefix="autoware", - # bev minimum distance ranges for each range bucket, must be the same length as max_distance, - # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering - min_distance=[0.0, 50.0, 90.0, 0.0], - # bev maximum distance ranges for each range bucket, must be the same length as min_distance - max_distance=[50.0, 90.0, 121.0, 121.0], - min_point_numbers=0, - matching_class_agnostic_fps=False, -) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index 213f0041b..64d494655 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py", ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs From e616c02adf9a2451a9e9e26088a2c7469b531435 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 18 May 2026 23:35:01 +0900 Subject: [PATCH 092/162] Add local 3d box expand --- projects/BEVFusion/bevfusion/utils.py | 28 ++-- .../default_lidar_second_secfpn_120m.py | 16 +- .../pipelines/default_lidar_120m_width.py | 150 ------------------ 3 files changed, 25 insertions(+), 169 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index c47604dbd..84797cc51 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -85,26 +85,31 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): final_box_preds = torch.cat([center, height, dim, rot, vel], dim=1).permute(0, 2, 1) predictions_dicts = [] - for i in range(heatmap.shape[0]): - boxes3d = final_box_preds[i] - scores = final_scores[i] - labels = final_preds[i] - predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} - predictions_dicts.append(predictions_dict) - - if filter is False: + if not filter: + for i in range(heatmap.shape[0]): + boxes3d = final_box_preds[i] + scores = final_scores[i] + labels = final_preds[i] + predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} + predictions_dicts.append(predictions_dict) return predictions_dicts - + # use score threshold if self.score_threshold is not None: - thresh_mask = final_scores > self.score_threshold + if isinstance(self.score_threshold, float): + thresh_mask = final_scores > self.score_threshold + elif isinstance(self.score_threshold, (list, tuple)): + score_threshold = final_scores.new_tensor(self.score_threshold) + thresh_mask = final_scores > score_threshold[final_preds] + else: + raise ValueError("score_threshold must be a float or list") + predictions_dicts = [] if self.post_center_range is not None: self.post_center_range = torch.tensor(self.post_center_range, device=heatmap.device) mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(2) mask &= (final_box_preds[..., :3] <= self.post_center_range[3:]).all(2) - predictions_dicts = [] for i in range(heatmap.shape[0]): cmask = mask[i, :] if self.score_threshold: @@ -114,7 +119,6 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): scores = final_scores[i, cmask] labels = final_preds[i, cmask] predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} - predictions_dicts.append(predictions_dict) else: raise NotImplementedError( diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 5a880d975..f1fa5a90d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -84,14 +84,14 @@ test_cfg=dict( dataset="t4datasets", out_size_factor=8, - nms_type=None, # Set to "circle" for circle_nms + nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ - dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms - dict(class_names=["bicycle"], nms_threshold=0.5), - dict(class_names=["pedestrian"], nms_threshold=0.175), - dict(class_names=["barrier"], nms_threshold=0.5), - dict(class_names=["traffic_cone"], nms_threshold=0.175), + dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.5), # It's radius if using circle_nms + dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0), + dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0), + dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0), + dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0), ], ), dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling @@ -99,7 +99,9 @@ bbox_coder=dict( type="TransFusionBBoxCoder", post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.0, + # score_threshold=0.03, + # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER + score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015], out_size_factor=8, code_size=10, ), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py deleted file mode 100644 index 0b32cc86a..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py +++ /dev/null @@ -1,150 +0,0 @@ -# Dataset parameters -backend_args = None -num_workers = 16 -input_modality = dict(use_lidar=True, use_camera=False) - -# range setting -point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] -eval_class_range = { - "car": 120, - "truck": 120, - "bus": 120, - "bicycle": 120, - "pedestrian": 120, - "traffic_cone": 120, - "barrier": 120, -} - -# LiDAR parameters -point_load_dim = 5 # x, y, z, intensity, ring_id -point_use_dim = 4 -lidar_sweep_dims = [0, 1, 2, 4] # x, y, z, time_lag -sweeps_num = 1 - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - # For the vehicle, we expand the width by 0.20 - 0.40 to try to include side mirros - dict(type="Local3DBBoxExpand", expand_widths=[0.20, 0.40], width_dim=4, label_ids=[0]), - # For truck and bus, they are usually huge vehicles, so we expand the width by 0.40 - 0.70 - dict(type="Local3DBBoxExpand", expand_widths=[0.40, 0.70], width_dim=4, label_ids=[1, 2]), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Filtering configuration -# Note: -# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, -# e.g., dict(filter_frames_with_missing_image=True). -# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so -# image-based filtering does not apply and `filter_cfg` is intentionally None. -filter_cfg = None From b5036550910474284c7346d746060eff066d4b95 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 15 May 2026 22:38:53 +0900 Subject: [PATCH 093/162] Updated --- ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 36c39dd5a..3208a592c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter From 6a9a47578d58018db28cab42b1e5364666bd4302 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 18 May 2026 23:41:31 +0900 Subject: [PATCH 094/162] Updated --- ..._secfpn_50e_8xb16_base_120m_48_channels.py | 158 ------------------ 1 file changed, 158 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py deleted file mode 100644 index 3208a592c..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ /dev/null @@ -1,158 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb16_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - ), - pts_voxel_encoder=dict( - type="BEVFusionVoxelFeatureNet", - in_channels=len(_base_.lidar_sweep_dims), - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - legacy=False - ), - pts_middle_encoder=dict( - in_channels=48, - sparse_shape=_base_.grid_size, - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) From f0f4542b29768388482152b5f5ff5b127cd795b7 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:22:48 +0900 Subject: [PATCH 095/162] Updated --- projects/BEVFusion/bevfusion/sparse_encoder.py | 15 +++++++++++---- ...dar_voxel_second_secfpn_50e_8xb16_base_120m.py | 4 ++-- .../models/default_lidar_second_secfpn_120m.py | 9 ++++++--- .../default/pipelines/default_lidar_120m.py | 4 ++-- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index ce45d4536..6e98a73ab 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -1,4 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. + +from typing import Dict, Optional + +import numpy as np +import torch + from mmdet3d.models.layers import make_sparse_convmodule from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE from mmdet3d.models.middle_encoders import SparseEncoder @@ -9,8 +15,6 @@ else: from mmcv.ops import SparseConvTensor -import numpy as np -import torch @MODELS.register_module() @@ -56,6 +60,8 @@ def __init__( encoder_paddings=((1,), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)), block_type="conv_module", return_middle_feats=False, + encoder_strides=(2, 2, 2, -1), + output_stride=2, ): super(SparseEncoder, self).__init__() assert block_type in ["conv_module", "basicblock"] @@ -66,6 +72,7 @@ def __init__( self.output_channels = output_channels self.encoder_channels = encoder_channels self.encoder_paddings = encoder_paddings + self.encoder_strides = encoder_strides self.stage_num = len(self.encoder_channels) self.fp16_enabled = False self.return_middle_feats = return_middle_feats @@ -110,7 +117,7 @@ def __init__( indice_key="spconv_down2", conv_type="SparseConv3d", ) - + def forward(self, voxel_features, coors, batch_size): """Forward of SparseEncoder. @@ -138,7 +145,7 @@ def forward(self, voxel_features, coors, batch_size): for encoder_layer in self.encoder_layers: x = encoder_layer(x) encode_features.append(x) - + # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 6d3a1f93b..28499b4f9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" +info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2_8_2/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index f1fa5a90d..e3297de3d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -1,6 +1,7 @@ num_proposals = 500 max_num_points = 32 max_voxels = [120000, 160000] +out_size_factor = 8 model = dict( type="BEVFusion", @@ -23,7 +24,9 @@ norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), + encoder_strides=(2, 2, 2, -1), # No stride for the last stage block_type="basicblock", + output_stride=2, # downsample stride ), pts_backbone=dict( type="SECOND", @@ -68,7 +71,7 @@ ), train_cfg=dict( dataset="t4datasets", - out_size_factor=8, + out_size_factor=out_size_factor, gaussian_overlap=0.1, min_radius=2, pos_weight=-1, @@ -83,7 +86,7 @@ ), test_cfg=dict( dataset="t4datasets", - out_size_factor=8, + out_size_factor=out_size_factor, nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ @@ -102,7 +105,7 @@ # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015], - out_size_factor=8, + out_size_factor=out_size_factor, code_size=10, ), loss_cls=dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 09b9f7b26..317c594c1 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -5,8 +5,8 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] +voxel_size = [0.15, 0.15, 0.2] +grid_size = [1632, 1632, 41] eval_class_range = { "car": 120, "truck": 120, From c9c34bb3c35c39bd452dde6d35e5936072d66fc6 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:24:16 +0900 Subject: [PATCH 096/162] Add local 3d box expand --- projects/BEVFusion/bevfusion/bevfusion_head.py | 1 + ..._secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../models/default_lidar_second_secfpn_120m.py | 12 ++++++------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 69417347b..96c38658b 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -435,6 +435,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F circle_nms( boxes_for_nms.detach().cpu().numpy(), nms_cluster["nms_threshold"], + post_max_size=nms_cluster["post_max_size"], ) ) else: diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 3bdda213e..4a32f99a0 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v2/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index e3297de3d..8b450d72a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -90,11 +90,11 @@ nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ - dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.5), # It's radius if using circle_nms - dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0), - dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0), - dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0), - dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0), + dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms + dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), + dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), + dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), + dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], ), dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling @@ -104,7 +104,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015], + score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.010], out_size_factor=out_size_factor, code_size=10, ), From a8073f8ddf7bf1c876b87c2eb1a489bfc644b3f5 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:24:39 +0900 Subject: [PATCH 097/162] Add local 3d box expand --- ...on_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 71c1829d4..b3858fb06 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 8b450d72a..e871fce58 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -104,8 +104,8 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.010], - out_size_factor=out_size_factor, + score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.015], + out_size_factor=8, code_size=10, ), loss_cls=dict( From be70f2fbb991e5d1272df5689bdeeb775df3d02f Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:23:07 +0900 Subject: [PATCH 098/162] Add local 3d box expand --- ...sion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index b3858fb06..71c1829d4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 4a32f99a0..c77e0332b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v2/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From a5b406540c1495081c62ed8e4ddf338914fbee42 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 20:14:30 +0900 Subject: [PATCH 099/162] Add local 3d box expand --- .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index e871fce58..4e4e7dde7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -104,7 +104,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.015], + score_threshold=[0.015, 0.010, 0.010, 0.010, 0.020, 0.020, 0.015], out_size_factor=8, code_size=10, ), From 0b547bf528d8ec474856a40e0697137c75bab6b6 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 20:27:56 +0900 Subject: [PATCH 100/162] Add local 3d box expand --- ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index c77e0332b..288cb4d1b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v4/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 4e4e7dde7..796fef3e8 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -104,7 +104,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.010, 0.010, 0.010, 0.020, 0.020, 0.015], + score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.030, 0.020], out_size_factor=8, code_size=10, ), From 6949e4154493164b950e56b2c85d38abbca7c29c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 22:37:21 +0900 Subject: [PATCH 101/162] Add local 3d box expand --- ...l_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 288cb4d1b..71a60c0d5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v4/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v5/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 796fef3e8..6b5f28e31 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -90,9 +90,10 @@ nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ + # Sqrt(0.25) = 0.5 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms - dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), - dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), + # Sqrt(0.04) = 0.2 + dict(class_names=["bicycle". "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], From bc6d024bc2b5f35dddc08ad5d582e279c358e19c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 22:45:08 +0900 Subject: [PATCH 102/162] Add local 3d box expand --- .../default/models/default_lidar_second_secfpn_120m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 6b5f28e31..44744c1dd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -93,7 +93,7 @@ # Sqrt(0.25) = 0.5 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms # Sqrt(0.04) = 0.2 - dict(class_names=["bicycle". "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), + dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], @@ -105,7 +105,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.030, 0.020], + score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.040, 0.025], out_size_factor=8, code_size=10, ), From 81e26bbc0fca82c9174f593c2877e48a31436d55 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 20 May 2026 16:55:27 +0900 Subject: [PATCH 103/162] Add local 3d box expand --- ...cond_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 71a60c0d5..245ae0814 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v5/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v9/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 44744c1dd..979dd31f2 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -90,10 +90,11 @@ nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ - # Sqrt(0.25) = 0.5 + # Sqrt(0.25) = 0.50 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms - # Sqrt(0.04) = 0.2 - dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), + # Sqrt(0.001) = 0.0316 + dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.001, post_max_size=200), + # dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=200), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], @@ -105,7 +106,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.040, 0.025], + score_threshold=[0.015, 0.010, 0.010, 0.020, 0.030, 0.040, 0.020], out_size_factor=8, code_size=10, ), From 770be09224b0536ab40505abe3aa1a0622bbb1cb Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 20 May 2026 18:52:18 +0900 Subject: [PATCH 104/162] Add local 3d box expand --- .../default/models/default_lidar_second_secfpn_120m.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 979dd31f2..bad602cb7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -92,9 +92,8 @@ nms_clusters=[ # Sqrt(0.25) = 0.50 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms - # Sqrt(0.001) = 0.0316 - dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.001, post_max_size=200), - # dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=200), + dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), + dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], From 89ab9c329e29afc3f8ae15f217664c3ac3421aa2 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 21 May 2026 01:22:59 +0900 Subject: [PATCH 105/162] Updated --- autoware_ml/configs/detection3d/dataset/t4dataset/base.py | 2 ++ autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py | 2 ++ .../configs/detection3d/dataset/t4dataset/j6gen2_base.py | 2 ++ .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py | 2 ++ .../configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py | 2 ++ autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py | 2 ++ ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- 7 files changed, 13 insertions(+), 1 deletion(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 8e49f2396..7f4be6293 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -149,6 +149,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = ["car", "truck", "bus", "bicycle", "pedestrian", "traffic_cone", "barrier"] diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index a93bf56af..a87166019 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -130,6 +130,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index 170086752..ef0141a5b 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -136,6 +136,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index c08decfa1..0f00a651d 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -123,6 +123,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index dbd6e2813..9995cd9b7 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -120,6 +120,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index 2212b8e56..cd42362b5 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -122,6 +122,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 245ae0814..3bdda213e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v9/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From 9594f3b46afc2a94f59664e3439926ea7e3bac32 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 23 May 2026 11:24:43 +0900 Subject: [PATCH 106/162] Updated --- ...on_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 4 ++-- ...n_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py | 4 ++-- ...bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 71c1829d4..08df461a4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" +info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_2/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 02ed7542a..44785da25 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" +info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_2/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 28499b4f9..3499885e5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -147,3 +147,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +resume = True \ No newline at end of file From 096f44389f0f359b9bf4be0ff7ee4da86dc5c888 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 23 May 2026 15:41:52 +0900 Subject: [PATCH 107/162] Updated --- projects/BEVFusion/bevfusion/bevfusion.py | 8 +- .../BEVFusion/bevfusion/bevfusion_head.py | 94 ++++++++++++++----- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- 3 files changed, 78 insertions(+), 26 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index b113bb566..aeddc09fa 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -179,7 +179,7 @@ def extract_img_feat( if not using_image_features: x = self.get_image_backbone_features(x) - with torch.cuda.amp.autocast(enabled=False): + with torch.amp.autocast("cuda",enabled=False): # with torch.autocast(device_type='cuda', dtype=torch.float32): x = self.view_transform( x, @@ -200,14 +200,14 @@ def extract_img_feat( def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: if points is not None: # NOTE(knzo25): training and normal inference - with torch.cuda.amp.autocast(enabled=False): + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast('cuda', enabled=False): points = [point.float() for point in points] feats, coords, sizes = self.voxelize(points) batch_size = coords[-1, 0] + 1 else: - # NOTE(knzo25): onnx inference. Voxelization happens outside the graph - with torch.cuda.amp.autocast(enabled=False): + # NOTE: (knzo25): onnx inference. Voxelization happens outside the graph + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast('cuda', enabled=False): # NOTE(knzo25): onnx demmands this diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 96c38658b..df61ff629 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -175,12 +175,22 @@ def __init__( self.dense_heatmap_exclude_pooling_classes = sorted( list(set(self.class_name_to_indices.values()) - set(self.dense_heatmap_pooling_class_indices)) - ) + ) + # Pre-compute the correct order of the classes for the final local_max + heatmap_concat_order = self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes + local_concat_class_remapping = [ + heatmap_concat_order.index(i) + for i in range(self.num_classes) + ] else: self.dense_heatmap_pooling_class_indices = None self.dense_heatmap_exclude_pooling_classes = None - + local_concat_class_remapping = [i for i in range(self.num_classes)] + + # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict. + self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping)) self.local_heatmap_padding = self.nms_kernel_size // 2 + # NMS clusters self.nms_clusters = self.test_cfg.get("nms_clusters", []) # Add class indices for nms @@ -201,7 +211,8 @@ def __init__( self.partial_ignore_labels = None print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ - {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}, \ + local_concat_class_remapping: {self.local_concat_class_remapping}", logger="current") def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] @@ -261,14 +272,12 @@ def forward_single(self, inputs, metas): ################################# # query initialization ################################# - with torch.cuda.amp.autocast(enabled=False): + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast('cuda', enabled=False): dense_heatmap = self.heatmap_head(fusion_feat.float()) heatmap = dense_heatmap.detach().sigmoid() - local_max = torch.zeros_like(heatmap) - # equals to nms radius = voxel_size * out_size_factor * kenel_size if self.dense_heatmap_pooling_class_indices is not None: - # Pooling + # Pooling selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] local_max_inner = F.max_pool2d( selected_heatmap, @@ -276,31 +285,74 @@ def forward_single(self, inputs, metas): stride=1, padding=0, ) - local_max[ - :, - self.dense_heatmap_pooling_class_indices, - self.local_heatmap_padding : (-self.local_heatmap_padding), - self.local_heatmap_padding : (-self.local_heatmap_padding), - ] = local_max_inner - # Non-pooling classes + + # 2. Restore spatial size using F.pad instead of slice mutation + local_max = F.pad( + local_max_inner, + (self.local_heatmap_padding, self.local_heatmap_padding, self.local_heatmap_padding, + self.local_heatmap_padding), + mode="constant", + value=0.0 + ) + + # 3. Any non-pooling classes if self.dense_heatmap_exclude_pooling_classes: - local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[ - :, self.dense_heatmap_exclude_pooling_classes - ] + excluded_local_max = heatmap[:, self.dense_heatmap_exclude_pooling_classes, :, :] + local_max = torch.cat([local_max, excluded_local_max], dim=1) + local_max = local_max[:, self.local_concat_class_remapping, :, :] else: - local_max = heatmap + local_max = heatmap + + # local_max = torch.zeros_like(heatmap) + # # equals to nms radius = voxel_size * out_size_factor * kenel_size + # if self.dense_heatmap_pooling_class_indices is not None: + # # Pooling + # selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] + # local_max_inner = F.max_pool2d( + # selected_heatmap, + # kernel_size=self.nms_kernel_size, + # stride=1, + # padding=0, + # ) + # local_max[ + # :, + # self.dense_heatmap_pooling_class_indices, + # self.local_heatmap_padding : (-self.local_heatmap_padding), + # self.local_heatmap_padding : (-self.local_heatmap_padding), + # ] = local_max_inner + # # Non-pooling classes + # if self.dense_heatmap_exclude_pooling_classes: + # local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[ + # :, self.dense_heatmap_exclude_pooling_classes + # ] + # else: + # local_max = heatmap heatmap = heatmap * (heatmap == local_max) heatmap = heatmap.view(batch_size, heatmap.shape[1], -1) # top num_proposals among all classes - top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals] - top_proposals_class = top_proposals // heatmap.shape[-1] - top_proposals_index = top_proposals % heatmap.shape[-1] + flattened_heatmap = heatmap.view(batch_size, -1) + + # Use topk instead or argsort to avoid sorting the entire flattened heatmap. + _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) + + # 2. Calculate class and spatial indices + # Use shape[-1] dynamically to handle grid sizes safely. + spatial_dim = heatmap.shape[-1] + top_proposals_class = top_proposals // spatial_dim + top_proposals_index = top_proposals % spatial_dim query_feat = fusion_feat_flatten.gather( index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), dim=-1, ) + # top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals] + # top_proposals_class = top_proposals // heatmap.shape[-1] + # top_proposals_index = top_proposals % heatmap.shape[-1] + # query_feat = fusion_feat_flatten.gather( + # index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), + # dim=-1, + # ) self.query_labels = top_proposals_class # add category embedding diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 08df461a4..5a4bc00fc 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -152,4 +152,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +load_from = "work_dirs/bevfusion_lidar_2_8_2/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/epoch_50.pth" From c6c79f80d723bd0550baccbc0d1a896bfedbf089 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 23 May 2026 16:49:15 +0900 Subject: [PATCH 108/162] Updated --- projects/BEVFusion/bevfusion/bevfusion_head.py | 4 ++-- .../default/pipelines/default_lidar_intensity_120m.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index df61ff629..0b59f274d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -188,7 +188,7 @@ def __init__( local_concat_class_remapping = [i for i in range(self.num_classes)] # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict. - self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping)) + self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False) self.local_heatmap_padding = self.nms_kernel_size // 2 # NMS clusters @@ -334,7 +334,7 @@ def forward_single(self, inputs, metas): # top num_proposals among all classes flattened_heatmap = heatmap.view(batch_size, -1) - # Use topk instead or argsort to avoid sorting the entire flattened heatmap. + # Use topk instead of argsort to avoid sorting the entire flattened heatmap. _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) # 2. Calculate class and spatial indices diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 1ce2aa2be..ecf983c37 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -5,8 +5,8 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] +voxel_size = [0.15, 0.15, 0.2] +grid_size = [1632, 1632, 41] eval_class_range = { "car": 120, "truck": 120, From 1e639c70d0e3cd9e92bdf226e0e0906c3e9714b0 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 24 May 2026 01:46:53 +0900 Subject: [PATCH 109/162] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 62 +++++-------------- .../bevfusion/bevfusion_voxel_encoder.py | 48 +++++++------- 2 files changed, 43 insertions(+), 67 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 0b59f274d..8af826d47 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -92,6 +92,7 @@ def __init__( self.loss_iou = MODELS.build(loss_iou) if loss_iou is not None else None self.loss_bbox = MODELS.build(loss_bbox) self.loss_heatmap = MODELS.build(loss_heatmap) + self.share_conv_out_channels = hidden_channel self.bbox_coder = build_bbox_coder(bbox_coder) self.sampling = False @@ -157,7 +158,11 @@ def __init__( # Position Embedding for Cross-Attention, which is re-used during training # noqa: E501 x_size = self.test_cfg["grid_size"][0] // self.test_cfg["out_size_factor"] y_size = self.test_cfg["grid_size"][1] // self.test_cfg["out_size_factor"] - self.bev_pos = self.create_2D_grid(x_size, y_size) + self.spatial_dim = x_size * y_size + bev_pos = self.create_2D_grid(x_size, y_size) + + # Register the bev_pos as a buffer so it moves to the GPU automatically. + self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2) self.img_feat_pos = None self.img_feat_collapsed_pos = None @@ -266,8 +271,7 @@ def forward_single(self, inputs, metas): ################################# # image to BEV ################################# - fusion_feat_flatten = fusion_feat.view(batch_size, fusion_feat.shape[1], -1) # [BS, C, H*W] - bev_pos = self.bev_pos.repeat(batch_size, 1, 1).to(fusion_feat.device) + fusion_feat_flatten = fusion_feat.view(batch_size, self.share_conv_out_channels, -1) # [BS, C, H*W] ################################# # query initialization @@ -302,34 +306,10 @@ def forward_single(self, inputs, metas): local_max = local_max[:, self.local_concat_class_remapping, :, :] else: local_max = heatmap - - # local_max = torch.zeros_like(heatmap) - # # equals to nms radius = voxel_size * out_size_factor * kenel_size - # if self.dense_heatmap_pooling_class_indices is not None: - # # Pooling - # selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] - # local_max_inner = F.max_pool2d( - # selected_heatmap, - # kernel_size=self.nms_kernel_size, - # stride=1, - # padding=0, - # ) - # local_max[ - # :, - # self.dense_heatmap_pooling_class_indices, - # self.local_heatmap_padding : (-self.local_heatmap_padding), - # self.local_heatmap_padding : (-self.local_heatmap_padding), - # ] = local_max_inner - # # Non-pooling classes - # if self.dense_heatmap_exclude_pooling_classes: - # local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[ - # :, self.dense_heatmap_exclude_pooling_classes - # ] - # else: - # local_max = heatmap heatmap = heatmap * (heatmap == local_max) - heatmap = heatmap.view(batch_size, heatmap.shape[1], -1) + # (BS, num_classes, H*W) + heatmap = heatmap.view(batch_size, self.num_classes, -1) # top num_proposals among all classes flattened_heatmap = heatmap.view(batch_size, -1) @@ -339,31 +319,21 @@ def forward_single(self, inputs, metas): # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. - spatial_dim = heatmap.shape[-1] - top_proposals_class = top_proposals // spatial_dim - top_proposals_index = top_proposals % spatial_dim + top_proposals_class = top_proposals // self.spatial_dim + top_proposals_index = top_proposals % self.spatial_dim query_feat = fusion_feat_flatten.gather( - index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), + index=top_proposals_index[:, None, :].expand(-1, self.share_conv_out_channels, -1), dim=-1, ) - # top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals] - # top_proposals_class = top_proposals // heatmap.shape[-1] - # top_proposals_index = top_proposals % heatmap.shape[-1] - # query_feat = fusion_feat_flatten.gather( - # index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), - # dim=-1, - # ) self.query_labels = top_proposals_class # add category embedding one_hot = F.one_hot(top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1) query_cat_encoding = self.class_encoding(one_hot.float()) query_feat += query_cat_encoding - - query_pos = bev_pos.gather( - index=top_proposals_index[:, None, :].permute(0, 2, 1).expand(-1, -1, bev_pos.shape[-1]), - dim=1, - ) + + # (B, N, 2) + query_pos = self.bev_pos.squeeze(0)[top_proposals_index] ################################# # transformer decoder layer (Fusion feature as K,V) ################################# @@ -371,7 +341,7 @@ def forward_single(self, inputs, metas): for i in range(self.num_decoder_layers): # Transformer Decoder Layer # :param query: B C Pq :param query_pos: B Pq 3/6 - query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=bev_pos) + query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos) # Prediction res_layer = self.prediction_heads[i](query_feat) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 83cd70482..30afdc41d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -2,8 +2,6 @@ import torch import numpy as np -from mmcv.cnn import build_norm_layer -from mmcv.ops import DynamicScatter from torch import Tensor, nn from mmdet3d.registry import MODELS @@ -26,14 +24,28 @@ def __init__(self, max_norm_values (Tuple[float]): Maximum values for the features. in_channels (int): Number of input channels. """ - super(HardSimpleVoxelSinCosEncoder, self).__init__() + super().__init__() # Create PillarFeatureNet layers self.in_channels = in_channels - - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + + # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP + min_norm_values = torch.tensor(min_norm_values) + max_norm_values = torch.tensor(max_norm_values) + # Let alpha = pi * exponents, beta = max - min + # y = ((x - min) / beta) * alpha + # y = alpha / beta * (x - min) + # y = (alpha / beta) * x - (alpha / beta) * min + # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta + # y = scale * x + bias + exponents = (2 ** torch.arange(0, self.in_channels)).float() + alpha = (torch.pi * exponents).unsqueeze(0) # (1, C) + beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1) + scale = alpha / beta + bias = - (alpha * min_norm_values.unsqueeze(1)) / beta # (C, C) + + self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C) + self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C) def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: @@ -49,23 +61,17 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, torch.Tensor: Features of pillars in shape (M, C*C*2). """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() + # (N, M, C) -> (N, C) + voxel_mean_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)).contiguous() - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) - + # x * scale + bias, (1, C, C) + (1, C, C) * (N, C, 1) -> (N, C, C) + # FMA (fused multiply-add): y = bias + scale * voxel_mean_features + y = torch.addcmul(self.exponent_bias, self.exponent_scale, voxel_mean_features.unsqueeze(-1)) # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) + # (N*C, C) -> (N, C*C) + y = y.reshape(-1, self.in_channels*self.in_channels) + # (N, C*C) -> (N, C*C*2) voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) return voxel_fourier_features From 03d94164b806d5596e423b2592c10feef50b8d85 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 24 May 2026 19:51:48 +0900 Subject: [PATCH 110/162] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 8 +++--- .../BEVFusion/bevfusion/sparse_encoder.py | 27 ++++++++++++------- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 1 + ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py | 1 + ...voxel_second_secfpn_50e_8xb16_base_120m.py | 1 + .../default_lidar_second_secfpn_120m.py | 2 -- .../default/pipelines/default_lidar_120m.py | 3 +++ .../pipelines/default_lidar_intensity_120m.py | 3 +++ 8 files changed, 31 insertions(+), 15 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 8af826d47..e3f5610c8 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -265,13 +265,13 @@ def forward_single(self, inputs, metas): Returns: list[dict]: Output results for tasks. """ - batch_size = inputs.shape[0] + # batch_size = inputs.shape[0] fusion_feat = self.shared_conv(inputs) ################################# # image to BEV ################################# - fusion_feat_flatten = fusion_feat.view(batch_size, self.share_conv_out_channels, -1) # [BS, C, H*W] + fusion_feat_flatten = fusion_feat.view(-1, self.share_conv_out_channels, self.spatial_dim) # [BS, C, H*W] ################################# # query initialization @@ -309,10 +309,10 @@ def forward_single(self, inputs, metas): heatmap = heatmap * (heatmap == local_max) # (BS, num_classes, H*W) - heatmap = heatmap.view(batch_size, self.num_classes, -1) + heatmap = heatmap.view(-1, self.num_classes, self.spatial_dim) # top num_proposals among all classes - flattened_heatmap = heatmap.view(batch_size, -1) + flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index 6e98a73ab..cd2ffb50b 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -11,7 +11,7 @@ from mmdet3d.registry import MODELS if IS_SPCONV2_AVAILABLE: - from spconv.pytorch import SparseConvTensor + from .custom_sparse_conv_tensor import CustomSparseConvTensor as SparseConvTensor else: from mmcv.ops import SparseConvTensor @@ -28,6 +28,7 @@ class BEVFusionSparseEncoder(SparseEncoder): Args: in_channels (int): The number of input channels. sparse_shape (list[int]): The sparse shape of input tensor. + dense_output_shape (list[int]): The final shape of the dense output tensor. order (list[str], optional): Order of conv module. Defaults to ('conv', 'norm', 'act'). norm_cfg (dict, optional): Config of normalization layer. Defaults to @@ -52,6 +53,7 @@ def __init__( self, in_channels, sparse_shape, + dense_output_shapes, order=("conv", "norm", "act"), norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), base_channels=16, @@ -60,19 +62,17 @@ def __init__( encoder_paddings=((1,), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)), block_type="conv_module", return_middle_feats=False, - encoder_strides=(2, 2, 2, -1), - output_stride=2, ): super(SparseEncoder, self).__init__() assert block_type in ["conv_module", "basicblock"] self.sparse_shape = sparse_shape + self.dense_output_shapes = dense_output_shapes self.in_channels = in_channels self.order = order self.base_channels = base_channels self.output_channels = output_channels self.encoder_channels = encoder_channels self.encoder_paddings = encoder_paddings - self.encoder_strides = encoder_strides self.stage_num = len(self.encoder_channels) self.fp16_enabled = False self.return_middle_feats = return_middle_feats @@ -149,11 +149,20 @@ def forward(self, voxel_features, coors, batch_size): # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) - spatial_features = out.dense() - - N, C, H, W, D = spatial_features.shape - spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous() - spatial_features = spatial_features.view(N, C * D, H, W) + # Return (N, H, W, D, C) instead of (N, C, H, W, D) + spatial_features = out.dense(channels_first=False) + + # Reshape to (N, C, D, H, W) + spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() + spatial_features = spatial_features.view( + batch_size, + self.output_channels * self.dense_output_shapes[2], + self.dense_output_shapes[0], + self.dense_output_shapes[1], + ) + # N, C, H, W, D = spatial_features.shape + # spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous() + # spatial_features = spatial_features.view(N, C * D, H, W) if self.return_middle_feats: return spatial_features, encode_features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 5a4bc00fc..269a0f00e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -35,6 +35,7 @@ pts_middle_encoder=dict( in_channels=50, sparse_shape=_base_.grid_size, + dense_output_shapes=_base_.sparse_dense_output_shapes, ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 44785da25..d3c5154c6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -35,6 +35,7 @@ pts_middle_encoder=dict( in_channels=50, sparse_shape=_base_.grid_size, + dense_output_shapes=_base_.sparse_dense_output_shapes, ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 3499885e5..70c27f0a7 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -35,6 +35,7 @@ pts_middle_encoder=dict( in_channels=32, sparse_shape=_base_.grid_size, + dense_output_shapes=_base_.sparse_dense_output_shapes, ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index bad602cb7..aa275f558 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -24,9 +24,7 @@ norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), - encoder_strides=(2, 2, 2, -1), # No stride for the last stage block_type="basicblock", - output_stride=2, # downsample stride ), pts_backbone=dict( type="SECOND", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 317c594c1..613ff8d0c 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -7,6 +7,9 @@ point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] voxel_size = [0.15, 0.15, 0.2] grid_size = [1632, 1632, 41] +# Sparse dense output shapes +sparse_dense_output_shapes = [204, 204, 2] + eval_class_range = { "car": 120, "truck": 120, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index ecf983c37..e7b78955a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -7,6 +7,9 @@ point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] voxel_size = [0.15, 0.15, 0.2] grid_size = [1632, 1632, 41] +# Sparse dense output shapes +sparse_dense_output_shapes = [204, 204, 2] + eval_class_range = { "car": 120, "truck": 120, From 94e175279116c6e34cb570b9b003f0b339691f4d Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 24 May 2026 21:51:41 +0900 Subject: [PATCH 111/162] Updated --- .../bevfusion/custom_sparse_conv_tensor.py | 68 +++++++++++++++++++ ...y_lidar_only_intensity_tensorrt_dynamic.py | 1 + .../BEVFusion/deploy/rewriters/__init__.py | 3 + .../BEVFusion/deploy/rewriters/layer_norm.py | 15 ++++ 4 files changed, 87 insertions(+) create mode 100644 projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py create mode 100644 projects/BEVFusion/deploy/rewriters/__init__.py create mode 100644 projects/BEVFusion/deploy/rewriters/layer_norm.py diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py new file mode 100644 index 000000000..c6841f64c --- /dev/null +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -0,0 +1,68 @@ +""" +Custom SparseConvTensor for BEVFusion. +This customiztion is used to support cleaner ONNX export of sparse convolutions. +""" + +from typing import Union, List, Optional + +import torch +from spconv.pytorch import SparseConvTensor +from spconv.core import ConvAlgo + + +class CustomSparseConvTensor(SparseConvTensor): + def __init__(self, + features: torch.Tensor, + indices: torch.Tensor, + spatial_shape: Union[List[int], np.ndarray], + batch_size: int, + grid: Optional[torch.Tensor] = None, + voxel_num: Optional[torch.Tensor] = None, + indice_dict: Optional[dict] = None, + benchmark: bool = False, + permanent_thrust_allocator: bool = False, + enable_timer: bool = False, + force_algo: Optional[ConvAlgo] = None): + """ + Check the superclass documentation for more details. + """ + + super().__init__( + features=features, + indices=indices, + spatial_shape=spatial_shape, + batch_size=batch_size, + grid=grid, + voxel_num=voxel_num, + indice_dict=indice_dict, + benchmark=benchmark, + permanent_thrust_allocator=permanent_thrust_allocator, + enable_timer=enable_timer, + force_algo=force_algo) + + # Precomputation for dense output shape. + self.spatial_shape_list = list(self.spatial_shape) + self.spatial_ndim = len(self.spatial_shape_list) + self.trans_params = list(range(0, self.spatial_ndim + 1)) + self.trans_params.insert(1, self.spatial_ndim + 1) + + def dense(self, channels_first: bool = True): + """ + Convert the sparse tensor to a dense tensor. + """ + C = self.features.shape[1] + out = self.features.zeros( + [ + self.batch_size, + *self.spatial_shape_list, + C, + ] + ) + idx = self.indices.to(self.features.device).long() # [N, 1+D] + out.index_put_(idx.unbind(1), self.features) + if not channels_first: + return out + + out = out.permute(*self.trans_params).contiguous() + return out + \ No newline at end of file diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py index e22e0f41b..0936cf8a0 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -5,6 +5,7 @@ "projects.BEVFusion.deploy", "projects.BEVFusion.bevfusion", "projects.SparseConvolution", + "projects.BEVFusion.deploy.rewriters", ], allow_failed_imports=False, ) diff --git a/projects/BEVFusion/deploy/rewriters/__init__.py b/projects/BEVFusion/deploy/rewriters/__init__.py new file mode 100644 index 000000000..1eb59b5c1 --- /dev/null +++ b/projects/BEVFusion/deploy/rewriters/__init__.py @@ -0,0 +1,3 @@ +from .layer_norm import layer_norm__passthrough + +__all__ = ["layer_norm__passthrough"] \ No newline at end of file diff --git a/projects/BEVFusion/deploy/rewriters/layer_norm.py b/projects/BEVFusion/deploy/rewriters/layer_norm.py new file mode 100644 index 000000000..0b8cc09c7 --- /dev/null +++ b/projects/BEVFusion/deploy/rewriters/layer_norm.py @@ -0,0 +1,15 @@ +import torch.nn.functional as F +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name="torch.nn.functional.layer_norm", backend="tensorrt" +) +@FUNCTION_REWRITER.register_rewriter( + func_name="torch.nn.functional.layer_norm", backend="default" +) +def layer_norm__passthrough(input, normalized_shape, weight=None, bias=None, eps=1e-5): + # Call the *original* op so the ONNX exporter sees aten::layer_norm + # and emits a single LayerNormalization node at opset >= 17. + ctx = FUNCTION_REWRITER.get_context() + return ctx.origin_func(input, normalized_shape, weight, bias, eps) \ No newline at end of file From 36bfd4ed24898d13deb977a926d36329f8fc9b22 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 08:15:29 +0900 Subject: [PATCH 112/162] Updated --- ...sion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 269a0f00e..238690492 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_2/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From 26e6bb0d8337d3b12a0fc94cfad66442ced9eca6 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 09:43:58 +0900 Subject: [PATCH 113/162] Updated --- .../bevfusion/custom_sparse_conv_tensor.py | 2 ++ ...y_lidar_only_intensity_tensorrt_dynamic.py | 7 ++-- projects/BEVFusion/deploy/exporter.py | 32 +++++++++++++++++-- .../BEVFusion/deploy/rewriters/__init__.py | 3 -- .../BEVFusion/deploy/rewriters/layer_norm.py | 15 --------- 5 files changed, 35 insertions(+), 24 deletions(-) delete mode 100644 projects/BEVFusion/deploy/rewriters/__init__.py delete mode 100644 projects/BEVFusion/deploy/rewriters/layer_norm.py diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index c6841f64c..8481e4853 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -6,6 +6,7 @@ from typing import Union, List, Optional import torch +import numpy as np from spconv.pytorch import SparseConvTensor from spconv.core import ConvAlgo @@ -58,6 +59,7 @@ def dense(self, channels_first: bool = True): C, ] ) + print("out.shape: ", out.shape) idx = self.indices.to(self.features.device).long() # [N, 1+D] out.index_put_(idx.unbind(1), self.features) if not channels_first: diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py index 0936cf8a0..0863889bb 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -4,8 +4,7 @@ imports=[ "projects.BEVFusion.deploy", "projects.BEVFusion.bevfusion", - "projects.SparseConvolution", - "projects.BEVFusion.deploy.rewriters", + "projects.SparseConvolution" ], allow_failed_imports=False, ) @@ -30,7 +29,7 @@ type="onnx", export_params=True, keep_initializers_as_inputs=False, - opset_version=17, + opset_version=18, save_file="bevfusion_lidar_intensity.onnx", input_names=["voxels", "coors", "num_points_per_voxel"], output_names=["bbox_pred", "score", "label_pred"], @@ -46,5 +45,5 @@ }, }, input_shape=None, - verbose=True, + verbose=False, ) diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py index b1a430eb9..ab5489a7e 100644 --- a/projects/BEVFusion/deploy/exporter.py +++ b/projects/BEVFusion/deploy/exporter.py @@ -2,7 +2,7 @@ import logging import os.path as osp -from typing import Optional +from typing import Optional, Any import numpy as np import onnx @@ -11,12 +11,33 @@ from builder import ExportBuilder from containers import TrtBevFusionCameraOnlyContainer, TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer from data_classes import ModelData, SetupConfigs -from mmdeploy.core import RewriterContext +from mmdeploy.core import RewriterContext, SYMBOLIC_REWRITER from mmdeploy.utils import ( get_root_logger, ) +def purge_mmdeploy_symbolics(op_names: list[str]) -> dict: + """Delete mmdeploy's symbolic records for the given op names. + Both the op-name key (e.g. `"layer_norm"`) and the function-path + bookkeeping key (e.g. `"mmdeploy.pytorch.symbolics.layer_norm.layer_norm__default"`) + are removed. Returns a snapshot of what was deleted for optional restore. + """ + records = SYMBOLIC_REWRITER._registry._rewrite_records + removed: dict = {} + for key in list(records.keys()): + # Primary key: the aten op name itself. + if key in op_names: + removed[key] = records.pop(key) + continue + # Bookkeeping key: full Python path of an implementer function. + # Match by "...symbolics.." or "...symbolics.__" + if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key + for op in op_names): + removed[key] = records.pop(key) + return removed + + class Torch2OnnxExporter: def __init__(self, setup_configs: SetupConfigs, log_level: str): @@ -62,7 +83,14 @@ def _export_model( patched_model (torch.nn.Module): Patched Pytorch model. ir_configs (dict): Configs for intermediate representations in ONNX. """ + # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported + # in the tensorrt version + removed = purge_mmdeploy_symbolics(["layer_norm"]) + self.logger.info( + f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}" + ) with RewriterContext(**context_info), torch.no_grad(): + list_layer_norm_rewriters() image_feats = None if "img_backbone" in self.setup_configs.model_cfg.model: image_feats = self._export_image_backbone(model_data, ir_configs, patched_model) diff --git a/projects/BEVFusion/deploy/rewriters/__init__.py b/projects/BEVFusion/deploy/rewriters/__init__.py deleted file mode 100644 index 1eb59b5c1..000000000 --- a/projects/BEVFusion/deploy/rewriters/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .layer_norm import layer_norm__passthrough - -__all__ = ["layer_norm__passthrough"] \ No newline at end of file diff --git a/projects/BEVFusion/deploy/rewriters/layer_norm.py b/projects/BEVFusion/deploy/rewriters/layer_norm.py deleted file mode 100644 index 0b8cc09c7..000000000 --- a/projects/BEVFusion/deploy/rewriters/layer_norm.py +++ /dev/null @@ -1,15 +0,0 @@ -import torch.nn.functional as F -from mmdeploy.core import FUNCTION_REWRITER - - -@FUNCTION_REWRITER.register_rewriter( - func_name="torch.nn.functional.layer_norm", backend="tensorrt" -) -@FUNCTION_REWRITER.register_rewriter( - func_name="torch.nn.functional.layer_norm", backend="default" -) -def layer_norm__passthrough(input, normalized_shape, weight=None, bias=None, eps=1e-5): - # Call the *original* op so the ONNX exporter sees aten::layer_norm - # and emits a single LayerNormalization node at opset >= 17. - ctx = FUNCTION_REWRITER.get_context() - return ctx.origin_func(input, normalized_shape, weight, bias, eps) \ No newline at end of file From b096792a9e87f1307c5fe6b5d9f99ad6bde25438 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 12:42:26 +0900 Subject: [PATCH 114/162] Updated --- .../bevfusion/custom_sparse_conv_tensor.py | 92 +++++++------------ .../BEVFusion/bevfusion/sparse_encoder.py | 7 +- projects/BEVFusion/deploy/exporter.py | 1 - 3 files changed, 35 insertions(+), 65 deletions(-) diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index 8481e4853..b16191ca8 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -2,69 +2,39 @@ Custom SparseConvTensor for BEVFusion. This customiztion is used to support cleaner ONNX export of sparse convolutions. """ +import torch -from typing import Union, List, Optional +from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE -import torch -import numpy as np -from spconv.pytorch import SparseConvTensor -from spconv.core import ConvAlgo +if IS_SPCONV2_AVAILABLE: + from spconv.pytorch import SparseConvTensor +else: + from mmcv.ops import SparseConvTensor -class CustomSparseConvTensor(SparseConvTensor): - def __init__(self, - features: torch.Tensor, - indices: torch.Tensor, - spatial_shape: Union[List[int], np.ndarray], - batch_size: int, - grid: Optional[torch.Tensor] = None, - voxel_num: Optional[torch.Tensor] = None, - indice_dict: Optional[dict] = None, - benchmark: bool = False, - permanent_thrust_allocator: bool = False, - enable_timer: bool = False, - force_algo: Optional[ConvAlgo] = None): - """ - Check the superclass documentation for more details. - """ - - super().__init__( - features=features, - indices=indices, - spatial_shape=spatial_shape, - batch_size=batch_size, - grid=grid, - voxel_num=voxel_num, - indice_dict=indice_dict, - benchmark=benchmark, - permanent_thrust_allocator=permanent_thrust_allocator, - enable_timer=enable_timer, - force_algo=force_algo) - - # Precomputation for dense output shape. - self.spatial_shape_list = list(self.spatial_shape) - self.spatial_ndim = len(self.spatial_shape_list) - self.trans_params = list(range(0, self.spatial_ndim + 1)) - self.trans_params.insert(1, self.spatial_ndim + 1) +def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_shapes: list[int], out_channels: int): + """ + Convert the sparse tensor to a dense tensor. + """ + H, W, D = spatial_shapes + num_cells = batch_size * H * W * D + idx = sparse_tensor.indices.to(sparse_tensor.features.device).long() # [N, 1+D] + b, h, w, d = idx.unbind(1) + # b * (H * W * D) + h*(W*D) + w*D + d + # Factor out the common terms D and W + # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d + linear_idx = ((b * H + h) * W + w) * D + d # [N] - def dense(self, channels_first: bool = True): - """ - Convert the sparse tensor to a dense tensor. - """ - C = self.features.shape[1] - out = self.features.zeros( - [ - self.batch_size, - *self.spatial_shape_list, - C, - ] - ) - print("out.shape: ", out.shape) - idx = self.indices.to(self.features.device).long() # [N, 1+D] - out.index_put_(idx.unbind(1), self.features) - if not channels_first: - return out - - out = out.permute(*self.trans_params).contiguous() - return out - \ No newline at end of file + out = torch.zeros( + [ + num_cells, + out_channels + ], + device=sparse_tensor.features.device, + dtype=sparse_tensor.features.dtype, + ) + # out = out.index_copy(0, linear_idx, sparse_tensor.features) + # out = out.scatter(0, linear_idx, sparse_tensor.features) + scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels) # [N, C] + out = out.scatter(0, scatter_idx, sparse_tensor.features) + return out.view(batch_size, H, W, D, out_channels) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index cd2ffb50b..f46ca621b 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -11,10 +11,11 @@ from mmdet3d.registry import MODELS if IS_SPCONV2_AVAILABLE: - from .custom_sparse_conv_tensor import CustomSparseConvTensor as SparseConvTensor + from spconv.pytorch import SparseConvTensor else: from mmcv.ops import SparseConvTensor +from .custom_sparse_conv_tensor import sparse_to_dense @MODELS.register_module() @@ -150,8 +151,8 @@ def forward(self, voxel_features, coors, batch_size): # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) # Return (N, H, W, D, C) instead of (N, C, H, W, D) - spatial_features = out.dense(channels_first=False) - + # spatial_features = out.dense(channels_first=False) + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) # Reshape to (N, C, D, H, W) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() spatial_features = spatial_features.view( diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py index ab5489a7e..4c5e72ac7 100644 --- a/projects/BEVFusion/deploy/exporter.py +++ b/projects/BEVFusion/deploy/exporter.py @@ -90,7 +90,6 @@ def _export_model( f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}" ) with RewriterContext(**context_info), torch.no_grad(): - list_layer_norm_rewriters() image_feats = None if "img_backbone" in self.setup_configs.model_cfg.model: image_feats = self._export_image_backbone(model_data, ir_configs, patched_model) From 0af40fc3ad622b7dbe1198d77329072512127264 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 18:59:46 +0900 Subject: [PATCH 115/162] Updated --- .../BEVFusion/bevfusion/sparse_encoder.py | 38 +++++++++++++++---- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index f46ca621b..e046d8917 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os from typing import Dict, Optional import numpy as np @@ -152,18 +153,39 @@ def forward(self, voxel_features, coors, batch_size): out = self.conv_out(encode_features[-1]) # Return (N, H, W, D, C) instead of (N, C, H, W, D) # spatial_features = out.dense(channels_first=False) - spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) + # spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) # Reshape to (N, C, D, H, W) + # spatial_features = out.dense(channels_first=False) + + # with torch.no_grad(): + # ref = spatial_features + # cand = sparse_to_dense( + # out, batch_size, self.dense_output_shapes, self.output_channels + # ) + # assert ref.shape == cand.shape, ( + # f"shape mismatch: dense={tuple(ref.shape)} " + # f"sparse_to_dense={tuple(cand.shape)}" + # ) + # max_abs = (ref - cand).abs().max().item() + # num_mismatch = (ref != cand).sum().item() + # allclose = torch.allclose(ref, cand, rtol=1e-5, atol=1e-6) + # print( + # f"[BEVFusionSparseEncoder] dense vs sparse_to_dense: " + # f"shape={tuple(ref.shape)} max_abs_diff={max_abs:.3e} " + # f"num_mismatch={num_mismatch} allclose={allclose}" + # ) + # assert allclose, ( + # "sparse_to_dense disagrees with out.dense(channels_first=False) " + # "-- index/order mismatch in BEVFusionSparseEncoder." + # ) + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() spatial_features = spatial_features.view( - batch_size, - self.output_channels * self.dense_output_shapes[2], - self.dense_output_shapes[0], - self.dense_output_shapes[1], + batch_size, + self.output_channels * self.dense_output_shapes[2], + self.dense_output_shapes[0], + self.dense_output_shapes[1], ) - # N, C, H, W, D = spatial_features.shape - # spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous() - # spatial_features = spatial_features.view(N, C * D, H, W) if self.return_middle_feats: return spatial_features, encode_features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 238690492..7a8afad3e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base_normal_dense/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From f871a14e2cb15b9c9a8a9c0f96f53b612ae6585e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 22:08:46 +0900 Subject: [PATCH 116/162] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 2 +- .../bevfusion/custom_sparse_conv_tensor.py | 5 ++-- .../BEVFusion/bevfusion/sparse_encoder.py | 29 ++----------------- .../default/pipelines/default_lidar_120m.py | 6 ++-- .../pipelines/default_lidar_intensity_120m.py | 6 ++-- projects/BEVFusion/deploy/containers.py | 9 ++++-- 6 files changed, 19 insertions(+), 38 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index e3f5610c8..d06305db9 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -315,7 +315,7 @@ def forward_single(self, inputs, metas): flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. - _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) + _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=False) # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index b16191ca8..054b0609b 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -24,11 +24,12 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh # Factor out the common terms D and W # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d linear_idx = ((b * H + h) * W + w) * D + d # [N] - + + # out_channels = sparse_tensor.features.shape[1] out = torch.zeros( [ num_cells, - out_channels + sparse_tensor.features.shape[1] ], device=sparse_tensor.features.device, dtype=sparse_tensor.features.dtype, diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index e046d8917..0fc20cd19 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -151,34 +151,9 @@ def forward(self, voxel_features, coors, batch_size): # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) - # Return (N, H, W, D, C) instead of (N, C, H, W, D) - # spatial_features = out.dense(channels_first=False) - # spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) - # Reshape to (N, C, D, H, W) - # spatial_features = out.dense(channels_first=False) - - # with torch.no_grad(): - # ref = spatial_features - # cand = sparse_to_dense( - # out, batch_size, self.dense_output_shapes, self.output_channels - # ) - # assert ref.shape == cand.shape, ( - # f"shape mismatch: dense={tuple(ref.shape)} " - # f"sparse_to_dense={tuple(cand.shape)}" - # ) - # max_abs = (ref - cand).abs().max().item() - # num_mismatch = (ref != cand).sum().item() - # allclose = torch.allclose(ref, cand, rtol=1e-5, atol=1e-6) - # print( - # f"[BEVFusionSparseEncoder] dense vs sparse_to_dense: " - # f"shape={tuple(ref.shape)} max_abs_diff={max_abs:.3e} " - # f"num_mismatch={num_mismatch} allclose={allclose}" - # ) - # assert allclose, ( - # "sparse_to_dense disagrees with out.dense(channels_first=False) " - # "-- index/order mismatch in BEVFusionSparseEncoder." - # ) + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) + # spatial_features = out.dense(channels_first=False) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() spatial_features = spatial_features.view( batch_size, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 613ff8d0c..d384bb4e0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -5,10 +5,10 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.15, 0.15, 0.2] -grid_size = [1632, 1632, 41] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] # Sparse dense output shapes -sparse_dense_output_shapes = [204, 204, 2] +sparse_dense_output_shapes = [180, 180, 2] eval_class_range = { "car": 120, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index e7b78955a..394647684 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -5,10 +5,10 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.15, 0.15, 0.2] -grid_size = [1632, 1632, 41] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] # Sparse dense output shapes -sparse_dense_output_shapes = [204, 204, 2] +sparse_dense_output_shapes = [180, 180, 2] eval_class_range = { "car": 120, diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index 51f2316df..018b5db7e 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -2,7 +2,7 @@ import torch.nn.functional as F # Wrapper Classes for onnx conversion - +import numpy as np class TrtBevFusionImageBackboneContainer(torch.nn.Module): def __init__(self, mod, mean, std) -> None: @@ -49,8 +49,13 @@ def forward( batch_inputs_dict = { "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, + } - + + voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin") + coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin") + num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin") + print("voxels.shape, coors.shape, num_points_per_voxel.shape:", voxels.shape, coors.shape, num_points_per_voxel.shape) if points is not None: batch_inputs_dict["points"] = [points] From 9a4642a59d8f869cb389d7317adbdc7267e069da Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 26 May 2026 17:12:53 +0900 Subject: [PATCH 117/162] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 4 +- .../bevfusion/custom_sparse_conv_tensor.py | 1 - .../BEVFusion/bevfusion/ops/topk/__init__.py | 0 projects/BEVFusion/bevfusion/ops/topk/topk.py | 45 +++++++++++++++++++ ...voxel_second_secfpn_50e_8xb16_base_120m.py | 4 +- .../default_camera_lidar_intensity_120m.py | 2 + .../default/pipelines/default_lidar_120m.py | 2 + .../pipelines/default_lidar_intensity_120m.py | 2 + 8 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 projects/BEVFusion/bevfusion/ops/topk/__init__.py create mode 100644 projects/BEVFusion/bevfusion/ops/topk/topk.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index d06305db9..5b6c5ad92 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -17,6 +17,8 @@ from mmengine.structures import InstanceData from torch import nn +from .ops.topk.topk import topk + def clip_sigmoid(x, eps=1e-4): y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps) @@ -315,7 +317,7 @@ def forward_single(self, inputs, metas): flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. - _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=False) + top_proposals_indices = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index 054b0609b..175c08bed 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -25,7 +25,6 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d linear_idx = ((b * H + h) * W + w) * D + d # [N] - # out_channels = sparse_tensor.features.shape[1] out = torch.zeros( [ num_cells, diff --git a/projects/BEVFusion/bevfusion/ops/topk/__init__.py b/projects/BEVFusion/bevfusion/ops/topk/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py new file mode 100644 index 000000000..020e677bf --- /dev/null +++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py @@ -0,0 +1,45 @@ +""" +This file is used to write functions to deploy custom plugins to support Autoware, for example, TopK. +""" + +import torch +from torch.autograd import Function +from torch.onnx.symbolic_helper import _get_tensor_sizes + + +class TopK(Function): + + @staticmethod + def symbolic( + g, + x: torch.Tensor, + k: int, + dim: int, + sorted: bool = False, + ): + + output = g.op( + "autoware::Argsort", + x, + outputs=1, + ) + x_shape = _get_tensor_sizes(x) + if x_shape is not None and hasattr(output.type(), "with_sizes"): + output_type = x.type().with_sizes(x_shape) + output.setType(output_type) + # Argsort from Autoware is in ascending order, so we need to return the last k elements. + return output[-k:] + + @staticmethod + def forward( + ctx, + x: torch.Tensor, + k: int, + dim: int, + sorted: bool = False, + ): + indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) + return indices + +def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False): + return TopK.apply(x, k, dim, sorted) \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 70c27f0a7..22762bed1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_2_8_2/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -148,5 +148,3 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) - -resume = True \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 0b0f44c08..1b0f91c3d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -83,6 +83,8 @@ "barrier", ], ), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index d384bb4e0..929c81c81 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -66,6 +66,8 @@ "barrier", ], ), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 394647684..2c95445ea 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -66,6 +66,8 @@ "barrier", ], ), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", From 3cfe9870cef009e69e7e860cfb2c105b6f8e0662 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 26 May 2026 17:35:19 +0900 Subject: [PATCH 118/162] Updated --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 5b6c5ad92..2f0a99a9f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -317,7 +317,7 @@ def forward_single(self, inputs, metas): flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. - top_proposals_indices = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) + top_proposals = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. From 798021d22468eb10d0b89e8b73265807d4871bff Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 26 May 2026 17:48:40 +0900 Subject: [PATCH 119/162] Updated --- projects/BEVFusion/bevfusion/ops/topk/topk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py index 020e677bf..a767bb720 100644 --- a/projects/BEVFusion/bevfusion/ops/topk/topk.py +++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py @@ -38,7 +38,7 @@ def forward( dim: int, sorted: bool = False, ): - indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) + _, indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) return indices def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False): From 40f3ea943da2d862c7bf42d0fd54bde90f7e1d9d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 29 May 2026 16:56:32 +0900 Subject: [PATCH 120/162] Updated --- ...bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 +- .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 22762bed1..ea4f75569 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2_8_0/base_more_filters/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 929c81c81..6472bb582 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -66,8 +66,8 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", From 7053c2d6942aa0b2ed306602ac606d7186b9501a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 30 May 2026 00:20:35 +0900 Subject: [PATCH 121/162] Updated --- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index ea4f75569..171c3076e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -148,3 +148,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +resume = True \ No newline at end of file From 9854527d554dbfd7134044da6b06e41d907cd822 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 7 Jun 2026 13:29:07 +0900 Subject: [PATCH 122/162] Updated --- ..._voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py} | 6 +++--- ...d_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py} | 4 ++-- ...voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py} | 6 +++--- ..._secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py} | 6 +++--- ...n_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py} | 6 +++--- ...l_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py} | 4 ++-- .../default/models/default_lidar_second_secfpn_120m.py | 2 +- .../default_lidar_second_secfpn_120m_iou_loss.py | 10 ---------- .../t4dataset/default/pipelines/default_lidar_120m.py | 4 ++-- .../default/pipelines/default_lidar_intensity_120m.py | 4 ++-- ...amw_cosine.py => default_30e_8xb32_adamw_cosine.py} | 4 ++-- ...amw_cosine.py => default_50e_8xb32_adamw_cosine.py} | 4 ++-- 12 files changed, 25 insertions(+), 35 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py} (95%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py} (95%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py} (96%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py} (92%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py} (95%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py} (94%) delete mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_8xb16_adamw_cosine.py => default_30e_8xb32_adamw_cosine.py} (98%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_8xb16_adamw_cosine.py => default_50e_8xb32_adamw_cosine.py} (98%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py index 7a8afad3e..772718987 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb16_adamw_cosine.py", + "../default/schedulers/default_30e_8xb32_adamw_cosine.py", "../default/default_misc.py", ] @@ -15,8 +15,8 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base_normal_dense/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" +experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py index 3bdda213e..ccccc5c93 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py", ] # user setting experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py similarity index 96% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py index d3c5154c6..9403a825f 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb16_adamw_cosine.py", + "../default/schedulers/default_30e_8xb32_adamw_cosine.py", "../default/default_misc.py", ] @@ -15,8 +15,8 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_2/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" +experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py similarity index 92% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py index 64d494655..0b74d816d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py", ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2" +experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py index 171c3076e..1c0facf9e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb16_adamw_cosine.py", + "../default/schedulers/default_50e_8xb32_adamw_cosine.py", "../default/default_misc.py", ] @@ -15,8 +15,8 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_2_8_0/base_more_filters/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" +experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb32_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py similarity index 94% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py index 98a65a3f9..c751bf4db 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py", ] # user setting experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index aa275f558..1a1cd680e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -96,7 +96,7 @@ dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], ), - dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling + dense_heatmap_pooling_classes=["car", "truck", "bus", "barrier"], # Use class indices for pooling common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]), bbox_coder=dict( type="TransFusionBBoxCoder", diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py deleted file mode 100644 index e90687fe3..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py +++ /dev/null @@ -1,10 +0,0 @@ -_base_ = [ - "./default_lidar_second_secfpn_120m.py", -] - -model = dict( - bbox_head=dict( - common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]), - loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), - ), -) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 6472bb582..ec9bc9f19 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -66,8 +66,8 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=2), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 2c95445ea..84510f95f 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -66,8 +66,8 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=2), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb32_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb32_adamw_cosine.py index d28468f71..5594d8108 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb32_adamw_cosine.py @@ -1,12 +1,12 @@ # learning rate lr = 2.0e-4 -t_max = 3 +t_max = 2 max_epochs = 30 val_interval = 5 train_gpu_size = 8 test_batch_size = 4 -train_batch_size = 16 +train_batch_size = 32 param_scheduler = [ # learning rate scheduler diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb32_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb32_adamw_cosine.py index d209d0c1b..97dcaf3e4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb32_adamw_cosine.py @@ -1,12 +1,12 @@ # learning rate lr = 2.0e-4 -t_max = 5 +t_max = 3 max_epochs = 50 val_interval = 5 train_gpu_size = 8 test_batch_size = 4 -train_batch_size = 16 +train_batch_size = 32 param_scheduler = [ # learning rate scheduler From d1fc710d06f11f753476e83e7e18b32994c9321e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 7 Jun 2026 13:30:21 +0900 Subject: [PATCH 123/162] Updated --- ...sion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py | 2 +- ...ion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py | 2 +- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py index 772718987..f19daed6d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py @@ -153,4 +153,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2_8_2/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/epoch_50.pth" +load_from = None diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py index 9403a825f..280d7876c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py @@ -153,4 +153,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +load_from = None diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py index 1c0facf9e..33459b36c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py @@ -148,5 +148,3 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) - -resume = True \ No newline at end of file From 6f5dbebec9d5c650280ba3cefed90a260e3c0cc3 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 7 Jun 2026 14:15:35 +0900 Subject: [PATCH 124/162] Updated --- ...n_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} | 4 ++-- ...l_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} | 4 ++-- ..._lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} | 4 ++-- ..._second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} | 4 ++-- ...evfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py} | 4 ++-- ...ar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} | 4 ++-- ...xb32_adamw_cosine.py => default_30e_8xb16_adamw_cosine.py} | 2 +- ...xb32_adamw_cosine.py => default_50e_8xb16_adamw_cosine.py} | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} (95%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} (95%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} (95%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_8xb32_adamw_cosine.py => default_30e_8xb16_adamw_cosine.py} (98%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_8xb32_adamw_cosine.py => default_50e_8xb16_adamw_cosine.py} (98%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index f19daed6d..442b3d994 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb32_adamw_cosine.py", + "../default/schedulers/default_30e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8_1/" experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index ccccc5c93..3bdda213e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py", ] # user setting experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_j6gen2_base_120m_t4metric_v2" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 280d7876c..a6649532e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb32_adamw_cosine.py", + "../default/schedulers/default_30e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8_1/" experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index 0b74d816d..394fd1a5f 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py", ] # user setting experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb32_jpntaxi_base_120m_t4metric_v2" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index d7c9e189b..3debad7d6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb32_adamw_cosine.py", + "../default/schedulers/default_50e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8_1/" experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb32_base_120m" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py index c751bf4db..319fc0da1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_50e_8xb32_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py", ] # user setting experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb32_base_120m_t4metric_v2" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb32_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb32_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index 5594d8108..fde20bebd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb32_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -6,7 +6,7 @@ train_gpu_size = 8 test_batch_size = 4 -train_batch_size = 32 +train_batch_size = 16 param_scheduler = [ # learning rate scheduler diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb32_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb32_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py index 97dcaf3e4..cf8b0e4f2 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb32_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py @@ -6,7 +6,7 @@ train_gpu_size = 8 test_batch_size = 4 -train_batch_size = 32 +train_batch_size = 16 param_scheduler = [ # learning rate scheduler From e9a2a161ac673940ab6c6615a055f7b0db4093b9 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 11 Jun 2026 14:44:16 +0900 Subject: [PATCH 125/162] Updated --- ...on_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 4 ++-- .../default/schedulers/default_30e_8xb16_adamw_cosine.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 442b3d994..fee0b26b9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -153,4 +153,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index fde20bebd..336861ba4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -1,6 +1,6 @@ # learning rate -lr = 2.0e-4 -t_max = 2 +lr = 1.0e-4 +t_max = 3 max_epochs = 30 val_interval = 5 From d2aee28ab8105e714e695f72d66d1e266212c056 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 13 Jun 2026 08:16:32 +0900 Subject: [PATCH 126/162] Updated --- .../default/pipelines/default_lidar_intensity_120m.py | 2 +- .../default/schedulers/default_30e_8xb16_adamw_cosine.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 84510f95f..7538eec2c 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -66,7 +66,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=2), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index 336861ba4..cc5c780bd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1.0e-4 +lr = 2.0e-4 t_max = 3 max_epochs = 30 val_interval = 5 @@ -10,7 +10,7 @@ param_scheduler = [ # learning rate scheduler - # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 10 + # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 5 # during the next epochs, learning rate decreases from lr * 10 to # lr * 1e-4 dict( From f4fa2f0dbd9309f173b52a73d1e8ed1ee4cf9ce7 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 13 Jun 2026 14:10:36 +0900 Subject: [PATCH 127/162] Updated --- .../detection3d/evaluation/t4metric/t4metric_v2.py | 14 ++++++++++++++ ...ecfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 ++ ...cfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py | 2 ++ 3 files changed, 18 insertions(+) diff --git a/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py b/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py index c7865320f..25e2f773f 100644 --- a/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py +++ b/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py @@ -243,6 +243,7 @@ def __init__( experiment_name: str, experiment_group_name: str, write_metric_summary: bool, + min_num_points: int = 0, evaluate_frame_prefix: bool = True, checkpoint_path: Optional[Union[Path, str]] = None, scene_batch_size: int = 128, @@ -268,6 +269,7 @@ def __init__( self.experiment_name = experiment_name self.experiment_group_name = experiment_group_name self.name_mapping = name_mapping + self.min_num_points = min_num_points if name_mapping is not None: self.class_names = [self.name_mapping.get(name, name) for name in self.class_names] @@ -1324,6 +1326,18 @@ def _parse_ground_truth_from_sample(self, time: float, data_sample: Dict[str, An # num_lidar_pts: (N,) array of int, number of LiDAR points inside each GT box num_lidar_pts: np.ndarray = eval_info.get("num_lidar_pts", np.array([])) + + if self.min_num_points > 0 and len(bboxes): + points = data_sample["points"] + indices = box_np_ops.points_in_rbbox( + points.tensor.numpy()[:, :3], + bboxes[:, :7] + ) + num_points_in_gt = indices.sum(0) + bboxes_mask = num_points_in_gt >= self.min_num_points + bboxes = bboxes[bboxes_mask] + gt_labels_3d = gt_labels_3d[bboxes_mask] + num_lidar_pts = num_lidar_pts[bboxes_mask] dynamic_objects = [ DynamicObject( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 3bdda213e..06b5321f1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -50,6 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, + min_num_points=1 ) test_evaluator = dict( @@ -72,4 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, + min_num_points=1 ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index 394fd1a5f..db26312de 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -50,6 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, + min_num_points=1 ) test_evaluator = dict( @@ -72,4 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, + min_num_points=1 ) From 0ef784230940c4a5944cb00e66e22927df4f1520 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 13 Jun 2026 15:02:26 +0900 Subject: [PATCH 128/162] Updated --- Dockerfile | 2 +- .../evaluation/t4metric/t4metric_v2.py | 103 ++++++++++++++++-- 2 files changed, 96 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9f1a8c15b..3d872bf88 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,7 +61,7 @@ RUN python3 -m pip --no-cache-dir install \ RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1 # Install autoware-perception-evaluation -RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@f67600293e3e74861f913da3efafd6bf436d2ab6 +RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@85b78e52bca312911ea6730163dfeaa63c66c628 # Need to dowgrade setuptools to 60.2.0 to fix setup RUN python3 -m pip --no-cache-dir install \ diff --git a/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py b/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py index 25e2f773f..7f79b7aaa 100644 --- a/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py +++ b/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py @@ -30,6 +30,7 @@ from perception_eval.evaluation.result.perception_frame_result import PerceptionFrameResult from perception_eval.manager import PerceptionEvaluationManager from pyquaternion import Quaternion +from mmdet3d.structures.ops import box_np_ops from autoware_ml.detection3d.evaluation.t4metric.t4metric_v2_dataframe import T4MetricV2DataFrame @@ -436,11 +437,12 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: if self.results_pickle_exists: # Skip processing if result pickle already exists return - - for data_sample in data_samples: + + batch_points = data_batch['inputs']['points'] + for data_sample, points in zip(data_samples, batch_points): current_time = data_sample["timestamp"] scene_id = self._parse_scene_id(data_sample["lidar_path"]) - frame_ground_truth = self._parse_ground_truth_from_sample(current_time, data_sample) + frame_ground_truth = self._parse_ground_truth_from_sample(current_time, data_sample, points) perception_frame = self._parse_predictions_from_sample(current_time, data_sample, frame_ground_truth) self._save_perception_frame(scene_id, data_sample["sample_idx"], perception_frame) @@ -987,11 +989,27 @@ def _aggregate_metrics_data( # Create precision_interpolate and recall_interpolate keys iterable_metrics[ - f"T4MetricV2_label_detection/{label_name}_precisions_{matching_mode}_{threshold}" + f"T4MetricV2_label_detection/{label_name}_interp-precisions_{matching_mode}_{threshold}" ] = ap.precision_interp.tolist() iterable_metrics[ - f"T4MetricV2_label_detection/{label_name}_recalls_{matching_mode}_{threshold}" + f"T4MetricV2_label_detection/{label_name}_interp-recalls_{matching_mode}_{threshold}" ] = ap.recall_interp.tolist() + iterable_metrics[ + f"T4MetricV2_label_detection/{label_name}_interp-confs_{matching_mode}_{threshold}" + ] = ap.conf_interp.tolist() + + # TP error metrics (e.g. ATE, AOE, ASE, AVE, AAE) + if ap.tp_error_metrics is not None: + for tp_error_metric in ap.tp_error_metrics: + mode = tp_error_metric.mode + average_mode = tp_error_metric.average_mode + + iterable_metrics[ + f"T4MetricV2_label_detection/{label_name}_{mode}_values_{matching_mode}_{threshold}" + ] = tp_error_metric.values.tolist() + iterable_metrics[ + f"T4MetricV2_label_detection/{label_name}_{mode}_interp-values_{matching_mode}_{threshold}" + ] = tp_error_metric.interpolated_values.tolist() return iterable_metrics @@ -1046,6 +1064,40 @@ def _process_metrics_for_aggregation(self, metrics_score: MetricsScore, evaluato ap.optimal_precision ) + # Number of prediction matches (TPs) and matches at the optimal confidence threshold + metric_dict[f"T4MetricV2_label/{label_name}_num-match_{matching_mode}_{threshold}"] = ap.num_tp + metric_dict[f"T4MetricV2_label/{label_name}_min-recall-num-match_{matching_mode}_{threshold}"] = ( + ap.num_tp_at_min_recall_conf + ) + metric_dict[ + f"T4MetricV2_label/{label_name}_medium-recall-num-match_{matching_mode}_{threshold}" + ] = ap.num_tp_at_medium_recall_conf + metric_dict[f"T4MetricV2_label/{label_name}_optimal-num-match_{matching_mode}_{threshold}"] = ( + ap.num_tp_at_optimal_conf + ) + + # TP error metrics (e.g. ATE, AOE, ASE, AVE, AAE) + if ap.tp_error_metrics is not None: + for tp_error_metric in ap.tp_error_metrics: + mode = tp_error_metric.mode + average_mode = tp_error_metric.average_mode + + metric_dict[ + f"T4MetricV2_label/{label_name}_tp-error_{average_mode}_{matching_mode}_{threshold}" + ] = tp_error_metric.avg_metric + metric_dict[ + f"T4MetricV2_label/{label_name}_tp-error-min-recall-conf_{average_mode}_{matching_mode}_{threshold}" + ] = tp_error_metric.min_recall_conf + metric_dict[ + f"T4MetricV2_label/{label_name}_tp-error-optimal-{average_mode}_{matching_mode}_{threshold}" + ] = tp_error_metric.optimal_avg_metric + metric_dict[ + f"T4MetricV2_label/{label_name}_tp-error-medium-{average_mode}_{matching_mode}_{threshold}" + ] = tp_error_metric.medium_avg_metric + metric_dict[ + f"T4MetricV2_label/{label_name}_tp-error-medium-recall-conf-{average_mode}_{matching_mode}_{threshold}" + ] = tp_error_metric.medium_recall_conf + # Label metadata key metric_dict[f"metadata_label/test_{label_name}_num_predictions"] = label_num_preds metric_dict[f"metadata_label/test_{label_name}_num_ground_truths"] = label_num_gts @@ -1056,6 +1108,41 @@ def _process_metrics_for_aggregation(self, metrics_score: MetricsScore, evaluato metric_dict[map_key] = map_instance.map metric_dict[maph_key] = map_instance.maph + # Add mean TP errors (e.g. mATE, mAOE, mASE, mAVE, mAAE) + if map_instance.mean_tp_errors is not None: + for mean_tp_error_name, mean_tp_error_value in map_instance.mean_tp_errors.items(): + metric_dict[f"T4MetricV2/mean-tp-error_{mean_tp_error_name}_{matching_mode}"] = mean_tp_error_value + + optimal_mean_tp_errors = map_instance.optimal_mean_tp_errors.get(mean_tp_error_name, None) + if optimal_mean_tp_errors is not None: + metric_dict[f"T4MetricV2/mean-tp-error-optimal-{mean_tp_error_name}_{matching_mode}"] = ( + optimal_mean_tp_errors + ) + + medium_mean_tp_errors = map_instance.medium_mean_tp_errors.get(mean_tp_error_name, None) + if medium_mean_tp_errors is not None: + metric_dict[f"T4MetricV2/mean-tp-error-medium-{mean_tp_error_name}_{matching_mode}"] = ( + medium_mean_tp_errors + ) + + # Add NuScenes Detection Score (NDS) based on mAP and mAPH + if map_instance.map_based_nds is not None: + metric_dict[f"T4MetricV2/{map_instance.map_based_nds.metric_prefix_name}_nds_{matching_mode}"] = ( + map_instance.map_based_nds.nds + ) + if map_instance.medium_map_based_nds is not None: + metric_dict[ + f"T4MetricV2/{map_instance.medium_map_based_nds.metric_prefix_name}_nds_{matching_mode}" + ] = map_instance.medium_map_based_nds.nds + if map_instance.mapH_based_nds is not None: + metric_dict[f"T4MetricV2/{map_instance.mapH_based_nds.metric_prefix_name}_nds_{matching_mode}"] = ( + map_instance.mapH_based_nds.nds + ) + if map_instance.medium_mapH_based_nds is not None: + metric_dict[ + f"T4MetricV2/{map_instance.medium_mapH_based_nds.metric_prefix_name}_nds_{matching_mode}" + ] = map_instance.medium_mapH_based_nds.nds + total_num_preds = num_preds # Selected evaluator @@ -1297,7 +1384,7 @@ def _parse_scene_id(self, lidar_path: str) -> str: except ValueError: return _UNKNOWN - def _parse_ground_truth_from_sample(self, time: float, data_sample: Dict[str, Any]) -> FrameGroundTruth: + def _parse_ground_truth_from_sample(self, time: float, data_sample: Dict[str, Any], points) -> FrameGroundTruth: """Parses ground truth objects from the given data sample. Args: @@ -1328,9 +1415,9 @@ def _parse_ground_truth_from_sample(self, time: float, data_sample: Dict[str, An num_lidar_pts: np.ndarray = eval_info.get("num_lidar_pts", np.array([])) if self.min_num_points > 0 and len(bboxes): - points = data_sample["points"] + points_cpu = points.cpu().numpy() indices = box_np_ops.points_in_rbbox( - points.tensor.numpy()[:, :3], + points_cpu[:, :3], bboxes[:, :7] ) num_points_in_gt = indices.sum(0) From b3595e0221e974e86dde3819390ff8c701973cb2 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 13 Jun 2026 15:39:22 +0900 Subject: [PATCH 129/162] Updated --- .../detection3d/datasets/transforms/object_min_points_filter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py b/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py index 3ed510769..28c9fb9af 100644 --- a/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py +++ b/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py @@ -34,6 +34,7 @@ def transform(self, input_dict: dict) -> dict: indices = box_np_ops.points_in_rbbox( points.tensor.numpy()[:, :3], gt_bboxes_3d.tensor.numpy()[:, :7], + origin=(0.5, 0.5, 0.0) # Always assume it's center_x, center_y and bottom_z ) num_points_in_gt = indices.sum(0) gt_bboxes_mask = num_points_in_gt >= self.min_num_points @@ -107,6 +108,7 @@ def transform(self, input_dict: dict) -> dict: indices = box_np_ops.points_in_rbbox( points.tensor.numpy()[:, :3], gt_bboxes_3d.tensor.numpy()[:, :7], + origin=(0.5, 0.5, 0.0) # Always assume it's center_x, center_y and bottom_z ) num_points_in_gt = indices.sum(0) From e12a2585d413088b76512b40a1ca4975f359e2d0 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 13 Jun 2026 16:04:34 +0900 Subject: [PATCH 130/162] Updated --- ...el_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 06b5321f1..44af5e330 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -50,7 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=1 + min_num_points=2 ) test_evaluator = dict( @@ -73,5 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=1 + min_num_points=2 ) From 0712c0a83528f168789fc2bf4dbcad18e32ce7a3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jun 2026 06:17:50 +0000 Subject: [PATCH 131/162] ci(pre-commit): autofix --- .../datasets/transforms/__init__.py | 2 +- .../datasets/transforms/local_3d_bbox.py | 19 +- .../transforms/object_min_points_filter.py | 4 +- .../evaluation/t4metric/t4metric_v2.py | 13 +- projects/BEVFusion/bevfusion/__init__.py | 2 +- projects/BEVFusion/bevfusion/bevfusion.py | 6 +- .../BEVFusion/bevfusion/bevfusion_head.py | 81 +++++--- .../bevfusion/bevfusion_voxel_encoder.py | 195 ++++++++---------- .../bevfusion/custom_sparse_conv_tensor.py | 13 +- projects/BEVFusion/bevfusion/ops/topk/topk.py | 17 +- .../BEVFusion/bevfusion/sparse_encoder.py | 7 +- projects/BEVFusion/bevfusion/utils.py | 2 +- ...y_lidar_only_intensity_tensorrt_dynamic.py | 6 +- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 20 +- ..._30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 4 +- ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py | 16 +- ...30e_8xb16_jpntaxi_base_120m_t4metric_v2.py | 4 +- .../default_lidar_second_secfpn_120m.py | 6 +- .../default_camera_lidar_intensity_120m.py | 2 +- .../default/pipelines/default_lidar_120m.py | 2 +- .../pipelines/default_lidar_intensity_120m.py | 2 +- projects/BEVFusion/deploy/containers.py | 14 +- projects/BEVFusion/deploy/exporter.py | 13 +- 23 files changed, 232 insertions(+), 218 deletions(-) diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index b20961db6..a63ff1eea 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,4 +1,4 @@ -from .object_min_points_filter import ObjectMinPointsFilter from .local_3d_bbox import Local3DBBoxExpand +from .object_min_points_filter import ObjectMinPointsFilter __all__ = ["ObjectMinPointsFilter", "Local3DBBoxExpand"] diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py index 96772cf44..ae06d4005 100644 --- a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py +++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py @@ -1,7 +1,6 @@ from typing import List -import numpy as np - +import numpy as np from mmcv.transforms import BaseTransform from mmdet3d.structures.ops import box_np_ops from mmengine.registry import TRANSFORMS @@ -13,11 +12,11 @@ class Local3DBBoxExpand(BaseTransform): Args: expand_widths: (List[float]): Uniformly sampled expand width. - width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D + width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the 4th dimension. label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. - """ + """ def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None: assert isinstance(expand_widths, list) @@ -26,7 +25,7 @@ def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: Li self.expand_widths = expand_widths self.width_dim = width_dim self.label_ids = label_ids - + def transform(self, input_dict: dict) -> dict: """Call function to locally augment the 3D bounding boxes by scaling the width. @@ -37,19 +36,19 @@ def transform(self, input_dict: dict) -> dict: dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \ key is updated in the result dict. """ - # Label mask + # Label mask if self.label_ids is not None: - label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] + label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] else: label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) for i in range(len(input_dict["gt_bboxes_3d"])): if not label_masks[i]: - continue - + continue + expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width - + return input_dict def __repr__(self) -> str: diff --git a/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py b/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py index 28c9fb9af..610a26767 100644 --- a/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py +++ b/autoware_ml/detection3d/datasets/transforms/object_min_points_filter.py @@ -34,7 +34,7 @@ def transform(self, input_dict: dict) -> dict: indices = box_np_ops.points_in_rbbox( points.tensor.numpy()[:, :3], gt_bboxes_3d.tensor.numpy()[:, :7], - origin=(0.5, 0.5, 0.0) # Always assume it's center_x, center_y and bottom_z + origin=(0.5, 0.5, 0.0), # Always assume it's center_x, center_y and bottom_z ) num_points_in_gt = indices.sum(0) gt_bboxes_mask = num_points_in_gt >= self.min_num_points @@ -108,7 +108,7 @@ def transform(self, input_dict: dict) -> dict: indices = box_np_ops.points_in_rbbox( points.tensor.numpy()[:, :3], gt_bboxes_3d.tensor.numpy()[:, :7], - origin=(0.5, 0.5, 0.0) # Always assume it's center_x, center_y and bottom_z + origin=(0.5, 0.5, 0.0), # Always assume it's center_x, center_y and bottom_z ) num_points_in_gt = indices.sum(0) diff --git a/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py b/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py index 7f79b7aaa..ffc5fee4c 100644 --- a/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py +++ b/autoware_ml/detection3d/evaluation/t4metric/t4metric_v2.py @@ -11,6 +11,7 @@ import torch from mmdet3d.registry import METRICS from mmdet3d.structures import LiDARInstance3DBoxes +from mmdet3d.structures.ops import box_np_ops from mmengine.dist import get_world_size from mmengine.evaluator import BaseMetric from mmengine.logging import MessageHub, MMLogger @@ -30,7 +31,6 @@ from perception_eval.evaluation.result.perception_frame_result import PerceptionFrameResult from perception_eval.manager import PerceptionEvaluationManager from pyquaternion import Quaternion -from mmdet3d.structures.ops import box_np_ops from autoware_ml.detection3d.evaluation.t4metric.t4metric_v2_dataframe import T4MetricV2DataFrame @@ -437,8 +437,8 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: if self.results_pickle_exists: # Skip processing if result pickle already exists return - - batch_points = data_batch['inputs']['points'] + + batch_points = data_batch["inputs"]["points"] for data_sample, points in zip(data_samples, batch_points): current_time = data_sample["timestamp"] scene_id = self._parse_scene_id(data_sample["lidar_path"]) @@ -1413,13 +1413,10 @@ def _parse_ground_truth_from_sample(self, time: float, data_sample: Dict[str, An # num_lidar_pts: (N,) array of int, number of LiDAR points inside each GT box num_lidar_pts: np.ndarray = eval_info.get("num_lidar_pts", np.array([])) - + if self.min_num_points > 0 and len(bboxes): points_cpu = points.cpu().numpy() - indices = box_np_ops.points_in_rbbox( - points_cpu[:, :3], - bboxes[:, :7] - ) + indices = box_np_ops.points_in_rbbox(points_cpu[:, :3], bboxes[:, :7]) num_points_in_gt = indices.sum(0) bboxes_mask = num_points_in_gt >= self.min_num_points bboxes = bboxes[bboxes_mask] diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index fa23d120c..df0e15eba 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -1,13 +1,13 @@ from .bevfusion import BEVFusion from .bevfusion_head import BEVFusionHead, ConvFuser from .bevfusion_necks import GeneralizedLSSFPN +from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder from .depth_lss import DepthLSSTransform, LSSTransform from .loading import BEVLoadMultiViewImageFromFiles from .sparse_encoder import BEVFusionSparseEncoder from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelFeatureNet __all__ = [ "BEVFusion", diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index aeddc09fa..a26f61855 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -179,7 +179,7 @@ def extract_img_feat( if not using_image_features: x = self.get_image_backbone_features(x) - with torch.amp.autocast("cuda",enabled=False): + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast(device_type='cuda', dtype=torch.float32): x = self.view_transform( x, @@ -243,11 +243,11 @@ def voxelize(self, points): coords = torch.cat(coords, dim=0) assert len(sizes) > 0, "No points in the voxel" sizes = torch.cat(sizes, dim=0) - + # if self.voxelize_reduce: # feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) # feats = feats.contiguous() - + return feats, coords, sizes def predict( diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 2f0a99a9f..74f0eadba 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -162,9 +162,9 @@ def __init__( y_size = self.test_cfg["grid_size"][1] // self.test_cfg["out_size_factor"] self.spatial_dim = x_size * y_size bev_pos = self.create_2D_grid(x_size, y_size) - + # Register the bev_pos as a buffer so it moves to the GPU automatically. - self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2) + self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2) self.img_feat_pos = None self.img_feat_collapsed_pos = None @@ -182,22 +182,23 @@ def __init__( self.dense_heatmap_exclude_pooling_classes = sorted( list(set(self.class_name_to_indices.values()) - set(self.dense_heatmap_pooling_class_indices)) - ) + ) # Pre-compute the correct order of the classes for the final local_max - heatmap_concat_order = self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes - local_concat_class_remapping = [ - heatmap_concat_order.index(i) - for i in range(self.num_classes) - ] + heatmap_concat_order = ( + self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes + ) + local_concat_class_remapping = [heatmap_concat_order.index(i) for i in range(self.num_classes)] else: self.dense_heatmap_pooling_class_indices = None self.dense_heatmap_exclude_pooling_classes = None local_concat_class_remapping = [i for i in range(self.num_classes)] - + # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict. - self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False) + self.register_buffer( + "local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False + ) self.local_heatmap_padding = self.nms_kernel_size // 2 - + # NMS clusters self.nms_clusters = self.test_cfg.get("nms_clusters", []) # Add class indices for nms @@ -283,7 +284,7 @@ def forward_single(self, inputs, metas): dense_heatmap = self.heatmap_head(fusion_feat.float()) heatmap = dense_heatmap.detach().sigmoid() if self.dense_heatmap_pooling_class_indices is not None: - # Pooling + # Pooling selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] local_max_inner = F.max_pool2d( selected_heatmap, @@ -294,31 +295,35 @@ def forward_single(self, inputs, metas): # 2. Restore spatial size using F.pad instead of slice mutation local_max = F.pad( - local_max_inner, - (self.local_heatmap_padding, self.local_heatmap_padding, self.local_heatmap_padding, - self.local_heatmap_padding), - mode="constant", - value=0.0 + local_max_inner, + ( + self.local_heatmap_padding, + self.local_heatmap_padding, + self.local_heatmap_padding, + self.local_heatmap_padding, + ), + mode="constant", + value=0.0, ) - + # 3. Any non-pooling classes if self.dense_heatmap_exclude_pooling_classes: excluded_local_max = heatmap[:, self.dense_heatmap_exclude_pooling_classes, :, :] local_max = torch.cat([local_max, excluded_local_max], dim=1) local_max = local_max[:, self.local_concat_class_remapping, :, :] else: - local_max = heatmap + local_max = heatmap heatmap = heatmap * (heatmap == local_max) # (BS, num_classes, H*W) heatmap = heatmap.view(-1, self.num_classes, self.spatial_dim) # top num_proposals among all classes - flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) - + flattened_heatmap = heatmap.view(-1, self.num_classes * self.spatial_dim) + # Use topk instead of argsort to avoid sorting the entire flattened heatmap. top_proposals = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) - + # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. top_proposals_class = top_proposals // self.spatial_dim @@ -333,7 +338,7 @@ def forward_single(self, inputs, metas): one_hot = F.one_hot(top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1) query_cat_encoding = self.class_encoding(one_hot.float()) query_feat += query_cat_encoding - + # (B, N, 2) query_pos = self.bev_pos.squeeze(0)[top_proposals_index] ################################# @@ -343,7 +348,9 @@ def forward_single(self, inputs, metas): for i in range(self.num_decoder_layers): # Transformer Decoder Layer # :param query: B C Pq :param query_pos: B Pq 3/6 - query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos) + query_feat = self.decoder[i]( + query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos + ) # Prediction res_layer = self.prediction_heads[i](query_feat) @@ -411,7 +418,9 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F batch_size = preds_dict[0]["heatmap"].shape[0] batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid() if self.loss_iou is not None: - batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 + batch_score = torch.sqrt( + batch_score * preds_dict[0]["iou"][..., -self.num_proposals :].sigmoid() + ) # noqa: E501 one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1) batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot @@ -886,15 +895,19 @@ def loss_by_feat( # Output iou for iou-aware loss if self.loss_iou is not None: - layer_ious = preds_dict["iou"][ - ..., - idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - ].squeeze(1) # [BS, num_proposals] - - # [BS, num_proposals] - layer_iou_weights = layer_bbox_weights[:, :, 0] - # print(layer_ious.shape, ious.shape, layer_iou_weights.shape, "layer_ious.shape, ious.shape, layer_iou_weights.shape") - loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1)) + layer_ious = preds_dict["iou"][ + ..., + idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, + ].squeeze( + 1 + ) # [BS, num_proposals] + + # [BS, num_proposals] + layer_iou_weights = layer_bbox_weights[:, :, 0] + # print(layer_ious.shape, ious.shape, layer_iou_weights.shape, "layer_ious.shape, ious.shape, layer_iou_weights.shape") + loss_dict[f"{prefix}_loss_iou"] = self.loss_iou( + layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1) + ) loss_dict["matched_ious"] = layer_loss_cls.new_tensor(matched_ious) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 30afdc41d..d7801482a 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -1,21 +1,19 @@ from typing import Optional, Tuple -import torch import numpy as np -from torch import Tensor, nn - +import torch +from mmdet3d.models.voxel_encoders.utils import PFNLayer, get_paddings_indicator from mmdet3d.registry import MODELS -from mmdet3d.models.voxel_encoders.utils import get_paddings_indicator, PFNLayer +from torch import Tensor, nn @MODELS.register_module() class HardSimpleVoxelSinCosEncoder(nn.Module): - def __init__(self, - min_norm_values: Tuple[float], - max_norm_values: Tuple[float], - in_channels: Optional[int] = 4) -> None: + def __init__( + self, min_norm_values: Tuple[float], max_norm_values: Tuple[float], in_channels: Optional[int] = 4 + ) -> None: """ - Simple voxel encoder that only performs mean pooling on the normalize features, and then + Simple voxel encoder that only performs mean pooling on the normalize features, and then performs sin-cos (fourier encoding) on each voxel channels. The output shape of each voxel is (N, feature_channels*2). @@ -25,30 +23,29 @@ def __init__(self, in_channels (int): Number of input channels. """ super().__init__() - + # Create PillarFeatureNet layers self.in_channels = in_channels - - # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP + + # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP min_norm_values = torch.tensor(min_norm_values) max_norm_values = torch.tensor(max_norm_values) # Let alpha = pi * exponents, beta = max - min - # y = ((x - min) / beta) * alpha + # y = ((x - min) / beta) * alpha # y = alpha / beta * (x - min) - # y = (alpha / beta) * x - (alpha / beta) * min - # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta + # y = (alpha / beta) * x - (alpha / beta) * min + # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta # y = scale * x + bias exponents = (2 ** torch.arange(0, self.in_channels)).float() - alpha = (torch.pi * exponents).unsqueeze(0) # (1, C) - beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1) + alpha = (torch.pi * exponents).unsqueeze(0) # (1, C) + beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1) scale = alpha / beta - bias = - (alpha * min_norm_values.unsqueeze(1)) / beta # (C, C) - - self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C) - self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C) + bias = -(alpha * min_norm_values.unsqueeze(1)) / beta # (C, C) - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: + self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C) + self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: """Forward function. Args: @@ -63,26 +60,28 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, """ # Mean in the voxel # (N, M, C) -> (N, C) - voxel_mean_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)).contiguous() + voxel_mean_features = ( + features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) + ).contiguous() # x * scale + bias, (1, C, C) + (1, C, C) * (N, C, 1) -> (N, C, C) # FMA (fused multiply-add): y = bias + scale * voxel_mean_features y = torch.addcmul(self.exponent_bias, self.exponent_scale, voxel_mean_features.unsqueeze(-1)) # SinCos encoding # (N*C, C) -> (N, C*C) - y = y.reshape(-1, self.in_channels*self.in_channels) + y = y.reshape(-1, self.in_channels * self.in_channels) # (N, C*C) -> (N, C*C*2) voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - + return voxel_fourier_features @MODELS.register_module() class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. - - The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the - offset features, for example, distances. After that, it concatenates the fourier features and the PFN features + + The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the + offset features, for example, distances. After that, it concatenates the fourier features and the PFN features along the channel dimension for each voxel. Args: @@ -93,25 +92,24 @@ class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder): N PFNLayers. Defaults to (64, ). """ - def __init__(self, - min_norm_values: Optional[Tuple[float]] = None, - max_norm_values: Optional[Tuple[float]] = None, - in_channels: Optional[int] = 4, - feat_channels: Optional[tuple] = (64, ), - with_distance: Optional[bool] = False, - with_cluster_center: Optional[bool] = True, - with_voxel_center: Optional[bool] = True, - voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), - point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1), - norm_cfg: Optional[dict] = dict( - type='BN1d', eps=1e-3, momentum=0.01), - mode: Optional[str] = 'max', - legacy: Optional[bool] = True): - + def __init__( + self, + min_norm_values: Optional[Tuple[float]] = None, + max_norm_values: Optional[Tuple[float]] = None, + in_channels: Optional[int] = 4, + feat_channels: Optional[tuple] = (64,), + with_distance: Optional[bool] = False, + with_cluster_center: Optional[bool] = True, + with_voxel_center: Optional[bool] = True, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, 40, 1), + norm_cfg: Optional[dict] = dict(type="BN1d", eps=1e-3, momentum=0.01), + mode: Optional[str] = "max", + legacy: Optional[bool] = True, + ): + super(BEVFusionVoxelFeatureNet, self).__init__( - min_norm_values=min_norm_values, - max_norm_values=max_norm_values, in_channels=in_channels + min_norm_values=min_norm_values, max_norm_values=max_norm_values, in_channels=in_channels ) assert len(feat_channels) > 0 self.legacy = legacy @@ -122,12 +120,12 @@ def __init__(self, pfn_in_channels += 3 if with_distance: pfn_in_channels += 1 - + assert pfn_in_channels > 0, "pfn_in_channels must be greater than 0" self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center - + # Create VoxelFeatureNet layers feat_channels = [pfn_in_channels] + list(feat_channels) pfn_layers = [] @@ -138,13 +136,7 @@ def __init__(self, last_layer = False else: last_layer = True - pfn_layers.append( - PFNLayer( - in_filters, - out_filters, - norm_cfg=norm_cfg, - last_layer=last_layer, - mode=mode)) + pfn_layers.append(PFNLayer(in_filters, out_filters, norm_cfg=norm_cfg, last_layer=last_layer, mode=mode)) self.pfn_layers = nn.ModuleList(pfn_layers) # Need pillar (voxel) size and x/y offset in order to calculate offset @@ -156,8 +148,7 @@ def __init__(self, self.z_offset = self.vz / 2 + point_cloud_range[2] self.point_cloud_range = point_cloud_range - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: """Forward function. Args: @@ -173,17 +164,17 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, voxel_fourier_features = super().forward(features, num_points, coors) # Normalize the features - norm_features = (features - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) - + norm_features = (features - self.min_norm_values.view(1, -1)) / ( + (self.max_norm_values - self.min_norm_values).view(1, -1) + ) + # Offset features - max_points_per_voxel = features.shape[1] - + max_points_per_voxel = features.shape[1] + features_ls = [norm_features] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available if self._with_cluster_center: - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_points.type_as(features).view( - -1, 1, 1) + points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_points.type_as(features).view(-1, 1, 1) f_cluster = features[:, :, :3] - points_mean # Map to [0, 1] if available # if self.min_norm_values is not None and self.max_norm_values is not None: @@ -196,27 +187,21 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, if self._with_voxel_center: if not self.legacy: f_center = torch.zeros_like(features[:, :, :3]) - f_center[:, :, 0] = features[:, :, 0] - ( - coors[:, 3].to(dtype).unsqueeze(1) * self.vx + - self.x_offset) - f_center[:, :, 1] = features[:, :, 1] - ( - coors[:, 2].to(dtype).unsqueeze(1) * self.vy + - self.y_offset) - f_center[:, :, 2] = features[:, :, 2] - ( - coors[:, 1].to(dtype).unsqueeze(1) * self.vz + - self.z_offset) + f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].to(dtype).unsqueeze(1) * self.vx + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset) + f_center[:, :, 2] = features[:, :, 2] - (coors[:, 1].to(dtype).unsqueeze(1) * self.vz + self.z_offset) else: f_center = features[:, :, :3] f_center[:, :, 0] = f_center[:, :, 0] - ( - coors[:, 3].type_as(features).unsqueeze(1) * self.vx + - self.x_offset) + coors[:, 3].type_as(features).unsqueeze(1) * self.vx + self.x_offset + ) f_center[:, :, 1] = f_center[:, :, 1] - ( - coors[:, 2].type_as(features).unsqueeze(1) * self.vy + - self.y_offset) + coors[:, 2].type_as(features).unsqueeze(1) * self.vy + self.y_offset + ) f_center[:, :, 2] = f_center[:, :, 2] - ( - coors[:, 1].type_as(features).unsqueeze(1) * self.vz + - self.z_offset) - + coors[:, 1].type_as(features).unsqueeze(1) * self.vz + self.z_offset + ) + # if self.min_norm_values is not None and self.max_norm_values is not None: # f_center = f_center / (voxel_size * 0.5) features_ls.append(f_center) @@ -234,12 +219,12 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) voxel_feature_offsets *= mask - + # PFN for pfn in self.pfn_layers: voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - - # Concat + + # Concat features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) return features @@ -247,7 +232,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # @MODELS.register_module() # class BEVFusionVoxelSinCosEncoder(nn.Module): -# def __init__(self, +# def __init__(self, # min_norm_values: Tuple[float], # max_norm_values: Tuple[float], # time_lag_channel_index: int = 3, @@ -279,7 +264,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # self.y_offset = self.vy / 2 + point_cloud_range[1] # self.z_offset = self.vz / 2 + point_cloud_range[2] # self.point_cloud_range = point_cloud_range - + # self.xyz_channels = 3 # feat_offset_channels = in_channels - self.xyz_channels # if with_cluster_center: @@ -310,7 +295,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # self.time_lag_channel_index = time_lag_channel_index # self.time_exp_factor = time_exp_factor - + # self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) # self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) @@ -328,9 +313,9 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # Returns: # torch.Tensor: Features of pillars in shape (M, C). -# """ +# """ # num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - + # # Mean in the voxel # # (N, M, 3) -> (N, 3) # voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( @@ -339,7 +324,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # # min-max normalization, (N, 3) -> (N, 3) # voxel_features_norm = (voxel_features - \ # self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) - + # # SinCos encoding # # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) # y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) @@ -348,22 +333,22 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # # (N, 3*3) -> (N, 3*3*2) # voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) -# # PFN -# # Other features, for example, intensity or time_lag +# # PFN +# # Other features, for example, intensity or time_lag # other_features = features[:, :, self.xyz_channels:] - -# # Normalization -# other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) + +# # Normalization +# other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) # time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels -# # exponentiate time_lag features, it's higher when the normlized time lag is lower +# # exponentiate time_lag features, it's higher when the normlized time lag is lower # # (1.0 when time_lag_features is 0.0) # if self.time_exp_factor is not None: # other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) # else: -# # Inverse the time_lag feature +# # Inverse the time_lag feature # other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] - + # # Offsets # voxel_feature_offsets = [other_features_norm] # # Find distance of x, y, and z from cluster center @@ -371,7 +356,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # points_mean = features[:, :, :3].sum( # dim=1, keepdim=True) / num_points.type_as(features).view( # -1, 1, 1) - + # # f_cluster = (features[:, :, :3] - points_mean) # f_cluster = features[:, :, :3] - points_mean # voxel_feature_offsets.append(f_cluster) @@ -389,7 +374,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # f_center[:, :, 2] = features[:, :, 2] - ( # coors[:, 1].to(dtype).unsqueeze(1) * self.vz + # self.z_offset) - + # # Map to [-1, 1] # # f_center = f_center / (self.voxel_size * 0.5) # voxel_feature_offsets.append(f_center) @@ -397,7 +382,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # if self._with_distance: # points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) # voxel_feature_offsets.append(points_dist) - + # voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) # # The feature decorations were calculated without regard to whether # # pillar was empty. Need to ensure that @@ -405,13 +390,11 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) # mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) # voxel_feature_offsets *= mask - + # # PFN # for pfn in self.pfn_layers: # voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - -# # Concat + +# # Concat # features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) # return features - - diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index 175c08bed..888d2b1c0 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -2,8 +2,8 @@ Custom SparseConvTensor for BEVFusion. This customiztion is used to support cleaner ONNX export of sparse convolutions. """ -import torch +import torch from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE if IS_SPCONV2_AVAILABLE: @@ -23,18 +23,15 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh # b * (H * W * D) + h*(W*D) + w*D + d # Factor out the common terms D and W # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d - linear_idx = ((b * H + h) * W + w) * D + d # [N] - + linear_idx = ((b * H + h) * W + w) * D + d # [N] + out = torch.zeros( - [ - num_cells, - sparse_tensor.features.shape[1] - ], + [num_cells, sparse_tensor.features.shape[1]], device=sparse_tensor.features.device, dtype=sparse_tensor.features.dtype, ) # out = out.index_copy(0, linear_idx, sparse_tensor.features) # out = out.scatter(0, linear_idx, sparse_tensor.features) - scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels) # [N, C] + scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels) # [N, C] out = out.scatter(0, scatter_idx, sparse_tensor.features) return out.view(batch_size, H, W, D, out_channels) diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py index a767bb720..f0f9a8779 100644 --- a/projects/BEVFusion/bevfusion/ops/topk/topk.py +++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py @@ -13,9 +13,9 @@ class TopK(Function): def symbolic( g, x: torch.Tensor, - k: int, - dim: int, - sorted: bool = False, + k: int, + dim: int, + sorted: bool = False, ): output = g.op( @@ -27,19 +27,20 @@ def symbolic( if x_shape is not None and hasattr(output.type(), "with_sizes"): output_type = x.type().with_sizes(x_shape) output.setType(output_type) - # Argsort from Autoware is in ascending order, so we need to return the last k elements. + # Argsort from Autoware is in ascending order, so we need to return the last k elements. return output[-k:] @staticmethod def forward( ctx, x: torch.Tensor, - k: int, - dim: int, - sorted: bool = False, + k: int, + dim: int, + sorted: bool = False, ): _, indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) return indices + def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False): - return TopK.apply(x, k, dim, sorted) \ No newline at end of file + return TopK.apply(x, k, dim, sorted) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index 0fc20cd19..6bf0592b6 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -5,7 +5,6 @@ import numpy as np import torch - from mmdet3d.models.layers import make_sparse_convmodule from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE from mmdet3d.models.middle_encoders import SparseEncoder @@ -119,7 +118,7 @@ def __init__( indice_key="spconv_down2", conv_type="SparseConv3d", ) - + def forward(self, voxel_features, coors, batch_size): """Forward of SparseEncoder. @@ -147,11 +146,11 @@ def forward(self, voxel_features, coors, batch_size): for encoder_layer in self.encoder_layers: x = encoder_layer(x) encode_features.append(x) - + # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) - + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) # spatial_features = out.dense(channels_first=False) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index 84797cc51..39c6a0ded 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -93,7 +93,7 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} predictions_dicts.append(predictions_dict) return predictions_dicts - + # use score threshold if self.score_threshold is not None: if isinstance(self.score_threshold, float): diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py index 0863889bb..2652b3965 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -1,11 +1,7 @@ codebase_config = dict(type="mmdet3d", task="VoxelDetection", model_type="end2end") custom_imports = dict( - imports=[ - "projects.BEVFusion.deploy", - "projects.BEVFusion.bevfusion", - "projects.SparseConvolution" - ], + imports=["projects.BEVFusion.deploy", "projects.BEVFusion.bevfusion", "projects.SparseConvolution"], allow_failed_imports=False, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index fee0b26b9..92b06fa47 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -29,8 +29,20 @@ pts_voxel_encoder=dict( in_channels=len(_base_.lidar_sweep_dims), # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + min_norm_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + max_norm_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], ), pts_middle_encoder=dict( in_channels=50, @@ -153,4 +165,6 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +load_from = ( + "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 44af5e330..c25ce0eb7 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -50,7 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=2 + min_num_points=2, ) test_evaluator = dict( @@ -73,5 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=2 + min_num_points=2, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index a6649532e..f293545a4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -29,8 +29,20 @@ pts_voxel_encoder=dict( in_channels=len(_base_.lidar_sweep_dims), # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + min_norm_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + max_norm_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], ), pts_middle_encoder=dict( in_channels=50, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index db26312de..947de5f0b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -50,7 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=1 + min_num_points=1, ) test_evaluator = dict( @@ -73,5 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=1 + min_num_points=1, ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 1a1cd680e..11b57c1cd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -14,7 +14,7 @@ pad_size_divisor=32, ), pts_voxel_encoder=dict( - type="HardSimpleVoxelSinCosEncoder", + type="HardSimpleVoxelSinCosEncoder", in_channels=4, ), pts_middle_encoder=dict( @@ -89,7 +89,9 @@ # Set NMS for different clusters nms_clusters=[ # Sqrt(0.25) = 0.50 - dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms + dict( + class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300 + ), # It's radius if using circle_nms dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 1b0f91c3d..c846a9b7e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -83,7 +83,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index ec9bc9f19..b37108873 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -66,7 +66,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=2), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 7538eec2c..7d6b8e506 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -66,7 +66,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=2), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index 018b5db7e..55586e0f5 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -1,8 +1,8 @@ +# Wrapper Classes for onnx conversion +import numpy as np import torch import torch.nn.functional as F -# Wrapper Classes for onnx conversion -import numpy as np class TrtBevFusionImageBackboneContainer(torch.nn.Module): def __init__(self, mod, mean, std) -> None: @@ -49,13 +49,17 @@ def forward( batch_inputs_dict = { "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, - } - + voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin") coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin") num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin") - print("voxels.shape, coors.shape, num_points_per_voxel.shape:", voxels.shape, coors.shape, num_points_per_voxel.shape) + print( + "voxels.shape, coors.shape, num_points_per_voxel.shape:", + voxels.shape, + coors.shape, + num_points_per_voxel.shape, + ) if points is not None: batch_inputs_dict["points"] = [points] diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py index 4c5e72ac7..c7cd9e6b0 100644 --- a/projects/BEVFusion/deploy/exporter.py +++ b/projects/BEVFusion/deploy/exporter.py @@ -2,7 +2,7 @@ import logging import os.path as osp -from typing import Optional, Any +from typing import Any, Optional import numpy as np import onnx @@ -11,7 +11,7 @@ from builder import ExportBuilder from containers import TrtBevFusionCameraOnlyContainer, TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer from data_classes import ModelData, SetupConfigs -from mmdeploy.core import RewriterContext, SYMBOLIC_REWRITER +from mmdeploy.core import SYMBOLIC_REWRITER, RewriterContext from mmdeploy.utils import ( get_root_logger, ) @@ -32,8 +32,7 @@ def purge_mmdeploy_symbolics(op_names: list[str]) -> dict: continue # Bookkeeping key: full Python path of an implementer function. # Match by "...symbolics.." or "...symbolics.__" - if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key - for op in op_names): + if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key for op in op_names): removed[key] = records.pop(key) return removed @@ -83,12 +82,10 @@ def _export_model( patched_model (torch.nn.Module): Patched Pytorch model. ir_configs (dict): Configs for intermediate representations in ONNX. """ - # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported + # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported # in the tensorrt version removed = purge_mmdeploy_symbolics(["layer_norm"]) - self.logger.info( - f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}" - ) + self.logger.info(f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}") with RewriterContext(**context_info), torch.no_grad(): image_feats = None if "img_backbone" in self.setup_configs.model_cfg.model: From d297c99e08a249b7a0a1da86e7163d94b26a987a Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 15 Jun 2026 15:20:09 +0900 Subject: [PATCH 132/162] Added --- projects/BEVFusion/deploy/containers.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index 55586e0f5..e9cd8d736 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -50,16 +50,7 @@ def forward( batch_inputs_dict = { "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, } - - voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin") - coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin") - num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin") - print( - "voxels.shape, coors.shape, num_points_per_voxel.shape:", - voxels.shape, - coors.shape, - num_points_per_voxel.shape, - ) + if points is not None: batch_inputs_dict["points"] = [points] From 9044bc0b103ebaf76338349d2888f10ff2129e0e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jun 2026 06:20:41 +0000 Subject: [PATCH 133/162] ci(pre-commit): autofix --- projects/BEVFusion/deploy/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index e9cd8d736..ad9243412 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -50,7 +50,7 @@ def forward( batch_inputs_dict = { "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, } - + if points is not None: batch_inputs_dict["points"] = [points] From 247e1c9f09a2e75d52af65a215ecc998746990d2 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 15 Jun 2026 15:21:29 +0900 Subject: [PATCH 134/162] Added --- projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index 888d2b1c0..8671532eb 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -30,8 +30,6 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh device=sparse_tensor.features.device, dtype=sparse_tensor.features.dtype, ) - # out = out.index_copy(0, linear_idx, sparse_tensor.features) - # out = out.scatter(0, linear_idx, sparse_tensor.features) scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels) # [N, C] out = out.scatter(0, scatter_idx, sparse_tensor.features) return out.view(batch_size, H, W, D, out_channels) From fffec2a148f38d54e972ee1314e47a96d4e0ef83 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 17 Jun 2026 16:46:25 +0900 Subject: [PATCH 135/162] Added --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- ...usion_main_body_lidar_only_intensity_tensorrt_dynamic.py | 2 +- ..._lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 5 +---- ...lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py | 4 ++-- ...second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py | 6 +++--- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 74f0eadba..4dc59ae15 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -322,7 +322,7 @@ def forward_single(self, inputs, metas): flattened_heatmap = heatmap.view(-1, self.num_classes * self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. - top_proposals = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) + _, top_proposals = torch.topk(flattened_heatmap, k=self.num_proposals, dim=-1, largest=True, sorted=False) # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py index 2652b3965..96ccbf583 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -12,7 +12,7 @@ dict( input_shapes=dict( voxels=dict( - min_shape=[1, 10, 5], opt_shape=[64000, 10, 5], max_shape=[256000, 10, 5] + min_shape=[1, 32, 5], opt_shape=[64000, 32, 5], max_shape=[256000, 32, 5] ), # [M, maximum number of points, features] features=5 when using intensity coors=dict(min_shape=[1, 3], opt_shape=[64000, 3], max_shape=[256000, 3]), num_points_per_voxel=dict(min_shape=[1], opt_shape=[64000], max_shape=[256000]), diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 92b06fa47..b91dfa0b4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -164,7 +164,4 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) - -load_from = ( - "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" -) +load_from = None diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index f293545a4..c917845a0 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -165,4 +165,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index 947de5f0b..598b1bb6f 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -50,7 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=1, + min_num_points=2 ) test_evaluator = dict( @@ -73,5 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=1, + min_num_points=2 ) From e5f71ca1da5b8a1e5ca22880d499c4facca3068e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Jun 2026 07:47:02 +0000 Subject: [PATCH 136/162] ci(pre-commit): autofix --- ...n_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py | 4 +++- ...l_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index c917845a0..94d3efd26 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -165,4 +165,6 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +load_from = ( + "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index 598b1bb6f..ecf85669b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -50,7 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=2 + min_num_points=2, ) test_evaluator = dict( @@ -73,5 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, - min_num_points=2 + min_num_points=2, ) From 8c02f982fa0f372cbe95db103a72d38e4159c8bf Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 18 Jun 2026 00:06:56 +0900 Subject: [PATCH 137/162] Added --- projects/BEVFusion/deploy/containers.py | 1 - projects/BEVFusion/deploy/utils.py | 2 +- projects/BEVFusion/deploy/voxel_detection.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index ad9243412..bab40d87f 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -46,7 +46,6 @@ def forward( coors = coors.flip(dims=[-1]).contiguous() # [x, y, z] batch_coors = torch.zeros(num_points, 1).to(coors.device) coors = torch.cat([batch_coors, coors], dim=1).contiguous() - batch_inputs_dict = { "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, } diff --git a/projects/BEVFusion/deploy/utils.py b/projects/BEVFusion/deploy/utils.py index fde859523..d3716d02d 100644 --- a/projects/BEVFusion/deploy/utils.py +++ b/projects/BEVFusion/deploy/utils.py @@ -49,7 +49,7 @@ def setup_configs( # TODO(KokSeang): Remove this # Default voxelize_layer voxelize_cfg = dict( - max_num_points=10, + max_num_points=32, voxel_size=[0.17, 0.17, 0.2], point_cloud_range=[-122.4, -122.4, -3.0, 122.4, 122.4, 5.0], max_voxels=[120000, 160000], diff --git a/projects/BEVFusion/deploy/voxel_detection.py b/projects/BEVFusion/deploy/voxel_detection.py index d9c5b3aa1..8b4b740a9 100644 --- a/projects/BEVFusion/deploy/voxel_detection.py +++ b/projects/BEVFusion/deploy/voxel_detection.py @@ -128,7 +128,6 @@ def create_input( camera_mask, img, lidar2image, cam2image, camera2lidar, geom_feats, kept, ranks, indices = ( self.extract_img_inputs(batch=batch, model=model, collate_data=collate_data) ) - inputs = [ feats, coors, From bb19131d04afbef775f84453a37d5fb3e53a2995 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 18 Jun 2026 19:20:14 +0900 Subject: [PATCH 138/162] Added --- ...dar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py index 319fc0da1..37459f748 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2_8/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -50,6 +50,7 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, + min_num_points=2, ) test_evaluator = dict( @@ -72,4 +73,5 @@ name_mapping={{_base_.name_mapping}}, experiment_name=experiment_name, experiment_group_name=_base_.experiment_group_name, + min_num_points=2, ) From b8a020de98df8dfc56f5439f09aedcaf5d8c4084 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 21 Jun 2026 14:00:17 +0900 Subject: [PATCH 139/162] Added --- .../bevfusion_main_body_lidar_only_tensorrt_dynamic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_tensorrt_dynamic.py index 7c7d9355d..d6ec91051 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_tensorrt_dynamic.py @@ -16,7 +16,7 @@ dict( input_shapes=dict( voxels=dict( - min_shape=[1, 10, 4], opt_shape=[64000, 10, 4], max_shape=[256000, 10, 4] + min_shape=[1, 32, 4], opt_shape=[64000, 32, 4], max_shape=[256000, 32, 4] ), # [M, maximum number of points, features] features=5 when using intensity coors=dict(min_shape=[1, 3], opt_shape=[64000, 3], max_shape=[256000, 3]), num_points_per_voxel=dict(min_shape=[1], opt_shape=[64000], max_shape=[256000]), @@ -29,7 +29,7 @@ type="onnx", export_params=True, keep_initializers_as_inputs=False, - opset_version=17, + opset_version=18, save_file="bevfusion_lidar.onnx", input_names=["voxels", "coors", "num_points_per_voxel"], output_names=["bbox_pred", "score", "label_pred"], @@ -45,5 +45,5 @@ }, }, input_shape=None, - verbose=True, + verbose=False, ) From e07e7cd3fde44f1405f6495ea9b023ae1ef8fe4c Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:16:11 +0900 Subject: [PATCH 140/162] Remove uncessary config changes --- ...idar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 4 ++-- ...econd_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- ...dar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py | 8 +++----- ...cond_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py | 2 +- ...usion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 4 ++-- ...voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py | 2 +- 6 files changed, 10 insertions(+), 12 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index b91dfa0b4..3edd06c92 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8_1/" +info_directory_path = "info/user_name/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index c25ce0eb7..8931d3008 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 94d3efd26..045a64a60 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8_1/" +info_directory_path = "info/user_name/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -165,6 +165,4 @@ ) log_processor = dict(window_size=50) -load_from = ( - "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" -) +load_from = None diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index ecf85669b..b6c23eeb8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 3debad7d6..7716a1508 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8_1/" +info_directory_path = "info/user_name/" -experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py index 37459f748..6f1c88ec4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_2_8/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From 25c3852d98b34a52ba90ad0205ae20479ccb94c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Jun 2026 09:16:35 +0000 Subject: [PATCH 141/162] ci(pre-commit): autofix --- ...ion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 045a64a60..05947c2fd 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -165,4 +165,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = None From c5a150c1985014748d0d2bd27fb446217ada4f65 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:17:03 +0900 Subject: [PATCH 142/162] Remove uncessary config changes --- autoware_ml/detection3d/datasets/t4dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index d7fed6256..ce1c78f31 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -192,7 +192,4 @@ def parse_data_info(self, info: dict) -> dict: else: info["lidar2img"] = info["cam2img"] @ info["lidar2cam"] - # Default difficulty to 0 if not present - if "difficulty" not in info: - info["difficulty"] = 0 return info From c1beda5ef4e759a7352e14e0a951db1aa7fe59bd Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:18:29 +0900 Subject: [PATCH 143/162] Remove uncessary config changes --- .../BEVFusion/bevfusion/ops/topk/__init__.py | 0 projects/BEVFusion/bevfusion/ops/topk/topk.py | 46 ------------------- 2 files changed, 46 deletions(-) delete mode 100644 projects/BEVFusion/bevfusion/ops/topk/__init__.py delete mode 100644 projects/BEVFusion/bevfusion/ops/topk/topk.py diff --git a/projects/BEVFusion/bevfusion/ops/topk/__init__.py b/projects/BEVFusion/bevfusion/ops/topk/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py deleted file mode 100644 index f0f9a8779..000000000 --- a/projects/BEVFusion/bevfusion/ops/topk/topk.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -This file is used to write functions to deploy custom plugins to support Autoware, for example, TopK. -""" - -import torch -from torch.autograd import Function -from torch.onnx.symbolic_helper import _get_tensor_sizes - - -class TopK(Function): - - @staticmethod - def symbolic( - g, - x: torch.Tensor, - k: int, - dim: int, - sorted: bool = False, - ): - - output = g.op( - "autoware::Argsort", - x, - outputs=1, - ) - x_shape = _get_tensor_sizes(x) - if x_shape is not None and hasattr(output.type(), "with_sizes"): - output_type = x.type().with_sizes(x_shape) - output.setType(output_type) - # Argsort from Autoware is in ascending order, so we need to return the last k elements. - return output[-k:] - - @staticmethod - def forward( - ctx, - x: torch.Tensor, - k: int, - dim: int, - sorted: bool = False, - ): - _, indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) - return indices - - -def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False): - return TopK.apply(x, k, dim, sorted) From 5a0a7311dce5f15bdadfd6881e517e2b95f76740 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:19:17 +0900 Subject: [PATCH 144/162] Remove uncessary config changes --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 4dc59ae15..42af8b741 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -17,8 +17,6 @@ from mmengine.structures import InstanceData from torch import nn -from .ops.topk.topk import topk - def clip_sigmoid(x, eps=1e-4): y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps) From b3409e2806244668cb7289b957c55762bba94242 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:19:50 +0900 Subject: [PATCH 145/162] Remove uncessary config changes --- projects/BEVFusion/bevfusion/bevfusion.py | 7 ------- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index a26f61855..77cf1e3ff 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -201,19 +201,12 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: if points is not None: # NOTE(knzo25): training and normal inference with torch.amp.autocast("cuda", enabled=False): - # with torch.autocast('cuda', enabled=False): points = [point.float() for point in points] feats, coords, sizes = self.voxelize(points) batch_size = coords[-1, 0] + 1 else: # NOTE: (knzo25): onnx inference. Voxelization happens outside the graph with torch.amp.autocast("cuda", enabled=False): - # with torch.autocast('cuda', enabled=False): - - # NOTE(knzo25): onnx demmands this - # batch_size = coords[-1, 0] + 1 - # with torch.autocast('cuda', enabled=False): - # NOTE(knzo25): onnx demmands this # batch_size = coords[-1, 0] + 1 batch_size = 1 diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 42af8b741..0195836ef 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -278,8 +278,8 @@ def forward_single(self, inputs, metas): # query initialization ################################# with torch.amp.autocast("cuda", enabled=False): - # with torch.autocast('cuda', enabled=False): dense_heatmap = self.heatmap_head(fusion_feat.float()) + heatmap = dense_heatmap.detach().sigmoid() if self.dense_heatmap_pooling_class_indices is not None: # Pooling From 4382008e4b67620078b3f6ca4472ec06af827d8f Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:20:47 +0900 Subject: [PATCH 146/162] Remove uncessary config changes --- projects/BEVFusion/bevfusion/bevfusion.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 77cf1e3ff..709d851a9 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -237,10 +237,6 @@ def voxelize(self, points): assert len(sizes) > 0, "No points in the voxel" sizes = torch.cat(sizes, dim=0) - # if self.voxelize_reduce: - # feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) - # feats = feats.contiguous() - return feats, coords, sizes def predict( From 4a9ec6edb7f00d3ff5c8d212bd456ccacf264fa9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Jun 2026 09:21:19 +0000 Subject: [PATCH 147/162] ci(pre-commit): autofix --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 0195836ef..b71a10d0a 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -279,7 +279,7 @@ def forward_single(self, inputs, metas): ################################# with torch.amp.autocast("cuda", enabled=False): dense_heatmap = self.heatmap_head(fusion_feat.float()) - + heatmap = dense_heatmap.detach().sigmoid() if self.dense_heatmap_pooling_class_indices is not None: # Pooling From 862e72be298f85108b1ce7e86926180d3edbe3ec Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:21:37 +0900 Subject: [PATCH 148/162] Remove uncessary config changes --- projects/BEVFusion/bevfusion/__init__.py | 5 +- .../bevfusion/bevfusion_voxel_encoder.py | 324 ------------------ 2 files changed, 2 insertions(+), 327 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index df0e15eba..941bacfd6 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -1,7 +1,7 @@ from .bevfusion import BEVFusion from .bevfusion_head import BEVFusionHead, ConvFuser from .bevfusion_necks import GeneralizedLSSFPN -from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder +from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder from .depth_lss import DepthLSSTransform, LSSTransform from .loading import BEVLoadMultiViewImageFromFiles from .sparse_encoder import BEVFusionSparseEncoder @@ -28,6 +28,5 @@ "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", "TransFusionBBoxCoder", - "HardSimpleVoxelSinCosEncoder", - "BEVFusionVoxelFeatureNet", + "HardSimpleVoxelSinCosEncoder" ] diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index d7801482a..02fe76b57 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -74,327 +74,3 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, ** voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) return voxel_fourier_features - - -@MODELS.register_module() -class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder): - """BEVFusion Voxel Encoder Feature Net. - - The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the - offset features, for example, distances. After that, it concatenates the fourier features and the PFN features - along the channel dimension for each voxel. - - Args: - min_norm_values (Tuple[float]): Minimum values for the features. - max_norm_values (Tuple[float]): Maximum values for the features. - in_channels (int): Number of input channels. - feat_channels (tuple, optional): Number of features in each of the - N PFNLayers. Defaults to (64, ). - """ - - def __init__( - self, - min_norm_values: Optional[Tuple[float]] = None, - max_norm_values: Optional[Tuple[float]] = None, - in_channels: Optional[int] = 4, - feat_channels: Optional[tuple] = (64,), - with_distance: Optional[bool] = False, - with_cluster_center: Optional[bool] = True, - with_voxel_center: Optional[bool] = True, - voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), - point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, 40, 1), - norm_cfg: Optional[dict] = dict(type="BN1d", eps=1e-3, momentum=0.01), - mode: Optional[str] = "max", - legacy: Optional[bool] = True, - ): - - super(BEVFusionVoxelFeatureNet, self).__init__( - min_norm_values=min_norm_values, max_norm_values=max_norm_values, in_channels=in_channels - ) - assert len(feat_channels) > 0 - self.legacy = legacy - pfn_in_channels = in_channels - if with_cluster_center: - pfn_in_channels += 3 - if with_voxel_center: - pfn_in_channels += 3 - if with_distance: - pfn_in_channels += 1 - - assert pfn_in_channels > 0, "pfn_in_channels must be greater than 0" - self._with_distance = with_distance - self._with_cluster_center = with_cluster_center - self._with_voxel_center = with_voxel_center - - # Create VoxelFeatureNet layers - feat_channels = [pfn_in_channels] + list(feat_channels) - pfn_layers = [] - for i in range(len(feat_channels) - 1): - in_filters = feat_channels[i] - out_filters = feat_channels[i + 1] - if i < len(feat_channels) - 2: - last_layer = False - else: - last_layer = True - pfn_layers.append(PFNLayer(in_filters, out_filters, norm_cfg=norm_cfg, last_layer=last_layer, mode=mode)) - self.pfn_layers = nn.ModuleList(pfn_layers) - - # Need pillar (voxel) size and x/y offset in order to calculate offset - self.vx = voxel_size[0] - self.vy = voxel_size[1] - self.vz = voxel_size[2] - self.x_offset = self.vx / 2 + point_cloud_range[0] - self.y_offset = self.vy / 2 + point_cloud_range[1] - self.z_offset = self.vz / 2 + point_cloud_range[2] - self.point_cloud_range = point_cloud_range - - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: - """Forward function. - - Args: - features (torch.Tensor): Point features or raw points in shape - (N, M, C) in (x, y, z, intensity, time_lag) if C is 5, (x, y, z, time_lag) if C is 4. - num_points (torch.Tensor): Number of points in each pillar in shape (M). - coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). - - Returns: - torch.Tensor: Features of pillars in shape (M, C*C*2 + feat_channels[-1]). - """ - # (M, C*C*2) - voxel_fourier_features = super().forward(features, num_points, coors) - - # Normalize the features - norm_features = (features - self.min_norm_values.view(1, -1)) / ( - (self.max_norm_values - self.min_norm_values).view(1, -1) - ) - - # Offset features - max_points_per_voxel = features.shape[1] - - features_ls = [norm_features] - # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available - if self._with_cluster_center: - points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_points.type_as(features).view(-1, 1, 1) - f_cluster = features[:, :, :3] - points_mean - # Map to [0, 1] if available - # if self.min_norm_values is not None and self.max_norm_values is not None: - # voxel_size = features.new_tensor([self.vx, self.vy, self.vz]) - # f_cluster = f_cluster / voxel_size - features_ls.append(f_cluster) - - # Find distance of x, y, and z from pillar center - dtype = features.dtype - if self._with_voxel_center: - if not self.legacy: - f_center = torch.zeros_like(features[:, :, :3]) - f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].to(dtype).unsqueeze(1) * self.vx + self.x_offset) - f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset) - f_center[:, :, 2] = features[:, :, 2] - (coors[:, 1].to(dtype).unsqueeze(1) * self.vz + self.z_offset) - else: - f_center = features[:, :, :3] - f_center[:, :, 0] = f_center[:, :, 0] - ( - coors[:, 3].type_as(features).unsqueeze(1) * self.vx + self.x_offset - ) - f_center[:, :, 1] = f_center[:, :, 1] - ( - coors[:, 2].type_as(features).unsqueeze(1) * self.vy + self.y_offset - ) - f_center[:, :, 2] = f_center[:, :, 2] - ( - coors[:, 1].type_as(features).unsqueeze(1) * self.vz + self.z_offset - ) - - # if self.min_norm_values is not None and self.max_norm_values is not None: - # f_center = f_center / (voxel_size * 0.5) - features_ls.append(f_center) - - if self._with_distance: - points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) - features_ls.append(points_dist) - - # Combine together feature decorations - voxel_feature_offsets = torch.cat(features_ls, dim=-1) - - # The feature decorations were calculated without regard to whether - # pillar was empty. Need to ensure that - # empty pillars remain set to zeros. - mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) - voxel_feature_offsets *= mask - - # PFN - for pfn in self.pfn_layers: - voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - - # Concat - features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) - - return features - - -# @MODELS.register_module() -# class BEVFusionVoxelSinCosEncoder(nn.Module): -# def __init__(self, -# min_norm_values: Tuple[float], -# max_norm_values: Tuple[float], -# time_lag_channel_index: int = 3, -# time_exp_factor: Optional[float] = None, -# feat_channels: Optional[tuple] = (16, ), -# in_channels: Optional[int] = 4, -# with_distance: Optional[bool] = False, -# with_cluster_center: Optional[bool] = True, -# with_voxel_center: Optional[bool] = True, -# voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), -# point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, -# 40, 1), -# norm_cfg: Optional[dict] = dict( -# type='BN1d', eps=1e-3, momentum=0.01), -# mode: Optional[str] = 'max'): -# super(BEVFusionVoxelSinCosEncoder, self).__init__() - -# self._with_distance = with_distance -# self._with_cluster_center = with_cluster_center -# self._with_voxel_center = with_voxel_center -# # Create PillarFeatureNet layers -# self.in_channels = in_channels - -# # Need pillar (voxel) size and x/y offset in order to calculate offset -# self.vx = voxel_size[0] -# self.vy = voxel_size[1] -# self.vz = voxel_size[2] -# self.x_offset = self.vx / 2 + point_cloud_range[0] -# self.y_offset = self.vy / 2 + point_cloud_range[1] -# self.z_offset = self.vz / 2 + point_cloud_range[2] -# self.point_cloud_range = point_cloud_range - -# self.xyz_channels = 3 -# feat_offset_channels = in_channels - self.xyz_channels -# if with_cluster_center: -# feat_offset_channels += 3 -# if with_voxel_center: -# feat_offset_channels += 3 -# if with_distance: -# feat_offset_channels += 1 - -# feat_channels = [feat_offset_channels] + list(feat_channels) -# assert len(feat_channels) > 0, "feat_channels must be greater than 0" -# pfn_layers = [] -# for i in range(len(feat_channels) - 1): -# in_filters = feat_channels[i] -# out_filters = feat_channels[i + 1] -# if i < len(feat_channels) - 2: -# last_layer = False -# else: -# last_layer = True -# pfn_layers.append( -# PFNLayer( -# in_filters, -# out_filters, -# norm_cfg=norm_cfg, -# last_layer=last_layer, -# mode=mode)) -# self.pfn_layers = nn.ModuleList(pfn_layers) - -# self.time_lag_channel_index = time_lag_channel_index -# self.time_exp_factor = time_exp_factor - -# self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) -# self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) -# self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) -# self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float()) - -# def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, -# *args, **kwargs) -> Tensor: -# """Forward function. - -# Args: -# features (torch.Tensor): Point features or raw points in shape -# (N, M, C). -# num_points (torch.Tensor): Number of points in each pillar in shape (M). -# coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). - -# Returns: -# torch.Tensor: Features of pillars in shape (M, C). -# """ -# num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - -# # Mean in the voxel -# # (N, M, 3) -> (N, 3) -# voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( -# -1, 1)).contiguous() - -# # min-max normalization, (N, 3) -> (N, 3) -# voxel_features_norm = (voxel_features - \ -# self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) - -# # SinCos encoding -# # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) -# y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) -# # (N*3, 3) -> (N, 3*3) -# y = y.reshape(num_voxels, -1) -# # (N, 3*3) -> (N, 3*3*2) -# voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - -# # PFN -# # Other features, for example, intensity or time_lag -# other_features = features[:, :, self.xyz_channels:] - -# # Normalization -# other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) - -# time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels -# # exponentiate time_lag features, it's higher when the normlized time lag is lower -# # (1.0 when time_lag_features is 0.0) -# if self.time_exp_factor is not None: -# other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) -# else: -# # Inverse the time_lag feature -# other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] - -# # Offsets -# voxel_feature_offsets = [other_features_norm] -# # Find distance of x, y, and z from cluster center -# if self._with_cluster_center: -# points_mean = features[:, :, :3].sum( -# dim=1, keepdim=True) / num_points.type_as(features).view( -# -1, 1, 1) - -# # f_cluster = (features[:, :, :3] - points_mean) -# f_cluster = features[:, :, :3] - points_mean -# voxel_feature_offsets.append(f_cluster) - -# # Find distance of x, y, and z from pillar center -# dtype = features.dtype -# if self._with_voxel_center: -# f_center = torch.zeros_like(features[:, :, :3]) -# f_center[:, :, 0] = features[:, :, 0] - ( -# coors[:, 3].to(dtype).unsqueeze(1) * self.vx + -# self.x_offset) -# f_center[:, :, 1] = features[:, :, 1] - ( -# coors[:, 2].to(dtype).unsqueeze(1) * self.vy + -# self.y_offset) -# f_center[:, :, 2] = features[:, :, 2] - ( -# coors[:, 1].to(dtype).unsqueeze(1) * self.vz + -# self.z_offset) - -# # Map to [-1, 1] -# # f_center = f_center / (self.voxel_size * 0.5) -# voxel_feature_offsets.append(f_center) - -# if self._with_distance: -# points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) -# voxel_feature_offsets.append(points_dist) - -# voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) -# # The feature decorations were calculated without regard to whether -# # pillar was empty. Need to ensure that -# # empty pillars remain set to zeros. -# mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) -# mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) -# voxel_feature_offsets *= mask - -# # PFN -# for pfn in self.pfn_layers: -# voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - -# # Concat -# features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) -# return features From 24c88eb1631a24851c5cfac26f23326e07c2dd1b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Jun 2026 09:22:13 +0000 Subject: [PATCH 149/162] ci(pre-commit): autofix --- projects/BEVFusion/bevfusion/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 941bacfd6..e84525651 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -28,5 +28,5 @@ "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", "TransFusionBBoxCoder", - "HardSimpleVoxelSinCosEncoder" + "HardSimpleVoxelSinCosEncoder", ] From b0ae1255f838281aef36d477ad2cb46f40e18266 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:25:14 +0900 Subject: [PATCH 150/162] Remove uncessary config changes --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index b71a10d0a..2d713b022 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -266,7 +266,6 @@ def forward_single(self, inputs, metas): Returns: list[dict]: Output results for tasks. """ - # batch_size = inputs.shape[0] fusion_feat = self.shared_conv(inputs) ################################# @@ -902,7 +901,6 @@ def loss_by_feat( # [BS, num_proposals] layer_iou_weights = layer_bbox_weights[:, :, 0] - # print(layer_ious.shape, ious.shape, layer_iou_weights.shape, "layer_ious.shape, ious.shape, layer_iou_weights.shape") loss_dict[f"{prefix}_loss_iou"] = self.loss_iou( layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1) ) From 8fad95989b5423b513ca7c4e2d7c1b652c0c973a Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 21 Jun 2026 18:30:00 +0900 Subject: [PATCH 151/162] Remove uncessary config changes --- projects/BEVFusion/bevfusion/sparse_encoder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index 6bf0592b6..4b4fd00f8 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -152,7 +152,6 @@ def forward(self, voxel_features, coors, batch_size): out = self.conv_out(encode_features[-1]) spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) - # spatial_features = out.dense(channels_first=False) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() spatial_features = spatial_features.view( batch_size, From 179ca256a165fd483801bec0a2a95c24866edf70 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Mon, 22 Jun 2026 11:41:29 +0900 Subject: [PATCH 152/162] Remove uncessary config changes --- .../default/pipelines/default_camera_lidar_intensity_120m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index c846a9b7e..4d9a5aa12 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -83,8 +83,8 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=3), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=2), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", From 0f4718699101a41b409d18db361f7be10f92f6b1 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Mon, 22 Jun 2026 13:36:12 +0900 Subject: [PATCH 153/162] Update evaluation metrics --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 389 ++++++++++++++---- 1 file changed, 304 insertions(+), 85 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index ecdd1e9a8..e8b5310e4 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -9,10 +9,10 @@ ### Testing Datasets -- **Total Frames: 15,154** +- **Total Frames: 16,597**
- j6gen2 (3,951 frames) + j6gen2 (4,682 frames) - `db_j6gen2_v1` - `db_j6gen2_v2` @@ -23,6 +23,9 @@ - `db_j6gen2_v7` - `db_j6gen2_v8` - `db_j6gen2_v9` + - `db_j6gen2_v10` + - `db_j6gen2_v11` + - `db_j6gen2_v12`
@@ -36,16 +39,183 @@
- jpntaxi_gen2 (9,975 frames) + jpntaxi_gen2 (10,687 frames) - `db_jpntaxigen2_v1` - `db_jpntaxigen2_v2`
+### mAP - Base +- Note that the metrics reported in `traffic_cone/barrier` might not be accurate since some of the evaluation dataset doesn't have annotations for the two classes. + +- **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m** + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(126,168) | truck
(26,897) | bus
(6,559) | bicycle
(5,865) | pedestrian
(93,520) | traffic_cone
(20,835) | barrier
(3,359) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.6763 | 0.6381 | 0.6507 | 0.6062 | 0.6316 | 0.5871 | 0.9065 | 0.8566 | 0.8705 | 0.8157 | 0.8913 | 0.3417 | 0.0521 | + +
+ +
+ Eval Range: 50.0 - 90.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(105,914) | truck
(28,864) | bus
(5,290) | bicycle
(3,608) | pedestrian
(48,637) | traffic_cone
(9,819) | barrier
(2,469) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.5447 | 0.5067 | 0.5590 | 0.5236 | 0.5400 | 0.5046 | 0.8132 | 0.6652 | 0.6404 | 0.6241 | 0.7502 | 0.3184 | 0.0017 | + +
+
- base (15,154 frames) + Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(39,577) | truck
(18,213) | bus
(3,541) | bicycle
(942) | pedestrian
(20,134) | traffic_cone
(1,231) | barrier
(711) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3779 | 0.3496 | 0.4428 | 0.3903 | 0.4287 | 0.3762 | 0.6979 | 0.5143 | 0.3860 | 0.3610 | 0.6588 | 0.0272 | 0.0002 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(271,659) | truck
(73,974) | bus
(15,390) | bicycle
(10,415) | pedestrian
(162,291) | traffic_cone
(31,885) | barrier
(6,539) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.5936 | 0.5554 | 0.6017 | 0.5555 | 0.5826 | 0.5365 | 0.8534 | 0.7110 | 0.6992 | 0.7185 | 0.8315 | 0.3204 | 0.0209 | + +
+ +### Mean TPError - Base +- Recalls: `0.10`, `0.40`, `optimal` + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.1796 | 0.1993 | 0.2024 | 0.2937 | 1.0000 | 0.2857 | 0.2916 | 0.2957 | 0.4466 | 1.0000 | 0.2149 | 0.2196 | 0.2175 | 0.3260 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 126,168) | truck
0.5/1.0/2.0/4.0
(GTs: 26,897) | bus
0.5/1.0/2.0/4.0
(GTs: 6,559) | bicycle
0.5/1.0/2.0/4.0
(GTs: 5,865) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 93,520) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,835) | barrier
0.5/1.0/2.0/4.0
(GTs: 3,359) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 13,878 / 13,878 / 13,878 / 13,878 | 2,958 / 2,958 / 2,958 / 2,958 | 721 / 721 / 721 / 721 | 645 / 645 / 645 / 645 | 10,287 / 10,287 / 10,287 / 10,287 | 2,291 / 2,291 / 2,291 / 2,291 | 369 / 369 / 369 / 369 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 126,168) | truck
0.5/1.0/2.0/4.0
(GTs: 26,897) | bus
0.5/1.0/2.0/4.0
(GTs: 6,559) | bicycle
0.5/1.0/2.0/4.0
(GTs: 5,865) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 93,520) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,835) | barrier
0.5/1.0/2.0/4.0
(GTs: 3,359) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 51,728 / 51,728 / 51,728 / 51,728 | 11,027 / 11,027 / 11,027 / 11,027 | 2,689 / 2,689 / 2,689 / 2,689 | 2,404 / 2,404 / 2,404 / 2,404 | 38,343 / 38,343 / 38,343 / 38,343 | 8,542 / 8,542 / 8,542 / 8,542 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 126,168) | truck
0.5/1.0/2.0/4.0
(GTs: 26,897) | bus
0.5/1.0/2.0/4.0
(GTs: 6,559) | bicycle
0.5/1.0/2.0/4.0
(GTs: 5,865) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 93,520) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,835) | barrier
0.5/1.0/2.0/4.0
(GTs: 3,359) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 109,035 / 113,613 / 115,502 / 116,446 | 20,578 / 23,058 / 24,234 / 24,631 | 5,398 / 5,809 / 5,952 / 5,967 | 4,573 / 4,739 / 4,611 / 4,616 | 78,245 / 79,717 / 80,219 / 80,761 | 10,168 / 11,012 / 11,411 / 11,947 | 452 / 629 / 667 / 715 | + +
+ +
+ + Eval Range: 50.0 - 90.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2878 | 0.2286 | 0.2487 | 0.3686 | 1.0000 | 0.3596 | 0.3138 | 0.3151 | 0.4991 | 1.0000 | 0.2897 | 0.2224 | 0.2274 | 0.3779 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 105,914) | truck
0.5/1.0/2.0/4.0
(GTs: 28,864) | bus
0.5/1.0/2.0/4.0
(GTs: 5,290) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,608) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 48,637) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 9,819) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,469) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 11,650 / 11,650 / 11,650 / 11,650 | 3,175 / 3,175 / 3,175 / 3,175 | 581 / 581 / 581 / 581 | 396 / 396 / 396 / 396 | 5,350 / 5,350 / 5,350 / 5,350 | 1,080 / 1,080 / 1,080 / 1,080 | 0 / 271 / 271 / 271 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 105,914) | truck
0.5/1.0/2.0/4.0
(GTs: 28,864) | bus
0.5/1.0/2.0/4.0
(GTs: 5,290) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,608) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 48,637) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 9,819) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,469) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 43,424 / 43,424 / 43,424 / 43,424 | 11,834 / 11,834 / 11,834 / 11,834 | 2,168 / 2,168 / 2,168 / 2,168 | 1,479 / 1,479 / 1,479 / 1,479 | 19,941 / 19,941 / 19,941 / 19,941 | 4,025 / 4,025 / 4,025 / 4,025 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 105,914) | truck
0.5/1.0/2.0/4.0
(GTs: 28,864) | bus
0.5/1.0/2.0/4.0
(GTs: 5,290) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,608) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 48,637) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 9,819) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,469) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 76,795 / 84,883 / 88,518 / 89,420 | 15,896 / 19,085 / 21,454 / 22,235 | 2,428 / 3,407 / 3,812 / 3,890 | 2,186 / 2,306 / 2,311 / 2,327 | 34,885 / 35,834 / 35,898 / 36,174 | 4,370 / 4,522 / 4,871 / 4,902 | 140 / 222 / 238 / 244 | +
+ +
+ + Eval Range: 90.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3878 | 0.2914 | 0.3071 | 0.4752 | 1.0000 | 0.5015 | 0.3966 | 0.4158 | 0.6724 | 1.0000 | 0.3567 | 0.2630 | 0.2646 | 0.4419 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 39,577) | truck
0.5/1.0/2.0/4.0
(GTs: 18,213) | bus
0.5/1.0/2.0/4.0
(GTs: 3,541) | bicycle
0.5/1.0/2.0/4.0
(GTs: 942) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 20,134) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 1,231) | barrier
0.5/1.0/2.0/4.0
(GTs: 711) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 4,353 / 4,353 / 4,353 / 4,353 | 2,003 / 2,003 / 2,003 / 2,003 | 389 / 389 / 389 / 389 | 103 / 103 / 103 / 103 | 2,214 / 2,214 / 2,214 / 2,214 | 135 / 135 / 135 / 135 | 0 / 0 / 78 / 78 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 39,577) | truck
0.5/1.0/2.0/4.0
(GTs: 18,213) | bus
0.5/1.0/2.0/4.0
(GTs: 3,541) | bicycle
0.5/1.0/2.0/4.0
(GTs: 942) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 20,134) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 1,231) | barrier
0.5/1.0/2.0/4.0
(GTs: 711) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 16,226 / 16,226 / 16,226 / 16,226 | 7,467 / 7,467 / 7,467 / 7,467 | 1,451 / 1,451 / 1,451 / 1,451 | 386 / 386 / 386 / 386 | 8,254 / 8,254 / 8,254 / 8,254 | 0 / 0 / 0 / 504 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 39,577) | truck
0.5/1.0/2.0/4.0
(GTs: 18,213) | bus
0.5/1.0/2.0/4.0
(GTs: 3,541) | bicycle
0.5/1.0/2.0/4.0
(GTs: 942) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 20,134) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 1,231) | barrier
0.5/1.0/2.0/4.0
(GTs: 711) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 23,637 / 27,796 / 30,068 / 31,051 | 6,996 / 9,963 / 12,146 / 13,138 | 1,277 / 1,704 / 1,857 / 1,963 | 379 / 421 / 434 / 439 | 13,121 / 13,270 / 13,350 / 13,493 | 247 / 251 / 262 / 280 | 28 / 54 / 67 / 70 | + +
+ +
+ + Eval Range: 0.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2162 | 0.2040 | 0.2147 | 0.3160 | 1.0000 | 0.3255 | 0.3034 | 0.3079 | 0.4756 | 1.0000 | 0.2567 | 0.2269 | 0.2265 | 0.3571 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 271,659) | truck
0.5/1.0/2.0/4.0
(GTs: 73,974) | bus
0.5/1.0/2.0/4.0
(GTs: 15,390) | bicycle
0.5/1.0/2.0/4.0
(GTs: 10,415) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 162,291) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 31,885) | barrier
0.5/1.0/2.0/4.0
(GTs: 6,539) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 29,883 / 29,882 / 29,882 / 29,882 | 8,137 / 8,137 / 8,137 / 8,137 | 1,692 / 1,692 / 1,692 / 1,692 | 1,145 / 1,145 / 1,145 / 1,145 | 17,852 / 17,852 / 17,852 / 17,852 | 3,507 / 3,507 / 3,507 / 3,507 | 719 / 719 / 719 / 719 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 271,659) | truck
0.5/1.0/2.0/4.0
(GTs: 73,974) | bus
0.5/1.0/2.0/4.0
(GTs: 15,390) | bicycle
0.5/1.0/2.0/4.0
(GTs: 10,415) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 162,291) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 31,885) | barrier
0.5/1.0/2.0/4.0
(GTs: 6,539) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 111,380 / 111,380 / 111,380 / 111,380 | 30,329 / 30,329 / 30,329 / 30,329 | 6,309 / 6,309 / 6,309 / 6,309 | 4,270 / 4,270 / 4,270 / 4,270 | 66,539 / 66,539 / 66,539 / 66,539 | 13,072 / 13,072 / 13,072 / 13,072 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 271,659) | truck
0.5/1.0/2.0/4.0
(GTs: 73,974) | bus
0.5/1.0/2.0/4.0
(GTs: 15,390) | bicycle
0.5/1.0/2.0/4.0
(GTs: 10,415) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 162,291) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 31,885) | barrier
0.5/1.0/2.0/4.0
(GTs: 6,539) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 209,064 / 225,972 / 233,684 / 235,583 | 43,131 / 52,358 / 57,620 / 60,504 | 9,224 / 10,854 / 11,562 / 11,707 | 7,044 / 7,224 / 7,242 / 7,260 | 125,762 / 128,271 / 129,072 / 130,130 | 14,442 / 15,921 / 16,500 / 17,198 | 624 / 886 / 950 / 1,028 | + +
+ + +## Datasets + +
+ J6Gen2 + +- Datasets (4,682 Testing Frames): - `db_j6gen2_v1` - `db_j6gen2_v2` - `db_j6gen2_v3` @@ -55,109 +225,165 @@ - `db_j6gen2_v7` - `db_j6gen2_v8` - `db_j6gen2_v9` - - `db_largebus_v1` - - `db_largebus_v2` - - `db_largebus_v3` - - `db_jpntaxigen2_v1` - - `db_jpntaxigen2_v2` - -
- -### mAP - Base + - `db_j6gen2_v10` + - `db_j6gen2_v11` + - `db_j6gen2_v12` - **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m**
Eval Range: 0.0 - 50.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(60,938) | truck
(7,081) | bus
(2,370) | bicycle
(1,357) | pedestrian
(18,202) | traffic_cone
(8,250) | barrier
(1,350) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.7246 | 0.6765 | 0.6874 | 0.6712 | 0.6633 | 0.6471 | 0.8849 | 0.8325 | 0.9034 | 0.9004 | 0.8381 | 0.4459 | 0.2671 | - | Model version | mAP | mAPH | car
(107,309) | truck
(24,206) | bus
(5,712) | bicycle
(4,060) | pedestrian
(77,369) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.8817 | 0.8496 | 0.9131 | 0.8552 | 0.9081 | 0.8357 | 0.8966 | - | BEVFusion-LiDAR base/2.6.0 | 0.8774 | 0.8443 | 0.9049 | 0.8514 | 0.8824 | 0.8543 | 0.8941 | - -
- + +
Eval Range: 50.0 - 90.0m - | Model version | mAP | mAPH | car
(94,080) | truck
(27,651) | bus
(4,761) | bicycle
(2,365) | pedestrian
(37,523) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.7002 | 0.6621 | 0.8174 | 0.6660 | 0.6414 | 0.6430 | 0.7331 | - | BEVFusion-LiDAR base/2.6.0 | 0.6824 | 0.6437 | 0.8005 | 0.6567 | 0.5783 | 0.6322 | 0.7445 | - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(54,217) | truck
(4,913) | bus
(2,116) | bicycle
(838) | pedestrian
(8,336) | traffic_cone
(2,632) | barrier
(622) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.5590 | 0.5053 | 0.5849 | 0.5656 | 0.5581 | 0.5387 | 0.7864 | 0.6212 | 0.7611 | 0.6674 | 0.6253 | 0.2711 | 0.1807 | +
Eval Range: 90.0 - 121.0m - - | Model version | mAP | mAPH | car
(36,895) | truck
(17,759) | bus
(2,852) | bicycle
(519) | pedestrian
(17,091) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.5600 | 0.5254 | 0.6578 | 0.5131 | 0.5178 | 0.4296 | 0.6815 | - | BEVFusion-LiDAR base/2.6.0 | 0.5136 | 0.4788 | 0.6552 | 0.5023 | 0.2849 | 0.4369 | 0.6887 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(19,301) | truck
(2,906) | bus
(484) | bicycle
(291) | pedestrian
(2,564) | traffic_cone
(462) | barrier
(145) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.4021 | 0.3638 | 0.4870 | 0.4675 | 0.4679 | 0.4484 | 0.6848 | 0.4894 | 0.4972 | 0.4913 | 0.4232 | 0.1266 | 0.1024 |
Eval Range: 0.0 - 121.0m - - | Model version | mAP | mAPH | car
(238,284) | truck
(69,616) | bus
(13,325) | bicycle
(6,944) | pedestrian
(131,983) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.7777 | 0.7420 | 0.8504 | 0.7065 | 0.7443 | 0.7538 | 0.8332 | - | BEVFusion-LiDAR base/2.6.0 | 0.7592 | 0.7227 | 0.8398 | 0.6994 | 0.6621 | 0.7595 | 0.8351 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(134,456) | truck
(14,900) | bus
(4,970) | bicycle
(2,486) | pedestrian
(29,102) | traffic_cone
(11,344) | barrier
(2,117) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.6463 | 0.5953 | 0.6403 | 0.6221 | 0.6148 | 0.5966 | 0.8310 | 0.7078 | 0.8174 | 0.7884 | 0.7558 | 0.3971 | 0.2263 |
-## Datasets +- **Mean TPError** + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.1731 | 0.1809 | 0.1966 | 0.1987 | 1.0000 | 0.2178 | 0.2153 | 0.2319 | 0.2464 | 1.0000 | 0.2080 | 0.2074 | 0.2153 | 0.2185 | 1.0000 | -
- JPNTaxi Gen2 + Num match summary -- Datasets (9,975 Testing Frames): - - `db_jpntaxigen2_v1` - - `db_jpntaxigen2_v2` + **recall 0.10** -- **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m** + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 60,938) | truck
0.5/1.0/2.0/4.0
(GTs: 7,081) | bus
0.5/1.0/2.0/4.0
(GTs: 2,370) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,357) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 18,202) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,250) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 6,703 / 6,703 / 6,703 / 6,703 | 778 / 778 / 778 / 778 | 261 / 261 / 260 / 260 | 149 / 149 / 149 / 149 | 2,002 / 2,002 / 2,002 / 2,002 | 907 / 907 / 907 / 907 | 148 / 148 / 148 / 148 | -
- Eval Range: 0.0 - 50.0m + **recall 0.40** - | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.8837 | 0.8562 | 0.9393 | 0.8587 | 0.8802 | 0.8268 | 0.9135 | - | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 | + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 60,938) | truck
0.5/1.0/2.0/4.0
(GTs: 7,081) | bus
0.5/1.0/2.0/4.0
(GTs: 2,370) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,357) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 18,202) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,250) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 24,984 / 24,984 / 24,984 / 24,984 | 2,903 / 2,903 / 2,903 / 2,903 | 971 / 971 / 971 / 971 | 556 / 556 / 556 / 556 | 7,462 / 7,462 / 7,462 / 7,462 | 3,382 / 3,382 / 3,382 / 3,382 | 0 / 553 / 553 / 553 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 60,938) | truck
0.5/1.0/2.0/4.0
(GTs: 7,081) | bus
0.5/1.0/2.0/4.0
(GTs: 2,370) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,357) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 18,202) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,250) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 51,545 / 53,474 / 54,439 / 55,024 | 5,252 / 5,935 / 6,142 / 6,261 | 1,963 / 2,101 / 2,217 / 2,227 | 1,141 / 1,146 / 1,146 / 1,146 | 14,108 / 14,410 / 14,530 / 14,796 | 4,463 / 4,943 / 5,097 / 5,196 | 452 / 587 / 622 / 638 |
Eval Range: 50.0 - 90.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2509 | 0.2223 | 0.2170 | 0.2557 | 1.0000 | 0.3081 | 0.2757 | 0.2529 | 0.3025 | 1.0000 | 0.2876 | 0.2507 | 0.2251 | 0.2699 | 1.0000 | - | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.6901 | 0.6630 | 0.8382 | 0.6676 | 0.5007 | 0.6794 | 0.7645 | - | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 | + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 54,217) | truck
0.5/1.0/2.0/4.0
(GTs: 4,913) | bus
0.5/1.0/2.0/4.0
(GTs: 2,116) | bicycle
0.5/1.0/2.0/4.0
(GTs: 838) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 8,336) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,632) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 5,963 / 5,963 / 5,963 / 5,963 | 540 / 540 / 540 / 540 | 232 / 232 / 232 / 232 | 92 / 92 / 92 / 92 | 916 / 916 / 916 / 916 | 289 / 289 / 289 / 289 | 68 / 68 / 68 / 68 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 54,217) | truck
0.5/1.0/2.0/4.0
(GTs: 4,913) | bus
0.5/1.0/2.0/4.0
(GTs: 2,116) | bicycle
0.5/1.0/2.0/4.0
(GTs: 838) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 8,336) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,632) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 22,228 / 22,228 / 22,228 / 22,228 | 2,014 / 2,014 / 2,014 / 2,014 | 867 / 867 / 867 / 867 | 343 / 343 / 343 / 343 | 3,417 / 3,417 / 3,417 / 3,417 | 1,079 / 1,079 / 1,079 / 1,079 | 0 / 255 / 255 / 255 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 54,217) | truck
0.5/1.0/2.0/4.0
(GTs: 4,913) | bus
0.5/1.0/2.0/4.0
(GTs: 2,116) | bicycle
0.5/1.0/2.0/4.0
(GTs: 838) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 8,336) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,632) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 37,866 / 42,472 / 44,630 / 45,417 | 2,598 / 3,145 / 3,407 / 3,593 | 1,175 / 1,604 / 1,740 / 1,815 | 524 / 551 / 552 / 557 | 5,196 / 5,378 / 5,448 / 5,510 | 1,014 / 1,077 / 1,237 / 1,290 | 136 / 209 / 227 / 233 |
Eval Range: 90.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3294 | 0.2250 | 0.2534 | 0.3325 | 1.0000 | 0.3858 | 0.2797 | 0.2836 | 0.3859 | 1.0000 | 0.3505 | 0.2499 | 0.2570 | 0.3449 | 1.0000 | - | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.5750 | 0.5466 | 0.6601 | 0.5131 | 0.5145 | 0.4541 | 0.7331 | - | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 | + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 19,301) | truck
0.5/1.0/2.0/4.0
(GTs: 2,906) | bus
0.5/1.0/2.0/4.0
(GTs: 484) | bicycle
0.5/1.0/2.0/4.0
(GTs: 291) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 2,564) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 2,123 / 2,123 / 2,123 / 2,123 | 319 / 319 / 319 / 319 | 53 / 53 / 53 / 53 | 32 / 32 / 32 / 32 | 282 / 282 / 282 / 282 | 50 / 50 / 50 / 50 | 15 / 15 / 15 / 15 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 19,301) | truck
0.5/1.0/2.0/4.0
(GTs: 2,906) | bus
0.5/1.0/2.0/4.0
(GTs: 484) | bicycle
0.5/1.0/2.0/4.0
(GTs: 291) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 2,564) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 7,913 / 7,913 / 7,913 / 7,913 | 1,191 / 1,191 / 1,191 / 1,191 | 198 / 198 / 198 / 198 | 119 / 119 / 119 / 119 | 1,051 / 1,051 / 1,051 / 1,051 | 189 / 189 / 189 / 189 | 0 / 59 / 59 / 59 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 19,301) | truck
0.5/1.0/2.0/4.0
(GTs: 2,906) | bus
0.5/1.0/2.0/4.0
(GTs: 484) | bicycle
0.5/1.0/2.0/4.0
(GTs: 291) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 2,564) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 11,411 / 13,677 / 14,775 / 15,007 | 1,065 / 1,541 / 1,834 / 2,047 | 164 / 233 / 291 / 296 | 147 / 164 / 164 / 164 | 1,358 / 1,329 / 1,371 / 1,342 | 135 / 135 / 133 / 148 | 26 / 40 / 50 / 53 |
-
+
Eval Range: 0.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2028 | 0.1964 | 0.2072 | 0.2220 | 1.0000 | 0.2571 | 0.2377 | 0.2438 | 0.2713 | 1.0000 | 0.2468 | 0.2298 | 0.2238 | 0.2435 | 1.0000 | - | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.7715 | 0.7432 | 0.8661 | 0.7010 | 0.6721 | 0.7611 | 0.8573 | - | BEVFusion-LiDAR base/2.6.0 | 0.7471 | 0.7176 | 0.8667 | 0.6928 | 0.5446 | 0.7710 | 0.8606 | + Num match summary -
+ **recall 0.10** + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 134,456) | truck
0.5/1.0/2.0/4.0
(GTs: 14,900) | bus
0.5/1.0/2.0/4.0
(GTs: 4,970) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,486) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 29,102) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,344) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 14,790 / 14,790 / 14,790 / 14,790 | 1,639 / 1,639 / 1,639 / 1,639 | 546 / 546 / 546 / 546 | 273 / 273 / 273 / 273 | 3,201 / 3,201 / 3,201 / 3,201 | 1,247 / 1,247 / 1,247 / 1,247 | 232 / 232 / 232 / 232 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 134,456) | truck
0.5/1.0/2.0/4.0
(GTs: 14,900) | bus
0.5/1.0/2.0/4.0
(GTs: 4,970) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,486) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 29,102) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,344) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 55,126 / 55,126 / 55,126 / 55,126 | 6,109 / 6,109 / 6,109 / 6,109 | 2,037 / 2,037 / 2,037 / 2,037 | 1,019 / 1,019 / 1,019 / 1,019 | 11,931 / 11,931 / 11,931 / 11,931 | 4,651 / 4,651 / 4,651 / 4,651 | 0 / 867 / 867 / 867 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 134,456) | truck
0.5/1.0/2.0/4.0
(GTs: 14,900) | bus
0.5/1.0/2.0/4.0
(GTs: 4,970) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,486) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 29,102) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,344) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 100,294 / 109,159 / 113,989 / 115,141 | 8,931 / 10,558 / 11,357 / 11,896 | 3,256 / 3,944 / 4,259 / 4,307 | 1,785 / 1,879 / 1,880 / 1,887 | 20,949 / 21,293 / 21,452 / 21,637 | 5,511 / 6,135 / 6,340 / 6,540 | 605 / 817 / 935 / 960 | + +
@@ -213,58 +439,51 @@
- J6Gen2 + JPNTaxi Gen2 -- Datasets (3,951 Testing Frames): - - `db_j6gen2_v1` - - `db_j6gen2_v2` - - `db_j6gen2_v3` - - `db_j6gen2_v4` - - `db_j6gen2_v5` - - `db_j6gen2_v6` - - `db_j6gen2_v7` - - `db_j6gen2_v8` - - `db_j6gen2_v9` +- Datasets (10,687 Testing Frames): + - `db_jpntaxigen2_v1` + - `db_jpntaxigen2_v2` - **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m**
Eval Range: 0.0 - 50.0m - | Model version | mAP | mAPH | car
(49,637) | truck
(5,754) | bus
(1,939) | bicycle
(639) | pedestrian
(14,362) | + | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.8776 | 0.8370 | 0.8907 | 0.8438 | 0.9473 | 0.8665 | 0.8397 | - | BEVFusion-LiDAR base/2.6.0 | 0.8702 | 0.8284 | 0.8758 | 0.8410 | 0.9408 | 0.8590 | 0.8344 | + | BEVFusion-LiDAR base/2.7.0 | 0.8837 | 0.8562 | 0.9393 | 0.8587 | 0.8802 | 0.8268 | 0.9135 | + | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 |
Eval Range: 50.0 - 90.0m - | Model version | mAP | mAPH | car
(47,568) | truck
(4,090) | bus
(1,935) | bicycle
(295) | pedestrian
(6,529) | + | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.6805 | 0.6279 | 0.7957 | 0.6451 | 0.7955 | 0.5394 | 0.6266 | - | BEVFusion-LiDAR base/2.6.0 | 0.6708 | 0.6165 | 0.7721 | 0.6421 | 0.7731 | 0.5472 | 0.6192 | + | BEVFusion-LiDAR base/2.7.0 | 0.6901 | 0.6630 | 0.8382 | 0.6676 | 0.5007 | 0.6794 | 0.7645 | + | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 |
Eval Range: 90.0 - 121.0m - | Model version | mAP | mAPH | car
(17,353) | truck
(2,570) | bus
(316) | bicycle
(70) | pedestrian
(1,673) | + | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.4902 | 0.4491 | 0.6483 | 0.4871 | 0.5172 | 0.4406 | 0.3578 | - | BEVFusion-LiDAR base/2.6.0 | 0.4462 | 0.4042 | 0.6346 | 0.4758 | 0.3215 | 0.4303 | 0.3688 | + | BEVFusion-LiDAR base/2.7.0 | 0.5750 | 0.5466 | 0.6601 | 0.5131 | 0.5145 | 0.4541 | 0.7331 | + | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 |
Eval Range: 0.0 - 121.0m - | Model version | mAP | mAPH | car
(114,558) | truck
(12,414) | bus
(4,190) | bicycle
(1,004) | pedestrian
(22,564) | + | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.7822 | 0.7349 | 0.8292 | 0.7169 | 0.8590 | 0.7505 | 0.7556 | - | BEVFusion-LiDAR base/2.6.0 | 0.7712 | 0.7223 | 0.8110 | 0.7129 | 0.8348 | 0.7458 | 0.7515 | + | BEVFusion-LiDAR base/2.7.0 | 0.7715 | 0.7432 | 0.8661 | 0.7010 | 0.6721 | 0.7611 | 0.8573 | + | BEVFusion-LiDAR base/2.6.0 | 0.7471 | 0.7176 | 0.8667 | 0.6928 | 0.5446 | 0.7710 | 0.8606 |
From 70fb27a4a349660c831c5a7f1f663667d076e751 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Mon, 22 Jun 2026 17:02:56 +0900 Subject: [PATCH 154/162] Update evaluation metrics --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 310 +++++++++++++++--- .../docs/BEVFusion-L/v2/jpntaxi_base.md | 41 ++- 2 files changed, 310 insertions(+), 41 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index e8b5310e4..7374d7a9b 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -246,7 +246,7 @@ | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(54,217) | truck
(4,913) | bus
(2,116) | bicycle
(838) | pedestrian
(8,336) | traffic_cone
(2,632) | barrier
(622) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5590 | 0.5053 | 0.5849 | 0.5656 | 0.5581 | 0.5387 | 0.7864 | 0.6212 | 0.7611 | 0.6674 | 0.6253 | 0.2711 | 0.1807 | - +
@@ -260,7 +260,7 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(134,456) | truck
(14,900) | bus
(4,970) | bicycle
(2,486) | pedestrian
(29,102) | traffic_cone
(11,344) | barrier
(2,117) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.6463 | 0.5953 | 0.6403 | 0.6221 | 0.6148 | 0.5966 | 0.8310 | 0.7078 | 0.8174 | 0.7884 | 0.7558 | 0.3971 | 0.2263 | @@ -398,42 +398,162 @@
Eval Range: 0.0 - 50.0m - - | Model version | mAP | mAPH | car
(14,883) | truck
(1,193) | bus
(336) | bicycle
(740) | pedestrian
(5,059) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.8876 | 0.8447 | 0.9176 | 0.8727 | 0.9443 | 0.8396 | 0.8639 | - | BEVFusion-LiDAR base/2.6.0 | 0.8882 | 0.8475 | 0.9045 | 0.8793 | 0.9482 | 0.8489 | 0.8598 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(14,872) | truck
(1,192) | bus
(336) | bicycle
(740) | pedestrian
(5,055) | traffic_cone
(60) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.6292 | 0.5987 | 0.5796 | 0.5491 | 0.5644 | 0.5339 | 0.9088 | 0.8625 | 0.9253 | 0.8660 | 0.8414 | 0.0000 | 0.0000 |
Eval Range: 50.0 - 90.0m + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(10,929) | truck
(1,009) | bus
(141) | bicycle
(460) | pedestrian
(3,721) | traffic_cone
(4) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.5080 | 0.4699 | 0.4842 | 0.4820 | 0.4652 | 0.4630 | 0.8284 | 0.6953 | 0.8101 | 0.5551 | 0.6672 | 0.0000 | 0.0000 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5281 | 0.4877 | 0.4942 | 0.4916 | 0.4740 | 0.4714 | 0.8442 | 0.7108 | 0.8522 | 0.5764 | 0.7129 | 0.0000 | 0.0000 | - | Model version | mAP | mAPH | car
(10,994) | truck
(1,011) | bus
(143) | bicycle
(463) | pedestrian
(3,754) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.7392 | 0.6842 | 0.8425 | 0.7288 | 0.8580 | 0.5826 | 0.6839 | - | BEVFusion-LiDAR base/2.6.0 | 0.7132 | 0.6586 | 0.8237 | 0.7245 | 0.7811 | 0.5497 | 0.6871 | +
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(2,883) | truck
(600) | bus
(60) | bicycle
(85) | pedestrian
(1,092) | traffic_cone
(0) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3869 | 0.3535 | 0.4036 | 0.3922 | 0.3870 | 0.3755 | 0.7338 | 0.6045 | 0.5314 | 0.3490 | 0.4896 | 0.0000 | 0.0000 |
+
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(28,684) | truck
(2,801) | bus
(537) | bicycle
(1,285) | pedestrian
(9,868) | traffic_cone
(64) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.5663 | 0.5318 | 0.5398 | 0.5103 | 0.5226 | 0.4931 | 0.8718 | 0.7543 | 0.8572 | 0.7306 | 0.7502 | 0.0000 | 0.0000 | + +
+ +- **Mean TPError** + +
+ Eval Range: 0.0 - 50.0m + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2738 | 0.4304 | 0.3040 | 0.3416 | 1.0000 | 0.3927 | 0.3902 | 0.3987 | 0.4730 | 1.0000 | 0.1903 | 0.3709 | 0.2019 | 0.2298 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 1,635 / 1,635 / 1,635 / 1,635 | 131 / 131 / 131 / 131 | 36 / 36 / 36 / 36 | 81 / 81 / 81 / 81 | 556 / 556 / 556 / 556 | 6 / 6 / 6 / 6 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 6,097 / 6,097 / 6,097 / 6,097 | 488 / 488 / 488 / 488 | 137 / 137 / 137 / 137 | 303 / 303 / 303 / 303 | 2,072 / 2,072 / 2,072 / 2,072 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 6,097 / 6,097 / 6,097 / 6,097 | 488 / 488 / 488 / 488 | 137 / 137 / 137 / 137 | 303 / 303 / 303 / 303 | 2,072 / 2,072 / 2,072 / 2,072 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 13,062 / 13,479 / 13,554 / 13,743 | 932 / 1,043 / 1,066 / 1,074 | 275 / 321 / 324 / 324 | 602 / 607 / 608 / 613 | 4,140 / 4,178 / 4,198 / 4,214 | 23 / 13 / 13 / 13 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 13,178 / 13,676 / 13,748 / 13,798 | 925 / 1,041 / 1,064 / 1,073 | 254 / 330 / 333 / 333 | 612 / 628 / 640 / 643 | 4,247 / 4,294 / 4,313 / 4,330 | 19 / 20 / 20 / 21 | 0 / 0 / 0 / 0 | + +
+ +
+ Eval Range: 50.0 - 90.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3215 | 0.6553 | 0.3435 | 0.3779 | 1.0000 | 0.3296 | 0.6554 | 0.3494 | 0.3854 | 1.0000 | 0.2287 | 0.6009 | 0.2438 | 0.2891 | 1.0000 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3091 | 0.6981 | 0.3081 | 0.3833 | 1.0000 | 0.3181 | 0.6966 | 0.3115 | 0.3980 | 1.0000 | 0.2197 | 0.6583 | 0.2015 | 0.3121 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 1,202 / 1,202 / 1,202 / 1,202 | 110 / 110 / 110 / 110 | 15 / 15 / 15 / 15 | 50 / 50 / 50 / 50 | 409 / 409 / 409 / 409 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 1,202 / 1,202 / 1,202 / 1,202 | 110 / 110 / 110 / 110 | 15 / 15 / 15 / 15 | 50 / 50 / 50 / 50 | 409 / 409 / 409 / 409 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 4,480 / 4,480 / 4,480 / 4,480 | 413 / 413 / 413 / 413 | 57 / 57 / 57 / 57 | 188 / 188 / 188 / 188 | 1,525 / 1,525 / 1,525 / 1,525 | 1 / 1 / 1 / 1 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 4,480 / 4,480 / 4,480 / 4,480 | 413 / 413 / 413 / 413 | 57 / 57 / 57 / 57 | 188 / 188 / 188 / 188 | 1,525 / 1,525 / 1,525 / 1,525 | 1 / 1 / 1 / 1 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 8,159 / 8,918 / 9,145 / 9,193 | 584 / 735 / 782 / 787 | 97 / 115 / 115 / 115 | 243 / 263 / 265 / 265 | 2,464 / 2,492 / 2,508 / 2,524 | 2 / 2 / 2 / 2 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 8,463 / 9,288 / 9,554 / 9,621 | 617 / 739 / 799 / 804 | 103 / 124 / 124 / 124 | 263 / 289 / 292 / 292 | 2,604 / 2,652 / 2,667 / 2,682 | 2 / 2 / 2 / 2 | 0 / 0 / 0 / 0 | + +
+
Eval Range: 90.0 - 121.0m - | Model version | mAP | mAPH | car
(3,018) | truck
(602) | bus
(60) | bicycle
(85) | pedestrian
(1,121) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.5572 | 0.5118 | 0.7091 | 0.6393 | 0.6121 | 0.3386 | 0.4870 | - | BEVFusion-LiDAR base/2.6.0 | 0.5202 | 0.4736 | 0.6989 | 0.6297 | 0.4058 | 0.3609 | 0.5056 | + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.4740 | 0.4126 | 0.4223 | 0.5892 | 1.0000 | 0.4917 | 0.4497 | 0.4298 | 0.6415 | 1.0000 | 0.2930 | 0.2332 | 0.2012 | 0.4842 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 2,883) | truck
0.5/1.0/2.0/4.0
(GTs: 600) | bus
0.5/1.0/2.0/4.0
(GTs: 60) | bicycle
0.5/1.0/2.0/4.0
(GTs: 85) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 1,092) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 0) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 317 / 317 / 317 / 317 | 66 / 66 / 66 / 66 | 6 / 6 / 6 / 6 | 9 / 9 / 9 / 9 | 120 / 120 / 120 / 120 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 2,883) | truck
0.5/1.0/2.0/4.0
(GTs: 600) | bus
0.5/1.0/2.0/4.0
(GTs: 60) | bicycle
0.5/1.0/2.0/4.0
(GTs: 85) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 1,092) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 0) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 1,182 / 1,182 / 1,182 / 1,182 | 246 / 246 / 246 / 246 | 24 / 24 / 24 / 24 | 34 / 34 / 34 / 34 | 447 / 447 / 447 / 447 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **optimal** + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 2,883) | truck
0.5/1.0/2.0/4.0
(GTs: 600) | bus
0.5/1.0/2.0/4.0
(GTs: 60) | bicycle
0.5/1.0/2.0/4.0
(GTs: 85) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 1,092) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 0) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 1,763 / 2,103 / 2,227 / 2,243 | 254 / 358 / 448 / 456 | 24 / 38 / 31 / 31 | 38 / 44 / 46 / 46 | 613 / 619 / 623 / 626 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 |
Eval Range: 0.0 - 121.0m - | Model version | mAP | mAPH | car
(28,895) | truck
(2,806) | bus
(539) | bicycle
(1,288) | pedestrian
(9,934) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.8086 | 0.7594 | 0.8789 | 0.7783 | 0.8898 | 0.7288 | 0.7670 | - | BEVFusion-LiDAR base/2.6.0 | 0.7995 | 0.7514 | 0.8640 | 0.7788 | 0.8608 | 0.7272 | 0.7669 | + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2926 | 0.4708 | 0.3148 | 0.3550 | 1.0000 | 0.4159 | 0.4150 | 0.4074 | 0.4899 | 1.0000 | 0.2138 | 0.4027 | 0.2131 | 0.2586 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 3,155 / 3,155 / 3,155 / 3,155 | 308 / 308 / 308 / 308 | 59 / 59 / 59 / 59 | 141 / 141 / 141 / 141 | 1,085 / 1,085 / 1,085 / 1,085 | 7 / 7 / 7 / 7 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 3,155 / 3,155 / 3,155 / 3,155 | 308 / 308 / 308 / 308 | 59 / 59 / 59 / 59 | 141 / 141 / 141 / 141 | 1,085 / 1,085 / 1,085 / 1,085 | 7 / 7 / 7 / 7 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 11,760 / 11,760 / 11,760 / 11,760 | 1,148 / 1,148 / 1,148 / 1,148 | 220 / 220 / 220 / 220 | 526 / 526 / 526 / 526 | 4,045 / 4,045 / 4,045 / 4,045 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 11,760 / 11,760 / 11,760 / 11,760 | 1,148 / 1,148 / 1,148 / 1,148 | 220 / 220 / 220 / 220 | 526 / 526 / 526 / 526 | 4,045 / 4,045 / 4,045 / 4,045 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **optimal** + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 23,001 / 24,465 / 24,972 / 25,071 | 1,757 / 2,148 / 2,295 / 2,313 | 391 / 442 / 445 / 445 | 839 / 875 / 938 / 944 | 7,078 / 7,166 / 7,204 / 7,247 | 13 / 15 / 15 / 15 | 0 / 0 / 0 / 0 | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 23,528 / 24,950 / 25,596 / 25,735 | 1,866 / 2,196 / 2,328 / 2,367 | 379 / 486 / 490 / 490 | 874 / 918 / 941 / 944 | 7,461 / 7,553 / 7,587 / 7,622 | 19 / 22 / 22 / 23 | 0 / 0 / 0 / 0 |
@@ -449,41 +569,157 @@
Eval Range: 0.0 - 50.0m - - | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.8837 | 0.8562 | 0.9393 | 0.8587 | 0.8802 | 0.8268 | 0.9135 | - | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(50,954) | truck
(18,624) | bus
(3,853) | bicycle
(3,768) | pedestrian
(70,699) | traffic_cone
(12,525) | barrier
(2,009) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.6595 | 0.6258 | 0.5925 | 0.5885 | 0.5757 | 0.5717 | 0.9193 | 0.8663 | 0.8424 | 0.7784 | 0.9038 | 0.3064 | 0.0000 |
Eval Range: 50.0 - 90.0m - - | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.6901 | 0.6630 | 0.8382 | 0.6676 | 0.5007 | 0.6794 | 0.7645 | - | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(41,196) | truck
(22,942) | bus
(3,033) | bicycle
(2,310) | pedestrian
(36,881) | traffic_cone
(7,183) | barrier
(1,847) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.5440 | 0.5111 | 0.5263 | 0.5188 | 0.5099 | 0.5024 | 0.8350 | 0.6741 | 0.5382 | 0.6234 | 0.7829 | 0.3548 | 0.0000 |
Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(17,510) | truck
(14,707) | bus
(2,997) | bicycle
(566) | pedestrian
(16,580) | traffic_cone
(769) | barrier
(566) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3700 | 0.3469 | 0.4109 | 0.3757 | 0.3994 | 0.3641 | 0.7043 | 0.5157 | 0.3679 | 0.2959 | 0.7000 | 0.0063 | 0.0000 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(109,660) | truck
(56,273) | bus
(9,883) | bicycle
(6,644) | pedestrian
(124,160) | traffic_cone
(20,477) | barrier
(4,422) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.5785 | 0.5444 | 0.5467 | 0.5405 | 0.5296 | 0.5234 | 0.8675 | 0.7091 | 0.6251 | 0.6924 | 0.8516 | 0.3040 | 0.0000 | + +
+ + - **Mean TPError** + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2759 | 0.3051 | 0.2927 | 0.4984 | 1.0000 | 0.2896 | 0.3123 | 0.2968 | 0.5135 | 1.0000 | 0.1903 | 0.2037 | 0.1876 | 0.4654 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 5,604 / 5,604 / 5,604 / 5,604 | 2,048 / 2,048 / 2,048 / 2,048 | 423 / 423 / 423 / 423 | 414 / 414 / 414 / 414 | 7,776 / 7,776 / 7,776 / 7,776 | 1,377 / 1,377 / 1,377 / 1,377 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 20,891 / 20,891 / 20,891 / 20,891 | 7,635 / 7,635 / 7,635 / 7,635 | 1,579 / 1,579 / 1,579 / 1,579 | 1,544 / 1,544 / 1,544 / 1,544 | 28,986 / 28,986 / 28,986 / 28,986 | 5,135 / 5,135 / 5,135 / 5,135 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 44,376 / 46,850 / 47,216 / 47,606 | 14,516 / 16,104 / 17,017 / 17,294 | 3,080 / 3,354 / 3,439 / 3,443 | 2,874 / 2,897 / 2,900 / 2,900 | 59,982 / 60,714 / 61,271 / 61,531 | 5,720 / 6,079 / 6,376 / 6,515 | 0 / 0 / 0 / 0 | + +
+ +
+ Eval Range: 50.0 - 90.0m + + **Mean TP error summary** + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3424 | 0.2851 | 0.3099 | 0.5197 | 1.0000 | 0.3636 | 0.2926 | 0.3147 | 0.5610 | 1.0000 | 0.2700 | 0.1826 | 0.2042 | 0.5180 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 41,196) | truck
0.5/1.0/2.0/4.0
(GTs: 22,942) | bus
0.5/1.0/2.0/4.0
(GTs: 3,033) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,310) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 36,881) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 7,183) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,847) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 4,531 / 4,531 / 4,531 / 4,531 | 2,523 / 2,523 / 2,523 / 2,523 | 333 / 333 / 333 / 333 | 254 / 254 / 254 / 254 | 4,056 / 4,056 / 4,056 / 4,056 | 790 / 790 / 790 / 790 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 41,196) | truck
0.5/1.0/2.0/4.0
(GTs: 22,942) | bus
0.5/1.0/2.0/4.0
(GTs: 3,033) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,310) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 36,881) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 7,183) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,847) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 16,890 / 16,890 / 16,890 / 16,890 | 9,406 / 9,406 / 9,406 / 9,406 | 1,243 / 1,243 / 1,243 / 1,243 | 947 / 947 / 947 / 947 | 15,121 / 15,121 / 15,121 / 15,121 | 2,945 / 2,945 / 2,945 / 2,945 | 0 / 0 / 0 / 0 | + + **optimal** - | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.5750 | 0.5466 | 0.6601 | 0.5131 | 0.5145 | 0.4541 | 0.7331 | - | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 | + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 41,196) | truck
0.5/1.0/2.0/4.0
(GTs: 22,942) | bus
0.5/1.0/2.0/4.0
(GTs: 3,033) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,310) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 36,881) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 7,183) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,847) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 30,777 / 33,945 / 34,775 / 34,956 | 12,711 / 15,140 / 17,099 / 18,052 | 1,191 / 1,652 / 1,886 / 1,924 | 1,384 / 1,483 / 1,484 / 1,496 | 27,185 / 28,060 / 28,214 / 28,437 | 3,298 / 3,377 / 3,633 / 3,756 | 0 / 0 / 0 / 0 |
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.4063 | 0.3864 | 0.3484 | 0.5995 | 1.0000 | 0.5077 | 0.3923 | 0.4395 | 0.7535 | 1.0000 | 0.3267 | 0.2998 | 0.2415 | 0.5890 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 17,510) | truck
0.5/1.0/2.0/4.0
(GTs: 14,707) | bus
0.5/1.0/2.0/4.0
(GTs: 2,997) | bicycle
0.5/1.0/2.0/4.0
(GTs: 566) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 16,580) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 769) | barrier
0.5/1.0/2.0/4.0
(GTs: 566) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 1,926 / 1,926 / 1,926 / 1,926 | 1,617 / 1,617 / 1,617 / 1,617 | 329 / 329 / 329 / 329 | 62 / 62 / 62 / 62 | 1,823 / 1,823 / 1,823 / 1,823 | 84 / 84 / 84 / 84 | 0 / 0 / 0 / 0 | + + **recall 0.40** + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 17,510) | truck
0.5/1.0/2.0/4.0
(GTs: 14,707) | bus
0.5/1.0/2.0/4.0
(GTs: 2,997) | bicycle
0.5/1.0/2.0/4.0
(GTs: 566) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 16,580) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 769) | barrier
0.5/1.0/2.0/4.0
(GTs: 566) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 7,179 / 7,179 / 7,179 / 7,179 | 6,029 / 6,029 / 6,029 / 6,029 | 1,228 / 1,228 / 1,228 / 1,228 | 232 / 232 / 232 / 232 | 6,797 / 6,797 / 6,797 / 6,797 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 17,510) | truck
0.5/1.0/2.0/4.0
(GTs: 14,707) | bus
0.5/1.0/2.0/4.0
(GTs: 2,997) | bicycle
0.5/1.0/2.0/4.0
(GTs: 566) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 16,580) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 769) | barrier
0.5/1.0/2.0/4.0
(GTs: 566) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 10,800 / 12,644 / 13,151 / 13,260 | 5,665 / 8,075 / 9,757 / 10,776 | 1,062 / 1,422 / 1,542 / 1,587 | 221 / 245 / 257 / 261 | 11,283 / 11,409 / 11,480 / 11,627 | 167 / 177 / 127 / 133 | 0 / 0 / 0 / 0 | + +
+
Eval Range: 0.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.3080 | 0.2997 | 0.3015 | 0.5162 | 1.0000 | 0.3316 | 0.3074 | 0.3083 | 0.5404 | 1.0000 | 0.2360 | 0.2001 | 0.1988 | 0.4973 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 109,660) | truck
0.5/1.0/2.0/4.0
(GTs: 56,273) | bus
0.5/1.0/2.0/4.0
(GTs: 9,883) | bicycle
0.5/1.0/2.0/4.0
(GTs: 6,644) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 124,160) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,477) | barrier
0.5/1.0/2.0/4.0
(GTs: 4,422) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 12,062 / 12,062 / 12,062 / 12,062 | 6,190 / 6,190 / 6,190 / 6,190 | 1,087 / 1,087 / 1,087 / 1,087 | 730 / 730 / 730 / 730 | 13,657 / 13,657 / 13,657 / 13,657 | 2,252 / 2,252 / 2,252 / 2,252 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 109,660) | truck
0.5/1.0/2.0/4.0
(GTs: 56,273) | bus
0.5/1.0/2.0/4.0
(GTs: 9,883) | bicycle
0.5/1.0/2.0/4.0
(GTs: 6,644) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 124,160) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,477) | barrier
0.5/1.0/2.0/4.0
(GTs: 4,422) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 44,960 / 44,960 / 44,960 / 44,960 | 23,071 / 23,071 / 23,071 / 23,071 | 4,052 / 4,052 / 4,052 / 4,052 | 2,724 / 2,724 / 2,724 / 2,724 | 50,905 / 50,905 / 50,905 / 50,905 | 8,395 / 8,395 / 8,395 / 8,395 | 0 / 0 / 0 / 0 | - | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.7.0 | 0.7715 | 0.7432 | 0.8661 | 0.7010 | 0.6721 | 0.7611 | 0.8573 | - | BEVFusion-LiDAR base/2.6.0 | 0.7471 | 0.7176 | 0.8667 | 0.6928 | 0.5446 | 0.7710 | 0.8606 | + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 109,660) | truck
0.5/1.0/2.0/4.0
(GTs: 56,273) | bus
0.5/1.0/2.0/4.0
(GTs: 9,883) | bicycle
0.5/1.0/2.0/4.0
(GTs: 6,644) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 124,160) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,477) | barrier
0.5/1.0/2.0/4.0
(GTs: 4,422) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR base/2.8.0 | 85,327 / 93,303 / 94,826 / 95,316 | 32,549 / 39,457 / 43,979 / 46,340 | 5,296 / 6,439 / 6,895 / 6,991 | 4,446 / 4,536 / 4,537 / 4,547 | 98,679 / 99,889 / 100,492 / 101,325 | 9,207 / 9,578 / 10,010 / 10,328 | 0 / 0 / 0 / 0 |
diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md index fc9e2677d..597910f21 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md @@ -25,10 +25,10 @@
Eval Range: 0.0 - 50.0m - - | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.8862 | 0.8586 | 0.9397 | 0.8591 | 0.8839 | 0.8264 | 0.9218 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(50,954) | truck
(18,624) | bus
(3,853) | bicycle
(3,768) | pedestrian
(70,699) | traffic_cone
(12,525) | barrier
(2,009) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.6765 | 0.6414 | 0.6054 | 0.6011 | 0.5878 | 0.5835 | 0.9267 | 0.8595 | 0.8713 | 0.7844 | 0.9097 | 0.3843 | 0.0000 |
@@ -59,6 +59,39 @@
+### Mean TPError - JPNTaxi_gen2 + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.2719 | 0.2964 | 0.2975 | 0.4627 | 1.0000 | 0.2842 | 0.3077 | 0.3000 | 0.4803 | 1.0000 | 0.1861 | 0.1991 | 0.1904 | 0.4301 | 1.0000 | + +Num match summary + +**recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 5,604 / 5,604 / 5,604 / 5,604 | 2,048 / 2,048 / 2,048 / 2,048 | 423 / 423 / 423 / 423 | 414 / 414 / 414 / 414 | 7,776 / 7,776 / 7,776 / 7,776 | 1,377 / 1,377 / 1,377 / 1,377 | 0 / 0 / 0 / 0 | + +**recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 20,891 / 20,891 / 20,891 / 20,891 | 7,635 / 7,635 / 7,635 / 7,635 | 1,579 / 1,579 / 1,579 / 1,579 | 1,544 / 1,544 / 1,544 / 1,544 | 28,986 / 28,986 / 28,986 / 28,986 | 5,135 / 5,135 / 5,135 / 5,135 | 0 / 0 / 0 / 0 | + +**optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 44,532 / 46,751 / 47,578 / 47,681 | 14,348 / 16,107 / 17,107 / 17,466 | 3,000 / 3,436 / 3,542 / 3,547 | 2,845 / 2,913 / 2,876 / 2,876 | 60,539 / 61,514 / 62,277 / 62,441 | 6,486 / 6,824 / 7,125 / 7,398 | 0 / 0 / 0 / 0 | + +
+ + + ## Release ### BEVFusion-LiDAR JPNTaxi_base/2.7.1 From c75dd8e5164743e2722f1c4a98292ab118c0b9bc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:03:33 +0000 Subject: [PATCH 155/162] ci(pre-commit): autofix --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 78 +++++++++---------- .../docs/BEVFusion-L/v2/jpntaxi_base.md | 4 +- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 7374d7a9b..1ad3cc2bb 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -53,16 +53,16 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(126,168) | truck
(26,897) | bus
(6,559) | bicycle
(5,865) | pedestrian
(93,520) | traffic_cone
(20,835) | barrier
(3,359) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.6763 | 0.6381 | 0.6507 | 0.6062 | 0.6316 | 0.5871 | 0.9065 | 0.8566 | 0.8705 | 0.8157 | 0.8913 | 0.3417 | 0.0521 | - +
Eval Range: 50.0 - 90.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(105,914) | truck
(28,864) | bus
(5,290) | bicycle
(3,608) | pedestrian
(48,637) | traffic_cone
(9,819) | barrier
(2,469) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5447 | 0.5067 | 0.5590 | 0.5236 | 0.5400 | 0.5046 | 0.8132 | 0.6652 | 0.6404 | 0.6241 | 0.7502 | 0.3184 | 0.0017 | @@ -71,7 +71,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(39,577) | truck
(18,213) | bus
(3,541) | bicycle
(942) | pedestrian
(20,134) | traffic_cone
(1,231) | barrier
(711) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3779 | 0.3496 | 0.4428 | 0.3903 | 0.4287 | 0.3762 | 0.6979 | 0.5143 | 0.3860 | 0.3610 | 0.6588 | 0.0272 | 0.0002 | @@ -80,19 +80,19 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(271,659) | truck
(73,974) | bus
(15,390) | bicycle
(10,415) | pedestrian
(162,291) | traffic_cone
(31,885) | barrier
(6,539) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5936 | 0.5554 | 0.6017 | 0.5555 | 0.5826 | 0.5365 | 0.8534 | 0.7110 | 0.6992 | 0.7185 | 0.8315 | 0.3204 | 0.0209 |
-### Mean TPError - Base +### Mean TPError - Base - Recalls: `0.10`, `0.40`, `optimal`
Eval Range: 0.0 - 50.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.1796 | 0.1993 | 0.2024 | 0.2937 | 1.0000 | 0.2857 | 0.2916 | 0.2957 | 0.4466 | 1.0000 | 0.2149 | 0.2196 | 0.2175 | 0.3260 | 1.0000 | @@ -120,7 +120,7 @@
- + Eval Range: 50.0 - 90.0m | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | @@ -150,7 +150,7 @@
- + Eval Range: 90.0 - 121.0m | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | @@ -180,7 +180,7 @@
- + Eval Range: 0.0 - 121.0m | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | @@ -233,25 +233,25 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(60,938) | truck
(7,081) | bus
(2,370) | bicycle
(1,357) | pedestrian
(18,202) | traffic_cone
(8,250) | barrier
(1,350) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.7246 | 0.6765 | 0.6874 | 0.6712 | 0.6633 | 0.6471 | 0.8849 | 0.8325 | 0.9034 | 0.9004 | 0.8381 | 0.4459 | 0.2671 |
- +
Eval Range: 50.0 - 90.0m | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(54,217) | truck
(4,913) | bus
(2,116) | bicycle
(838) | pedestrian
(8,336) | traffic_cone
(2,632) | barrier
(622) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5590 | 0.5053 | 0.5849 | 0.5656 | 0.5581 | 0.5387 | 0.7864 | 0.6212 | 0.7611 | 0.6674 | 0.6253 | 0.2711 | 0.1807 | - +
Eval Range: 90.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(19,301) | truck
(2,906) | bus
(484) | bicycle
(291) | pedestrian
(2,564) | traffic_cone
(462) | barrier
(145) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.4021 | 0.3638 | 0.4870 | 0.4675 | 0.4679 | 0.4484 | 0.6848 | 0.4894 | 0.4972 | 0.4913 | 0.4232 | 0.1266 | 0.1024 | @@ -268,10 +268,10 @@
- **Mean TPError** - +
Eval Range: 0.0 - 50.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.1731 | 0.1809 | 0.1966 | 0.1987 | 1.0000 | 0.2178 | 0.2153 | 0.2319 | 0.2464 | 1.0000 | 0.2080 | 0.2074 | 0.2153 | 0.2185 | 1.0000 | @@ -300,7 +300,7 @@
Eval Range: 50.0 - 90.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.2509 | 0.2223 | 0.2170 | 0.2557 | 1.0000 | 0.3081 | 0.2757 | 0.2529 | 0.3025 | 1.0000 | 0.2876 | 0.2507 | 0.2251 | 0.2699 | 1.0000 | @@ -329,7 +329,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3294 | 0.2250 | 0.2534 | 0.3325 | 1.0000 | 0.3858 | 0.2797 | 0.2836 | 0.3859 | 1.0000 | 0.3505 | 0.2499 | 0.2570 | 0.3449 | 1.0000 | @@ -358,7 +358,7 @@
Eval Range: 0.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.2028 | 0.1964 | 0.2072 | 0.2220 | 1.0000 | 0.2571 | 0.2377 | 0.2438 | 0.2713 | 1.0000 | 0.2468 | 0.2298 | 0.2238 | 0.2435 | 1.0000 | @@ -398,7 +398,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(14,872) | truck
(1,192) | bus
(336) | bicycle
(740) | pedestrian
(5,055) | traffic_cone
(60) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.6292 | 0.5987 | 0.5796 | 0.5491 | 0.5644 | 0.5339 | 0.9088 | 0.8625 | 0.9253 | 0.8660 | 0.8414 | 0.0000 | 0.0000 | @@ -416,7 +416,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(2,883) | truck
(600) | bus
(60) | bicycle
(85) | pedestrian
(1,092) | traffic_cone
(0) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3869 | 0.3535 | 0.4036 | 0.3922 | 0.3870 | 0.3755 | 0.7338 | 0.6045 | 0.5314 | 0.3490 | 0.4896 | 0.0000 | 0.0000 | @@ -425,15 +425,15 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(28,684) | truck
(2,801) | bus
(537) | bicycle
(1,285) | pedestrian
(9,868) | traffic_cone
(64) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5663 | 0.5318 | 0.5398 | 0.5103 | 0.5226 | 0.4931 | 0.8718 | 0.7543 | 0.8572 | 0.7306 | 0.7502 | 0.0000 | 0.0000 |
- + - **Mean TPError** - +
Eval Range: 0.0 - 50.0m | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | @@ -463,7 +463,7 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 13,178 / 13,676 / 13,748 / 13,798 | 925 / 1,041 / 1,064 / 1,073 | 254 / 330 / 333 / 333 | 612 / 628 / 640 / 643 | 4,247 / 4,294 / 4,313 / 4,330 | 19 / 20 / 20 / 21 | 0 / 0 / 0 / 0 |
- +
Eval Range: 50.0 - 90.0m @@ -496,7 +496,7 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 8,463 / 9,288 / 9,554 / 9,621 | 617 / 739 / 799 / 804 | 103 / 124 / 124 / 124 | 263 / 289 / 292 / 292 | 2,604 / 2,652 / 2,667 / 2,682 | 2 / 2 / 2 / 2 | 0 / 0 / 0 / 0 |
- +
Eval Range: 90.0 - 121.0m @@ -569,7 +569,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(50,954) | truck
(18,624) | bus
(3,853) | bicycle
(3,768) | pedestrian
(70,699) | traffic_cone
(12,525) | barrier
(2,009) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.6595 | 0.6258 | 0.5925 | 0.5885 | 0.5757 | 0.5717 | 0.9193 | 0.8663 | 0.8424 | 0.7784 | 0.9038 | 0.3064 | 0.0000 | @@ -578,7 +578,7 @@
Eval Range: 50.0 - 90.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(41,196) | truck
(22,942) | bus
(3,033) | bicycle
(2,310) | pedestrian
(36,881) | traffic_cone
(7,183) | barrier
(1,847) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5440 | 0.5111 | 0.5263 | 0.5188 | 0.5099 | 0.5024 | 0.8350 | 0.6741 | 0.5382 | 0.6234 | 0.7829 | 0.3548 | 0.0000 | @@ -587,7 +587,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(17,510) | truck
(14,707) | bus
(2,997) | bicycle
(566) | pedestrian
(16,580) | traffic_cone
(769) | barrier
(566) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3700 | 0.3469 | 0.4109 | 0.3757 | 0.3994 | 0.3641 | 0.7043 | 0.5157 | 0.3679 | 0.2959 | 0.7000 | 0.0063 | 0.0000 | @@ -596,18 +596,18 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(109,660) | truck
(56,273) | bus
(9,883) | bicycle
(6,644) | pedestrian
(124,160) | traffic_cone
(20,477) | barrier
(4,422) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5785 | 0.5444 | 0.5467 | 0.5405 | 0.5296 | 0.5234 | 0.8675 | 0.7091 | 0.6251 | 0.6924 | 0.8516 | 0.3040 | 0.0000 |
- + - **Mean TPError** - +
Eval Range: 0.0 - 50.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.2759 | 0.3051 | 0.2927 | 0.4984 | 1.0000 | 0.2896 | 0.3123 | 0.2968 | 0.5135 | 1.0000 | 0.1903 | 0.2037 | 0.1876 | 0.4654 | 1.0000 | @@ -631,12 +631,12 @@ | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 44,376 / 46,850 / 47,216 / 47,606 | 14,516 / 16,104 / 17,017 / 17,294 | 3,080 / 3,354 / 3,439 / 3,443 | 2,874 / 2,897 / 2,900 / 2,900 | 59,982 / 60,714 / 61,271 / 61,531 | 5,720 / 6,079 / 6,376 / 6,515 | 0 / 0 / 0 / 0 | - +
- +
Eval Range: 50.0 - 90.0m - + **Mean TP error summary** | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | @@ -664,7 +664,7 @@ | BEVFusion-LiDAR base/2.8.0 | 30,777 / 33,945 / 34,775 / 34,956 | 12,711 / 15,140 / 17,099 / 18,052 | 1,191 / 1,652 / 1,886 / 1,924 | 1,384 / 1,483 / 1,484 / 1,496 | 27,185 / 28,060 / 28,214 / 28,437 | 3,298 / 3,377 / 3,633 / 3,756 | 0 / 0 / 0 / 0 |
- +
Eval Range: 90.0 - 121.0m @@ -693,10 +693,10 @@ | BEVFusion-LiDAR base/2.8.0 | 10,800 / 12,644 / 13,151 / 13,260 | 5,665 / 8,075 / 9,757 / 10,776 | 1,062 / 1,422 / 1,542 / 1,587 | 221 / 245 / 257 / 261 | 11,283 / 11,409 / 11,480 / 11,627 | 167 / 177 / 127 / 133 | 0 / 0 / 0 / 0 |
- +
Eval Range: 0.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3080 | 0.2997 | 0.3015 | 0.5162 | 1.0000 | 0.3316 | 0.3074 | 0.3083 | 0.5404 | 1.0000 | 0.2360 | 0.2001 | 0.1988 | 0.4973 | 1.0000 | diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md index 597910f21..24454df13 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md @@ -25,7 +25,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(50,954) | truck
(18,624) | bus
(3,853) | bicycle
(3,768) | pedestrian
(70,699) | traffic_cone
(12,525) | barrier
(2,009) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.6765 | 0.6414 | 0.6054 | 0.6011 | 0.5878 | 0.5835 | 0.9267 | 0.8595 | 0.8713 | 0.7844 | 0.9097 | 0.3843 | 0.0000 | @@ -63,7 +63,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.2719 | 0.2964 | 0.2975 | 0.4627 | 1.0000 | 0.2842 | 0.3077 | 0.3000 | 0.4803 | 1.0000 | 0.1861 | 0.1991 | 0.1904 | 0.4301 | 1.0000 | From f9e24ffd5f25f54d765d8b328994e0355f784f9c Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Mon, 22 Jun 2026 17:10:25 +0900 Subject: [PATCH 156/162] Update evaluation metrics --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 1ad3cc2bb..6047b1d75 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -407,10 +407,10 @@
Eval Range: 50.0 - 90.0m + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(10,929) | truck
(1,009) | bus
(141) | bicycle
(460) | pedestrian
(3,721) | traffic_cone
(4) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5080 | 0.4699 | 0.4842 | 0.4820 | 0.4652 | 0.4630 | 0.8284 | 0.6953 | 0.8101 | 0.5551 | 0.6672 | 0.0000 | 0.0000 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5281 | 0.4877 | 0.4942 | 0.4916 | 0.4740 | 0.4714 | 0.8442 | 0.7108 | 0.8522 | 0.5764 | 0.7129 | 0.0000 | 0.0000 |
@@ -436,9 +436,10 @@
Eval Range: 0.0 - 50.0m - | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.8.0 | 0.2738 | 0.4304 | 0.3040 | 0.3416 | 1.0000 | 0.3927 | 0.3902 | 0.3987 | 0.4730 | 1.0000 | 0.1903 | 0.3709 | 0.2019 | 0.2298 | 1.0000 | + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.8.0 | 0.2738 | 0.4304 | 0.3040 | 0.3416 | 1.0000 | 0.3927 | 0.3902 | 0.3987 | 0.4730 | 1.0000 | 0.1903 | 0.3709 | 0.2019 | 0.2298 | 1.0000 | Num match summary @@ -453,14 +454,12 @@ | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 6,097 / 6,097 / 6,097 / 6,097 | 488 / 488 / 488 / 488 | 137 / 137 / 137 / 137 | 303 / 303 / 303 / 303 | 2,072 / 2,072 / 2,072 / 2,072 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 6,097 / 6,097 / 6,097 / 6,097 | 488 / 488 / 488 / 488 | 137 / 137 / 137 / 137 | 303 / 303 / 303 / 303 | 2,072 / 2,072 / 2,072 / 2,072 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | **optimal** | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 13,062 / 13,479 / 13,554 / 13,743 | 932 / 1,043 / 1,066 / 1,074 | 275 / 321 / 324 / 324 | 602 / 607 / 608 / 613 | 4,140 / 4,178 / 4,198 / 4,214 | 23 / 13 / 13 / 13 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 13,178 / 13,676 / 13,748 / 13,798 | 925 / 1,041 / 1,064 / 1,073 | 254 / 330 / 333 / 333 | 612 / 628 / 640 / 643 | 4,247 / 4,294 / 4,313 / 4,330 | 19 / 20 / 20 / 21 | 0 / 0 / 0 / 0 |
@@ -470,7 +469,6 @@ | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3215 | 0.6553 | 0.3435 | 0.3779 | 1.0000 | 0.3296 | 0.6554 | 0.3494 | 0.3854 | 1.0000 | 0.2287 | 0.6009 | 0.2438 | 0.2891 | 1.0000 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3091 | 0.6981 | 0.3081 | 0.3833 | 1.0000 | 0.3181 | 0.6966 | 0.3115 | 0.3980 | 1.0000 | 0.2197 | 0.6583 | 0.2015 | 0.3121 | 1.0000 | Num match summary @@ -479,21 +477,18 @@ | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 1,202 / 1,202 / 1,202 / 1,202 | 110 / 110 / 110 / 110 | 15 / 15 / 15 / 15 | 50 / 50 / 50 / 50 | 409 / 409 / 409 / 409 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 1,202 / 1,202 / 1,202 / 1,202 | 110 / 110 / 110 / 110 | 15 / 15 / 15 / 15 | 50 / 50 / 50 / 50 | 409 / 409 / 409 / 409 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | **recall 0.40** | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 4,480 / 4,480 / 4,480 / 4,480 | 413 / 413 / 413 / 413 | 57 / 57 / 57 / 57 | 188 / 188 / 188 / 188 | 1,525 / 1,525 / 1,525 / 1,525 | 1 / 1 / 1 / 1 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 4,480 / 4,480 / 4,480 / 4,480 | 413 / 413 / 413 / 413 | 57 / 57 / 57 / 57 | 188 / 188 / 188 / 188 | 1,525 / 1,525 / 1,525 / 1,525 | 1 / 1 / 1 / 1 | 0 / 0 / 0 / 0 | **optimal** | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 8,159 / 8,918 / 9,145 / 9,193 | 584 / 735 / 782 / 787 | 97 / 115 / 115 / 115 | 243 / 263 / 265 / 265 | 2,464 / 2,492 / 2,508 / 2,524 | 2 / 2 / 2 / 2 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 8,463 / 9,288 / 9,554 / 9,621 | 617 / 739 / 799 / 804 | 103 / 124 / 124 / 124 | 263 / 289 / 292 / 292 | 2,604 / 2,652 / 2,667 / 2,682 | 2 / 2 / 2 / 2 | 0 / 0 / 0 / 0 |
@@ -539,21 +534,18 @@ | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 3,155 / 3,155 / 3,155 / 3,155 | 308 / 308 / 308 / 308 | 59 / 59 / 59 / 59 | 141 / 141 / 141 / 141 | 1,085 / 1,085 / 1,085 / 1,085 | 7 / 7 / 7 / 7 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 3,155 / 3,155 / 3,155 / 3,155 | 308 / 308 / 308 / 308 | 59 / 59 / 59 / 59 | 141 / 141 / 141 / 141 | 1,085 / 1,085 / 1,085 / 1,085 | 7 / 7 / 7 / 7 | 0 / 0 / 0 / 0 | **recall 0.40** | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 11,760 / 11,760 / 11,760 / 11,760 | 1,148 / 1,148 / 1,148 / 1,148 | 220 / 220 / 220 / 220 | 526 / 526 / 526 / 526 | 4,045 / 4,045 / 4,045 / 4,045 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 11,760 / 11,760 / 11,760 / 11,760 | 1,148 / 1,148 / 1,148 / 1,148 | 220 / 220 / 220 / 220 | 526 / 526 / 526 / 526 | 4,045 / 4,045 / 4,045 / 4,045 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | **optimal** | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | | BEVFusion-LiDAR base/2.8.0 | 23,001 / 24,465 / 24,972 / 25,071 | 1,757 / 2,148 / 2,295 / 2,313 | 391 / 442 / 445 / 445 | 839 / 875 / 938 / 944 | 7,078 / 7,166 / 7,204 / 7,247 | 13 / 15 / 15 / 15 | 0 / 0 / 0 / 0 | - | BEVFusion-LiDAR j6gen2_base/2.8.1 | 23,528 / 24,950 / 25,596 / 25,735 | 1,866 / 2,196 / 2,328 / 2,367 | 379 / 486 / 490 / 490 | 874 / 918 / 941 / 944 | 7,461 / 7,553 / 7,587 / 7,622 | 19 / 22 / 22 / 23 | 0 / 0 / 0 / 0 |
@@ -602,9 +594,9 @@ | BEVFusion-LiDAR base/2.8.0 | 0.5785 | 0.5444 | 0.5467 | 0.5405 | 0.5296 | 0.5234 | 0.8675 | 0.7091 | 0.6251 | 0.6924 | 0.8516 | 0.3040 | 0.0000 |
- - - **Mean TPError** - + +- **Mean TPError** +
Eval Range: 0.0 - 50.0m @@ -636,9 +628,7 @@
Eval Range: 50.0 - 90.0m - - **Mean TP error summary** - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3424 | 0.2851 | 0.3099 | 0.5197 | 1.0000 | 0.3636 | 0.2926 | 0.3147 | 0.5610 | 1.0000 | 0.2700 | 0.1826 | 0.2042 | 0.5180 | 1.0000 | From 3d5e2fa3df7ad61d9ae773a3ea3f418f4916e05b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:12:19 +0000 Subject: [PATCH 157/162] ci(pre-commit): autofix --- projects/BEVFusion/docs/BEVFusion-L/v2/base.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 6047b1d75..519ff5d95 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -407,7 +407,7 @@
Eval Range: 50.0 - 90.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(10,929) | truck
(1,009) | bus
(141) | bicycle
(460) | pedestrian
(3,721) | traffic_cone
(4) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.5080 | 0.4699 | 0.4842 | 0.4820 | 0.4652 | 0.4630 | 0.8284 | 0.6953 | 0.8101 | 0.5551 | 0.6672 | 0.0000 | 0.0000 | @@ -436,7 +436,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.2738 | 0.4304 | 0.3040 | 0.3416 | 1.0000 | 0.3927 | 0.3902 | 0.3987 | 0.4730 | 1.0000 | 0.1903 | 0.3709 | 0.2019 | 0.2298 | 1.0000 | @@ -594,9 +594,9 @@ | BEVFusion-LiDAR base/2.8.0 | 0.5785 | 0.5444 | 0.5467 | 0.5405 | 0.5296 | 0.5234 | 0.8675 | 0.7091 | 0.6251 | 0.6924 | 0.8516 | 0.3040 | 0.0000 |
- + - **Mean TPError** - +
Eval Range: 0.0 - 50.0m @@ -628,7 +628,7 @@
Eval Range: 50.0 - 90.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR base/2.8.0 | 0.3424 | 0.2851 | 0.3099 | 0.5197 | 1.0000 | 0.3636 | 0.2926 | 0.3147 | 0.5610 | 1.0000 | 0.2700 | 0.1826 | 0.2042 | 0.5180 | 1.0000 | From 5f0d52bdb32b00221e949653d13e0cfe1f234b3a Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Mon, 22 Jun 2026 18:26:08 +0900 Subject: [PATCH 158/162] Update evaluation metrics --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 1005 +++++++++++++++++ 1 file changed, 1005 insertions(+) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 519ff5d95..4fb185cdb 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -717,6 +717,1011 @@ ## Release +### BEVFusion-LiDAR base/2.8.0 + +
+ Changes +- Update training batch size from `8` to `16` per gpu. +- Update number of max points per voxel from `10` to `32`. +- Implement 1D-flatten sparse to dense to reduce ONNX ops (projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py). +- Purse custom LayerNorm in mmdeploy to support ONNX LayerNorm ops (purge_mmdeploy_symbolics). +- Add two classes: `traffic_cone` and `barrier` to the model. +- Do not max-pooling of `bicycle`. + +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto]() + - [model-zoo]() + - [Google drive](https://drive.google.com/file/d/16dh2UQg4w46WQu0Dbmai9BtD43nz7hLv/view?usp=drive_link) +- Logs (for internal) + - [model-zoo]() + - [Google drive](https://drive.google.com/file/d/1kQVufXiB_K9JYTL3DSUhGUIGW5fCbbwc/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo]() + - [Google drive](https://drive.google.com/file/d/1wJjdG1dCbOjfmTCaOFWw9-2xNvlfYDza/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/179ca256a165fd483801bec0a2a95c24866edf70/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py) +- Train time: NVIDIA H200 80GB * 8 * 50 epochs ~= 4 days +- Batch size: 8*16 = 128 +- Training Dataset (frames: 151,478): + - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames) + - j6: db_gsm8_v1 + db_j6_v1 + db_j6_v2 + db_j6_v3 + db_j6_v5 (29,336 frames) + - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 (51,208 frames) + - largebus: db_largebus_v1 + db_largebus_v2 (12,605 frames) + - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (30,168 frames) + +
+ +
+ Evaluation + +**Base Datasets (16,597 frames)**: + + - j6gen2 (4,682 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 + - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3 + - jpntaxi_gen2 (10,687 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.6763** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 126,168 | 112,018 / 117,488 / 119,486 / 120,557 | 0.852 / 0.913 / 0.925 / 0.936 | 0.900 / 0.932 / 0.939 / 0.942 | 0.249 / 0.213 / 0.168 / 0.148 | +| truck | 26,897 | 21,675 / 24,088 / 25,410 / 25,929 | 0.719 / 0.849 / 0.916 / 0.942 | 0.810 / 0.884 / 0.922 / 0.937 | 0.293 / 0.202 / 0.176 / 0.176 | +| bus | 6,559 | 5,520 / 5,973 / 6,143 / 6,163 | 0.789 / 0.876 / 0.908 / 0.909 | 0.828 / 0.887 / 0.909 / 0.911 | 0.047 / 0.044 / 0.044 / 0.044 | +| bicycle | 5,865 | 5,319 / 5,410 / 5,419 / 5,428 | 0.799 / 0.820 / 0.821 / 0.823 | 0.819 / 0.826 / 0.827 / 0.828 | 0.234 / 0.202 / 0.236 / 0.236 | +| pedestrian | 93,520 | 87,053 / 88,369 / 88,873 / 89,317 | 0.873 / 0.889 / 0.899 / 0.905 | 0.861 / 0.872 / 0.878 / 0.883 | 0.154 / 0.148 / 0.148 / 0.148 | +| traffic_cone | 20,835 | 13,926 / 14,948 / 15,514 / 15,991 | 0.293 / 0.330 / 0.358 / 0.386 | 0.484 / 0.509 / 0.527 / 0.548 | 0.153 / 0.143 / 0.143 / 0.140 | +| barrier | 3,359 | 518 / 711 / 772 / 804 | 0.017 / 0.055 / 0.066 / 0.071 | 0.199 / 0.263 / 0.279 / 0.285 | 0.151 / 0.107 / 0.107 / 0.083 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 13,878 / 13,878 / 13,878 / 13,878 | 0.106 / 0.112 / 0.114 / 0.119 | 0.029 / 0.031 / 0.032 / 0.033 | 0.108 / 0.110 / 0.110 / 0.110 | 0.210 / 0.214 / 0.215 / 0.216 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,958 / 2,958 / 2,958 / 2,958 | 0.137 / 0.163 / 0.183 / 0.198 | 0.019 / 0.019 / 0.020 / 0.020 | 0.117 / 0.123 / 0.127 / 0.129 | 0.381 / 0.390 / 0.398 / 0.401 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 721 / 721 / 721 / 721 | 0.137 / 0.148 / 0.159 / 0.161 | 0.045 / 0.046 / 0.046 / 0.047 | 0.096 / 0.098 / 0.101 / 0.101 | 0.220 / 0.226 / 0.226 / 0.226 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 645 / 645 / 645 / 645 | 0.127 / 0.131 / 0.132 / 0.134 | 0.089 / 0.090 / 0.090 / 0.091 | 0.187 / 0.188 / 0.188 / 0.188 | 0.758 / 0.757 / 0.758 / 0.758 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 10,287 / 10,287 / 10,287 / 10,287 | 0.106 / 0.109 / 0.114 / 0.126 | 0.282 / 0.284 / 0.286 / 0.288 | 0.228 / 0.228 / 0.228 / 0.228 | 0.380 / 0.380 / 0.380 / 0.382 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 2,291 / 2,291 / 2,291 / 2,291 | 0.147 / 0.173 / 0.236 / 0.364 | 0.505 / 0.505 / 0.509 / 0.503 | 0.291 / 0.292 / 0.292 / 0.292 | 0.063 / 0.063 / 0.063 / 0.062 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 369 / 369 / 369 / 369 | 0.243 / 0.334 / 0.381 / 0.436 | 0.448 / 0.413 / 0.404 / 0.405 | 0.356 / 0.378 / 0.381 / 0.390 | 0.023 / 0.024 / 0.025 / 0.025 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 51,728 / 51,728 / 51,728 / 51,728 | 0.113 / 0.122 / 0.124 / 0.130 | 0.032 / 0.036 / 0.037 / 0.038 | 0.113 / 0.115 / 0.115 / 0.115 | 0.236 / 0.239 / 0.241 / 0.241 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 11,027 / 11,027 / 11,027 / 11,027 | 0.145 / 0.177 / 0.203 / 0.224 | 0.020 / 0.021 / 0.022 / 0.023 | 0.121 / 0.128 / 0.133 / 0.135 | 0.439 / 0.445 / 0.455 / 0.457 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,689 / 2,689 / 2,689 / 2,689 | 0.143 / 0.159 / 0.171 / 0.173 | 0.044 / 0.046 / 0.046 / 0.047 | 0.099 / 0.102 / 0.105 / 0.105 | 0.257 / 0.262 / 0.261 / 0.261 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 2,404 / 2,404 / 2,404 / 2,404 | 0.131 / 0.136 / 0.136 / 0.140 | 0.092 / 0.093 / 0.093 / 0.094 | 0.194 / 0.195 / 0.195 / 0.196 | 0.719 / 0.721 / 0.721 / 0.722 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 38,343 / 38,343 / 38,343 / 38,343 | 0.110 / 0.115 / 0.123 / 0.139 | 0.291 / 0.292 / 0.295 / 0.299 | 0.231 / 0.231 / 0.231 / 0.232 | 0.392 / 0.391 / 0.392 / 0.394 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 8,542 / 8,542 / 8,542 / 8,542 | 0.159 / 0.196 / 0.282 / 0.450 | 0.553 / 0.551 / 0.555 / 0.546 | 0.297 / 0.297 / 0.298 / 0.297 | 0.065 / 0.064 / 0.064 / 0.064 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 109,035 / 113,613 / 115,502 / 116,446 | 0.128 / 0.147 / 0.159 / 0.175 | 0.046 / 0.055 / 0.061 / 0.064 | 0.121 / 0.125 / 0.127 / 0.127 | 0.267 / 0.274 / 0.279 / 0.281 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 20,578 / 23,058 / 24,234 / 24,631 | 0.157 / 0.207 / 0.257 / 0.300 | 0.024 / 0.028 / 0.031 / 0.033 | 0.129 / 0.141 / 0.150 / 0.154 | 0.469 / 0.490 / 0.515 / 0.523 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 5,398 / 5,809 / 5,952 / 5,967 | 0.157 / 0.193 / 0.219 / 0.227 | 0.045 / 0.051 / 0.052 / 0.055 | 0.107 / 0.117 / 0.123 / 0.123 | 0.307 / 0.306 / 0.306 / 0.308 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 4,573 / 4,739 / 4,611 / 4,616 | 0.134 / 0.140 / 0.140 / 0.144 | 0.098 / 0.102 / 0.099 / 0.099 | 0.198 / 0.200 / 0.199 / 0.200 | 0.710 / 0.712 / 0.710 / 0.710 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 78,245 / 79,717 / 80,219 / 80,761 | 0.117 / 0.127 / 0.141 / 0.174 | 0.312 / 0.316 / 0.321 / 0.328 | 0.236 / 0.237 / 0.238 / 0.238 | 0.401 / 0.401 / 0.402 / 0.405 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 10,168 / 11,012 / 11,411 / 11,947 | 0.156 / 0.190 / 0.271 / 0.440 | 0.543 / 0.550 / 0.552 / 0.546 | 0.295 / 0.297 / 0.297 / 0.297 | 0.064 / 0.064 / 0.064 / 0.064 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 452 / 629 / 667 / 715 | 0.247 / 0.353 / 0.409 / 0.509 | 0.452 / 0.430 / 0.423 / 0.430 | 0.363 / 0.405 / 0.412 / 0.433 | 0.023 / 0.025 / 0.025 / 0.025 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5447** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 105,914 | 83,239 / 92,799 / 97,175 / 98,872 | 0.700 / 0.813 / 0.860 / 0.880 | 0.780 / 0.843 / 0.865 / 0.872 | 0.227 / 0.188 / 0.164 / 0.161 | +| truck | 28,864 | 17,414 / 21,372 / 24,178 / 25,356 | 0.467 / 0.634 / 0.757 / 0.803 | 0.624 / 0.729 / 0.800 / 0.823 | 0.223 / 0.182 / 0.149 / 0.140 | +| bus | 5,290 | 3,038 / 4,048 / 4,566 / 4,671 | 0.393 / 0.623 / 0.760 / 0.785 | 0.537 / 0.695 / 0.778 / 0.793 | 0.154 / 0.077 / 0.077 / 0.077 | +| bicycle | 3,608 | 2,849 / 3,030 / 3,047 / 3,063 | 0.575 / 0.635 / 0.640 / 0.647 | 0.662 / 0.691 / 0.693 / 0.698 | 0.147 / 0.142 / 0.142 / 0.142 | +| pedestrian | 48,637 | 43,259 / 43,978 / 44,223 / 44,525 | 0.729 / 0.748 / 0.756 / 0.768 | 0.750 / 0.759 / 0.764 / 0.770 | 0.153 / 0.146 / 0.148 / 0.148 | +| traffic_cone | 9,819 | 6,361 / 6,625 / 6,770 / 7,010 | 0.276 / 0.301 / 0.327 / 0.369 | 0.462 / 0.478 / 0.496 / 0.518 | 0.135 / 0.135 / 0.125 / 0.135 | +| barrier | 2,469 | 180 / 276 / 305 / 321 | 0.000 / 0.001 / 0.002 / 0.004 | 0.093 / 0.143 / 0.154 / 0.157 | 0.116 / 0.097 / 0.097 / 0.097 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 11,650 / 11,650 / 11,650 / 11,650 | 0.151 / 0.171 / 0.184 / 0.197 | 0.078 / 0.099 / 0.113 / 0.116 | 0.148 / 0.151 / 0.152 / 0.153 | 0.231 / 0.241 / 0.246 / 0.249 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 3,175 / 3,175 / 3,175 / 3,175 | 0.195 / 0.237 / 0.297 / 0.330 | 0.026 / 0.030 / 0.034 / 0.037 | 0.148 / 0.156 / 0.165 / 0.169 | 0.492 / 0.538 / 0.572 / 0.589 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 581 / 581 / 581 / 581 | 0.196 / 0.267 / 0.331 / 0.356 | 0.088 / 0.084 / 0.088 / 0.094 | 0.124 / 0.134 / 0.142 / 0.144 | 0.190 / 0.199 / 0.202 / 0.202 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 396 / 396 / 396 / 396 | 0.183 / 0.200 / 0.203 / 0.235 | 0.107 / 0.110 / 0.111 / 0.110 | 0.223 / 0.226 / 0.226 / 0.225 | 0.786 / 0.801 / 0.801 / 0.801 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 5,350 / 5,350 / 5,350 / 5,350 | 0.116 / 0.122 / 0.134 / 0.166 | 0.339 / 0.343 / 0.346 / 0.352 | 0.253 / 0.254 / 0.254 / 0.254 | 0.429 / 0.429 / 0.430 / 0.434 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,080 / 1,080 / 1,080 / 1,080 | 0.164 / 0.184 / 0.300 / 0.551 | 0.429 / 0.435 / 0.436 / 0.439 | 0.249 / 0.250 / 0.250 / 0.250 | 0.089 / 0.088 / 0.088 / 0.087 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 271 / 271 / 271 | 1.000 / 0.398 / 0.488 / 0.703 | 1.000 / 0.327 / 0.314 / 0.313 | 1.000 / 0.414 / 0.417 / 0.433 | 1.000 / 0.036 / 0.036 / 0.035 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 43,424 / 43,424 / 43,424 / 43,424 | 0.163 / 0.188 / 0.207 / 0.224 | 0.095 / 0.121 / 0.139 / 0.144 | 0.154 / 0.157 / 0.159 / 0.159 | 0.260 / 0.271 / 0.277 / 0.280 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 11,834 / 11,834 / 11,834 / 11,834 | 0.201 / 0.263 / 0.343 / 0.390 | 0.034 / 0.039 / 0.044 / 0.047 | 0.153 / 0.165 / 0.176 / 0.182 | 0.581 / 0.629 / 0.666 / 0.685 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,168 / 2,168 / 2,168 / 2,168 | 0.219 / 0.317 / 0.401 / 0.439 | 0.077 / 0.076 / 0.085 / 0.094 | 0.129 / 0.142 / 0.153 / 0.154 | 0.225 / 0.222 / 0.223 / 0.223 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,479 / 1,479 / 1,479 / 1,479 | 0.187 / 0.208 / 0.211 / 0.253 | 0.132 / 0.135 / 0.136 / 0.133 | 0.229 / 0.232 / 0.232 / 0.231 | 0.812 / 0.825 / 0.825 / 0.828 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 19,941 / 19,941 / 19,941 / 19,941 | 0.121 / 0.129 / 0.144 / 0.186 | 0.363 / 0.368 / 0.371 / 0.379 | 0.254 / 0.254 / 0.255 / 0.255 | 0.449 / 0.449 / 0.450 / 0.455 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,025 / 4,025 / 4,025 / 4,025 | 0.172 / 0.195 / 0.320 / 0.588 | 0.437 / 0.444 / 0.444 / 0.448 | 0.248 / 0.249 / 0.249 / 0.250 | 0.086 / 0.086 / 0.085 / 0.085 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 76,795 / 84,883 / 88,518 / 89,420 | 0.174 / 0.215 / 0.247 / 0.276 | 0.122 / 0.158 / 0.183 / 0.189 | 0.160 / 0.165 / 0.167 / 0.168 | 0.288 / 0.310 / 0.324 / 0.327 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 15,896 / 19,085 / 21,454 / 22,235 | 0.205 / 0.280 / 0.387 / 0.461 | 0.036 / 0.048 / 0.060 / 0.064 | 0.156 / 0.171 / 0.190 / 0.199 | 0.611 / 0.683 / 0.745 / 0.776 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,428 / 3,407 / 3,812 / 3,890 | 0.217 / 0.335 / 0.437 / 0.496 | 0.072 / 0.077 / 0.091 / 0.109 | 0.128 / 0.146 / 0.158 / 0.160 | 0.192 / 0.236 / 0.245 / 0.244 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 2,186 / 2,306 / 2,311 / 2,327 | 0.186 / 0.206 / 0.209 / 0.255 | 0.129 / 0.133 / 0.133 / 0.133 | 0.230 / 0.233 / 0.234 / 0.233 | 0.807 / 0.817 / 0.817 / 0.821 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 34,885 / 35,834 / 35,898 / 36,174 | 0.124 / 0.134 / 0.151 / 0.200 | 0.374 / 0.381 / 0.385 / 0.393 | 0.254 / 0.255 / 0.255 / 0.255 | 0.460 / 0.464 / 0.463 / 0.468 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,370 / 4,522 / 4,871 / 4,902 | 0.167 / 0.187 / 0.315 / 0.563 | 0.434 / 0.441 / 0.443 / 0.447 | 0.249 / 0.250 / 0.250 / 0.251 | 0.087 / 0.087 / 0.086 / 0.086 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 140 / 222 / 238 / 244 | 0.243 / 0.402 / 0.475 / 0.566 | 0.329 / 0.297 / 0.288 / 0.282 | 0.319 / 0.369 / 0.379 / 0.384 | 0.035 / 0.034 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.3779** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 39,577 | 28,566 / 33,503 / 35,851 / 36,643 | 0.545 / 0.697 / 0.765 / 0.785 | 0.653 / 0.737 / 0.769 / 0.778 | 0.197 / 0.165 / 0.142 / 0.129 | +| truck | 18,213 | 7,767 / 11,422 / 14,150 / 15,707 | 0.210 / 0.458 / 0.641 / 0.748 | 0.437 / 0.617 / 0.728 / 0.779 | 0.186 / 0.178 / 0.139 / 0.130 | +| bus | 3,541 | 1,576 / 2,234 / 2,516 / 2,627 | 0.174 / 0.385 / 0.475 / 0.511 | 0.408 / 0.544 / 0.593 / 0.611 | 0.050 / 0.050 / 0.050 / 0.046 | +| bicycle | 942 | 651 / 733 / 753 / 757 | 0.279 / 0.363 / 0.397 / 0.405 | 0.459 / 0.509 / 0.525 / 0.531 | 0.159 / 0.159 / 0.159 / 0.159 | +| pedestrian | 20,134 | 17,375 / 17,647 / 17,776 / 17,948 | 0.639 / 0.654 / 0.663 / 0.678 | 0.685 / 0.693 / 0.697 / 0.704 | 0.136 / 0.136 / 0.136 / 0.136 | +| traffic_cone | 1,231 | 454 / 474 / 484 / 524 | 0.022 / 0.024 / 0.028 / 0.036 | 0.194 / 0.197 / 0.206 / 0.220 | 0.143 / 0.143 / 0.144 / 0.143 | +| barrier | 711 | 38 / 65 / 83 / 88 | 0.000 / 0.000 / 0.000 / 0.001 | 0.061 / 0.106 / 0.132 / 0.137 | 0.082 / 0.059 / 0.059 / 0.059 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 4,353 / 4,353 / 4,353 / 4,353 | 0.196 / 0.233 / 0.264 / 0.291 | 0.138 / 0.178 / 0.201 / 0.209 | 0.181 / 0.185 / 0.187 / 0.188 | 0.432 / 0.437 / 0.441 / 0.442 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,003 / 2,003 / 2,003 / 2,003 | 0.236 / 0.343 / 0.443 / 0.588 | 0.032 / 0.035 / 0.040 / 0.047 | 0.178 / 0.193 / 0.203 / 0.222 | 0.578 / 0.584 / 0.604 / 0.616 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 389 / 389 / 389 / 389 | 0.255 / 0.363 / 0.444 / 0.496 | 0.034 / 0.039 / 0.041 / 0.046 | 0.138 / 0.147 / 0.160 / 0.167 | 0.215 / 0.244 / 0.272 / 0.278 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 103 / 103 / 103 / 103 | 0.210 / 0.255 / 0.293 / 0.338 | 0.192 / 0.184 / 0.180 / 0.190 | 0.238 / 0.250 / 0.262 / 0.260 | 0.831 / 0.866 / 0.881 / 0.878 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,214 / 2,214 / 2,214 / 2,214 | 0.132 / 0.139 / 0.151 / 0.205 | 0.274 / 0.275 / 0.280 / 0.289 | 0.276 / 0.276 / 0.276 / 0.276 | 0.538 / 0.538 / 0.539 / 0.544 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 135 / 135 / 135 / 135 | 0.180 / 0.203 / 0.304 / 0.671 | 0.686 / 0.693 / 0.690 / 0.687 | 0.343 / 0.343 / 0.343 / 0.339 | 0.110 / 0.110 / 0.112 / 0.114 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 78 / 78 | 1.000 / 1.000 / 0.652 / 0.972 | 1.000 / 1.000 / 0.257 / 0.243 | 1.000 / 1.000 / 0.479 / 0.489 | 1.000 / 1.000 / 0.050 / 0.051 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 16,226 / 16,226 / 16,226 / 16,226 | 0.206 / 0.253 / 0.292 / 0.326 | 0.175 / 0.216 / 0.241 / 0.250 | 0.186 / 0.191 / 0.192 / 0.193 | 0.460 / 0.466 / 0.472 / 0.473 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 7,467 / 7,467 / 7,467 / 7,467 | 0.243 / 0.374 / 0.510 / 0.666 | 0.070 / 0.050 / 0.054 / 0.063 | 0.190 / 0.202 / 0.215 / 0.234 | 0.696 / 0.686 / 0.693 / 0.706 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,451 / 1,451 / 1,451 / 1,451 | 0.271 / 0.376 / 0.474 / 0.535 | 0.039 / 0.044 / 0.048 / 0.055 | 0.144 / 0.154 / 0.170 / 0.177 | 0.229 / 0.283 / 0.316 / 0.322 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 386 / 386 / 386 / 386 | 0.206 / 0.257 / 0.290 / 0.341 | 0.225 / 0.206 / 0.202 / 0.212 | 0.232 / 0.243 / 0.253 / 0.251 | 0.866 / 0.932 / 0.946 / 0.944 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 8,254 / 8,254 / 8,254 / 8,254 | 0.138 / 0.148 / 0.163 / 0.231 | 0.317 / 0.319 / 0.324 / 0.336 | 0.272 / 0.272 / 0.273 / 0.273 | 0.551 / 0.551 / 0.552 / 0.559 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 504 | 1.000 / 1.000 / 1.000 / 0.742 | 1.000 / 1.000 / 1.000 / 0.658 | 1.000 / 1.000 / 1.000 / 0.328 | 1.000 / 1.000 / 1.000 / 0.124 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 23,637 / 27,796 / 30,068 / 31,051 | 0.208 / 0.265 / 0.316 / 0.362 | 0.187 / 0.242 / 0.276 / 0.289 | 0.187 / 0.193 / 0.195 / 0.196 | 0.468 / 0.484 / 0.494 / 0.497 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,996 / 9,963 / 12,146 / 13,138 | 0.240 / 0.374 / 0.533 / 0.705 | 0.046 / 0.049 / 0.062 / 0.073 | 0.184 / 0.204 / 0.222 / 0.242 | 0.662 / 0.697 / 0.744 / 0.767 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,277 / 1,704 / 1,857 / 1,963 | 0.257 / 0.364 / 0.451 / 0.512 | 0.039 / 0.043 / 0.046 / 0.050 | 0.136 / 0.148 / 0.162 / 0.170 | 0.221 / 0.269 / 0.302 / 0.305 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 379 / 421 / 434 / 439 | 0.202 / 0.248 / 0.279 / 0.329 | 0.175 / 0.169 / 0.165 / 0.180 | 0.237 / 0.248 / 0.257 / 0.256 | 0.864 / 0.904 / 0.922 / 0.916 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 13,121 / 13,270 / 13,350 / 13,493 | 0.139 / 0.149 / 0.163 / 0.233 | 0.313 / 0.316 / 0.320 / 0.333 | 0.272 / 0.272 / 0.272 / 0.273 | 0.551 / 0.551 / 0.552 / 0.560 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 247 / 251 / 262 / 280 | 0.180 / 0.195 / 0.301 / 0.651 | 0.723 / 0.731 / 0.722 / 0.717 | 0.347 / 0.346 / 0.345 / 0.341 | 0.110 / 0.111 / 0.115 / 0.117 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 28 / 54 / 67 / 70 | 0.268 / 0.452 / 0.633 / 0.979 | 0.376 / 0.247 / 0.242 / 0.235 | 0.374 / 0.427 / 0.450 / 0.454 | 0.048 / 0.046 / 0.048 / 0.048 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.5936** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 271,659 | 224,244 / 244,411 / 253,295 / 256,954 | 0.763 / 0.852 / 0.893 / 0.905 | 0.821 / 0.872 / 0.888 / 0.894 | 0.229 / 0.192 / 0.165 / 0.161 | +| truck | 73,974 | 46,967 / 57,070 / 64,081 / 67,424 | 0.504 / 0.682 / 0.801 / 0.856 | 0.652 / 0.765 / 0.833 / 0.860 | 0.246 / 0.182 / 0.166 / 0.139 | +| bus | 15,390 | 10,156 / 12,295 / 13,277 / 13,514 | 0.528 / 0.694 / 0.779 / 0.796 | 0.634 / 0.745 / 0.794 / 0.804 | 0.058 / 0.057 / 0.057 / 0.057 | +| bicycle | 10,415 | 8,835 / 9,191 / 9,237 / 9,265 | 0.685 / 0.726 / 0.730 / 0.733 | 0.738 / 0.755 / 0.757 / 0.759 | 0.202 / 0.198 / 0.198 / 0.198 | +| pedestrian | 162,291 | 147,838 / 150,126 / 151,000 / 151,937 | 0.812 / 0.828 / 0.839 / 0.846 | 0.808 / 0.818 / 0.823 / 0.829 | 0.153 / 0.148 / 0.148 / 0.148 | +| traffic_cone | 31,885 | 20,767 / 22,074 / 22,797 / 23,566 | 0.276 / 0.308 / 0.333 / 0.365 | 0.465 / 0.487 / 0.505 / 0.527 | 0.154 / 0.138 / 0.138 / 0.138 | +| barrier | 6,539 | 737 / 1,055 / 1,164 / 1,216 | 0.001 / 0.021 / 0.029 / 0.033 | 0.147 / 0.204 / 0.218 / 0.224 | 0.125 / 0.107 / 0.107 / 0.082 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 29,883 / 29,882 / 29,882 / 29,882 | 0.126 / 0.140 / 0.149 / 0.157 | 0.049 / 0.060 / 0.068 / 0.070 | 0.125 / 0.128 / 0.129 / 0.130 | 0.236 / 0.242 / 0.247 / 0.249 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 8,137 / 8,137 / 8,137 / 8,137 | 0.169 / 0.214 / 0.262 / 0.307 | 0.022 / 0.025 / 0.028 / 0.031 | 0.134 / 0.144 / 0.152 / 0.158 | 0.446 / 0.472 / 0.496 / 0.509 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,692 / 1,692 / 1,692 / 1,692 | 0.159 / 0.199 / 0.236 / 0.250 | 0.055 / 0.057 / 0.059 / 0.061 | 0.106 / 0.113 / 0.119 / 0.120 | 0.227 / 0.235 / 0.240 / 0.241 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,145 / 1,145 / 1,145 / 1,145 | 0.143 / 0.152 / 0.155 / 0.165 | 0.096 / 0.098 / 0.098 / 0.099 | 0.197 / 0.200 / 0.201 / 0.201 | 0.747 / 0.753 / 0.753 / 0.753 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 17,852 / 17,852 / 17,852 / 17,852 | 0.111 / 0.115 / 0.123 / 0.143 | 0.297 / 0.298 / 0.301 / 0.305 | 0.237 / 0.237 / 0.237 / 0.238 | 0.402 / 0.402 / 0.403 / 0.405 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 3,507 / 3,507 / 3,507 / 3,507 | 0.153 / 0.177 / 0.255 / 0.423 | 0.488 / 0.491 / 0.493 / 0.489 | 0.281 / 0.282 / 0.282 / 0.282 | 0.070 / 0.070 / 0.070 / 0.069 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 719 / 719 / 719 / 719 | 0.248 / 0.365 / 0.428 / 0.532 | 0.434 / 0.387 / 0.377 / 0.375 | 0.385 / 0.392 / 0.395 / 0.406 | 0.028 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 111,380 / 111,380 / 111,380 / 111,380 | 0.138 / 0.157 / 0.169 / 0.180 | 0.061 / 0.077 / 0.088 / 0.090 | 0.133 / 0.136 / 0.138 / 0.138 | 0.264 / 0.271 / 0.276 / 0.278 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 30,329 / 30,329 / 30,329 / 30,329 | 0.181 / 0.241 / 0.307 / 0.367 | 0.027 / 0.031 / 0.034 / 0.038 | 0.142 / 0.154 / 0.164 / 0.171 | 0.519 / 0.545 / 0.570 / 0.584 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 6,309 / 6,309 / 6,309 / 6,309 | 0.179 / 0.238 / 0.288 / 0.309 | 0.053 / 0.056 / 0.060 / 0.065 | 0.113 / 0.123 / 0.130 / 0.132 | 0.259 / 0.261 / 0.265 / 0.266 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 4,270 / 4,270 / 4,270 / 4,270 | 0.151 / 0.163 / 0.166 / 0.182 | 0.108 / 0.110 / 0.110 / 0.111 | 0.207 / 0.210 / 0.211 / 0.211 | 0.741 / 0.749 / 0.750 / 0.750 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 66,539 / 66,539 / 66,539 / 66,539 | 0.116 / 0.122 / 0.133 / 0.162 | 0.312 / 0.314 / 0.318 / 0.323 | 0.241 / 0.242 / 0.242 / 0.242 | 0.420 / 0.420 / 0.420 / 0.424 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 13,072 / 13,072 / 13,072 / 13,072 | 0.164 / 0.198 / 0.296 / 0.504 | 0.526 / 0.528 / 0.530 / 0.525 | 0.284 / 0.285 / 0.286 / 0.285 | 0.072 / 0.072 / 0.071 / 0.071 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 209,064 / 225,972 / 233,684 / 235,583 | 0.154 / 0.187 / 0.212 / 0.234 | 0.088 / 0.115 / 0.133 / 0.138 | 0.143 / 0.148 / 0.150 / 0.151 | 0.297 / 0.313 / 0.324 / 0.326 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 43,131 / 52,358 / 57,620 / 60,504 | 0.187 / 0.266 / 0.362 / 0.453 | 0.030 / 0.040 / 0.047 / 0.054 | 0.147 / 0.164 / 0.179 / 0.191 | 0.550 / 0.602 / 0.646 / 0.675 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 9,224 / 10,854 / 11,562 / 11,707 | 0.189 / 0.266 / 0.329 / 0.363 | 0.054 / 0.060 / 0.066 / 0.074 | 0.117 / 0.130 / 0.140 / 0.142 | 0.272 / 0.282 / 0.289 / 0.291 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 7,044 / 7,224 / 7,242 / 7,260 | 0.152 / 0.163 / 0.167 / 0.183 | 0.109 / 0.111 / 0.111 / 0.111 | 0.209 / 0.211 / 0.212 / 0.212 | 0.743 / 0.750 / 0.752 / 0.752 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 125,762 / 128,271 / 129,072 / 130,130 | 0.121 / 0.131 / 0.146 / 0.188 | 0.328 / 0.333 / 0.338 / 0.347 | 0.245 / 0.246 / 0.246 / 0.247 | 0.432 / 0.433 / 0.434 / 0.438 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 14,442 / 15,921 / 16,500 / 17,198 | 0.158 / 0.190 / 0.285 / 0.485 | 0.515 / 0.523 / 0.525 / 0.522 | 0.283 / 0.285 / 0.284 / 0.285 | 0.072 / 0.071 / 0.071 / 0.071 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 624 / 886 / 950 / 1,028 | 0.246 / 0.367 / 0.435 / 0.567 | 0.424 / 0.390 / 0.381 / 0.385 | 0.355 / 0.393 / 0.403 / 0.423 | 0.026 / 0.028 / 0.028 / 0.029 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.6292** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 14,872 | 13,343 / 13,829 / 14,017 / 14,145 | 0.867 / 0.906 / 0.926 / 0.937 | 0.914 / 0.938 / 0.943 / 0.946 | 0.228 / 0.194 / 0.193 / 0.145 | +| truck | 1,192 | 960 / 1,091 / 1,118 / 1,135 | 0.726 / 0.886 / 0.910 / 0.928 | 0.823 / 0.910 / 0.919 / 0.923 | 0.247 / 0.208 / 0.157 / 0.152 | +| bus | 336 | 283 / 329 / 332 / 332 | 0.793 / 0.962 / 0.973 / 0.973 | 0.859 / 0.950 / 0.959 / 0.959 | 0.286 / 0.065 / 0.065 / 0.065 | +| bicycle | 740 | 694 / 713 / 717 / 724 | 0.825 / 0.870 / 0.877 / 0.891 | 0.837 / 0.855 / 0.856 / 0.863 | 0.201 / 0.211 / 0.211 / 0.211 | +| pedestrian | 5,055 | 4,641 / 4,701 / 4,718 / 4,731 | 0.825 / 0.839 / 0.848 / 0.854 | 0.840 / 0.847 / 0.851 / 0.855 | 0.160 / 0.160 / 0.160 / 0.160 | +| traffic_cone | 60 | 24 / 24 / 24 / 24 | 0.000 / 0.000 / 0.000 / 0.000 | 0.028 / 0.029 / 0.029 / 0.029 | 0.077 / 0.153 / 0.153 / 0.153 | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,635 / 1,635 / 1,635 / 1,635 | 0.110 / 0.114 / 0.116 / 0.119 | 0.040 / 0.042 / 0.044 / 0.045 | 0.112 / 0.113 / 0.113 / 0.113 | 0.141 / 0.142 / 0.143 / 0.143 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 131 / 131 / 131 / 131 | 0.140 / 0.171 / 0.175 / 0.180 | 0.035 / 0.039 / 0.040 / 0.043 | 0.128 / 0.132 / 0.134 / 0.134 | 0.229 / 0.223 / 0.223 / 0.222 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 36 / 36 / 36 / 36 | 0.144 / 0.164 / 0.166 / 0.166 | 0.176 / 0.173 / 0.173 / 0.173 | 0.097 / 0.099 / 0.099 / 0.099 | 0.188 / 0.183 / 0.182 / 0.182 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 81 / 81 / 81 / 81 | 0.131 / 0.142 / 0.145 / 0.156 | 0.092 / 0.092 / 0.093 / 0.093 | 0.206 / 0.211 / 0.211 / 0.214 | 0.517 / 0.511 / 0.511 / 0.510 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 556 / 556 / 556 / 556 | 0.096 / 0.099 / 0.105 / 0.123 | 0.290 / 0.291 / 0.294 / 0.299 | 0.204 / 0.204 / 0.205 / 0.205 | 0.240 / 0.240 / 0.240 / 0.244 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 6 / 6 / 6 / 6 | 0.154 / 0.251 / 0.251 / 0.251 | 1.295 / 1.395 / 1.395 / 1.395 | 0.369 / 0.370 / 0.370 / 0.370 | 0.087 / 0.088 / 0.088 / 0.088 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 6,097 / 6,097 / 6,097 / 6,097 | 0.118 / 0.124 / 0.126 / 0.130 | 0.045 / 0.048 / 0.050 / 0.051 | 0.117 / 0.118 / 0.119 / 0.119 | 0.155 / 0.156 / 0.157 / 0.157 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 488 / 488 / 488 / 488 | 0.151 / 0.188 / 0.195 / 0.201 | 0.041 / 0.047 / 0.048 / 0.052 | 0.140 / 0.144 / 0.146 / 0.147 | 0.231 / 0.224 / 0.224 / 0.223 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 137 / 137 / 137 / 137 | 0.157 / 0.184 / 0.186 / 0.186 | 0.206 / 0.196 / 0.195 / 0.195 | 0.096 / 0.099 / 0.099 / 0.099 | 0.192 / 0.184 / 0.183 / 0.183 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 303 / 303 / 303 / 303 | 0.135 / 0.146 / 0.151 / 0.167 | 0.107 / 0.107 / 0.108 / 0.108 | 0.214 / 0.218 / 0.219 / 0.222 | 0.499 / 0.496 / 0.495 / 0.494 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,072 / 2,072 / 2,072 / 2,072 | 0.100 / 0.104 / 0.112 / 0.134 | 0.326 / 0.327 / 0.330 / 0.336 | 0.211 / 0.211 / 0.212 / 0.212 | 0.246 / 0.246 / 0.247 / 0.252 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 13,062 / 13,479 / 13,554 / 13,743 | 0.134 / 0.148 / 0.155 / 0.171 | 0.065 / 0.081 / 0.083 / 0.090 | 0.128 / 0.130 / 0.131 / 0.132 | 0.156 / 0.158 / 0.159 / 0.160 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 932 / 1,043 / 1,066 / 1,074 | 0.169 / 0.218 / 0.232 / 0.248 | 0.057 / 0.063 / 0.067 / 0.081 | 0.151 / 0.158 / 0.161 / 0.162 | 0.223 / 0.215 / 0.218 / 0.217 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 275 / 321 / 324 / 324 | 0.176 / 0.238 / 0.247 / 0.247 | 0.192 / 0.169 / 0.167 / 0.167 | 0.097 / 0.105 / 0.105 / 0.105 | 0.184 / 0.184 / 0.182 / 0.182 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 602 / 607 / 608 / 613 | 0.141 / 0.152 / 0.154 / 0.174 | 0.122 / 0.120 / 0.120 / 0.120 | 0.222 / 0.226 / 0.227 / 0.231 | 0.485 / 0.483 / 0.483 / 0.482 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,140 / 4,178 / 4,198 / 4,214 | 0.108 / 0.116 / 0.127 / 0.158 | 0.362 / 0.364 / 0.367 / 0.377 | 0.217 / 0.218 / 0.219 / 0.219 | 0.248 / 0.248 / 0.248 / 0.254 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 23 / 13 / 13 / 13 | 0.147 / 0.302 / 0.302 / 0.302 | 1.599 / 1.355 / 1.355 / 1.355 | 0.378 / 0.374 / 0.374 / 0.374 | 0.085 / 0.087 / 0.087 / 0.087 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5080** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 10,929 | 8,721 / 9,616 / 10,038 / 10,208 | 0.724 / 0.828 / 0.872 / 0.891 | 0.802 / 0.862 / 0.880 / 0.884 | 0.216 / 0.185 / 0.177 / 0.175 | +| truck | 1,009 | 649 / 789 / 850 / 868 | 0.527 / 0.699 / 0.770 / 0.785 | 0.661 / 0.785 / 0.824 / 0.829 | 0.229 / 0.154 / 0.134 / 0.134 | +| bus | 141 | 112 / 130 / 131 / 134 | 0.632 / 0.861 / 0.864 / 0.883 | 0.752 / 0.881 / 0.881 / 0.881 | 0.483 / 0.431 / 0.431 / 0.431 | +| bicycle | 460 | 321 / 363 / 373 / 375 | 0.461 / 0.573 / 0.593 / 0.593 | 0.602 / 0.652 / 0.657 / 0.657 | 0.143 / 0.143 / 0.143 / 0.143 | +| pedestrian | 3,721 | 3,077 / 3,133 / 3,157 / 3,180 | 0.646 / 0.667 / 0.673 / 0.683 | 0.696 / 0.707 / 0.712 / 0.717 | 0.119 / 0.122 / 0.122 / 0.122 | +| traffic_cone | 4 | 2 / 2 / 2 / 2 | 0.000 / 0.000 / 0.000 / 0.000 | 0.043 / 0.043 / 0.043 / 0.043 | 0.227 / 0.227 / 0.227 / 0.227 | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,202 / 1,202 / 1,202 / 1,202 | 0.157 / 0.173 / 0.183 / 0.193 | 0.091 / 0.111 / 0.122 / 0.126 | 0.147 / 0.150 / 0.152 / 0.153 | 0.224 / 0.228 / 0.232 / 0.233 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 110 / 110 / 110 / 110 | 0.187 / 0.225 / 0.251 / 0.259 | 0.038 / 0.045 / 0.048 / 0.051 | 0.164 / 0.175 / 0.180 / 0.181 | 0.241 / 0.245 / 0.259 / 0.263 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 15 / 15 / 15 / 15 | 0.225 / 0.269 / 0.271 / 0.278 | 0.493 / 0.444 / 0.443 / 0.439 | 0.100 / 0.114 / 0.115 / 0.115 | 0.155 / 0.185 / 0.188 / 0.191 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 50 / 50 / 50 / 50 | 0.191 / 0.235 / 0.247 / 0.251 | 0.178 / 0.184 / 0.189 / 0.188 | 0.237 / 0.254 / 0.258 / 0.258 | 0.611 / 0.693 / 0.689 / 0.689 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 409 / 409 / 409 / 409 | 0.114 / 0.124 / 0.132 / 0.161 | 0.415 / 0.417 / 0.418 / 0.426 | 0.203 / 0.204 / 0.205 / 0.205 | 0.280 / 0.279 / 0.279 / 0.282 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 0.219 / 0.219 / 0.219 / 0.219 | 2.371 / 2.371 / 2.371 / 2.371 | 0.513 / 0.513 / 0.513 / 0.513 | 0.034 / 0.034 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 4,480 / 4,480 / 4,480 / 4,480 | 0.167 / 0.188 / 0.202 / 0.215 | 0.114 / 0.138 / 0.153 / 0.157 | 0.154 / 0.158 / 0.160 / 0.161 | 0.240 / 0.245 / 0.249 / 0.250 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 413 / 413 / 413 / 413 | 0.197 / 0.253 / 0.289 / 0.300 | 0.045 / 0.054 / 0.058 / 0.064 | 0.173 / 0.187 / 0.193 / 0.194 | 0.248 / 0.252 / 0.266 / 0.272 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 57 / 57 / 57 / 57 | 0.230 / 0.274 / 0.276 / 0.288 | 0.405 / 0.359 / 0.358 / 0.356 | 0.104 / 0.117 / 0.118 / 0.118 | 0.172 / 0.201 / 0.207 / 0.210 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 188 / 188 / 188 / 188 | 0.192 / 0.236 / 0.251 / 0.257 | 0.219 / 0.227 / 0.232 / 0.232 | 0.246 / 0.261 / 0.265 / 0.265 | 0.610 / 0.685 / 0.681 / 0.681 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,525 / 1,525 / 1,525 / 1,525 | 0.123 / 0.136 / 0.149 / 0.189 | 0.463 / 0.464 / 0.465 / 0.475 | 0.209 / 0.210 / 0.211 / 0.212 | 0.297 / 0.295 / 0.295 / 0.298 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1 / 1 / 1 / 1 | 0.205 / 0.205 / 0.205 / 0.205 | 2.328 / 2.328 / 2.328 / 2.328 | 0.517 / 0.517 / 0.517 / 0.517 | 0.034 / 0.034 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 8,159 / 8,918 / 9,145 / 9,193 | 0.179 / 0.214 / 0.238 / 0.253 | 0.166 / 0.202 / 0.219 / 0.223 | 0.162 / 0.168 / 0.169 / 0.170 | 0.252 / 0.272 / 0.279 / 0.280 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 584 / 735 / 782 / 787 | 0.203 / 0.288 / 0.342 / 0.356 | 0.043 / 0.076 / 0.081 / 0.086 | 0.176 / 0.196 / 0.204 / 0.206 | 0.252 / 0.242 / 0.257 / 0.271 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 97 / 115 / 115 / 115 | 0.232 / 0.284 / 0.284 / 0.284 | 0.381 / 0.326 / 0.326 / 0.326 | 0.105 / 0.118 / 0.118 / 0.118 | 0.224 / 0.244 / 0.244 / 0.244 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 243 / 263 / 265 / 265 | 0.192 / 0.233 / 0.240 / 0.247 | 0.198 / 0.205 / 0.204 / 0.203 | 0.244 / 0.257 / 0.259 / 0.259 | 0.588 / 0.651 / 0.648 / 0.648 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,464 / 2,492 / 2,508 / 2,524 | 0.127 / 0.141 / 0.156 / 0.198 | 0.474 / 0.473 / 0.475 / 0.484 | 0.211 / 0.212 / 0.213 / 0.214 | 0.302 / 0.300 / 0.300 / 0.302 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 2 / 2 / 2 / 2 | 0.199 / 0.199 / 0.199 / 0.199 | 2.313 / 2.313 / 2.313 / 2.313 | 0.518 / 0.518 / 0.518 / 0.518 | 0.034 / 0.034 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.3869** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 2,883 | 2,115 / 2,486 / 2,667 / 2,711 | 0.586 / 0.739 / 0.796 / 0.814 | 0.688 / 0.769 / 0.794 / 0.801 | 0.231 / 0.180 / 0.162 / 0.163 | +| truck | 600 | 314 / 447 / 515 / 531 | 0.307 / 0.600 / 0.741 / 0.769 | 0.497 / 0.688 / 0.776 / 0.790 | 0.257 / 0.234 / 0.120 / 0.120 | +| bus | 60 | 30 / 45 / 47 / 48 | 0.337 / 0.572 / 0.607 / 0.610 | 0.516 / 0.650 / 0.667 / 0.667 | 0.098 / 0.044 / 0.098 / 0.098 | +| bicycle | 85 | 54 / 62 / 65 / 65 | 0.246 / 0.365 / 0.392 / 0.392 | 0.452 / 0.537 / 0.561 / 0.561 | 0.120 / 0.122 / 0.122 / 0.122 | +| pedestrian | 1,092 | 881 / 895 / 900 / 918 | 0.474 / 0.485 / 0.492 / 0.507 | 0.584 / 0.590 / 0.594 / 0.597 | 0.125 / 0.125 / 0.125 / 0.125 | +| traffic_cone | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 317 / 317 / 317 / 317 | 0.195 / 0.229 / 0.248 / 0.272 | 0.108 / 0.142 / 0.165 / 0.172 | 0.179 / 0.186 / 0.187 / 0.187 | 0.521 / 0.529 / 0.535 / 0.536 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 66 / 66 / 66 / 66 | 0.222 / 0.321 / 0.382 / 0.402 | 0.045 / 0.050 / 0.057 / 0.062 | 0.180 / 0.191 / 0.200 / 0.203 | 0.177 / 0.168 / 0.166 / 0.173 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 6 / 6 / 6 / 6 | 0.273 / 0.344 / 0.364 / 0.371 | 0.168 / 0.213 / 0.210 / 0.211 | 0.140 / 0.162 / 0.165 / 0.166 | 0.184 / 0.307 / 0.440 / 0.450 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 9 / 9 / 9 / 9 | 0.183 / 0.260 / 0.290 / 0.290 | 0.091 / 0.084 / 0.086 / 0.086 | 0.236 / 0.248 / 0.253 / 0.253 | 0.721 / 0.705 / 0.729 / 0.729 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 120 / 120 / 120 / 120 | 0.131 / 0.137 / 0.152 / 0.207 | 0.398 / 0.398 / 0.400 / 0.407 | 0.172 / 0.172 / 0.172 / 0.173 | 0.354 / 0.355 / 0.355 / 0.364 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,182 / 1,182 / 1,182 / 1,182 | 0.206 / 0.249 / 0.276 / 0.309 | 0.146 / 0.189 / 0.219 / 0.227 | 0.186 / 0.193 / 0.194 / 0.195 | 0.579 / 0.582 / 0.587 / 0.585 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 246 / 246 / 246 / 246 | 0.231 / 0.345 / 0.430 / 0.459 | 0.072 / 0.066 / 0.075 / 0.081 | 0.185 / 0.196 / 0.209 / 0.212 | 0.215 / 0.199 / 0.190 / 0.199 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 24 / 24 / 24 / 24 | 0.291 / 0.384 / 0.416 / 0.425 | 0.269 / 0.312 / 0.301 / 0.298 | 0.146 / 0.175 / 0.178 / 0.179 | 0.315 / 0.475 / 0.689 / 0.695 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 34 / 34 / 34 / 34 | 0.183 / 0.261 / 0.315 / 0.315 | 0.110 / 0.109 / 0.110 / 0.110 | 0.257 / 0.269 / 0.275 / 0.275 | 0.757 / 0.750 / 0.789 / 0.789 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 447 / 447 / 447 / 447 | 0.136 / 0.145 / 0.162 / 0.231 | 0.472 / 0.470 / 0.472 / 0.483 | 0.176 / 0.176 / 0.176 / 0.177 | 0.387 / 0.389 / 0.389 / 0.401 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,763 / 2,103 / 2,227 / 2,243 | 0.207 / 0.261 / 0.299 / 0.338 | 0.155 / 0.220 / 0.266 / 0.271 | 0.188 / 0.196 / 0.197 / 0.198 | 0.571 / 0.603 / 0.613 / 0.613 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 254 / 358 / 448 / 456 | 0.225 / 0.356 / 0.481 / 0.514 | 0.051 / 0.061 / 0.095 / 0.097 | 0.182 / 0.199 / 0.221 / 0.224 | 0.192 / 0.187 / 0.206 / 0.217 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 24 / 38 / 31 / 31 | 0.304 / 0.380 / 0.407 / 0.407 | 0.305 / 0.282 / 0.341 / 0.341 | 0.155 / 0.172 / 0.176 / 0.176 | 0.329 / 0.487 / 0.633 / 0.633 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 38 / 44 / 46 / 46 | 0.178 / 0.253 / 0.304 / 0.304 | 0.113 / 0.102 / 0.102 / 0.102 | 0.252 / 0.263 / 0.267 / 0.267 | 0.725 / 0.699 / 0.717 / 0.717 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 613 / 619 / 623 / 626 | 0.134 / 0.141 / 0.157 / 0.208 | 0.439 / 0.441 / 0.439 / 0.442 | 0.173 / 0.172 / 0.173 / 0.174 | 0.383 / 0.385 / 0.385 / 0.389 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.5663** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 28,684 | 24,224 / 25,996 / 26,796 / 27,149 | 0.797 / 0.872 / 0.903 / 0.914 | 0.852 / 0.894 / 0.906 / 0.910 | 0.225 / 0.192 / 0.177 / 0.177 | +| truck | 2,801 | 1,924 / 2,330 / 2,488 / 2,543 | 0.570 / 0.767 / 0.831 / 0.850 | 0.700 / 0.818 / 0.856 / 0.863 | 0.250 / 0.177 / 0.138 / 0.138 | +| bus | 537 | 425 / 504 / 510 / 514 | 0.707 / 0.897 / 0.909 / 0.916 | 0.795 / 0.889 / 0.895 / 0.895 | 0.337 / 0.284 / 0.284 / 0.284 | +| bicycle | 1,285 | 1,072 / 1,141 / 1,158 / 1,167 | 0.666 / 0.740 / 0.755 / 0.762 | 0.727 / 0.759 / 0.764 / 0.769 | 0.200 / 0.200 / 0.150 / 0.150 | +| pedestrian | 9,868 | 8,613 / 8,743 / 8,786 / 8,842 | 0.733 / 0.748 / 0.757 / 0.764 | 0.759 / 0.769 / 0.773 / 0.777 | 0.146 / 0.146 / 0.146 / 0.146 | +| traffic_cone | 64 | 26 / 26 / 26 / 26 | 0.000 / 0.000 / 0.000 / 0.000 | 0.024 / 0.027 / 0.027 / 0.027 | 0.157 / 0.153 / 0.153 / 0.153 | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 3,155 / 3,155 / 3,155 / 3,155 | 0.127 / 0.137 / 0.143 / 0.148 | 0.057 / 0.067 / 0.072 / 0.074 | 0.125 / 0.127 / 0.128 / 0.129 | 0.177 / 0.182 / 0.185 / 0.186 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 308 / 308 / 308 / 308 | 0.165 / 0.209 / 0.229 / 0.237 | 0.038 / 0.043 / 0.046 / 0.050 | 0.147 / 0.155 / 0.159 / 0.160 | 0.223 / 0.219 / 0.223 / 0.225 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 59 / 59 / 59 / 59 | 0.165 / 0.195 / 0.197 / 0.199 | 0.250 / 0.239 / 0.238 / 0.238 | 0.098 / 0.103 / 0.103 / 0.104 | 0.186 / 0.190 / 0.193 / 0.193 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 141 / 141 / 141 / 141 | 0.146 / 0.167 / 0.175 / 0.186 | 0.114 / 0.115 / 0.118 / 0.117 | 0.215 / 0.223 / 0.226 / 0.227 | 0.538 / 0.554 / 0.553 / 0.553 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,085 / 1,085 / 1,085 / 1,085 | 0.103 / 0.109 / 0.116 / 0.140 | 0.338 / 0.339 / 0.342 / 0.348 | 0.205 / 0.206 / 0.206 / 0.207 | 0.257 / 0.257 / 0.257 / 0.262 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 7 / 7 / 7 / 7 | 0.160 / 0.246 / 0.246 / 0.246 | 1.421 / 1.507 / 1.507 / 1.507 | 0.389 / 0.391 / 0.391 / 0.391 | 0.081 / 0.082 / 0.082 / 0.082 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 11,760 / 11,760 / 11,760 / 11,760 | 0.139 / 0.152 / 0.160 / 0.167 | 0.069 / 0.082 / 0.090 / 0.092 | 0.133 / 0.136 / 0.137 / 0.137 | 0.197 / 0.202 / 0.205 / 0.206 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,148 / 1,148 / 1,148 / 1,148 | 0.179 / 0.235 / 0.263 / 0.275 | 0.046 / 0.052 / 0.055 / 0.061 | 0.159 / 0.168 / 0.173 / 0.174 | 0.229 / 0.225 / 0.229 / 0.232 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 220 / 220 / 220 / 220 | 0.181 / 0.217 / 0.220 / 0.223 | 0.254 / 0.238 / 0.237 / 0.237 | 0.099 / 0.106 / 0.106 / 0.106 | 0.184 / 0.190 / 0.194 / 0.195 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 526 / 526 / 526 / 526 | 0.154 / 0.176 / 0.187 / 0.200 | 0.137 / 0.138 / 0.143 / 0.141 | 0.226 / 0.234 / 0.236 / 0.239 | 0.525 / 0.543 / 0.542 / 0.542 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,045 / 4,045 / 4,045 / 4,045 | 0.111 / 0.118 / 0.129 / 0.159 | 0.383 / 0.383 / 0.386 / 0.393 | 0.209 / 0.210 / 0.210 / 0.211 | 0.268 / 0.268 / 0.268 / 0.273 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 23,001 / 24,465 / 24,972 / 25,071 | 0.155 / 0.182 / 0.199 / 0.212 | 0.107 / 0.136 / 0.149 / 0.152 | 0.145 / 0.149 / 0.151 / 0.151 | 0.222 / 0.237 / 0.243 / 0.243 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,757 / 2,148 / 2,295 / 2,313 | 0.188 / 0.266 / 0.319 / 0.340 | 0.051 / 0.067 / 0.078 / 0.087 | 0.163 / 0.178 / 0.188 / 0.190 | 0.228 / 0.223 / 0.227 / 0.235 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 391 / 442 / 445 / 445 | 0.196 / 0.242 / 0.248 / 0.248 | 0.248 / 0.230 / 0.228 / 0.228 | 0.102 / 0.110 / 0.110 / 0.110 | 0.191 / 0.192 / 0.209 / 0.209 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 839 / 875 / 938 / 944 | 0.154 / 0.176 / 0.199 / 0.216 | 0.135 / 0.135 / 0.157 / 0.157 | 0.226 / 0.234 / 0.243 / 0.246 | 0.519 / 0.542 / 0.536 / 0.535 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 7,078 / 7,166 / 7,204 / 7,247 | 0.115 / 0.125 / 0.138 / 0.175 | 0.398 / 0.400 / 0.402 / 0.413 | 0.210 / 0.211 / 0.212 / 0.213 | 0.272 / 0.272 / 0.272 / 0.278 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 13 / 15 / 15 / 15 | 0.174 / 0.289 / 0.289 / 0.289 | 1.258 / 1.483 / 1.483 / 1.483 | 0.389 / 0.393 / 0.393 / 0.393 | 0.079 / 0.080 / 0.080 / 0.080 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 (4,682 frames) + + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.7246** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 134,456 | 107,307 / 117,848 / 123,019 / 125,254 | 0.730 / 0.828 / 0.873 / 0.893 | 0.804 / 0.858 / 0.879 / 0.887 | 0.240 / 0.200 / 0.165 / 0.162 | +| truck | 14,900 | 9,814 / 11,683 / 12,829 / 13,364 | 0.527 / 0.690 / 0.786 / 0.828 | 0.667 / 0.766 / 0.817 / 0.841 | 0.225 / 0.182 / 0.170 / 0.145 | +| bus | 4,970 | 3,654 / 4,321 / 4,639 / 4,711 | 0.643 / 0.809 / 0.902 / 0.916 | 0.740 / 0.842 / 0.892 / 0.901 | 0.318 / 0.140 / 0.103 / 0.102 | +| bicycle | 2,486 | 2,153 / 2,228 / 2,239 / 2,239 | 0.755 / 0.796 / 0.801 / 0.802 | 0.789 / 0.809 / 0.809 / 0.812 | 0.186 / 0.164 / 0.164 / 0.164 | +| pedestrian | 29,102 | 25,516 / 26,078 / 26,291 / 26,512 | 0.728 / 0.752 / 0.765 / 0.778 | 0.748 / 0.761 / 0.766 / 0.773 | 0.153 / 0.153 / 0.153 / 0.153 | +| traffic_cone | 11,344 | 7,980 / 8,516 / 8,749 / 9,099 | 0.342 / 0.388 / 0.410 / 0.448 | 0.509 / 0.538 / 0.551 / 0.575 | 0.177 / 0.156 / 0.153 / 0.157 | +| barrier | 2,117 | 737 / 1,055 / 1,164 / 1,216 | 0.104 / 0.236 / 0.275 / 0.290 | 0.323 / 0.439 / 0.467 / 0.479 | 0.151 / 0.155 / 0.115 / 0.115 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 14,790 / 14,790 / 14,790 / 14,790 | 0.129 / 0.145 / 0.156 / 0.167 | 0.064 / 0.082 / 0.095 / 0.099 | 0.129 / 0.133 / 0.134 / 0.135 | 0.151 / 0.153 / 0.154 / 0.155 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,639 / 1,639 / 1,639 / 1,639 | 0.161 / 0.202 / 0.244 / 0.276 | 0.029 / 0.033 / 0.037 / 0.039 | 0.134 / 0.144 / 0.150 / 0.154 | 0.360 / 0.375 / 0.382 / 0.385 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 546 / 546 / 546 / 546 | 0.132 / 0.163 / 0.196 / 0.203 | 0.057 / 0.059 / 0.061 / 0.064 | 0.094 / 0.100 / 0.107 / 0.107 | 0.126 / 0.130 / 0.131 / 0.131 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 273 / 273 / 273 / 273 | 0.146 / 0.154 / 0.155 / 0.158 | 0.079 / 0.081 / 0.082 / 0.083 | 0.195 / 0.197 / 0.197 / 0.197 | 0.593 / 0.594 / 0.595 / 0.596 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 3,201 / 3,201 / 3,201 / 3,201 | 0.110 / 0.118 / 0.133 / 0.160 | 0.455 / 0.459 / 0.464 / 0.469 | 0.238 / 0.239 / 0.239 / 0.240 | 0.245 / 0.245 / 0.246 / 0.248 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,247 / 1,247 / 1,247 / 1,247 | 0.152 / 0.181 / 0.225 / 0.389 | 0.292 / 0.291 / 0.290 / 0.293 | 0.283 / 0.283 / 0.283 / 0.284 | 0.027 / 0.027 / 0.027 / 0.027 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 232 / 232 / 232 / 232 | 0.235 / 0.339 / 0.389 / 0.459 | 0.390 / 0.353 / 0.349 / 0.349 | 0.319 / 0.353 / 0.362 / 0.370 | 0.026 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 55,126 / 55,126 / 55,126 / 55,126 | 0.143 / 0.165 / 0.181 / 0.196 | 0.084 / 0.109 / 0.126 / 0.131 | 0.139 / 0.143 / 0.144 / 0.145 | 0.164 / 0.165 / 0.166 / 0.166 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,109 / 6,109 / 6,109 / 6,109 | 0.177 / 0.231 / 0.287 / 0.332 | 0.037 / 0.042 / 0.047 / 0.049 | 0.144 / 0.155 / 0.164 / 0.169 | 0.386 / 0.403 / 0.410 / 0.414 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,037 / 2,037 / 2,037 / 2,037 | 0.149 / 0.192 / 0.229 / 0.239 | 0.056 / 0.060 / 0.064 / 0.069 | 0.102 / 0.108 / 0.115 / 0.115 | 0.149 / 0.150 / 0.149 / 0.150 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,019 / 1,019 / 1,019 / 1,019 | 0.151 / 0.161 / 0.163 / 0.168 | 0.090 / 0.093 / 0.094 / 0.096 | 0.201 / 0.204 / 0.204 / 0.204 | 0.618 / 0.619 / 0.621 / 0.622 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 11,931 / 11,931 / 11,931 / 11,931 | 0.115 / 0.126 / 0.145 / 0.183 | 0.487 / 0.491 / 0.496 / 0.503 | 0.243 / 0.244 / 0.245 / 0.245 | 0.261 / 0.260 / 0.261 / 0.264 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,651 / 4,651 / 4,651 / 4,651 | 0.161 / 0.197 / 0.257 / 0.463 | 0.318 / 0.316 / 0.315 / 0.318 | 0.288 / 0.287 / 0.287 / 0.288 | 0.028 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 867 / 867 / 867 | 1.000 / 0.374 / 0.445 / 0.567 | 1.000 / 0.395 / 0.385 / 0.383 | 1.000 / 0.407 / 0.414 / 0.425 | 1.000 / 0.028 / 0.029 / 0.029 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 100,294 / 109,159 / 113,989 / 115,141 | 0.159 / 0.194 / 0.227 / 0.255 | 0.120 / 0.160 / 0.190 / 0.196 | 0.148 / 0.154 / 0.156 / 0.157 | 0.172 / 0.175 / 0.178 / 0.178 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 8,931 / 10,558 / 11,357 / 11,896 | 0.184 / 0.254 / 0.327 / 0.403 | 0.044 / 0.054 / 0.063 / 0.072 | 0.150 / 0.167 / 0.177 / 0.186 | 0.393 / 0.411 / 0.422 / 0.428 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 3,256 / 3,944 / 4,259 / 4,307 | 0.162 / 0.238 / 0.301 / 0.332 | 0.056 / 0.067 / 0.080 / 0.096 | 0.106 / 0.120 / 0.130 / 0.131 | 0.153 / 0.154 / 0.158 / 0.159 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,785 / 1,879 / 1,880 / 1,887 | 0.153 / 0.168 / 0.168 / 0.179 | 0.096 / 0.105 / 0.105 / 0.108 | 0.202 / 0.207 / 0.207 / 0.207 | 0.628 / 0.634 / 0.635 / 0.637 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 20,949 / 21,293 / 21,452 / 21,637 | 0.119 / 0.132 / 0.153 / 0.200 | 0.497 / 0.502 / 0.508 / 0.516 | 0.245 / 0.246 / 0.247 / 0.247 | 0.270 / 0.269 / 0.270 / 0.274 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 5,511 / 6,135 / 6,340 / 6,540 | 0.156 / 0.191 / 0.247 / 0.443 | 0.303 / 0.308 / 0.309 / 0.312 | 0.284 / 0.286 / 0.286 / 0.286 | 0.028 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 605 / 817 / 935 / 960 | 0.247 / 0.359 / 0.432 / 0.527 | 0.422 / 0.386 / 0.380 / 0.378 | 0.351 / 0.379 / 0.397 / 0.407 | 0.026 / 0.027 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5590** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 54,217 | 40,773 / 46,131 / 48,671 / 49,739 | 0.657 / 0.787 / 0.840 / 0.863 | 0.755 / 0.828 / 0.855 / 0.866 | 0.227 / 0.189 / 0.163 / 0.155 | +| truck | 4,913 | 2,938 / 3,603 / 3,991 / 4,182 | 0.433 / 0.605 / 0.700 / 0.747 | 0.606 / 0.710 / 0.757 / 0.781 | 0.223 / 0.182 / 0.165 / 0.141 | +| bus | 2,116 | 1,374 / 1,777 / 1,950 / 1,994 | 0.516 / 0.753 / 0.876 / 0.900 | 0.645 / 0.798 / 0.865 / 0.881 | 0.329 / 0.140 / 0.140 / 0.110 | +| bicycle | 838 | 666 / 708 / 711 / 711 | 0.617 / 0.681 / 0.684 / 0.688 | 0.693 / 0.729 / 0.730 / 0.737 | 0.164 / 0.164 / 0.164 / 0.164 | +| pedestrian | 8,336 | 6,988 / 7,142 / 7,202 / 7,290 | 0.599 / 0.621 / 0.634 / 0.647 | 0.667 / 0.677 / 0.683 / 0.690 | 0.153 / 0.145 / 0.144 / 0.144 | +| traffic_cone | 2,632 | 1,574 / 1,688 / 1,748 / 1,850 | 0.215 / 0.254 / 0.277 / 0.338 | 0.405 / 0.430 / 0.449 / 0.484 | 0.153 / 0.153 / 0.126 / 0.134 | +| barrier | 622 | 180 / 276 / 305 / 321 | 0.057 / 0.195 / 0.227 / 0.244 | 0.268 / 0.401 / 0.429 / 0.440 | 0.140 / 0.121 / 0.116 / 0.116 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 5,963 / 5,963 / 5,963 / 5,963 | 0.159 / 0.185 / 0.203 / 0.219 | 0.112 / 0.145 / 0.167 / 0.171 | 0.157 / 0.161 / 0.162 / 0.162 | 0.158 / 0.158 / 0.158 / 0.158 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 540 / 540 / 540 / 540 | 0.191 / 0.240 / 0.292 / 0.338 | 0.034 / 0.040 / 0.046 / 0.048 | 0.152 / 0.163 / 0.170 / 0.175 | 0.438 / 0.449 / 0.453 / 0.458 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 232 / 232 / 232 / 232 | 0.168 / 0.223 / 0.268 / 0.281 | 0.117 / 0.109 / 0.113 / 0.119 | 0.119 / 0.126 / 0.133 / 0.133 | 0.131 / 0.131 / 0.130 / 0.130 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 92 / 92 / 92 / 92 | 0.172 / 0.187 / 0.188 / 0.200 | 0.114 / 0.121 / 0.120 / 0.123 | 0.199 / 0.201 / 0.201 / 0.201 | 0.691 / 0.685 / 0.686 / 0.689 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 916 / 916 / 916 / 916 | 0.119 / 0.129 / 0.148 / 0.192 | 0.569 / 0.574 / 0.579 / 0.587 | 0.230 / 0.231 / 0.231 / 0.231 | 0.291 / 0.290 / 0.292 / 0.297 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 289 / 289 / 289 / 289 | 0.165 / 0.201 / 0.289 / 0.670 | 0.269 / 0.283 / 0.286 / 0.308 | 0.290 / 0.290 / 0.291 / 0.292 | 0.038 / 0.038 / 0.038 / 0.039 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 68 / 68 / 68 / 68 | 0.240 / 0.385 / 0.440 / 0.537 | 0.297 / 0.260 / 0.257 / 0.255 | 0.309 / 0.350 / 0.357 / 0.361 | 0.034 / 0.034 / 0.033 / 0.033 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 22,228 / 22,228 / 22,228 / 22,228 | 0.171 / 0.204 / 0.228 / 0.250 | 0.138 / 0.177 / 0.205 / 0.210 | 0.163 / 0.167 / 0.168 / 0.169 | 0.164 / 0.164 / 0.163 / 0.163 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,014 / 2,014 / 2,014 / 2,014 | 0.204 / 0.269 / 0.334 / 0.393 | 0.046 / 0.054 / 0.061 / 0.064 | 0.163 / 0.174 / 0.182 / 0.189 | 0.465 / 0.481 / 0.484 / 0.492 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 867 / 867 / 867 / 867 | 0.187 / 0.263 / 0.322 / 0.341 | 0.089 / 0.087 / 0.097 / 0.108 | 0.125 / 0.134 / 0.142 / 0.143 | 0.141 / 0.137 / 0.135 / 0.135 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 343 / 343 / 343 / 343 | 0.180 / 0.202 / 0.204 / 0.223 | 0.142 / 0.149 / 0.148 / 0.153 | 0.199 / 0.202 / 0.202 / 0.201 | 0.711 / 0.704 / 0.704 / 0.709 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 3,417 / 3,417 / 3,417 / 3,417 | 0.124 / 0.136 / 0.159 / 0.215 | 0.590 / 0.596 / 0.602 / 0.612 | 0.230 / 0.231 / 0.231 / 0.232 | 0.315 / 0.313 / 0.314 / 0.320 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,079 / 1,079 / 1,079 / 1,079 | 0.175 / 0.218 / 0.328 / 0.718 | 0.348 / 0.355 / 0.359 / 0.379 | 0.297 / 0.297 / 0.296 / 0.297 | 0.038 / 0.038 / 0.038 / 0.038 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 255 / 255 / 255 | 1.000 / 0.398 / 0.489 / 0.692 | 1.000 / 0.332 / 0.312 / 0.307 | 1.000 / 0.409 / 0.415 / 0.424 | 1.000 / 0.036 / 0.035 / 0.035 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 37,866 / 42,472 / 44,630 / 45,417 | 0.182 / 0.229 / 0.269 / 0.309 | 0.167 / 0.219 / 0.256 / 0.266 | 0.168 / 0.173 / 0.175 / 0.176 | 0.167 / 0.169 / 0.170 / 0.170 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,598 / 3,145 / 3,407 / 3,593 | 0.206 / 0.281 / 0.355 / 0.432 | 0.047 / 0.061 / 0.072 / 0.081 | 0.165 / 0.181 / 0.190 / 0.200 | 0.477 / 0.495 / 0.499 / 0.513 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,175 / 1,604 / 1,740 / 1,815 | 0.193 / 0.302 / 0.379 / 0.440 | 0.087 / 0.090 / 0.118 / 0.151 | 0.126 / 0.142 / 0.151 / 0.153 | 0.136 / 0.136 / 0.133 / 0.139 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 524 / 551 / 552 / 557 | 0.179 / 0.201 / 0.203 / 0.230 | 0.141 / 0.147 / 0.147 / 0.154 | 0.197 / 0.202 / 0.202 / 0.202 | 0.701 / 0.690 / 0.692 / 0.696 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 5,196 / 5,378 / 5,448 / 5,510 | 0.124 / 0.136 / 0.159 / 0.216 | 0.574 / 0.584 / 0.590 / 0.601 | 0.230 / 0.230 / 0.231 / 0.231 | 0.316 / 0.319 / 0.321 / 0.327 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,014 / 1,077 / 1,237 / 1,290 | 0.166 / 0.203 / 0.318 / 0.694 | 0.293 / 0.300 / 0.335 / 0.360 | 0.291 / 0.291 / 0.293 / 0.294 | 0.038 / 0.038 / 0.038 / 0.039 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 136 / 209 / 227 / 233 | 0.241 / 0.390 / 0.465 / 0.548 | 0.322 / 0.295 / 0.284 / 0.278 | 0.319 / 0.355 / 0.365 / 0.371 | 0.035 / 0.034 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4021** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 19,301 | 13,277 / 15,990 / 17,394 / 17,838 | 0.500 / 0.678 / 0.767 / 0.794 | 0.640 / 0.736 / 0.776 / 0.789 | 0.200 / 0.169 / 0.153 / 0.153 | +| truck | 2,906 | 1,247 / 1,799 / 2,243 / 2,429 | 0.212 / 0.427 / 0.622 / 0.697 | 0.430 / 0.589 / 0.701 / 0.743 | 0.191 / 0.144 / 0.144 / 0.107 | +| bus | 484 | 215 / 322 / 390 / 403 | 0.225 / 0.483 / 0.628 / 0.652 | 0.407 / 0.579 / 0.671 / 0.683 | 0.098 / 0.098 / 0.073 / 0.073 | +| bicycle | 291 | 211 / 238 / 242 / 242 | 0.395 / 0.517 / 0.525 / 0.529 | 0.575 / 0.641 / 0.641 / 0.641 | 0.182 / 0.182 / 0.182 / 0.182 | +| pedestrian | 2,564 | 1,978 / 2,019 / 2,045 / 2,066 | 0.407 / 0.420 / 0.429 / 0.437 | 0.540 / 0.547 / 0.549 / 0.552 | 0.121 / 0.129 / 0.123 / 0.129 | +| traffic_cone | 462 | 218 / 223 / 228 / 251 | 0.114 / 0.117 / 0.124 / 0.151 | 0.322 / 0.322 / 0.332 / 0.353 | 0.133 / 0.133 / 0.144 / 0.133 | +| barrier | 145 | 38 / 65 / 83 / 88 | 0.014 / 0.084 / 0.147 / 0.165 | 0.198 / 0.304 / 0.375 / 0.397 | 0.112 / 0.112 / 0.110 / 0.110 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 2,123 / 2,123 / 2,123 / 2,123 | 0.202 / 0.250 / 0.292 / 0.326 | 0.216 / 0.280 / 0.318 / 0.330 | 0.180 / 0.184 / 0.186 / 0.186 | 0.295 / 0.284 / 0.280 / 0.279 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 319 / 319 / 319 / 319 | 0.225 / 0.328 / 0.457 / 0.534 | 0.043 / 0.051 / 0.066 / 0.073 | 0.172 / 0.193 / 0.210 / 0.219 | 0.442 / 0.467 / 0.477 / 0.484 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 53 / 53 / 53 / 53 | 0.265 / 0.372 / 0.446 / 0.471 | 0.028 / 0.030 / 0.034 / 0.036 | 0.147 / 0.167 / 0.179 / 0.180 | 0.345 / 0.342 / 0.343 / 0.342 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 32 / 32 / 32 / 32 | 0.252 / 0.288 / 0.293 / 0.303 | 0.091 / 0.096 / 0.098 / 0.099 | 0.246 / 0.259 / 0.259 / 0.259 | 0.754 / 0.761 / 0.765 / 0.765 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 282 / 282 / 282 / 282 | 0.123 / 0.134 / 0.153 / 0.197 | 0.505 / 0.506 / 0.511 / 0.518 | 0.250 / 0.250 / 0.251 / 0.252 | 0.389 / 0.390 / 0.390 / 0.393 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 50 / 50 / 50 / 50 | 0.146 / 0.153 / 0.222 / 0.594 | 0.289 / 0.290 / 0.290 / 0.310 | 0.316 / 0.315 / 0.315 / 0.314 | 0.035 / 0.035 / 0.035 / 0.035 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 15 / 15 / 15 / 15 | 0.270 / 0.426 / 0.570 / 0.932 | 0.370 / 0.281 / 0.272 / 0.268 | 0.363 / 0.395 / 0.423 / 0.426 | 0.046 / 0.045 / 0.047 / 0.047 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 7,913 / 7,913 / 7,913 / 7,913 | 0.211 / 0.267 / 0.318 / 0.361 | 0.274 / 0.331 / 0.371 / 0.383 | 0.185 / 0.189 / 0.191 / 0.191 | 0.284 / 0.276 / 0.273 / 0.272 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,191 / 1,191 / 1,191 / 1,191 | 0.243 / 0.362 / 0.521 / 0.621 | 0.091 / 0.076 / 0.093 / 0.099 | 0.189 / 0.205 / 0.222 / 0.232 | 0.501 / 0.524 / 0.522 / 0.529 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 198 / 198 / 198 / 198 | 0.278 / 0.417 / 0.521 / 0.558 | 0.050 / 0.043 / 0.046 / 0.048 | 0.160 / 0.179 / 0.195 / 0.195 | 0.394 / 0.358 / 0.356 / 0.354 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 119 / 119 / 119 / 119 | 0.234 / 0.274 / 0.282 / 0.298 | 0.122 / 0.127 / 0.129 / 0.131 | 0.237 / 0.250 / 0.250 / 0.250 | 0.763 / 0.799 / 0.804 / 0.805 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,051 / 1,051 / 1,051 / 1,051 | 0.131 / 0.145 / 0.169 / 0.219 | 0.569 / 0.570 / 0.576 / 0.584 | 0.247 / 0.248 / 0.248 / 0.249 | 0.421 / 0.420 / 0.421 / 0.425 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 189 / 189 / 189 / 189 | 0.160 / 0.178 / 0.270 / 0.709 | 0.332 / 0.328 / 0.332 / 0.362 | 0.317 / 0.316 / 0.315 / 0.313 | 0.040 / 0.039 / 0.039 / 0.039 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 59 / 59 / 59 | 1.000 / 0.450 / 0.638 / 0.970 | 1.000 / 0.271 / 0.250 / 0.246 | 1.000 / 0.444 / 0.461 / 0.465 | 1.000 / 0.048 / 0.050 / 0.050 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 11,411 / 13,677 / 14,775 / 15,007 | 0.213 / 0.279 / 0.340 / 0.390 | 0.299 / 0.372 / 0.416 / 0.429 | 0.186 / 0.192 / 0.194 / 0.194 | 0.278 / 0.273 / 0.273 / 0.272 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,065 / 1,541 / 1,834 / 2,047 | 0.234 / 0.364 / 0.529 / 0.666 | 0.057 / 0.073 / 0.097 / 0.114 | 0.180 / 0.206 / 0.224 / 0.240 | 0.494 / 0.516 / 0.515 / 0.550 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 164 / 233 / 291 / 296 | 0.279 / 0.412 / 0.546 / 0.582 | 0.044 / 0.044 / 0.050 / 0.052 | 0.155 / 0.178 / 0.198 / 0.198 | 0.340 / 0.321 / 0.363 / 0.358 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 147 / 164 / 164 / 164 | 0.233 / 0.270 / 0.270 / 0.270 | 0.110 / 0.119 / 0.119 / 0.119 | 0.239 / 0.252 / 0.252 / 0.252 | 0.756 / 0.786 / 0.786 / 0.786 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,358 / 1,329 / 1,371 / 1,342 | 0.128 / 0.139 / 0.157 / 0.196 | 0.544 / 0.532 / 0.546 / 0.545 | 0.249 / 0.249 / 0.248 / 0.249 | 0.417 / 0.409 / 0.417 / 0.417 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 135 / 135 / 133 / 148 | 0.142 / 0.142 / 0.216 / 0.596 | 0.300 / 0.300 / 0.303 / 0.308 | 0.313 / 0.313 / 0.312 / 0.313 | 0.035 / 0.035 / 0.035 / 0.035 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 26 / 40 / 50 / 53 | 0.274 / 0.409 / 0.604 / 0.937 | 0.312 / 0.258 / 0.259 / 0.276 | 0.370 / 0.387 / 0.427 / 0.429 | 0.046 / 0.045 / 0.048 / 0.048 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.6463** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 134,456 | 107,307 / 117,848 / 123,019 / 125,254 | 0.730 / 0.828 / 0.873 / 0.893 | 0.804 / 0.858 / 0.879 / 0.887 | 0.240 / 0.200 / 0.165 / 0.162 | +| truck | 14,900 | 9,814 / 11,683 / 12,829 / 13,364 | 0.527 / 0.690 / 0.786 / 0.828 | 0.667 / 0.766 / 0.817 / 0.841 | 0.225 / 0.182 / 0.170 / 0.145 | +| bus | 4,970 | 3,654 / 4,321 / 4,639 / 4,711 | 0.643 / 0.809 / 0.902 / 0.916 | 0.740 / 0.842 / 0.892 / 0.901 | 0.318 / 0.140 / 0.103 / 0.102 | +| bicycle | 2,486 | 2,153 / 2,228 / 2,239 / 2,239 | 0.755 / 0.796 / 0.801 / 0.802 | 0.789 / 0.809 / 0.809 / 0.812 | 0.186 / 0.164 / 0.164 / 0.164 | +| pedestrian | 29,102 | 25,516 / 26,078 / 26,291 / 26,512 | 0.728 / 0.752 / 0.765 / 0.778 | 0.748 / 0.761 / 0.766 / 0.773 | 0.153 / 0.153 / 0.153 / 0.153 | +| traffic_cone | 11,344 | 7,980 / 8,516 / 8,749 / 9,099 | 0.342 / 0.388 / 0.410 / 0.448 | 0.509 / 0.538 / 0.551 / 0.575 | 0.177 / 0.156 / 0.153 / 0.157 | +| barrier | 2,117 | 737 / 1,055 / 1,164 / 1,216 | 0.104 / 0.236 / 0.275 / 0.290 | 0.323 / 0.439 / 0.467 / 0.479 | 0.151 / 0.155 / 0.115 / 0.115 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 14,790 / 14,790 / 14,790 / 14,790 | 0.129 / 0.145 / 0.156 / 0.167 | 0.064 / 0.082 / 0.095 / 0.099 | 0.129 / 0.133 / 0.134 / 0.135 | 0.151 / 0.153 / 0.154 / 0.155 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,639 / 1,639 / 1,639 / 1,639 | 0.161 / 0.202 / 0.244 / 0.276 | 0.029 / 0.033 / 0.037 / 0.039 | 0.134 / 0.144 / 0.150 / 0.154 | 0.360 / 0.375 / 0.382 / 0.385 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 546 / 546 / 546 / 546 | 0.132 / 0.163 / 0.196 / 0.203 | 0.057 / 0.059 / 0.061 / 0.064 | 0.094 / 0.100 / 0.107 / 0.107 | 0.126 / 0.130 / 0.131 / 0.131 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 273 / 273 / 273 / 273 | 0.146 / 0.154 / 0.155 / 0.158 | 0.079 / 0.081 / 0.082 / 0.083 | 0.195 / 0.197 / 0.197 / 0.197 | 0.593 / 0.594 / 0.595 / 0.596 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 3,201 / 3,201 / 3,201 / 3,201 | 0.110 / 0.118 / 0.133 / 0.160 | 0.455 / 0.459 / 0.464 / 0.469 | 0.238 / 0.239 / 0.239 / 0.240 | 0.245 / 0.245 / 0.246 / 0.248 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,247 / 1,247 / 1,247 / 1,247 | 0.152 / 0.181 / 0.225 / 0.389 | 0.292 / 0.291 / 0.290 / 0.293 | 0.283 / 0.283 / 0.283 / 0.284 | 0.027 / 0.027 / 0.027 / 0.027 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 232 / 232 / 232 / 232 | 0.235 / 0.339 / 0.389 / 0.459 | 0.390 / 0.353 / 0.349 / 0.349 | 0.319 / 0.353 / 0.362 / 0.370 | 0.026 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 55,126 / 55,126 / 55,126 / 55,126 | 0.143 / 0.165 / 0.181 / 0.196 | 0.084 / 0.109 / 0.126 / 0.131 | 0.139 / 0.143 / 0.144 / 0.145 | 0.164 / 0.165 / 0.166 / 0.166 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,109 / 6,109 / 6,109 / 6,109 | 0.177 / 0.231 / 0.287 / 0.332 | 0.037 / 0.042 / 0.047 / 0.049 | 0.144 / 0.155 / 0.164 / 0.169 | 0.386 / 0.403 / 0.410 / 0.414 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,037 / 2,037 / 2,037 / 2,037 | 0.149 / 0.192 / 0.229 / 0.239 | 0.056 / 0.060 / 0.064 / 0.069 | 0.102 / 0.108 / 0.115 / 0.115 | 0.149 / 0.150 / 0.149 / 0.150 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,019 / 1,019 / 1,019 / 1,019 | 0.151 / 0.161 / 0.163 / 0.168 | 0.090 / 0.093 / 0.094 / 0.096 | 0.201 / 0.204 / 0.204 / 0.204 | 0.618 / 0.619 / 0.621 / 0.622 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 11,931 / 11,931 / 11,931 / 11,931 | 0.115 / 0.126 / 0.145 / 0.183 | 0.487 / 0.491 / 0.496 / 0.503 | 0.243 / 0.244 / 0.245 / 0.245 | 0.261 / 0.260 / 0.261 / 0.264 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,651 / 4,651 / 4,651 / 4,651 | 0.161 / 0.197 / 0.257 / 0.463 | 0.318 / 0.316 / 0.315 / 0.318 | 0.288 / 0.287 / 0.287 / 0.288 | 0.028 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 867 / 867 / 867 | 1.000 / 0.374 / 0.445 / 0.567 | 1.000 / 0.395 / 0.385 / 0.383 | 1.000 / 0.407 / 0.414 / 0.425 | 1.000 / 0.028 / 0.029 / 0.029 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 100,294 / 109,159 / 113,989 / 115,141 | 0.159 / 0.194 / 0.227 / 0.255 | 0.120 / 0.160 / 0.190 / 0.196 | 0.148 / 0.154 / 0.156 / 0.157 | 0.172 / 0.175 / 0.178 / 0.178 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 8,931 / 10,558 / 11,357 / 11,896 | 0.184 / 0.254 / 0.327 / 0.403 | 0.044 / 0.054 / 0.063 / 0.072 | 0.150 / 0.167 / 0.177 / 0.186 | 0.393 / 0.411 / 0.422 / 0.428 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 3,256 / 3,944 / 4,259 / 4,307 | 0.162 / 0.238 / 0.301 / 0.332 | 0.056 / 0.067 / 0.080 / 0.096 | 0.106 / 0.120 / 0.130 / 0.131 | 0.153 / 0.154 / 0.158 / 0.159 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,785 / 1,879 / 1,880 / 1,887 | 0.153 / 0.168 / 0.168 / 0.179 | 0.096 / 0.105 / 0.105 / 0.108 | 0.202 / 0.207 / 0.207 / 0.207 | 0.628 / 0.634 / 0.635 / 0.637 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 20,949 / 21,293 / 21,452 / 21,637 | 0.119 / 0.132 / 0.153 / 0.200 | 0.497 / 0.502 / 0.508 / 0.516 | 0.245 / 0.246 / 0.247 / 0.247 | 0.270 / 0.269 / 0.270 / 0.274 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 5,511 / 6,135 / 6,340 / 6,540 | 0.156 / 0.191 / 0.247 / 0.443 | 0.303 / 0.308 / 0.309 / 0.312 | 0.284 / 0.286 / 0.286 / 0.286 | 0.028 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 605 / 817 / 935 / 960 | 0.247 / 0.359 / 0.432 / 0.527 | 0.422 / 0.386 / 0.380 / 0.378 | 0.351 / 0.379 / 0.397 / 0.407 | 0.026 / 0.027 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**JPNTaxi_Gen2**: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (10,687 frames) + + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.6595** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 50,954 | 45,644 / 48,317 / 49,034 / 49,324 | 0.858 / 0.925 / 0.946 / 0.948 | 0.900 / 0.942 / 0.948 / 0.950 | 0.249 / 0.199 / 0.191 / 0.161 | +| truck | 18,624 | 15,109 / 16,753 / 17,761 / 18,114 | 0.729 / 0.852 / 0.930 / 0.955 | 0.818 / 0.891 / 0.934 / 0.948 | 0.299 / 0.215 / 0.181 / 0.180 | +| bus | 3,853 | 3,182 / 3,437 / 3,532 / 3,538 | 0.769 / 0.850 / 0.875 / 0.875 | 0.811 / 0.863 / 0.882 / 0.883 | 0.047 / 0.038 / 0.036 / 0.036 | +| bicycle | 3,768 | 3,351 / 3,417 / 3,418 / 3,420 | 0.764 / 0.783 / 0.783 / 0.784 | 0.796 / 0.802 / 0.803 / 0.803 | 0.243 / 0.243 / 0.243 / 0.243 | +| pedestrian | 70,699 | 66,162 / 67,089 / 67,449 / 67,816 | 0.886 / 0.901 / 0.911 / 0.917 | 0.875 / 0.886 / 0.891 / 0.897 | 0.148 / 0.148 / 0.146 / 0.148 | +| traffic_cone | 12,525 | 7,726 / 8,331 / 8,729 / 8,980 | 0.258 / 0.290 / 0.323 / 0.354 | 0.457 / 0.480 / 0.503 / 0.524 | 0.138 / 0.135 / 0.135 / 0.140 | +| barrier | 2,009 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 5,604 / 5,604 / 5,604 / 5,604 | 0.104 / 0.112 / 0.114 / 0.119 | 0.021 / 0.021 / 0.022 / 0.022 | 0.103 / 0.105 / 0.106 / 0.106 | 0.349 / 0.353 / 0.356 / 0.356 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,048 / 2,048 / 2,048 / 2,048 | 0.135 / 0.162 / 0.183 / 0.197 | 0.016 / 0.016 / 0.017 / 0.017 | 0.115 / 0.121 / 0.126 / 0.127 | 0.406 / 0.412 / 0.423 / 0.427 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 423 / 423 / 423 / 423 | 0.154 / 0.163 / 0.168 / 0.169 | 0.042 / 0.041 / 0.041 / 0.041 | 0.105 / 0.109 / 0.110 / 0.110 | 0.303 / 0.315 / 0.317 / 0.318 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 414 / 414 / 414 / 414 | 0.126 / 0.129 / 0.129 / 0.131 | 0.096 / 0.098 / 0.098 / 0.099 | 0.183 / 0.184 / 0.184 / 0.184 | 0.878 / 0.878 / 0.879 / 0.880 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 7,776 / 7,776 / 7,776 / 7,776 | 0.106 / 0.109 / 0.113 / 0.122 | 0.249 / 0.250 / 0.251 / 0.254 | 0.227 / 0.228 / 0.228 / 0.228 | 0.426 / 0.427 / 0.427 / 0.428 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,377 / 1,377 / 1,377 / 1,377 | 0.145 / 0.170 / 0.260 / 0.408 | 0.702 / 0.711 / 0.715 / 0.702 | 0.300 / 0.302 / 0.302 / 0.301 | 0.100 / 0.100 / 0.099 / 0.098 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 20,891 / 20,891 / 20,891 / 20,891 | 0.110 / 0.120 / 0.123 / 0.130 | 0.022 / 0.024 / 0.025 / 0.025 | 0.106 / 0.109 / 0.109 / 0.109 | 0.380 / 0.384 / 0.388 / 0.387 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 7,635 / 7,635 / 7,635 / 7,635 | 0.143 / 0.174 / 0.202 / 0.223 | 0.017 / 0.017 / 0.018 / 0.018 | 0.118 / 0.125 / 0.130 / 0.133 | 0.474 / 0.478 / 0.490 / 0.494 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,579 / 1,579 / 1,579 / 1,579 | 0.157 / 0.171 / 0.180 / 0.181 | 0.034 / 0.034 / 0.034 / 0.034 | 0.108 / 0.113 / 0.116 / 0.116 | 0.359 / 0.369 / 0.371 / 0.372 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,544 / 1,544 / 1,544 / 1,544 | 0.131 / 0.135 / 0.135 / 0.138 | 0.097 / 0.099 / 0.099 / 0.100 | 0.190 / 0.191 / 0.191 / 0.191 | 0.819 / 0.822 / 0.824 / 0.824 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 28,986 / 28,986 / 28,986 / 28,986 | 0.111 / 0.115 / 0.121 / 0.135 | 0.252 / 0.253 / 0.255 / 0.259 | 0.230 / 0.230 / 0.230 / 0.230 | 0.439 / 0.439 / 0.439 / 0.441 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 5,135 / 5,135 / 5,135 / 5,135 | 0.161 / 0.200 / 0.316 / 0.500 | 0.758 / 0.759 / 0.763 / 0.751 | 0.307 / 0.309 / 0.310 / 0.309 | 0.098 / 0.097 / 0.097 / 0.097 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 44,376 / 46,850 / 47,216 / 47,606 | 0.123 / 0.149 / 0.156 / 0.170 | 0.031 / 0.036 / 0.037 / 0.040 | 0.114 / 0.120 / 0.121 / 0.121 | 0.432 / 0.444 / 0.452 / 0.457 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 14,516 / 16,104 / 17,017 / 17,294 | 0.154 / 0.204 / 0.260 / 0.301 | 0.018 / 0.020 / 0.022 / 0.023 | 0.125 / 0.136 / 0.146 / 0.151 | 0.532 / 0.559 / 0.590 / 0.602 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 3,080 / 3,354 / 3,439 / 3,443 | 0.166 / 0.201 / 0.224 / 0.228 | 0.033 / 0.034 / 0.034 / 0.034 | 0.116 / 0.132 / 0.139 / 0.139 | 0.414 / 0.419 / 0.419 / 0.422 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 2,874 / 2,897 / 2,900 / 2,900 | 0.134 / 0.138 / 0.139 / 0.141 | 0.105 / 0.105 / 0.105 / 0.106 | 0.193 / 0.194 / 0.194 / 0.194 | 0.806 / 0.809 / 0.810 / 0.810 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 59,982 / 60,714 / 61,271 / 61,531 | 0.118 / 0.127 / 0.140 / 0.172 | 0.271 / 0.274 / 0.279 / 0.286 | 0.235 / 0.235 / 0.236 / 0.236 | 0.451 / 0.451 / 0.451 / 0.454 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 5,720 / 6,079 / 6,376 / 6,515 | 0.156 / 0.190 / 0.300 / 0.475 | 0.748 / 0.753 / 0.755 / 0.743 | 0.305 / 0.308 / 0.307 / 0.306 | 0.097 / 0.097 / 0.096 / 0.096 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5440** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 41,196 | 33,758 / 37,076 / 38,510 / 38,977 | 0.744 / 0.836 / 0.874 / 0.885 | 0.802 / 0.853 / 0.869 / 0.873 | 0.229 / 0.170 / 0.161 / 0.161 | +| truck | 22,942 | 13,827 / 16,980 / 19,337 / 20,306 | 0.471 / 0.638 / 0.771 / 0.817 | 0.626 / 0.731 / 0.809 / 0.833 | 0.224 / 0.192 / 0.160 / 0.129 | +| bus | 3,033 | 1,552 / 2,141 / 2,485 / 2,543 | 0.286 / 0.506 / 0.667 / 0.693 | 0.455 / 0.616 / 0.712 / 0.727 | 0.078 / 0.069 / 0.074 / 0.074 | +| bicycle | 2,310 | 1,862 / 1,959 / 1,963 / 1,977 | 0.584 / 0.633 / 0.633 / 0.643 | 0.665 / 0.689 / 0.689 / 0.695 | 0.160 / 0.142 / 0.142 / 0.142 | +| pedestrian | 36,881 | 33,364 / 33,924 / 34,094 / 34,294 | 0.764 / 0.779 / 0.790 / 0.799 | 0.772 / 0.781 / 0.785 / 0.791 | 0.159 / 0.148 / 0.148 / 0.148 | +| traffic_cone | 7,183 | 4,785 / 4,935 / 5,020 / 5,158 | 0.317 / 0.337 / 0.366 / 0.399 | 0.492 / 0.504 / 0.523 / 0.541 | 0.135 / 0.135 / 0.125 / 0.125 | +| barrier | 1,847 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 4,531 / 4,531 / 4,531 / 4,531 | 0.141 / 0.154 / 0.162 / 0.171 | 0.034 / 0.040 / 0.043 / 0.045 | 0.137 / 0.139 / 0.140 / 0.141 | 0.319 / 0.346 / 0.360 / 0.366 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,523 / 2,523 / 2,523 / 2,523 | 0.197 / 0.237 / 0.300 / 0.332 | 0.024 / 0.028 / 0.032 / 0.035 | 0.146 / 0.154 / 0.164 / 0.168 | 0.509 / 0.562 / 0.605 / 0.624 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 333 / 333 / 333 / 333 | 0.224 / 0.314 / 0.405 / 0.440 | 0.030 / 0.036 / 0.039 / 0.040 | 0.130 / 0.143 / 0.155 / 0.156 | 0.272 / 0.289 / 0.289 / 0.290 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 254 / 254 / 254 / 254 | 0.186 / 0.198 / 0.199 / 0.245 | 0.094 / 0.094 / 0.094 / 0.091 | 0.230 / 0.231 / 0.231 / 0.229 | 0.847 / 0.861 / 0.861 / 0.861 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,056 / 4,056 / 4,056 / 4,056 | 0.116 / 0.121 / 0.133 / 0.162 | 0.296 / 0.299 / 0.303 / 0.307 | 0.261 / 0.262 / 0.262 / 0.262 | 0.464 / 0.464 / 0.465 / 0.468 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 790 / 790 / 790 / 790 | 0.164 / 0.177 / 0.305 / 0.502 | 0.490 / 0.497 / 0.497 / 0.493 | 0.234 / 0.235 / 0.234 / 0.233 | 0.108 / 0.108 / 0.107 / 0.107 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 16,890 / 16,890 / 16,890 / 16,890 | 0.151 / 0.169 / 0.181 / 0.193 | 0.041 / 0.049 / 0.053 / 0.056 | 0.142 / 0.145 / 0.146 / 0.147 | 0.379 / 0.410 / 0.429 / 0.435 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 9,406 / 9,406 / 9,406 / 9,406 | 0.201 / 0.261 / 0.346 / 0.394 | 0.031 / 0.036 / 0.041 / 0.044 | 0.151 / 0.162 / 0.175 / 0.181 | 0.612 / 0.669 / 0.712 / 0.735 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,243 / 1,243 / 1,243 / 1,243 | 0.244 / 0.365 / 0.483 / 0.537 | 0.055 / 0.052 / 0.052 / 0.054 | 0.137 / 0.153 / 0.166 / 0.168 | 0.375 / 0.347 / 0.337 / 0.337 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 947 / 947 / 947 / 947 | 0.188 / 0.205 / 0.207 / 0.264 | 0.116 / 0.115 / 0.115 / 0.109 | 0.238 / 0.238 / 0.238 / 0.237 | 0.884 / 0.895 / 0.896 / 0.899 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 15,121 / 15,121 / 15,121 / 15,121 | 0.121 / 0.128 / 0.142 / 0.182 | 0.314 / 0.318 / 0.323 / 0.328 | 0.262 / 0.263 / 0.263 / 0.263 | 0.486 / 0.485 / 0.487 / 0.491 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 2,945 / 2,945 / 2,945 / 2,945 | 0.171 / 0.188 / 0.318 / 0.540 | 0.468 / 0.477 / 0.474 / 0.473 | 0.232 / 0.234 / 0.234 / 0.233 | 0.102 / 0.102 / 0.102 / 0.102 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 30,777 / 33,945 / 34,775 / 34,956 | 0.164 / 0.200 / 0.222 / 0.240 | 0.054 / 0.071 / 0.078 / 0.080 | 0.149 / 0.155 / 0.156 / 0.157 | 0.447 / 0.505 / 0.535 / 0.541 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 12,711 / 15,140 / 17,099 / 18,052 | 0.205 / 0.278 / 0.393 / 0.478 | 0.033 / 0.043 / 0.054 / 0.063 | 0.154 / 0.168 / 0.188 / 0.200 | 0.655 / 0.740 / 0.812 / 0.867 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,191 / 1,652 / 1,886 / 1,924 | 0.238 / 0.367 / 0.492 / 0.552 | 0.036 / 0.052 / 0.052 / 0.055 | 0.132 / 0.153 / 0.167 / 0.168 | 0.297 / 0.341 / 0.345 / 0.344 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,384 / 1,483 / 1,484 / 1,496 | 0.188 / 0.203 / 0.205 / 0.265 | 0.111 / 0.114 / 0.114 / 0.111 | 0.239 / 0.241 / 0.241 / 0.239 | 0.885 / 0.894 / 0.895 / 0.899 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 27,185 / 28,060 / 28,214 / 28,437 | 0.123 / 0.134 / 0.150 / 0.199 | 0.327 / 0.335 / 0.340 / 0.348 | 0.263 / 0.264 / 0.264 / 0.265 | 0.500 / 0.504 / 0.505 / 0.510 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 3,298 / 3,377 / 3,633 / 3,756 | 0.166 / 0.180 / 0.314 / 0.523 | 0.475 / 0.481 / 0.478 / 0.475 | 0.234 / 0.236 / 0.235 / 0.235 | 0.103 / 0.103 / 0.102 / 0.102 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.3700** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 17,510 | 13,179 / 15,034 / 15,801 / 16,115 | 0.586 / 0.706 / 0.755 / 0.770 | 0.663 / 0.735 / 0.759 / 0.764 | 0.164 / 0.129 / 0.125 / 0.125 | +| truck | 14,707 | 6,206 / 9,176 / 11,392 / 12,747 | 0.207 / 0.459 / 0.642 / 0.755 | 0.436 / 0.622 / 0.732 / 0.786 | 0.185 / 0.185 / 0.152 / 0.124 | +| bus | 2,997 | 1,331 / 1,867 / 2,079 / 2,176 | 0.168 / 0.369 / 0.448 / 0.487 | 0.412 / 0.538 / 0.581 / 0.601 | 0.049 / 0.046 / 0.046 / 0.046 | +| bicycle | 566 | 386 / 433 / 446 / 450 | 0.227 / 0.284 / 0.330 / 0.342 | 0.417 / 0.461 / 0.484 / 0.491 | 0.132 / 0.132 / 0.132 / 0.132 | +| pedestrian | 16,580 | 14,554 / 14,780 / 14,883 / 15,028 | 0.679 / 0.697 / 0.704 / 0.720 | 0.712 / 0.720 / 0.725 / 0.734 | 0.136 / 0.136 / 0.136 / 0.136 | +| traffic_cone | 769 | 236 / 251 / 256 / 273 | 0.003 / 0.005 / 0.007 / 0.010 | 0.142 / 0.150 / 0.155 / 0.168 | 0.102 / 0.102 / 0.148 / 0.154 | +| barrier | 566 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,926 / 1,926 / 1,926 / 1,926 | 0.189 / 0.215 / 0.237 / 0.254 | 0.053 / 0.066 / 0.071 / 0.073 | 0.183 / 0.187 / 0.189 / 0.189 | 0.568 / 0.597 / 0.617 / 0.622 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,617 / 1,617 / 1,617 / 1,617 | 0.239 / 0.347 / 0.444 / 0.602 | 0.030 / 0.032 / 0.036 / 0.042 | 0.179 / 0.193 / 0.203 / 0.223 | 0.621 / 0.620 / 0.643 / 0.653 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 329 / 329 / 329 / 329 | 0.252 / 0.359 / 0.439 / 0.498 | 0.027 / 0.033 / 0.036 / 0.041 | 0.135 / 0.142 / 0.155 / 0.162 | 0.188 / 0.219 / 0.248 / 0.256 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 62 / 62 / 62 / 62 | 0.188 / 0.233 / 0.295 / 0.376 | 0.273 / 0.256 / 0.247 / 0.266 | 0.236 / 0.245 / 0.266 / 0.264 | 0.925 / 0.985 / 1.007 / 0.996 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,823 / 1,823 / 1,823 / 1,823 | 0.133 / 0.140 / 0.150 / 0.206 | 0.245 / 0.248 / 0.252 / 0.261 | 0.283 / 0.283 / 0.283 / 0.284 | 0.561 / 0.562 / 0.563 / 0.568 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 84 / 84 / 84 / 84 | 0.214 / 0.252 / 0.380 / 0.734 | 1.068 / 1.069 / 1.057 / 1.038 | 0.369 / 0.369 / 0.368 / 0.363 | 0.191 / 0.190 / 0.192 / 0.196 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 7,179 / 7,179 / 7,179 / 7,179 | 0.202 / 0.238 / 0.266 / 0.290 | 0.066 / 0.085 / 0.090 / 0.093 | 0.187 / 0.192 / 0.194 / 0.194 | 0.633 / 0.667 / 0.691 / 0.698 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,029 / 6,029 / 6,029 / 6,029 | 0.244 / 0.378 / 0.512 / 0.679 | 0.068 / 0.045 / 0.048 / 0.055 | 0.191 / 0.202 / 0.214 / 0.235 | 0.768 / 0.733 / 0.741 / 0.752 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,228 / 1,228 / 1,228 / 1,228 | 0.270 / 0.368 / 0.460 / 0.525 | 0.030 / 0.037 / 0.041 / 0.048 | 0.141 / 0.148 / 0.163 / 0.172 | 0.198 / 0.254 / 0.284 / 0.293 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 232 / 232 / 232 / 232 | 0.194 / 0.249 / 0.291 / 0.366 | 0.306 / 0.275 / 0.267 / 0.279 | 0.224 / 0.234 / 0.248 / 0.246 | 0.939 / 1.037 / 1.056 / 1.050 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 6,797 / 6,797 / 6,797 / 6,797 | 0.139 / 0.148 / 0.162 / 0.233 | 0.281 / 0.284 / 0.288 / 0.300 | 0.280 / 0.280 / 0.280 / 0.281 | 0.574 / 0.574 / 0.576 / 0.582 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 10,800 / 12,644 / 13,151 / 13,260 | 0.205 / 0.254 / 0.291 / 0.316 | 0.068 / 0.095 / 0.102 / 0.105 | 0.187 / 0.194 / 0.196 / 0.196 | 0.661 / 0.706 / 0.737 / 0.745 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 5,665 / 8,075 / 9,757 / 10,776 | 0.241 / 0.377 / 0.531 / 0.726 | 0.043 / 0.045 / 0.053 / 0.068 | 0.185 / 0.204 / 0.220 / 0.244 | 0.716 / 0.746 / 0.803 / 0.839 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,062 / 1,422 / 1,542 / 1,587 | 0.253 / 0.354 / 0.439 / 0.498 | 0.032 / 0.035 / 0.039 / 0.044 | 0.132 / 0.142 / 0.156 / 0.163 | 0.194 / 0.240 / 0.273 / 0.278 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 221 / 245 / 257 / 261 | 0.185 / 0.238 / 0.286 / 0.362 | 0.251 / 0.234 / 0.225 / 0.256 | 0.229 / 0.237 / 0.254 / 0.252 | 0.957 / 1.037 / 1.058 / 1.044 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 11,283 / 11,409 / 11,480 / 11,627 | 0.141 / 0.150 / 0.164 / 0.243 | 0.284 / 0.287 / 0.291 / 0.307 | 0.280 / 0.280 / 0.280 / 0.281 | 0.578 / 0.577 / 0.578 / 0.586 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 167 / 177 / 127 / 133 | 0.210 / 0.249 / 0.392 / 0.736 | 1.018 / 1.021 / 1.158 / 1.135 | 0.365 / 0.365 / 0.381 / 0.374 | 0.190 / 0.191 / 0.198 / 0.204 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.5785** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 109,660 | 92,745 / 100,651 / 103,627 / 104,735 | 0.783 / 0.872 / 0.902 / 0.913 | 0.829 / 0.878 / 0.890 / 0.894 | 0.229 / 0.164 / 0.161 / 0.159 | +| truck | 56,273 | 35,229 / 43,057 / 48,764 / 51,517 | 0.495 / 0.675 / 0.803 / 0.862 | 0.647 / 0.762 / 0.837 / 0.865 | 0.249 / 0.192 / 0.166 / 0.136 | +| bus | 9,883 | 6,077 / 7,470 / 8,128 / 8,289 | 0.457 / 0.619 / 0.703 / 0.721 | 0.586 / 0.693 / 0.741 / 0.752 | 0.056 / 0.049 / 0.049 / 0.049 | +| bicycle | 6,644 | 5,610 / 5,822 / 5,840 / 5,859 | 0.666 / 0.697 / 0.701 / 0.705 | 0.723 / 0.736 / 0.738 / 0.740 | 0.202 / 0.198 / 0.202 / 0.202 | +| pedestrian | 124,160 | 114,192 / 115,885 / 116,521 / 117,246 | 0.831 / 0.851 / 0.856 / 0.868 | 0.824 / 0.833 / 0.839 / 0.845 | 0.148 / 0.148 / 0.148 / 0.148 | +| traffic_cone | 20,477 | 12,760 / 13,531 / 14,021 / 14,440 | 0.261 / 0.288 / 0.318 / 0.349 | 0.455 / 0.473 / 0.494 / 0.514 | 0.135 / 0.135 / 0.135 / 0.138 | +| barrier | 4,422 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 12,062 / 12,062 / 12,062 / 12,062 | 0.122 / 0.134 / 0.140 / 0.148 | 0.027 / 0.031 / 0.033 / 0.034 | 0.119 / 0.122 / 0.123 / 0.124 | 0.361 / 0.374 / 0.383 / 0.386 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,190 / 6,190 / 6,190 / 6,190 | 0.171 / 0.216 / 0.267 / 0.315 | 0.020 / 0.022 / 0.025 / 0.027 | 0.134 / 0.144 / 0.152 / 0.159 | 0.476 / 0.504 / 0.534 / 0.549 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,087 / 1,087 / 1,087 / 1,087 | 0.178 / 0.225 / 0.268 / 0.286 | 0.034 / 0.036 / 0.038 / 0.038 | 0.115 / 0.123 / 0.130 / 0.131 | 0.313 / 0.324 / 0.330 / 0.331 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 730 / 730 / 730 / 730 | 0.142 / 0.149 / 0.151 / 0.165 | 0.100 / 0.101 / 0.101 / 0.102 | 0.195 / 0.197 / 0.197 / 0.197 | 0.841 / 0.848 / 0.849 / 0.850 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 13,657 / 13,657 / 13,657 / 13,657 | 0.111 / 0.115 / 0.121 / 0.141 | 0.260 / 0.262 / 0.264 / 0.269 | 0.239 / 0.239 / 0.239 / 0.240 | 0.445 / 0.446 / 0.446 / 0.449 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 2,252 / 2,252 / 2,252 / 2,252 | 0.153 / 0.175 / 0.279 / 0.453 | 0.637 / 0.645 / 0.648 / 0.638 | 0.279 / 0.281 / 0.281 / 0.280 | 0.103 / 0.103 / 0.102 / 0.102 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 44,960 / 44,960 / 44,960 / 44,960 | 0.132 / 0.149 / 0.157 / 0.168 | 0.032 / 0.037 / 0.039 / 0.041 | 0.126 / 0.129 / 0.131 / 0.131 | 0.401 / 0.418 / 0.429 / 0.433 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 23,071 / 23,071 / 23,071 / 23,071 | 0.182 / 0.243 / 0.313 / 0.378 | 0.024 / 0.027 / 0.030 / 0.033 | 0.141 / 0.153 / 0.163 / 0.172 | 0.564 / 0.592 / 0.622 / 0.638 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 4,052 / 4,052 / 4,052 / 4,052 | 0.198 / 0.269 / 0.331 / 0.358 | 0.033 / 0.037 / 0.039 / 0.041 | 0.123 / 0.134 / 0.144 / 0.145 | 0.350 / 0.354 / 0.360 / 0.362 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 2,724 / 2,724 / 2,724 / 2,724 | 0.151 / 0.161 / 0.164 / 0.185 | 0.110 / 0.111 / 0.111 / 0.112 | 0.207 / 0.208 / 0.209 / 0.209 | 0.828 / 0.838 / 0.840 / 0.840 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 50,905 / 50,905 / 50,905 / 50,905 | 0.117 / 0.122 / 0.131 / 0.160 | 0.269 / 0.273 / 0.275 / 0.282 | 0.244 / 0.244 / 0.244 / 0.245 | 0.464 / 0.464 / 0.465 / 0.469 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 8,395 / 8,395 / 8,395 / 8,395 | 0.166 / 0.198 / 0.321 / 0.530 | 0.659 / 0.664 / 0.669 / 0.661 | 0.281 / 0.283 / 0.284 / 0.284 | 0.101 / 0.101 / 0.100 / 0.100 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 85,327 / 93,303 / 94,826 / 95,316 | 0.148 / 0.182 / 0.198 / 0.214 | 0.044 / 0.057 / 0.061 / 0.063 | 0.136 / 0.142 / 0.144 / 0.144 | 0.464 / 0.503 / 0.523 / 0.527 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 32,549 / 39,457 / 43,979 / 46,340 | 0.188 / 0.269 / 0.373 / 0.472 | 0.026 / 0.034 / 0.041 / 0.048 | 0.145 / 0.162 / 0.179 / 0.192 | 0.610 / 0.671 / 0.725 / 0.764 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 5,296 / 6,439 / 6,895 / 6,991 | 0.201 / 0.280 / 0.349 / 0.382 | 0.036 / 0.042 / 0.044 / 0.046 | 0.123 / 0.139 / 0.150 / 0.152 | 0.350 / 0.367 / 0.378 / 0.381 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 4,446 / 4,536 / 4,537 / 4,547 | 0.152 / 0.161 / 0.164 / 0.186 | 0.110 / 0.110 / 0.110 / 0.109 | 0.209 / 0.210 / 0.211 / 0.210 | 0.830 / 0.839 / 0.841 / 0.840 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 98,679 / 99,889 / 100,492 / 101,325 | 0.122 / 0.131 / 0.145 / 0.189 | 0.288 / 0.292 / 0.296 / 0.306 | 0.248 / 0.248 / 0.249 / 0.249 | 0.480 / 0.479 / 0.480 / 0.485 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 9,207 / 9,578 / 10,010 / 10,328 | 0.161 / 0.187 / 0.306 / 0.503 | 0.656 / 0.663 / 0.664 / 0.656 | 0.281 / 0.283 / 0.283 / 0.282 | 0.100 / 0.100 / 0.099 / 0.100 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +
+ +--- + ### BEVFusion-LiDAR base/2.7.0
From 6f001d12ba33f3751e232461e5eb1b10fc5a30db Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Mon, 22 Jun 2026 22:19:37 +0900 Subject: [PATCH 159/162] Update base, j6gen2, jpntaxi docstring --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 13 +- .../docs/BEVFusion-L/v2/j6gen2_base.md | 1199 ++++++++++++++++- 2 files changed, 1152 insertions(+), 60 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 4fb185cdb..fdd038874 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -721,6 +721,7 @@
Changes +- Add datasets: `db_j6gen2_v10`, `db_j6gen2_v11`, and `db_j6gen2_v12`. - Update training batch size from `8` to `16` per gpu. - Update number of max points per voxel from `10` to `32`. - Implement 1D-flatten sparse to dense to reduce ONNX ops (projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py). @@ -734,14 +735,14 @@ Artifacts - Deployed onnx and ROS parameter files (for internal) - - [WebAuto]() - - [model-zoo]() + - [WebAuto](https://evaluation.ci.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/6c8bc393-9cff-413c-bfc7-52c1fcd8ba8a?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.8.0/deployment.zip) - [Google drive](https://drive.google.com/file/d/16dh2UQg4w46WQu0Dbmai9BtD43nz7hLv/view?usp=drive_link) - Logs (for internal) - - [model-zoo]() + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.8.0/logs.zip) - [Google drive](https://drive.google.com/file/d/1kQVufXiB_K9JYTL3DSUhGUIGW5fCbbwc/view?usp=drive_link) - Pytorch Best checkpoints: - - [model-zoo]() + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.8.0/best_epoch_47.zip) - [Google drive](https://drive.google.com/file/d/1wJjdG1dCbOjfmTCaOFWw9-2xNvlfYDza/view?usp=drive_link)
@@ -750,13 +751,13 @@ Training configs - [Config file path](https://github.com/KSeangTan/AWML/blob/179ca256a165fd483801bec0a2a95c24866edf70/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py) -- Train time: NVIDIA H200 80GB * 8 * 50 epochs ~= 4 days +- Train time: NVIDIA H200 140GB * 8 * 50 epochs ~= 4 days - Batch size: 8*16 = 128 - Training Dataset (frames: 151,478): - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames) - j6: db_gsm8_v1 + db_j6_v1 + db_j6_v2 + db_j6_v3 + db_j6_v5 (29,336 frames) - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 (51,208 frames) - - largebus: db_largebus_v1 + db_largebus_v2 (12,605 frames) + - largebus: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (12,605 frames) - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (30,168 frames)
diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md index 54e994313..eb477b9b9 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md @@ -13,7 +13,7 @@ - **Total Frames: 5,179**
- j6gen2 (3,951 frames) + j6gen2 (4,682 frames) - `db_j6gen2_v1` - `db_j6gen2_v2` @@ -24,6 +24,9 @@ - `db_j6gen2_v7` - `db_j6gen2_v8` - `db_j6gen2_v9` + - `db_j6gen2_v10` + - `db_j6gen2_v11` + - `db_j6gen2_v12`
@@ -37,7 +40,7 @@
- j6gen2_base (5,179 frames) + j6gen2_base (5,910 frames) - `db_j6gen2_v1` - `db_j6gen2_v2` @@ -48,6 +51,9 @@ - `db_j6gen2_v7` - `db_j6gen2_v8` - `db_j6gen2_v9` + - `db_j6gen2_v10` + - `db_j6gen2_v11` + - `db_j6gen2_v12` - `db_largebus_v1` - `db_largebus_v2` - `db_largebus_v3` @@ -61,41 +67,159 @@
Eval Range: 0.0 - 50.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(75,589) | truck
(8,273) | bus
(2,706) | bicycle
(2,097) | pedestrian
(23,254) | traffic_cone
(8,310) | barrier
(1,350) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.7289 | 0.6808 | 0.6820 | 0.6757 | 0.6579 | 0.6516 | 0.9000 | 0.8398 | 0.9130 | 0.8907 | 0.8535 | 0.4465 | 0.2590 | - | Model version | mAP | mAPH | car
(64,520) | truck
(6,947) | bus
(2,275) | bicycle
(1,379) | pedestrian
(19,421) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8828 | 0.8387 | 0.9022 | 0.8627 | 0.9440 | 0.8483 | 0.8569 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8810 | 0.8380 | 0.8873 | 0.8586 | 0.9476 | 0.8583 | 0.8534 | +
+ +
+ Eval Range: 50.0 - 90.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(64,960) | truck
(5,922) | bus
(2,257) | bicycle
(1,298) | pedestrian
(12,052) | traffic_cone
(2,636) | barrier
(622) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5802 | 0.5217 | 0.5876 | 0.5690 | 0.5584 | 0.5398 | 0.8127 | 0.6518 | 0.7926 | 0.6527 | 0.6690 | 0.2760 | 0.2064 | + +
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(22,141) | truck
(3,506) | bus
(544) | bicycle
(376) | pedestrian
(3,656) | traffic_cone
(462) | barrier
(145) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.4396 | 0.3969 | 0.5002 | 0.4696 | 0.4789 | 0.4483 | 0.7147 | 0.5324 | 0.5445 | 0.4977 | 0.4993 | 0.1329 | 0.1559 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(162,690) | truck
(17,701) | bus
(5,507) | bicycle
(3,771) | pedestrian
(38,962) | traffic_cone
(11,408) | barrier
(2,117) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.6590 | 0.6053 | 0.6391 | 0.6214 | 0.6122 | 0.5946 | 0.8547 | 0.7285 | 0.8389 | 0.7843 | 0.7789 | 0.3955 | 0.2321 |
+### Mean TPError - J6Gen2_base + +- Recalls: `0.10`, `0.40`, `optimal` + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1699 | 0.1847 | 0.2714 | 0.1985 | 1.0000 | 0.1878 | 0.2040 | 0.2866 | 0.2091 | 1.0000 | 0.2039 | 0.2176 | 0.2883 | 0.2138 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 75,589) | truck
0.5/1.0/2.0/4.0
(GTs: 8,273) | bus
0.5/1.0/2.0/4.0
(GTs: 2,706) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,097) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 23,254) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,310) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 8,314 / 8,314 / 8,314 / 8,314 | 910 / 910 / 910 / 910 | 297 / 297 / 297 / 297 | 230 / 230 / 230 / 230 | 2,557 / 2,557 / 2,557 / 2,557 | 914 / 914 / 914 / 914 | 148 / 148 / 148 / 148 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 75,589) | truck
0.5/1.0/2.0/4.0
(GTs: 8,273) | bus
0.5/1.0/2.0/4.0
(GTs: 2,706) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,097) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 23,254) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,310) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 30,991 / 30,991 / 30,991 / 30,991 | 3,391 / 3,391 / 3,391 / 3,391 | 1,109 / 1,109 / 1,109 / 1,109 | 859 / 859 / 859 / 859 | 9,534 / 9,534 / 9,534 / 9,534 | 3,407 / 3,407 / 3,407 / 3,407 | 553 / 553 / 553 / 553 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 75,589) | truck
0.5/1.0/2.0/4.0
(GTs: 8,273) | bus
0.5/1.0/2.0/4.0
(GTs: 2,706) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,097) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 23,254) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,310) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 65,297 / 67,749 / 68,945 / 69,400 | 6,298 / 7,033 / 7,266 / 7,405 | 2,282 / 2,494 / 2,565 / 2,589 | 1,789 / 1,815 / 1,822 / 1,825 | 18,701 / 19,099 / 19,305 / 19,357 | 4,529 / 4,841 / 5,046 / 5,561 | 453 / 529 / 550 / 562 | + +
+
Eval Range: 50.0 - 90.0m - | Model version | mAP | mAPH | car
(58,562) | truck
(5,101) | bus
(2,078) | bicycle
(758) | pedestrian
(10,283) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7193 | 0.6620 | 0.8197 | 0.6856 | 0.8249 | 0.5862 | 0.6801 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7032 | 0.6483 | 0.7876 | 0.6830 | 0.7911 | 0.5802 | 0.6741 | + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.2380 | 0.2382 | 0.2946 | 0.2537 | 1.0000 | 0.2943 | 0.2886 | 0.3273 | 0.3009 | 1.0000 | 0.2809 | 0.2785 | 0.3078 | 0.2721 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 64,960) | truck
0.5/1.0/2.0/4.0
(GTs: 5,922) | bus
0.5/1.0/2.0/4.0
(GTs: 2,257) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,298) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 12,052) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,636) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 7,145 / 7,145 / 7,145 / 7,145 | 651 / 651 / 651 / 651 | 248 / 248 / 248 / 248 | 142 / 142 / 142 / 142 | 1,325 / 1,325 / 1,325 / 1,325 | 289 / 289 / 289 / 289 | 68 / 68 / 68 / 68 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 64,960) | truck
0.5/1.0/2.0/4.0
(GTs: 5,922) | bus
0.5/1.0/2.0/4.0
(GTs: 2,257) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,298) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 12,052) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,636) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 26,633 / 26,633 / 26,633 / 26,633 | 2,428 / 2,428 / 2,428 / 2,428 | 925 / 925 / 925 / 925 | 532 / 532 / 532 / 532 | 4,941 / 4,941 / 4,941 / 4,941 | 1,080 / 1,080 / 1,080 / 1,080 | 0 / 255 / 255 / 255 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 64,960) | truck
0.5/1.0/2.0/4.0
(GTs: 5,922) | bus
0.5/1.0/2.0/4.0
(GTs: 2,257) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,298) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 12,052) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,636) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 47,451 / 52,827 / 55,318 / 55,889 | 3,253 / 4,045 / 4,354 / 4,471 | 1,367 / 1,826 / 1,951 / 1,981 | 839 / 846 / 901 / 902 | 8,085 / 8,202 / 8,275 / 8,325 | 1,120 / 1,225 / 1,270 / 1,347 | 156 / 231 / 237 / 268 |
Eval Range: 90.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3252 | 0.2112 | 0.3207 | 0.3389 | 1.0000 | 0.4093 | 0.3020 | 0.3601 | 0.4304 | 1.0000 | 0.3625 | 0.2467 | 0.3279 | 0.3624 | 1.0000 | - | Model version | mAP | mAPH | car
(20,371) | truck
(3,172) | bus
(376) | bicycle
(155) | pedestrian
(2,794) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5223 | 0.4757 | 0.6814 | 0.5181 | 0.5381 | 0.4165 | 0.4573 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4938 | 0.4494 | 0.6564 | 0.5192 | 0.3777 | 0.4406 | 0.4752 | + Num match summary -
+ **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 22,141) | truck
0.5/1.0/2.0/4.0
(GTs: 3,506) | bus
0.5/1.0/2.0/4.0
(GTs: 544) | bicycle
0.5/1.0/2.0/4.0
(GTs: 376) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,656) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 2,435 / 2,435 / 2,435 / 2,435 | 385 / 385 / 385 / 385 | 59 / 59 / 59 / 59 | 41 / 41 / 41 / 41 | 402 / 402 / 402 / 402 | 50 / 50 / 50 / 50 | 15 / 15 / 15 / 15 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 22,141) | truck
0.5/1.0/2.0/4.0
(GTs: 3,506) | bus
0.5/1.0/2.0/4.0
(GTs: 544) | bicycle
0.5/1.0/2.0/4.0
(GTs: 376) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,656) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 9,077 / 9,077 / 9,077 / 9,077 | 1,437 / 1,437 / 1,437 / 1,437 | 223 / 223 / 223 / 223 | 154 / 154 / 154 / 154 | 1,498 / 1,498 / 1,498 / 1,498 | 0 / 189 / 189 / 189 | 0 / 59 / 59 / 59 | + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 22,141) | truck
0.5/1.0/2.0/4.0
(GTs: 3,506) | bus
0.5/1.0/2.0/4.0
(GTs: 544) | bicycle
0.5/1.0/2.0/4.0
(GTs: 376) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,656) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 13,838 / 16,236 / 17,440 / 17,774 | 1,483 / 1,981 / 2,484 / 2,599 | 192 / 312 / 394 / 405 | 187 / 218 / 223 / 223 | 2,141 / 2,167 / 2,181 / 2,199 | 132 / 147 / 178 / 186 | 33 / 52 / 72 / 73 | + +
+
Eval Range: 0.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1972 | 0.2036 | 0.2826 | 0.2208 | 1.0000 | 0.2503 | 0.2473 | 0.3146 | 0.2684 | 1.0000 | 0.2405 | 0.2402 | 0.2976 | 0.2433 | 1.0000 | + + Num match summary + + **recall 0.10** - | Model version | mAP | mAPH | car
(143,453) | truck
(15,220) | bus
(4,729) | bicycle
(2,292) | pedestrian
(32,498) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7990 | 0.7487 | 0.8508 | 0.7435 | 0.8711 | 0.7487 | 0.7809 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7903 | 0.7413 | 0.8266 | 0.7409 | 0.8510 | 0.7541 | 0.7790 | + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 162,690) | truck
0.5/1.0/2.0/4.0
(GTs: 17,701) | bus
0.5/1.0/2.0/4.0
(GTs: 5,507) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,771) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 38,962) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,408) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 17,895 / 17,895 / 17,895 / 17,895 | 1,947 / 1,947 / 1,947 / 1,947 | 605 / 605 / 605 / 605 | 414 / 414 / 414 / 414 | 4,285 / 4,285 / 4,285 / 4,285 | 1,254 / 1,254 / 1,254 / 1,254 | 232 / 232 / 232 / 232 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 162,690) | truck
0.5/1.0/2.0/4.0
(GTs: 17,701) | bus
0.5/1.0/2.0/4.0
(GTs: 5,507) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,771) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 38,962) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,408) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 66,702 / 66,702 / 66,702 / 66,702 | 7,257 / 7,257 / 7,257 / 7,257 | 2,257 / 2,257 / 2,257 / 2,257 | 1,546 / 1,546 / 1,546 / 1,546 | 15,974 / 15,974 / 15,974 / 15,974 | 4,677 / 4,677 / 4,677 / 4,677 | 0 / 867 / 867 / 867 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 162,690) | truck
0.5/1.0/2.0/4.0
(GTs: 17,701) | bus
0.5/1.0/2.0/4.0
(GTs: 5,507) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,771) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 38,962) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,408) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 126,514 / 137,022 / 141,450 / 143,300 | 10,859 / 13,045 / 14,027 / 14,439 | 3,844 / 4,623 / 4,831 / 4,974 | 2,732 / 2,840 / 2,856 / 2,859 | 28,970 / 29,422 / 29,647 / 29,660 | 5,644 / 6,197 / 6,356 / 7,069 | 602 / 827 / 850 / 871 | + +
@@ -113,41 +237,155 @@
Eval Range: 0.0 - 50.0m - - | Model version | mAP | mAPH | car
(14,883) | truck
(1,193) | bus
(336) | bicycle
(740) | pedestrian
(5,059) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8947 | 0.8393 | 0.9231 | 0.8893 | 0.9564 | 0.8264 | 0.8782 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8985 | 0.8484 | 0.9087 | 0.8974 | 0.9636 | 0.8447 | 0.8780 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(14,872) | truck
(1,192) | bus
(336) | bicycle
(740) | pedestrian
(5,055) | traffic_cone
(60) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.6313 | 0.6015 | 0.5746 | 0.5493 | 0.5597 | 0.5344 | 0.9156 | 0.8702 | 0.9160 | 0.8586 | 0.8588 | 0.0000 | 0.0000 |
Eval Range: 50.0 - 90.0m - - | Model version | mAP | mAPH | car
(10,994) | truck
(1,011) | bus
(143) | bicycle
(463) | pedestrian
(3,754) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7679 | 0.7089 | 0.8567 | 0.7666 | 0.8723 | 0.5955 | 0.7485 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7475 | 0.6925 | 0.8317 | 0.7758 | 0.7910 | 0.5959 | 0.7433 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(10,929) | truck
(1,009) | bus
(141) | bicycle
(460) | pedestrian
(3,721) | traffic_cone
(4) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5281 | 0.4877 | 0.4942 | 0.4916 | 0.4740 | 0.4714 | 0.8442 | 0.7108 | 0.8522 | 0.5764 | 0.7129 | 0.0000 | 0.0000 |
Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(2,883) | truck
(600) | bus
(60) | bicycle
(85) | pedestrian
(1,092) | traffic_cone
(0) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.4172 | 0.3831 | 0.4189 | 0.4104 | 0.4018 | 0.3934 | 0.7548 | 0.6586 | 0.5716 | 0.3759 | 0.5594 | 0.0000 | 0.0000 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(28,684) | truck
(2,801) | bus
(537) | bicycle
(1,285) | pedestrian
(9,868) | traffic_cone
(64) | barrier
(0) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5779 | 0.5432 | 0.5404 | 0.5154 | 0.5230 | 0.4980 | 0.8813 | 0.7754 | 0.8642 | 0.7410 | 0.7836 | 0.0000 | 0.0000 | + +
+ +- **Mean TPError - LargeBus** + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.2732 | 0.4747 | 0.3197 | 0.3430 | 1.0000 | 0.3937 | 0.3922 | 0.3996 | 0.4786 | 1.0000 | 0.1883 | 0.3943 | 0.2227 | 0.2329 | 1.0000 | + + Num match summary + + **recall 0.10** - | Model version | mAP | mAPH | car
(3,018) | truck
(602) | bus
(60) | bicycle
(85) | pedestrian
(1,121) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5924 | 0.5370 | 0.7238 | 0.6616 | 0.6305 | 0.3964 | 0.5497 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.5636 | 0.5191 | 0.7125 | 0.6383 | 0.4781 | 0.4293 | 0.5595 | + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 1,635 / 1,635 / 1,635 / 1,635 | 131 / 131 / 131 / 131 | 36 / 36 / 36 / 36 | 81 / 81 / 81 / 81 | 556 / 556 / 556 / 556 | 6 / 6 / 6 / 6 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 6,097 / 6,097 / 6,097 / 6,097 | 488 / 488 / 488 / 488 | 137 / 137 / 137 / 137 | 303 / 303 / 303 / 303 | 2,072 / 2,072 / 2,072 / 2,072 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 14,872) | truck
0.5/1.0/2.0/4.0
(GTs: 1,192) | bus
0.5/1.0/2.0/4.0
(GTs: 336) | bicycle
0.5/1.0/2.0/4.0
(GTs: 740) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 5,055) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 60) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 13,178 / 13,676 / 13,748 / 13,798 | 925 / 1,041 / 1,064 / 1,073 | 254 / 330 / 333 / 333 | 612 / 628 / 640 / 643 | 4,247 / 4,294 / 4,313 / 4,330 | 19 / 20 / 20 / 21 | 0 / 0 / 0 / 0 |
+ +
+ Eval Range: 50.0 - 90.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3091 | 0.6981 | 0.3081 | 0.3833 | 1.0000 | 0.3181 | 0.6966 | 0.3115 | 0.3980 | 1.0000 | 0.2197 | 0.6583 | 0.2015 | 0.3121 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 1,202 / 1,202 / 1,202 / 1,202 | 110 / 110 / 110 / 110 | 15 / 15 / 15 / 15 | 50 / 50 / 50 / 50 | 409 / 409 / 409 / 409 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 4,480 / 4,480 / 4,480 / 4,480 | 413 / 413 / 413 / 413 | 57 / 57 / 57 / 57 | 188 / 188 / 188 / 188 | 1,525 / 1,525 / 1,525 / 1,525 | 1 / 1 / 1 / 1 | 0 / 0 / 0 / 0 | + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 10,929) | truck
0.5/1.0/2.0/4.0
(GTs: 1,009) | bus
0.5/1.0/2.0/4.0
(GTs: 141) | bicycle
0.5/1.0/2.0/4.0
(GTs: 460) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 3,721) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 4) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 8,463 / 9,288 / 9,554 / 9,621 | 617 / 739 / 799 / 804 | 103 / 124 / 124 / 124 | 263 / 289 / 292 / 292 | 2,604 / 2,652 / 2,667 / 2,682 | 2 / 2 / 2 / 2 | 0 / 0 / 0 / 0 | + +
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.4683 | 0.4107 | 0.4227 | 0.5956 | 1.0000 | 0.4839 | 0.4292 | 0.4259 | 0.6425 | 1.0000 | 0.2920 | 0.2122 | 0.1980 | 0.5124 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 2,883) | truck
0.5/1.0/2.0/4.0
(GTs: 600) | bus
0.5/1.0/2.0/4.0
(GTs: 60) | bicycle
0.5/1.0/2.0/4.0
(GTs: 85) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 1,092) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 0) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 317 / 317 / 317 / 317 | 66 / 66 / 66 / 66 | 6 / 6 / 6 / 6 | 9 / 9 / 9 / 9 | 120 / 120 / 120 / 120 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 2,883) | truck
0.5/1.0/2.0/4.0
(GTs: 600) | bus
0.5/1.0/2.0/4.0
(GTs: 60) | bicycle
0.5/1.0/2.0/4.0
(GTs: 85) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 1,092) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 0) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 1,182 / 1,182 / 1,182 / 1,182 | 246 / 246 / 246 / 246 | 24 / 24 / 24 / 24 | 34 / 34 / 34 / 34 | 447 / 447 / 447 / 447 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 2,883) | truck
0.5/1.0/2.0/4.0
(GTs: 600) | bus
0.5/1.0/2.0/4.0
(GTs: 60) | bicycle
0.5/1.0/2.0/4.0
(GTs: 85) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 1,092) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 0) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 1,929 / 2,144 / 2,236 / 2,337 | 301 / 408 / 466 / 475 | 26 / 32 / 41 / 41 | 39 / 42 / 46 / 46 | 691 / 697 / 700 / 730 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + +
+
Eval Range: 0.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.2907 | 0.5134 | 0.3242 | 0.3572 | 1.0000 | 0.4150 | 0.4172 | 0.4068 | 0.4972 | 1.0000 | 0.2100 | 0.4414 | 0.2253 | 0.2736 | 1.0000 | + + Num match summary + + **recall 0.10** - | Model version | mAP | mAPH | car
(28,895) | truck
(2,806) | bus
(539) | bicycle
(1,288) | pedestrian
(9,934) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8267 | 0.7675 | 0.8888 | 0.8055 | 0.9009 | 0.7334 | 0.8051 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8198 | 0.7666 | 0.8690 | 0.8052 | 0.8756 | 0.7455 | 0.8036 | + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 3,155 / 3,155 / 3,155 / 3,155 | 308 / 308 / 308 / 308 | 59 / 59 / 59 / 59 | 141 / 141 / 141 / 141 | 1,085 / 1,085 / 1,085 / 1,085 | 7 / 7 / 7 / 7 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 11,760 / 11,760 / 11,760 / 11,760 | 1,148 / 1,148 / 1,148 / 1,148 | 220 / 220 / 220 / 220 | 526 / 526 / 526 / 526 | 4,045 / 4,045 / 4,045 / 4,045 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 28,684) | truck
0.5/1.0/2.0/4.0
(GTs: 2,801) | bus
0.5/1.0/2.0/4.0
(GTs: 537) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,285) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 9,868) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 64) | barrier
0.5/1.0/2.0/4.0
(GTs: 0) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 23,528 / 24,950 / 25,596 / 25,735 | 1,866 / 2,196 / 2,328 / 2,367 | 379 / 486 / 490 / 490 | 874 / 918 / 941 / 944 | 7,461 / 7,553 / 7,587 / 7,622 | 19 / 22 / 22 / 23 | 0 / 0 / 0 / 0 |
@@ -156,7 +394,7 @@
J6Gen2 -- Datasets (3,951 Testing Frames): +- Datasets (4,682 Testing Frames): - `db_j6gen2_v1` - `db_j6gen2_v2` - `db_j6gen2_v3` @@ -166,46 +404,163 @@ - `db_j6gen2_v7` - `db_j6gen2_v8` - `db_j6gen2_v9` + - `db_j6gen2_v10` + - `db_j6gen2_v11` + - `db_j6gen2_v12` - **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m**
Eval Range: 0.0 - 50.0m - | Model version | mAP | mAPH | car
(49,637) | truck
(5,754) | bus
(1,939) | bicycle
(639) | pedestrian
(14,362) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8836 | 0.8431 | 0.8942 | 0.8569 | 0.9393 | 0.8780 | 0.8494 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8788 | 0.8368 | 0.8813 | 0.8505 | 0.9427 | 0.8749 | 0.8448 | + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(60,938) | truck
(7,081) | bus
(2,370) | bicycle
(1,357) | pedestrian
(18,202) | traffic_cone
(8,250) | barrier
(1,350) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.7371 | 0.6887 | 0.6863 | 0.6799 | 0.6621 | 0.6558 | 0.8940 | 0.8368 | 0.9124 | 0.9072 | 0.8537 | 0.4940 | 0.2617 |
Eval Range: 50.0 - 90.0m - | Model version | mAP | mAPH | car
(47,568) | truck
(4,090) | bus
(1,935) | bicycle
(295) | pedestrian
(6,529) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7040 | 0.6488 | 0.8118 | 0.6662 | 0.8221 | 0.5781 | 0.6417 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.6864 | 0.6344 | 0.7772 | 0.6609 | 0.7913 | 0.5671 | 0.6357 | + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(54,217) | truck
(4,913) | bus
(2,116) | bicycle
(838) | pedestrian
(8,336) | traffic_cone
(2,632) | barrier
(622) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5833 | 0.5245 | 0.5890 | 0.5704 | 0.5596 | 0.5410 | 0.8044 | 0.6387 | 0.7893 | 0.6949 | 0.6496 | 0.2967 | 0.2096 |
Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(19,301) | truck
(2,906) | bus
(484) | bicycle
(291) | pedestrian
(2,564) | traffic_cone
(462) | barrier
(145) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.4384 | 0.3949 | 0.4973 | 0.4673 | 0.4756 | 0.4456 | 0.7075 | 0.5046 | 0.5412 | 0.5343 | 0.4732 | 0.1509 | 0.1571 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(134,456) | truck
(14,900) | bus
(4,970) | bicycle
(2,486) | pedestrian
(29,102) | traffic_cone
(11,344) | barrier
(2,117) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.6650 | 0.6111 | 0.6420 | 0.6241 | 0.6150 | 0.5972 | 0.8448 | 0.7186 | 0.8363 | 0.8063 | 0.7779 | 0.4361 | 0.2350 | + +
+ +- **Mean TPError - J6Gen2** + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1692 | 0.1845 | 0.2711 | 0.1983 | 1.0000 | 0.1868 | 0.2021 | 0.2864 | 0.2108 | 1.0000 | 0.2026 | 0.2155 | 0.2887 | 0.2184 | 1.0000 | + +
+ Num match summary - | Model version | mAP | mAPH | car
(17,353) | truck
(2,570) | bus
(316) | bicycle
(70) | pedestrian
(1,673) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5030 | 0.4572 | 0.6739 | 0.4847 | 0.5186 | 0.4430 | 0.3948 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4766 | 0.4309 | 0.6465 | 0.4903 | 0.3618 | 0.4627 | 0.4214 | + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 60,938) | truck
0.5/1.0/2.0/4.0
(GTs: 7,081) | bus
0.5/1.0/2.0/4.0
(GTs: 2,370) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,357) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 18,202) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,250) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 6,703 / 6,703 / 6,703 / 6,703 | 778 / 778 / 778 / 778 | 260 / 260 / 260 / 260 | 149 / 149 / 149 / 149 | 2,002 / 2,002 / 2,002 / 2,002 | 907 / 907 / 907 / 907 | 148 / 148 / 148 / 148 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 60,938) | truck
0.5/1.0/2.0/4.0
(GTs: 7,081) | bus
0.5/1.0/2.0/4.0
(GTs: 2,370) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,357) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 18,202) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,250) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 24,984 / 24,984 / 24,984 / 24,984 | 2,903 / 2,903 / 2,903 / 2,903 | 971 / 971 / 971 / 971 | 556 / 556 / 556 / 556 | 7,462 / 7,462 / 7,462 / 7,462 | 3,382 / 3,382 / 3,382 / 3,382 | 553 / 553 / 553 / 553 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 60,938) | truck
0.5/1.0/2.0/4.0
(GTs: 7,081) | bus
0.5/1.0/2.0/4.0
(GTs: 2,370) | bicycle
0.5/1.0/2.0/4.0
(GTs: 1,357) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 18,202) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 8,250) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,350) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 52,338 / 54,149 / 55,189 / 55,565 | 5,350 / 5,996 / 6,202 / 6,337 | 2,027 / 2,173 / 2,227 / 2,232 | 1,170 / 1,181 / 1,182 / 1,182 | 14,547 / 14,883 / 15,058 / 15,106 | 4,519 / 4,942 / 5,249 / 5,546 | 453 / 538 / 550 / 562 |
+ Eval Range: 50.0 - 90.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.2376 | 0.2394 | 0.2946 | 0.2552 | 1.0000 | 0.2952 | 0.2891 | 0.3267 | 0.3020 | 1.0000 | 0.2820 | 0.2779 | 0.3072 | 0.2730 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 54,217) | truck
0.5/1.0/2.0/4.0
(GTs: 4,913) | bus
0.5/1.0/2.0/4.0
(GTs: 2,116) | bicycle
0.5/1.0/2.0/4.0
(GTs: 838) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 8,336) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,632) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 5,963 / 5,963 / 5,963 / 5,963 | 540 / 540 / 540 / 540 | 232 / 232 / 232 / 232 | 92 / 92 / 92 / 92 | 916 / 916 / 916 / 916 | 289 / 289 / 289 / 289 | 68 / 68 / 68 / 68 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 54,217) | truck
0.5/1.0/2.0/4.0
(GTs: 4,913) | bus
0.5/1.0/2.0/4.0
(GTs: 2,116) | bicycle
0.5/1.0/2.0/4.0
(GTs: 838) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 8,336) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,632) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 22,228 / 22,228 / 22,228 / 22,228 | 2,014 / 2,014 / 2,014 / 2,014 | 867 / 867 / 867 / 867 | 343 / 343 / 343 / 343 | 3,417 / 3,417 / 3,417 / 3,417 | 1,079 / 1,079 / 1,079 / 1,079 | 0 / 255 / 255 / 255 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 54,217) | truck
0.5/1.0/2.0/4.0
(GTs: 4,913) | bus
0.5/1.0/2.0/4.0
(GTs: 2,116) | bicycle
0.5/1.0/2.0/4.0
(GTs: 838) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 8,336) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 2,632) | barrier
0.5/1.0/2.0/4.0
(GTs: 622) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 39,059 / 43,688 / 45,797 / 46,308 | 2,628 / 3,307 / 3,561 / 3,660 | 1,261 / 1,700 / 1,825 / 1,855 | 584 / 579 / 581 / 611 | 5,589 / 5,531 / 5,588 / 5,745 | 1,142 / 1,223 / 1,314 / 1,394 | 156 / 231 / 237 / 268 | + +
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3293 | 0.2166 | 0.3253 | 0.3476 | 1.0000 | 0.4132 | 0.3083 | 0.3639 | 0.4336 | 1.0000 | 0.3657 | 0.2567 | 0.3324 | 0.3634 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 19,301) | truck
0.5/1.0/2.0/4.0
(GTs: 2,906) | bus
0.5/1.0/2.0/4.0
(GTs: 484) | bicycle
0.5/1.0/2.0/4.0
(GTs: 291) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 2,564) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 2,123 / 2,123 / 2,123 / 2,123 | 319 / 319 / 319 / 319 | 53 / 53 / 53 / 53 | 32 / 32 / 32 / 32 | 282 / 282 / 282 / 282 | 50 / 50 / 50 / 50 | 15 / 15 / 15 / 15 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 19,301) | truck
0.5/1.0/2.0/4.0
(GTs: 2,906) | bus
0.5/1.0/2.0/4.0
(GTs: 484) | bicycle
0.5/1.0/2.0/4.0
(GTs: 291) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 2,564) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 7,913 / 7,913 / 7,913 / 7,913 | 1,191 / 1,191 / 1,191 / 1,191 | 198 / 198 / 198 / 198 | 119 / 119 / 119 / 119 | 1,051 / 1,051 / 1,051 / 1,051 | 0 / 189 / 189 / 189 | 0 / 59 / 59 / 59 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 19,301) | truck
0.5/1.0/2.0/4.0
(GTs: 2,906) | bus
0.5/1.0/2.0/4.0
(GTs: 484) | bicycle
0.5/1.0/2.0/4.0
(GTs: 291) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 2,564) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 462) | barrier
0.5/1.0/2.0/4.0
(GTs: 145) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 12,130 / 14,043 / 15,222 / 15,426 | 1,162 / 1,568 / 2,007 / 2,112 | 169 / 279 / 355 / 366 | 153 / 180 / 181 / 181 | 1,439 / 1,486 / 1,498 / 1,509 | 155 / 172 / 178 / 186 | 33 / 52 / 72 / 73 | + +
+ +
Eval Range: 0.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1971 | 0.2029 | 0.2829 | 0.2224 | 1.0000 | 0.2506 | 0.2464 | 0.3150 | 0.2717 | 1.0000 | 0.2419 | 0.2401 | 0.2986 | 0.2465 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 134,456) | truck
0.5/1.0/2.0/4.0
(GTs: 14,900) | bus
0.5/1.0/2.0/4.0
(GTs: 4,970) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,486) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 29,102) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,344) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 14,790 / 14,790 / 14,790 / 14,790 | 1,639 / 1,639 / 1,639 / 1,639 | 546 / 546 / 546 / 546 | 273 / 273 / 273 / 273 | 3,201 / 3,201 / 3,201 / 3,201 | 1,247 / 1,247 / 1,247 / 1,247 | 232 / 232 / 232 / 232 | + + **recall 0.40** - | Model version | mAP | mAPH | car
(114,558) | truck
(12,414) | bus
(4,190) | bicycle
(1,004) | pedestrian
(22,564) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7958 | 0.7472 | 0.8408 | 0.7294 | 0.8673 | 0.7710 | 0.7706 | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7851 | 0.7375 | 0.8166 | 0.7262 | 0.8481 | 0.7661 | 0.7687 | + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 134,456) | truck
0.5/1.0/2.0/4.0
(GTs: 14,900) | bus
0.5/1.0/2.0/4.0
(GTs: 4,970) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,486) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 29,102) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,344) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 55,126 / 55,126 / 55,126 / 55,126 | 6,109 / 6,109 / 6,109 / 6,109 | 2,037 / 2,037 / 2,037 / 2,037 | 1,019 / 1,019 / 1,019 / 1,019 | 11,931 / 11,931 / 11,931 / 11,931 | 4,651 / 4,651 / 4,651 / 4,651 | 0 / 867 / 867 / 867 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 134,456) | truck
0.5/1.0/2.0/4.0
(GTs: 14,900) | bus
0.5/1.0/2.0/4.0
(GTs: 4,970) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,486) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 29,102) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 11,344) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,117) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR j6gen2_base/2.8.1 | 102,297 / 112,278 / 116,527 / 117,516 | 9,001 / 10,847 / 11,695 / 12,090 | 3,495 / 4,146 / 4,350 / 4,483 | 1,859 / 1,919 / 1,923 / 1,923 | 21,639 / 21,986 / 21,863 / 22,136 | 5,782 / 6,632 / 6,752 / 7,077 | 602 / 827 / 850 / 872 |
@@ -213,6 +568,742 @@ ## Release +### BEVFusion-LiDAR J6Gen2_base/2.8.1 + Changes + +- Finetune from `BEVFusion-LiDAR base/2.8.0` with j6gen2 base dataset and intensity. +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.ci.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/fcf081e7-b3a9-4085-82f8-60023df3e854?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.8.1/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1VwFa3BZnDI7WV1i3aq6VYsK3pII2axMb/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.8.1/logs.zip) + - [Google drive](https://drive.google.com/file/d/1n1EZUOMF6PKi9SciRQXzoMvCBkMnQaYL/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.8.1/best_epoch_25.zip) + - [Google drive](https://drive.google.com/file/d/1mOVIs7rUGPumjl3dosuNZqJGZlNOdV-e/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/3d5e2fa3df7ad61d9ae773a3ea3f418f4916e05b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py) +- Train time: NVIDIA H200 140GB * 8 * 30 epochs = 1 day +- Batch size: 8*8 = 64 +- Training Dataset (frames: 63,813): + - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 (51,208 frames) + - largebus: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (12,605 frames) + +
+ +
+ Evaluation + +**J6Gen2_base Datasets (5,910 frames)**: + + - j6gen2 (3,951 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 + - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.7289** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 75,589 | 67,025 / 69,724 / 70,866 / 71,572 | 0.855 / 0.903 / 0.915 / 0.927 | 0.904 / 0.929 / 0.937 / 0.942 | 0.279 / 0.218 / 0.158 / 0.147 | +| truck | 8,273 | 6,615 / 7,329 / 7,646 / 7,835 | 0.715 / 0.838 / 0.889 / 0.917 | 0.802 / 0.875 / 0.903 / 0.920 | 0.263 / 0.191 / 0.186 / 0.184 | +| bus | 2,706 | 2,339 / 2,562 / 2,628 / 2,640 | 0.810 / 0.916 / 0.962 / 0.963 | 0.874 / 0.943 / 0.962 / 0.965 | 0.260 / 0.167 / 0.130 / 0.096 | +| bicycle | 2,097 | 1,950 / 1,979 / 1,992 / 1,996 | 0.866 / 0.894 / 0.898 / 0.905 | 0.877 / 0.889 / 0.893 / 0.894 | 0.158 / 0.157 / 0.157 / 0.157 | +| pedestrian | 23,254 | 21,368 / 21,777 / 21,940 / 22,071 | 0.828 / 0.852 / 0.864 / 0.870 | 0.833 / 0.846 / 0.852 / 0.857 | 0.171 / 0.166 / 0.163 / 0.166 | +| traffic_cone | 8,310 | 5,479 / 5,915 / 6,096 / 6,331 | 0.385 / 0.444 / 0.463 / 0.494 | 0.559 / 0.594 / 0.607 / 0.624 | 0.123 / 0.121 / 0.111 / 0.086 | +| barrier | 1,350 | 572 / 754 / 803 / 843 | 0.174 / 0.267 / 0.289 / 0.306 | 0.409 / 0.462 / 0.472 / 0.483 | 0.283 / 0.260 / 0.248 / 0.248 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 8,314 / 8,314 / 8,314 / 8,314 | 0.107 / 0.112 / 0.113 / 0.117 | 0.033 / 0.036 / 0.036 / 0.037 | 0.116 / 0.117 / 0.118 / 0.118 | 0.126 / 0.127 / 0.128 / 0.128 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 910 / 910 / 910 / 910 | 0.144 / 0.165 / 0.181 / 0.200 | 0.028 / 0.030 / 0.031 / 0.031 | 0.127 / 0.132 / 0.135 / 0.137 | 0.297 / 0.306 / 0.311 / 0.310 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 297 / 297 / 297 / 297 | 0.108 / 0.121 / 0.142 / 0.143 | 0.044 / 0.045 / 0.045 / 0.045 | 0.083 / 0.085 / 0.091 / 0.091 | 0.128 / 0.130 / 0.129 / 0.129 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 230 / 230 / 230 / 230 | 0.131 / 0.137 / 0.138 / 0.140 | 0.080 / 0.080 / 0.080 / 0.081 | 0.202 / 0.204 / 0.204 / 0.205 | 0.537 / 0.536 / 0.535 / 0.536 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,557 / 2,557 / 2,557 / 2,557 | 0.102 / 0.108 / 0.117 / 0.133 | 0.395 / 0.397 / 0.401 / 0.404 | 0.232 / 0.233 / 0.234 / 0.234 | 0.240 / 0.239 / 0.239 / 0.241 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 914 / 914 / 914 / 914 | 0.176 / 0.198 / 0.219 / 0.297 | 0.328 / 0.325 / 0.327 / 0.329 | 0.644 / 0.648 / 0.649 / 0.650 | 0.026 / 0.026 / 0.026 / 0.026 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 148 / 148 / 148 / 148 | 0.232 / 0.293 / 0.318 / 0.363 | 0.374 / 0.376 / 0.378 / 0.375 | 0.458 / 0.477 / 0.484 / 0.492 | 0.024 / 0.025 / 0.025 / 0.025 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 30,991 / 30,991 / 30,991 / 30,991 | 0.115 / 0.121 / 0.124 / 0.128 | 0.037 / 0.041 / 0.043 / 0.044 | 0.121 / 0.123 / 0.123 / 0.123 | 0.139 / 0.140 / 0.141 / 0.141 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 3,391 / 3,391 / 3,391 / 3,391 | 0.153 / 0.181 / 0.202 / 0.230 | 0.032 / 0.034 / 0.035 / 0.036 | 0.133 / 0.138 / 0.142 / 0.145 | 0.317 / 0.325 / 0.331 / 0.330 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,109 / 1,109 / 1,109 / 1,109 | 0.118 / 0.136 / 0.152 / 0.154 | 0.051 / 0.052 / 0.052 / 0.052 | 0.086 / 0.089 / 0.094 / 0.094 | 0.147 / 0.147 / 0.147 / 0.147 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 859 / 859 / 859 / 859 | 0.132 / 0.138 / 0.139 / 0.142 | 0.087 / 0.087 / 0.087 / 0.087 | 0.207 / 0.209 / 0.209 / 0.210 | 0.555 / 0.554 / 0.553 / 0.553 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 9,534 / 9,534 / 9,534 / 9,534 | 0.107 / 0.114 / 0.127 / 0.149 | 0.427 / 0.429 / 0.433 / 0.437 | 0.239 / 0.240 / 0.240 / 0.241 | 0.244 / 0.244 / 0.244 / 0.245 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 3,407 / 3,407 / 3,407 / 3,407 | 0.186 / 0.215 / 0.249 / 0.353 | 0.376 / 0.370 / 0.370 / 0.371 | 0.654 / 0.659 / 0.660 / 0.661 | 0.028 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 553 / 553 / 553 / 553 | 0.251 / 0.333 / 0.368 / 0.443 | 0.422 / 0.406 / 0.410 / 0.403 | 0.542 / 0.541 / 0.547 / 0.554 | 0.025 / 0.025 / 0.025 / 0.025 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 65,297 / 67,749 / 68,945 / 69,400 | 0.130 / 0.146 / 0.159 / 0.175 | 0.052 / 0.063 / 0.072 / 0.076 | 0.131 / 0.133 / 0.135 / 0.135 | 0.149 / 0.152 / 0.154 / 0.154 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,298 / 7,033 / 7,266 / 7,405 | 0.164 / 0.211 / 0.246 / 0.296 | 0.041 / 0.053 / 0.054 / 0.058 | 0.142 / 0.153 / 0.158 / 0.163 | 0.313 / 0.324 / 0.331 / 0.331 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,282 / 2,494 / 2,565 / 2,589 | 0.138 / 0.181 / 0.205 / 0.217 | 0.065 / 0.069 / 0.073 / 0.077 | 0.095 / 0.102 / 0.106 / 0.107 | 0.173 / 0.166 / 0.166 / 0.170 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,789 / 1,815 / 1,822 / 1,825 | 0.136 / 0.144 / 0.149 / 0.153 | 0.097 / 0.097 / 0.097 / 0.097 | 0.214 / 0.216 / 0.217 / 0.218 | 0.550 / 0.548 / 0.547 / 0.547 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 18,701 / 19,099 / 19,305 / 19,357 | 0.113 / 0.125 / 0.144 / 0.177 | 0.453 / 0.457 / 0.462 / 0.467 | 0.244 / 0.246 / 0.247 / 0.247 | 0.250 / 0.249 / 0.250 / 0.252 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,529 / 4,841 / 5,046 / 5,561 | 0.187 / 0.217 / 0.256 / 0.403 | 0.385 / 0.383 / 0.382 / 0.387 | 0.654 / 0.659 / 0.662 / 0.669 | 0.028 / 0.028 / 0.028 / 0.029 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 453 / 529 / 550 / 562 | 0.240 / 0.298 / 0.323 / 0.375 | 0.394 / 0.397 / 0.395 / 0.389 | 0.491 / 0.504 / 0.510 / 0.516 | 0.023 / 0.024 / 0.024 / 0.024 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5802** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 64,960 | 50,716 / 56,494 / 59,237 / 60,392 | 0.695 / 0.809 / 0.866 / 0.881 | 0.781 / 0.848 / 0.874 / 0.883 | 0.240 / 0.190 / 0.157 / 0.157 | +| truck | 5,922 | 3,638 / 4,443 / 4,919 / 5,132 | 0.459 / 0.634 / 0.737 / 0.777 | 0.625 / 0.731 / 0.786 / 0.805 | 0.249 / 0.165 / 0.164 / 0.159 | +| bus | 2,257 | 1,543 / 1,947 / 2,104 / 2,161 | 0.565 / 0.797 / 0.891 / 0.917 | 0.681 / 0.830 / 0.883 / 0.900 | 0.415 / 0.184 / 0.171 / 0.181 | +| bicycle | 1,298 | 986 / 1,068 / 1,079 / 1,080 | 0.576 / 0.670 / 0.682 / 0.683 | 0.683 / 0.722 / 0.726 / 0.727 | 0.110 / 0.135 / 0.106 / 0.106 | +| pedestrian | 12,052 | 10,341 / 10,570 / 10,667 / 10,768 | 0.642 / 0.664 / 0.679 / 0.692 | 0.694 / 0.705 / 0.711 / 0.716 | 0.145 / 0.145 / 0.145 / 0.146 | +| traffic_cone | 2,636 | 1,308 / 1,442 / 1,510 / 1,614 | 0.214 / 0.260 / 0.291 / 0.339 | 0.436 / 0.477 / 0.493 / 0.523 | 0.085 / 0.085 / 0.084 / 0.084 | +| barrier | 622 | 216 / 296 / 314 / 328 | 0.117 / 0.222 / 0.239 / 0.248 | 0.333 / 0.423 / 0.434 / 0.439 | 0.183 / 0.106 / 0.106 / 0.082 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 7,145 / 7,145 / 7,145 / 7,145 | 0.158 / 0.178 / 0.194 / 0.207 | 0.108 / 0.135 / 0.153 / 0.156 | 0.158 / 0.161 / 0.162 / 0.162 | 0.158 / 0.159 / 0.160 / 0.161 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 651 / 651 / 651 / 651 | 0.191 / 0.240 / 0.292 / 0.328 | 0.035 / 0.039 / 0.042 / 0.044 | 0.155 / 0.167 / 0.174 / 0.178 | 0.425 / 0.429 / 0.434 / 0.439 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 248 / 248 / 248 / 248 | 0.156 / 0.206 / 0.236 / 0.248 | 0.149 / 0.137 / 0.139 / 0.146 | 0.114 / 0.123 / 0.128 / 0.129 | 0.146 / 0.153 / 0.153 / 0.153 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 142 / 142 / 142 / 142 | 0.174 / 0.203 / 0.209 / 0.213 | 0.140 / 0.144 / 0.144 / 0.145 | 0.217 / 0.226 / 0.227 / 0.227 | 0.642 / 0.675 / 0.673 / 0.672 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,325 / 1,325 / 1,325 / 1,325 | 0.116 / 0.125 / 0.144 / 0.184 | 0.544 / 0.550 / 0.554 / 0.563 | 0.220 / 0.221 / 0.221 / 0.222 | 0.290 / 0.289 / 0.290 / 0.294 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 289 / 289 / 289 / 289 | 0.190 / 0.223 / 0.313 / 0.573 | 0.272 / 0.284 / 0.286 / 0.309 | 0.685 / 0.691 / 0.692 / 0.692 | 0.043 / 0.044 / 0.044 / 0.044 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 68 / 68 / 68 / 68 | 0.247 / 0.336 / 0.361 / 0.416 | 0.375 / 0.360 / 0.359 / 0.359 | 0.456 / 0.473 / 0.481 / 0.485 | 0.032 / 0.033 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 26,633 / 26,633 / 26,633 / 26,633 | 0.169 / 0.196 / 0.217 / 0.235 | 0.129 / 0.164 / 0.187 / 0.190 | 0.164 / 0.167 / 0.169 / 0.169 | 0.167 / 0.168 / 0.169 / 0.170 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,428 / 2,428 / 2,428 / 2,428 | 0.202 / 0.264 / 0.329 / 0.375 | 0.045 / 0.048 / 0.052 / 0.055 | 0.165 / 0.178 / 0.186 / 0.191 | 0.441 / 0.450 / 0.455 / 0.462 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 925 / 925 / 925 / 925 | 0.173 / 0.240 / 0.281 / 0.298 | 0.107 / 0.104 / 0.112 / 0.124 | 0.123 / 0.132 / 0.139 / 0.140 | 0.163 / 0.169 / 0.167 / 0.167 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 532 / 532 / 532 / 532 | 0.181 / 0.209 / 0.218 / 0.224 | 0.169 / 0.174 / 0.175 / 0.175 | 0.223 / 0.230 / 0.231 / 0.231 | 0.661 / 0.683 / 0.680 / 0.680 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,941 / 4,941 / 4,941 / 4,941 | 0.122 / 0.134 / 0.156 / 0.204 | 0.580 / 0.585 / 0.590 / 0.600 | 0.223 / 0.224 / 0.224 / 0.225 | 0.314 / 0.313 / 0.313 / 0.318 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,080 / 1,080 / 1,080 / 1,080 | 0.205 / 0.249 / 0.352 / 0.633 | 0.359 / 0.368 / 0.365 / 0.385 | 0.690 / 0.695 / 0.697 / 0.697 | 0.053 / 0.051 / 0.051 / 0.052 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 255 / 255 / 255 | 1.000 / 0.380 / 0.432 / 0.563 | 1.000 / 0.422 / 0.413 / 0.407 | 1.000 / 0.546 / 0.552 / 0.554 | 1.000 / 0.036 / 0.036 / 0.036 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 47,451 / 52,827 / 55,318 / 55,889 | 0.179 / 0.222 / 0.259 / 0.293 | 0.160 / 0.208 / 0.240 / 0.245 | 0.169 / 0.175 / 0.176 / 0.177 | 0.177 / 0.183 / 0.186 / 0.186 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 3,253 / 4,045 / 4,354 / 4,471 | 0.205 / 0.284 / 0.360 / 0.418 | 0.048 / 0.057 / 0.060 / 0.064 | 0.168 / 0.185 / 0.195 / 0.200 | 0.438 / 0.459 / 0.468 / 0.475 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,367 / 1,826 / 1,951 / 1,981 | 0.183 / 0.290 / 0.348 / 0.405 | 0.101 / 0.104 / 0.124 / 0.166 | 0.126 / 0.140 / 0.148 / 0.151 | 0.174 / 0.172 / 0.172 / 0.171 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 839 / 846 / 901 / 902 | 0.183 / 0.209 / 0.219 / 0.225 | 0.190 / 0.180 / 0.192 / 0.194 | 0.225 / 0.232 / 0.233 / 0.233 | 0.663 / 0.692 / 0.686 / 0.686 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 8,085 / 8,202 / 8,275 / 8,325 | 0.123 / 0.135 / 0.157 / 0.205 | 0.588 / 0.593 / 0.598 / 0.607 | 0.223 / 0.224 / 0.224 / 0.225 | 0.321 / 0.320 / 0.322 / 0.326 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,120 / 1,225 / 1,270 / 1,347 | 0.202 / 0.247 / 0.346 / 0.630 | 0.354 / 0.373 / 0.372 / 0.396 | 0.687 / 0.695 / 0.697 / 0.699 | 0.049 / 0.050 / 0.050 / 0.051 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 156 / 231 / 237 / 268 | 0.255 / 0.361 / 0.392 / 0.529 | 0.380 / 0.406 / 0.399 / 0.397 | 0.461 / 0.502 / 0.509 / 0.539 | 0.033 / 0.036 / 0.036 / 0.036 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4396** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 22,141 | 15,775 / 18,697 / 20,145 / 20,550 | 0.541 / 0.714 / 0.790 / 0.814 | 0.666 / 0.762 / 0.800 / 0.811 | 0.204 / 0.181 / 0.160 / 0.156 | +| truck | 3,506 | 1,627 / 2,289 / 2,818 / 3,016 | 0.257 / 0.472 / 0.668 / 0.733 | 0.464 / 0.620 / 0.736 / 0.770 | 0.159 / 0.159 / 0.111 / 0.111 | +| bus | 544 | 257 / 368 / 432 / 448 | 0.273 / 0.540 / 0.667 / 0.698 | 0.467 / 0.639 / 0.717 / 0.737 | 0.349 / 0.126 / 0.066 / 0.066 | +| bicycle | 376 | 269 / 307 / 317 / 318 | 0.354 / 0.532 / 0.552 / 0.554 | 0.509 / 0.605 / 0.619 / 0.619 | 0.136 / 0.143 / 0.143 / 0.143 | +| pedestrian | 3,656 | 3,001 / 3,053 / 3,081 / 3,122 | 0.482 / 0.496 / 0.505 / 0.515 | 0.591 / 0.598 / 0.602 / 0.607 | 0.135 / 0.135 / 0.135 / 0.135 | +| traffic_cone | 462 | 183 / 207 / 225 / 235 | 0.100 / 0.129 / 0.141 / 0.162 | 0.304 / 0.339 / 0.349 / 0.365 | 0.127 / 0.127 / 0.088 / 0.088 | +| barrier | 145 | 49 / 72 / 90 / 96 | 0.041 / 0.139 / 0.203 / 0.240 | 0.237 / 0.362 / 0.425 / 0.449 | 0.139 / 0.119 / 0.085 / 0.095 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 2,435 / 2,435 / 2,435 / 2,435 | 0.199 / 0.241 / 0.275 / 0.305 | 0.198 / 0.246 / 0.274 / 0.284 | 0.180 / 0.184 / 0.185 / 0.186 | 0.299 / 0.294 / 0.293 / 0.294 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 385 / 385 / 385 / 385 | 0.226 / 0.312 / 0.434 / 0.493 | 0.043 / 0.048 / 0.054 / 0.060 | 0.175 / 0.191 / 0.209 / 0.215 | 0.387 / 0.414 / 0.428 / 0.437 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 59 / 59 / 59 / 59 | 0.234 / 0.326 / 0.385 / 0.409 | 0.037 / 0.054 / 0.058 / 0.059 | 0.141 / 0.156 / 0.164 / 0.167 | 0.378 / 0.406 / 0.428 / 0.431 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 41 / 41 / 41 / 41 | 0.237 / 0.297 / 0.311 / 0.314 | 0.101 / 0.094 / 0.095 / 0.095 | 0.249 / 0.264 / 0.266 / 0.266 | 0.777 / 0.771 / 0.776 / 0.779 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 402 / 402 / 402 / 402 | 0.126 / 0.137 / 0.155 / 0.197 | 0.496 / 0.502 / 0.508 / 0.514 | 0.229 / 0.230 / 0.230 / 0.230 | 0.382 / 0.382 / 0.383 / 0.386 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 50 / 50 / 50 / 50 | 0.193 / 0.234 / 0.272 / 0.526 | 0.288 / 0.286 / 0.309 / 0.312 | 0.702 / 0.701 / 0.703 / 0.699 | 0.044 / 0.046 / 0.046 / 0.046 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 15 / 15 / 15 / 15 | 0.301 / 0.435 / 0.535 / 0.997 | 0.250 / 0.220 / 0.216 / 0.212 | 0.487 / 0.511 / 0.530 / 0.530 | 0.045 / 0.045 / 0.046 / 0.046 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 9,077 / 9,077 / 9,077 / 9,077 | 0.208 / 0.259 / 0.301 / 0.339 | 0.248 / 0.296 / 0.327 / 0.336 | 0.186 / 0.190 / 0.191 / 0.191 | 0.306 / 0.302 / 0.301 / 0.302 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,437 / 1,437 / 1,437 / 1,437 | 0.240 / 0.346 / 0.494 / 0.573 | 0.067 / 0.062 / 0.068 / 0.075 | 0.187 / 0.201 / 0.220 / 0.229 | 0.457 / 0.479 / 0.483 / 0.490 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 223 / 223 / 223 / 223 | 0.249 / 0.363 / 0.448 / 0.486 | 0.061 / 0.072 / 0.073 / 0.074 | 0.149 / 0.162 / 0.175 / 0.178 | 0.429 / 0.434 / 0.460 / 0.464 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 154 / 154 / 154 / 154 | 0.222 / 0.284 / 0.307 / 0.313 | 0.131 / 0.120 / 0.120 / 0.120 | 0.246 / 0.259 / 0.263 / 0.263 | 0.766 / 0.795 / 0.801 / 0.806 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,498 / 1,498 / 1,498 / 1,498 | 0.132 / 0.145 / 0.167 / 0.220 | 0.550 / 0.554 / 0.563 / 0.572 | 0.228 / 0.228 / 0.229 / 0.229 | 0.415 / 0.414 / 0.415 / 0.419 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 189 / 189 / 189 | 1.000 / 0.274 / 0.365 / 0.629 | 1.000 / 0.405 / 0.430 / 0.444 | 1.000 / 0.718 / 0.719 / 0.714 | 1.000 / 0.060 / 0.059 / 0.055 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 59 / 59 / 59 | 1.000 / 0.438 / 0.604 / 1.056 | 1.000 / 0.237 / 0.228 / 0.223 | 1.000 / 0.567 / 0.578 / 0.581 | 1.000 / 0.045 / 0.047 / 0.047 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 13,838 / 16,236 / 17,440 / 17,774 | 0.211 / 0.272 / 0.327 / 0.375 | 0.275 / 0.339 / 0.383 / 0.394 | 0.188 / 0.193 / 0.195 / 0.196 | 0.306 / 0.304 / 0.306 / 0.308 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,483 / 1,981 / 2,484 / 2,599 | 0.238 / 0.351 / 0.530 / 0.622 | 0.055 / 0.061 / 0.083 / 0.093 | 0.186 / 0.205 / 0.230 / 0.238 | 0.465 / 0.493 / 0.490 / 0.496 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 192 / 312 / 394 / 405 | 0.240 / 0.373 / 0.509 / 0.561 | 0.044 / 0.085 / 0.084 / 0.084 | 0.141 / 0.164 / 0.184 / 0.189 | 0.369 / 0.450 / 0.478 / 0.483 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 187 / 218 / 223 / 223 | 0.223 / 0.285 / 0.310 / 0.310 | 0.090 / 0.095 / 0.095 / 0.095 | 0.244 / 0.257 / 0.261 / 0.261 | 0.760 / 0.794 / 0.798 / 0.798 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,141 / 2,167 / 2,181 / 2,199 | 0.130 / 0.142 / 0.162 / 0.211 | 0.536 / 0.542 / 0.547 / 0.555 | 0.226 / 0.227 / 0.228 / 0.228 | 0.413 / 0.413 / 0.412 / 0.417 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 132 / 147 / 178 / 186 | 0.201 / 0.255 / 0.313 / 0.600 | 0.338 / 0.322 / 0.401 / 0.422 | 0.704 / 0.702 / 0.710 / 0.705 | 0.047 / 0.048 / 0.055 / 0.054 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 33 / 52 / 72 / 73 | 0.295 / 0.437 / 0.603 / 1.064 | 0.239 / 0.211 / 0.222 / 0.219 | 0.469 / 0.512 / 0.568 / 0.572 | 0.048 / 0.047 / 0.047 / 0.047 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.6590** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 162,690 | 133,765 / 145,295 / 150,720 / 153,032 | 0.765 / 0.854 / 0.890 / 0.910 | 0.825 / 0.877 / 0.895 / 0.902 | 0.242 / 0.194 / 0.169 / 0.157 | +| truck | 17,701 | 11,903 / 14,102 / 15,459 / 16,076 | 0.550 / 0.709 / 0.807 / 0.848 | 0.683 / 0.781 / 0.835 / 0.856 | 0.266 / 0.176 / 0.165 / 0.157 | +| bus | 5,507 | 4,147 / 4,894 / 5,184 / 5,269 | 0.668 / 0.840 / 0.916 / 0.930 | 0.761 / 0.872 / 0.909 / 0.919 | 0.343 / 0.182 / 0.172 / 0.125 | +| bicycle | 3,771 | 3,210 / 3,360 / 3,393 / 3,399 | 0.732 / 0.795 / 0.802 / 0.808 | 0.775 / 0.805 / 0.809 / 0.810 | 0.158 / 0.156 / 0.156 / 0.156 | +| pedestrian | 38,962 | 34,759 / 35,452 / 35,735 / 36,011 | 0.755 / 0.774 / 0.787 / 0.799 | 0.768 / 0.780 / 0.786 / 0.792 | 0.154 / 0.154 / 0.154 / 0.158 | +| traffic_cone | 11,408 | 6,982 / 7,578 / 7,846 / 8,197 | 0.338 / 0.390 / 0.411 / 0.444 | 0.521 / 0.557 / 0.571 / 0.592 | 0.123 / 0.111 / 0.111 / 0.087 | +| barrier | 2,117 | 839 / 1,125 / 1,212 / 1,272 | 0.144 / 0.241 / 0.263 / 0.280 | 0.367 / 0.438 / 0.450 / 0.459 | 0.274 / 0.185 / 0.185 / 0.183 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 17,895 / 17,895 / 17,895 / 17,895 | 0.129 / 0.142 / 0.151 / 0.160 | 0.062 / 0.077 / 0.086 / 0.090 | 0.133 / 0.136 / 0.137 / 0.137 | 0.148 / 0.151 / 0.152 / 0.153 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,947 / 1,947 / 1,947 / 1,947 | 0.165 / 0.202 / 0.242 / 0.273 | 0.032 / 0.035 / 0.037 / 0.039 | 0.140 / 0.148 / 0.155 / 0.158 | 0.339 / 0.351 / 0.359 / 0.362 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 605 / 605 / 605 / 605 | 0.127 / 0.157 / 0.182 / 0.188 | 0.079 / 0.078 / 0.080 / 0.083 | 0.094 / 0.100 / 0.106 / 0.106 | 0.144 / 0.150 / 0.151 / 0.152 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 414 / 414 / 414 / 414 | 0.147 / 0.162 / 0.165 / 0.168 | 0.095 / 0.096 / 0.095 / 0.096 | 0.210 / 0.215 / 0.215 / 0.216 | 0.573 / 0.583 / 0.582 / 0.583 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,285 / 4,285 / 4,285 / 4,285 | 0.107 / 0.114 / 0.127 / 0.151 | 0.437 / 0.439 / 0.443 / 0.449 | 0.232 / 0.232 / 0.233 / 0.233 | 0.256 / 0.255 / 0.255 / 0.258 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,254 / 1,254 / 1,254 / 1,254 | 0.179 / 0.203 / 0.234 / 0.347 | 0.322 / 0.321 / 0.322 / 0.327 | 0.652 / 0.656 / 0.657 / 0.659 | 0.029 / 0.029 / 0.029 / 0.030 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 232 / 232 / 232 / 232 | 0.239 / 0.311 / 0.339 / 0.410 | 0.374 / 0.369 / 0.369 / 0.366 | 0.469 / 0.488 / 0.494 / 0.502 | 0.026 / 0.027 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 66,702 / 66,702 / 66,702 / 66,702 | 0.142 / 0.160 / 0.172 / 0.185 | 0.080 / 0.101 / 0.113 / 0.117 | 0.142 / 0.145 / 0.146 / 0.147 | 0.162 / 0.164 / 0.165 / 0.166 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 7,257 / 7,257 / 7,257 / 7,257 | 0.178 / 0.226 / 0.280 / 0.323 | 0.039 / 0.042 / 0.045 / 0.047 | 0.149 / 0.159 / 0.167 / 0.171 | 0.361 / 0.374 / 0.383 / 0.385 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,257 / 2,257 / 2,257 / 2,257 | 0.143 / 0.184 / 0.212 / 0.220 | 0.076 / 0.076 / 0.079 / 0.083 | 0.101 / 0.109 / 0.114 / 0.115 | 0.165 / 0.169 / 0.170 / 0.171 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,546 / 1,546 / 1,546 / 1,546 | 0.151 / 0.167 / 0.172 / 0.176 | 0.108 / 0.109 / 0.109 / 0.110 | 0.215 / 0.219 / 0.220 / 0.221 | 0.592 / 0.600 / 0.599 / 0.599 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 15,974 / 15,974 / 15,974 / 15,974 | 0.113 / 0.122 / 0.139 / 0.172 | 0.476 / 0.478 / 0.483 / 0.489 | 0.236 / 0.237 / 0.238 / 0.238 | 0.269 / 0.268 / 0.268 / 0.271 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,677 / 4,677 / 4,677 / 4,677 | 0.191 / 0.223 / 0.269 / 0.413 | 0.371 / 0.368 / 0.368 / 0.372 | 0.663 / 0.668 / 0.669 / 0.670 | 0.032 / 0.032 / 0.032 / 0.032 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 867 / 867 / 867 | 1.000 / 0.355 / 0.402 / 0.517 | 1.000 / 0.398 / 0.397 / 0.391 | 1.000 / 0.544 / 0.550 / 0.557 | 1.000 / 0.029 / 0.029 / 0.030 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 126,514 / 137,022 / 141,450 / 143,300 | 0.157 / 0.191 / 0.218 / 0.246 | 0.115 / 0.151 / 0.174 / 0.182 | 0.151 / 0.156 / 0.158 / 0.159 | 0.176 / 0.182 / 0.185 / 0.186 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 10,859 / 13,045 / 14,027 / 14,439 | 0.185 / 0.255 / 0.328 / 0.389 | 0.044 / 0.055 / 0.059 / 0.064 | 0.154 / 0.170 / 0.181 / 0.188 | 0.368 / 0.391 / 0.402 / 0.406 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 3,844 / 4,623 / 4,831 / 4,974 | 0.159 / 0.236 / 0.278 / 0.319 | 0.075 / 0.083 / 0.094 / 0.114 | 0.109 / 0.121 / 0.127 / 0.131 | 0.178 / 0.185 / 0.188 / 0.191 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 2,732 / 2,840 / 2,856 / 2,859 | 0.154 / 0.173 / 0.180 / 0.184 | 0.117 / 0.120 / 0.121 / 0.121 | 0.218 / 0.223 / 0.224 / 0.225 | 0.598 / 0.606 / 0.605 / 0.605 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 28,970 / 29,422 / 29,647 / 29,660 | 0.117 / 0.130 / 0.148 / 0.187 | 0.496 / 0.500 / 0.505 / 0.510 | 0.238 / 0.239 / 0.240 / 0.240 | 0.280 / 0.279 / 0.280 / 0.283 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 5,644 / 6,197 / 6,356 / 7,069 | 0.189 / 0.223 / 0.271 / 0.452 | 0.370 / 0.375 / 0.375 / 0.388 | 0.660 / 0.667 / 0.669 / 0.676 | 0.032 / 0.032 / 0.032 / 0.033 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 602 / 827 / 850 / 871 | 0.245 / 0.329 / 0.356 / 0.433 | 0.378 / 0.383 / 0.380 / 0.377 | 0.478 / 0.506 / 0.509 / 0.515 | 0.026 / 0.027 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + +**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.6313** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 14,872 | 13,494 / 13,924 / 14,079 / 14,195 | 0.879 / 0.917 / 0.928 / 0.939 | 0.921 / 0.942 / 0.947 / 0.951 | 0.278 / 0.169 / 0.169 / 0.169 | +| truck | 1,192 | 981 / 1,080 / 1,113 / 1,136 | 0.760 / 0.876 / 0.913 / 0.932 | 0.837 / 0.907 / 0.922 / 0.925 | 0.357 / 0.208 / 0.187 / 0.157 | +| bus | 336 | 261 / 332 / 335 / 335 | 0.715 / 0.975 / 0.987 / 0.987 | 0.808 / 0.973 / 0.982 / 0.982 | 0.469 / 0.099 / 0.099 / 0.099 | +| bicycle | 740 | 676 / 694 / 706 / 710 | 0.817 / 0.857 / 0.878 / 0.883 | 0.846 / 0.864 / 0.871 / 0.875 | 0.174 / 0.166 / 0.157 / 0.157 | +| pedestrian | 5,055 | 4,706 / 4,761 / 4,785 / 4,800 | 0.844 / 0.859 / 0.864 / 0.868 | 0.851 / 0.860 / 0.864 / 0.868 | 0.151 / 0.151 / 0.151 / 0.151 | +| traffic_cone | 60 | 20 / 21 / 21 / 22 | 0.000 / 0.000 / 0.000 / 0.000 | 0.038 / 0.040 / 0.040 / 0.042 | 0.065 / 0.065 / 0.065 / 0.065 | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,635 / 1,635 / 1,635 / 1,635 | 0.110 / 0.113 / 0.114 / 0.116 | 0.040 / 0.041 / 0.042 / 0.043 | 0.117 / 0.117 / 0.118 / 0.118 | 0.138 / 0.140 / 0.140 / 0.140 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 131 / 131 / 131 / 131 | 0.146 / 0.166 / 0.175 / 0.181 | 0.030 / 0.031 / 0.032 / 0.034 | 0.127 / 0.131 / 0.132 / 0.133 | 0.205 / 0.208 / 0.207 / 0.207 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 36 / 36 / 36 / 36 | 0.141 / 0.172 / 0.174 / 0.174 | 0.205 / 0.198 / 0.198 / 0.198 | 0.084 / 0.090 / 0.090 / 0.090 | 0.185 / 0.180 / 0.181 / 0.181 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 81 / 81 / 81 / 81 | 0.136 / 0.148 / 0.152 / 0.156 | 0.109 / 0.108 / 0.109 / 0.109 | 0.218 / 0.223 / 0.224 / 0.225 | 0.547 / 0.542 / 0.541 / 0.541 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 556 / 556 / 556 / 556 | 0.096 / 0.099 / 0.104 / 0.120 | 0.295 / 0.296 / 0.296 / 0.302 | 0.210 / 0.210 / 0.210 / 0.210 | 0.249 / 0.248 / 0.248 / 0.251 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 6 / 6 / 6 / 6 | 0.155 / 0.194 / 0.194 / 0.312 | 1.696 / 1.646 / 1.646 / 1.590 | 0.453 / 0.468 / 0.468 / 0.485 | 0.077 / 0.082 / 0.082 / 0.083 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 6,097 / 6,097 / 6,097 / 6,097 | 0.118 / 0.122 / 0.124 / 0.127 | 0.044 / 0.047 / 0.048 / 0.049 | 0.121 / 0.122 / 0.122 / 0.123 | 0.150 / 0.151 / 0.152 / 0.152 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 488 / 488 / 488 / 488 | 0.155 / 0.179 / 0.193 / 0.202 | 0.034 / 0.035 / 0.036 / 0.039 | 0.136 / 0.140 / 0.142 / 0.143 | 0.213 / 0.215 / 0.214 / 0.213 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 137 / 137 / 137 / 137 | 0.154 / 0.196 / 0.199 / 0.199 | 0.216 / 0.201 / 0.201 / 0.201 | 0.088 / 0.095 / 0.096 / 0.096 | 0.194 / 0.183 / 0.184 / 0.184 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 303 / 303 / 303 / 303 | 0.140 / 0.150 / 0.156 / 0.162 | 0.122 / 0.121 / 0.122 / 0.121 | 0.221 / 0.225 / 0.227 / 0.228 | 0.547 / 0.544 / 0.541 / 0.542 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,072 / 2,072 / 2,072 / 2,072 | 0.100 / 0.104 / 0.110 / 0.130 | 0.334 / 0.335 / 0.335 / 0.342 | 0.216 / 0.216 / 0.217 / 0.217 | 0.255 / 0.255 / 0.255 / 0.258 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 13,178 / 13,676 / 13,748 / 13,798 | 0.133 / 0.147 / 0.154 / 0.166 | 0.062 / 0.074 / 0.078 / 0.080 | 0.132 / 0.134 / 0.135 / 0.135 | 0.155 / 0.157 / 0.158 / 0.158 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 925 / 1,041 / 1,064 / 1,073 | 0.167 / 0.206 / 0.227 / 0.247 | 0.040 / 0.046 / 0.051 / 0.062 | 0.148 / 0.155 / 0.159 / 0.160 | 0.204 / 0.203 / 0.202 / 0.201 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 254 / 330 / 333 / 333 | 0.167 / 0.272 / 0.283 / 0.283 | 0.213 / 0.172 / 0.171 / 0.171 | 0.092 / 0.108 / 0.110 / 0.110 | 0.192 / 0.163 / 0.166 / 0.166 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 612 / 628 / 640 / 643 | 0.145 / 0.156 / 0.171 / 0.182 | 0.136 / 0.135 / 0.136 / 0.135 | 0.223 / 0.228 / 0.232 / 0.234 | 0.529 / 0.525 / 0.517 / 0.516 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,247 / 4,294 / 4,313 / 4,330 | 0.108 / 0.117 / 0.126 / 0.152 | 0.374 / 0.377 / 0.379 / 0.388 | 0.222 / 0.222 / 0.223 / 0.223 | 0.257 / 0.255 / 0.256 / 0.260 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 19 / 20 / 20 / 21 | 0.158 / 0.207 / 0.207 / 0.339 | 1.613 / 1.543 / 1.543 / 1.485 | 0.469 / 0.492 / 0.492 / 0.508 | 0.086 / 0.087 / 0.087 / 0.087 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5281** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 10,929 | 8,919 / 9,721 / 10,089 / 10,240 | 0.747 / 0.843 / 0.887 / 0.899 | 0.816 / 0.875 / 0.895 / 0.901 | 0.216 / 0.166 / 0.149 / 0.149 | +| truck | 1,009 | 664 / 792 / 859 / 874 | 0.548 / 0.705 / 0.789 / 0.802 | 0.701 / 0.790 / 0.844 / 0.849 | 0.283 / 0.180 / 0.155 / 0.155 | +| bus | 141 | 114 / 134 / 135 / 137 | 0.650 / 0.913 / 0.916 / 0.929 | 0.769 / 0.905 / 0.905 / 0.905 | 0.486 / 0.444 / 0.444 / 0.444 | +| bicycle | 460 | 320 / 362 / 371 / 371 | 0.468 / 0.598 / 0.619 / 0.619 | 0.610 / 0.670 / 0.677 / 0.677 | 0.098 / 0.098 / 0.098 / 0.098 | +| pedestrian | 3,721 | 3,190 / 3,254 / 3,271 / 3,297 | 0.691 / 0.713 / 0.718 / 0.729 | 0.728 / 0.740 / 0.744 / 0.749 | 0.125 / 0.124 / 0.124 / 0.124 | +| traffic_cone | 4 | 2 / 2 / 2 / 2 | 0.000 / 0.000 / 0.000 / 0.000 | 0.027 / 0.027 / 0.027 / 0.027 | 0.099 / 0.099 / 0.099 / 0.099 | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,202 / 1,202 / 1,202 / 1,202 | 0.157 / 0.172 / 0.180 / 0.188 | 0.079 / 0.094 / 0.103 / 0.104 | 0.147 / 0.151 / 0.152 / 0.152 | 0.204 / 0.209 / 0.213 / 0.213 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 110 / 110 / 110 / 110 | 0.188 / 0.222 / 0.253 / 0.258 | 0.038 / 0.043 / 0.046 / 0.048 | 0.163 / 0.173 / 0.178 / 0.178 | 0.237 / 0.240 / 0.257 / 0.258 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 15 / 15 / 15 / 15 | 0.208 / 0.258 / 0.258 / 0.261 | 0.597 / 0.536 / 0.536 / 0.532 | 0.082 / 0.089 / 0.090 / 0.090 | 0.211 / 0.208 / 0.208 / 0.210 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 50 / 50 / 50 / 50 | 0.185 / 0.236 / 0.248 / 0.249 | 0.182 / 0.184 / 0.185 / 0.185 | 0.243 / 0.258 / 0.262 / 0.262 | 0.621 / 0.730 / 0.726 / 0.726 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 409 / 409 / 409 / 409 | 0.113 / 0.122 / 0.129 / 0.155 | 0.446 / 0.450 / 0.452 / 0.459 | 0.203 / 0.204 / 0.205 / 0.205 | 0.279 / 0.279 / 0.279 / 0.282 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 0.154 / 0.154 / 0.154 / 0.154 | 2.562 / 2.562 / 2.562 / 2.562 | 0.285 / 0.285 / 0.285 / 0.285 | 0.035 / 0.035 / 0.035 / 0.035 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 4,480 / 4,480 / 4,480 / 4,480 | 0.166 / 0.186 / 0.198 / 0.208 | 0.099 / 0.118 / 0.130 / 0.131 | 0.154 / 0.158 / 0.159 / 0.160 | 0.223 / 0.228 / 0.233 / 0.233 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 413 / 413 / 413 / 413 | 0.199 / 0.241 / 0.285 / 0.293 | 0.045 / 0.051 / 0.054 / 0.057 | 0.169 / 0.180 / 0.187 / 0.187 | 0.271 / 0.270 / 0.281 / 0.283 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 57 / 57 / 57 / 57 | 0.216 / 0.269 / 0.269 / 0.272 | 0.432 / 0.394 / 0.394 / 0.393 | 0.091 / 0.099 / 0.099 / 0.099 | 0.241 / 0.238 / 0.239 / 0.241 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 188 / 188 / 188 / 188 | 0.184 / 0.230 / 0.244 / 0.245 | 0.225 / 0.227 / 0.228 / 0.227 | 0.255 / 0.266 / 0.269 / 0.269 | 0.652 / 0.730 / 0.723 / 0.723 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,525 / 1,525 / 1,525 / 1,525 | 0.120 / 0.132 / 0.142 / 0.178 | 0.490 / 0.495 / 0.497 / 0.504 | 0.208 / 0.209 / 0.210 / 0.211 | 0.299 / 0.297 / 0.298 / 0.302 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1 / 1 / 1 / 1 | 0.157 / 0.157 / 0.157 / 0.157 | 2.579 / 2.579 / 2.579 / 2.579 | 0.270 / 0.270 / 0.270 / 0.270 | 0.034 / 0.034 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 8,463 / 9,288 / 9,554 / 9,621 | 0.178 / 0.214 / 0.240 / 0.263 | 0.140 / 0.176 / 0.194 / 0.196 | 0.162 / 0.168 / 0.170 / 0.171 | 0.241 / 0.261 / 0.269 / 0.269 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 617 / 739 / 799 / 804 | 0.205 / 0.274 / 0.347 / 0.361 | 0.046 / 0.062 / 0.070 / 0.079 | 0.173 / 0.184 / 0.195 / 0.197 | 0.262 / 0.270 / 0.275 / 0.283 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 103 / 124 / 124 / 124 | 0.222 / 0.283 / 0.283 / 0.283 | 0.374 / 0.341 / 0.341 / 0.341 | 0.097 / 0.107 / 0.107 / 0.107 | 0.296 / 0.295 / 0.295 / 0.295 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 263 / 289 / 292 / 292 | 0.183 / 0.227 / 0.237 / 0.237 | 0.249 / 0.257 / 0.255 / 0.255 | 0.256 / 0.268 / 0.271 / 0.271 | 0.631 / 0.726 / 0.721 / 0.721 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,604 / 2,652 / 2,667 / 2,682 | 0.124 / 0.139 / 0.152 / 0.191 | 0.521 / 0.526 / 0.528 / 0.535 | 0.211 / 0.212 / 0.213 / 0.214 | 0.310 / 0.308 / 0.310 / 0.313 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 2 / 2 / 2 / 2 | 0.157 / 0.157 / 0.157 / 0.157 | 2.579 / 2.579 / 2.579 / 2.579 | 0.270 / 0.270 / 0.270 / 0.270 | 0.034 / 0.034 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4172** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 2,883 | 2,173 / 2,512 / 2,669 / 2,713 | 0.616 / 0.762 / 0.812 / 0.829 | 0.705 / 0.783 / 0.806 / 0.814 | 0.203 / 0.202 / 0.192 / 0.162 | +| truck | 600 | 343 / 461 / 519 / 534 | 0.393 / 0.654 / 0.778 / 0.809 | 0.572 / 0.727 / 0.808 / 0.824 | 0.279 / 0.178 / 0.145 / 0.145 | +| bus | 60 | 32 / 44 / 47 / 47 | 0.379 / 0.599 / 0.655 / 0.655 | 0.536 / 0.681 / 0.707 / 0.707 | 0.134 / 0.176 / 0.049 / 0.049 | +| bicycle | 85 | 54 / 61 / 66 / 66 | 0.262 / 0.373 / 0.434 / 0.434 | 0.433 / 0.528 / 0.579 / 0.579 | 0.102 / 0.144 / 0.144 / 0.144 | +| pedestrian | 1,092 | 945 / 960 / 963 / 979 | 0.545 / 0.554 / 0.562 / 0.576 | 0.638 / 0.644 / 0.646 / 0.651 | 0.145 / 0.145 / 0.145 / 0.135 | +| traffic_cone | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 317 / 317 / 317 / 317 | 0.195 / 0.225 / 0.242 / 0.263 | 0.082 / 0.107 / 0.125 / 0.131 | 0.178 / 0.184 / 0.185 / 0.185 | 0.449 / 0.461 / 0.470 / 0.476 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 66 / 66 / 66 / 66 | 0.210 / 0.284 / 0.333 / 0.350 | 0.040 / 0.041 / 0.045 / 0.049 | 0.169 / 0.177 / 0.185 / 0.188 | 0.152 / 0.154 / 0.157 / 0.161 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 6 / 6 / 6 / 6 | 0.264 / 0.325 / 0.348 / 0.348 | 0.039 / 0.208 / 0.205 / 0.205 | 0.135 / 0.141 / 0.145 / 0.145 | 0.137 / 0.266 / 0.428 / 0.428 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 9 / 9 / 9 / 9 | 0.203 / 0.257 / 0.320 / 0.320 | 0.120 / 0.110 / 0.108 / 0.108 | 0.270 / 0.272 / 0.282 / 0.282 | 0.796 / 0.879 / 0.888 / 0.888 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 120 / 120 / 120 / 120 | 0.131 / 0.135 / 0.155 / 0.204 | 0.441 / 0.441 / 0.444 / 0.453 | 0.178 / 0.178 / 0.179 / 0.178 | 0.369 / 0.370 / 0.371 / 0.376 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,182 / 1,182 / 1,182 / 1,182 | 0.204 / 0.243 / 0.266 / 0.294 | 0.107 / 0.142 / 0.167 / 0.175 | 0.186 / 0.191 / 0.192 / 0.192 | 0.512 / 0.522 / 0.531 / 0.538 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 246 / 246 / 246 / 246 | 0.217 / 0.307 / 0.381 / 0.406 | 0.058 / 0.052 / 0.056 / 0.061 | 0.176 / 0.185 / 0.197 / 0.200 | 0.196 / 0.193 / 0.189 / 0.194 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 24 / 24 / 24 / 24 | 0.236 / 0.355 / 0.393 / 0.393 | 0.039 / 0.269 / 0.255 / 0.255 | 0.134 / 0.146 / 0.152 / 0.152 | 0.234 / 0.402 / 0.674 / 0.674 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 34 / 34 / 34 / 34 | 0.202 / 0.269 / 0.361 / 0.361 | 0.120 / 0.104 / 0.102 / 0.102 | 0.269 / 0.271 / 0.284 / 0.284 | 0.792 / 0.904 / 0.924 / 0.924 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 447 / 447 / 447 / 447 | 0.135 / 0.142 / 0.162 / 0.225 | 0.483 / 0.483 / 0.486 / 0.500 | 0.179 / 0.179 / 0.179 / 0.179 | 0.394 / 0.393 / 0.396 / 0.403 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,929 / 2,144 / 2,236 / 2,337 | 0.208 / 0.254 / 0.285 / 0.333 | 0.124 / 0.166 / 0.200 / 0.232 | 0.189 / 0.196 / 0.196 / 0.197 | 0.524 / 0.543 / 0.557 / 0.563 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 301 / 408 / 466 / 475 | 0.217 / 0.320 / 0.431 / 0.466 | 0.048 / 0.062 / 0.071 / 0.076 | 0.177 / 0.193 / 0.209 / 0.213 | 0.195 / 0.228 / 0.220 / 0.222 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 26 / 32 / 41 / 41 | 0.236 / 0.360 / 0.411 / 0.411 | 0.037 / 0.331 / 0.268 / 0.268 | 0.129 / 0.140 / 0.162 / 0.162 | 0.191 / 0.325 / 0.784 / 0.784 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 39 / 42 / 46 / 46 | 0.206 / 0.281 / 0.376 / 0.376 | 0.105 / 0.093 / 0.092 / 0.092 | 0.266 / 0.265 / 0.280 / 0.280 | 0.783 / 0.918 / 0.916 / 0.916 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 691 / 697 / 700 / 730 | 0.136 / 0.142 / 0.161 / 0.230 | 0.492 / 0.492 / 0.490 / 0.504 | 0.175 / 0.176 / 0.176 / 0.178 | 0.393 / 0.393 / 0.393 / 0.402 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.5779** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 28,684 | 24,624 / 26,214 / 26,901 / 27,216 | 0.811 / 0.885 / 0.909 / 0.920 | 0.862 / 0.903 / 0.915 / 0.920 | 0.236 / 0.200 / 0.166 / 0.166 | +| truck | 2,801 | 1,991 / 2,336 / 2,497 / 2,554 | 0.611 / 0.775 / 0.848 / 0.868 | 0.735 / 0.827 / 0.870 / 0.878 | 0.281 / 0.178 / 0.157 / 0.144 | +| bus | 537 | 407 / 510 / 517 / 519 | 0.662 / 0.919 / 0.938 / 0.938 | 0.771 / 0.907 / 0.914 / 0.914 | 0.486 / 0.125 / 0.125 / 0.125 | +| bicycle | 1,285 | 1,053 / 1,121 / 1,146 / 1,150 | 0.667 / 0.750 / 0.772 / 0.775 | 0.739 / 0.775 / 0.784 / 0.787 | 0.169 / 0.166 / 0.156 / 0.156 | +| pedestrian | 9,868 | 8,856 / 8,991 / 9,035 / 9,095 | 0.764 / 0.783 / 0.788 / 0.799 | 0.783 / 0.792 / 0.796 / 0.799 | 0.146 / 0.146 / 0.146 / 0.146 | +| traffic_cone | 64 | 22 / 23 / 23 / 24 | 0.000 / 0.000 / 0.000 / 0.000 | 0.033 / 0.034 / 0.034 / 0.036 | 0.079 / 0.065 / 0.065 / 0.065 | +| barrier | 0 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 3,155 / 3,155 / 3,155 / 3,155 | 0.127 / 0.136 / 0.141 / 0.145 | 0.053 / 0.060 / 0.064 / 0.065 | 0.128 / 0.130 / 0.131 / 0.131 | 0.168 / 0.173 / 0.175 / 0.176 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 308 / 308 / 308 / 308 | 0.169 / 0.202 / 0.224 / 0.232 | 0.034 / 0.036 / 0.038 / 0.041 | 0.145 / 0.152 / 0.156 / 0.157 | 0.204 / 0.206 / 0.211 / 0.212 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 59 / 59 / 59 / 59 | 0.163 / 0.201 / 0.205 / 0.205 | 0.310 / 0.286 / 0.285 / 0.285 | 0.085 / 0.092 / 0.093 / 0.093 | 0.190 / 0.188 / 0.194 / 0.194 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 141 / 141 / 141 / 141 | 0.149 / 0.173 / 0.182 / 0.187 | 0.128 / 0.128 / 0.129 / 0.128 | 0.224 / 0.233 / 0.235 / 0.236 | 0.561 / 0.594 / 0.593 / 0.593 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,085 / 1,085 / 1,085 / 1,085 | 0.103 / 0.108 / 0.115 / 0.137 | 0.352 / 0.355 / 0.356 / 0.364 | 0.209 / 0.209 / 0.210 / 0.210 | 0.265 / 0.264 / 0.265 / 0.268 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 7 / 7 / 7 / 7 | 0.155 / 0.190 / 0.190 / 0.300 | 1.777 / 1.717 / 1.717 / 1.666 | 0.439 / 0.454 / 0.454 / 0.471 | 0.073 / 0.078 / 0.078 / 0.079 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 11,760 / 11,760 / 11,760 / 11,760 | 0.139 / 0.151 / 0.157 / 0.163 | 0.063 / 0.073 / 0.078 / 0.080 | 0.135 / 0.138 / 0.139 / 0.139 | 0.186 / 0.191 / 0.194 / 0.195 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,148 / 1,148 / 1,148 / 1,148 | 0.179 / 0.221 / 0.253 / 0.263 | 0.040 / 0.042 / 0.045 / 0.049 | 0.154 / 0.161 / 0.167 / 0.168 | 0.222 / 0.222 / 0.226 / 0.227 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 220 / 220 / 220 / 220 | 0.177 / 0.227 / 0.231 / 0.232 | 0.270 / 0.250 / 0.250 / 0.250 | 0.091 / 0.099 / 0.100 / 0.100 | 0.201 / 0.196 / 0.204 / 0.204 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 526 / 526 / 526 / 526 | 0.155 / 0.177 / 0.191 / 0.197 | 0.145 / 0.145 / 0.146 / 0.145 | 0.230 / 0.237 / 0.240 / 0.241 | 0.570 / 0.595 / 0.592 / 0.592 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,045 / 4,045 / 4,045 / 4,045 | 0.110 / 0.117 / 0.126 / 0.154 | 0.397 / 0.401 / 0.402 / 0.410 | 0.212 / 0.213 / 0.213 / 0.213 | 0.276 / 0.275 / 0.276 / 0.279 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 23,528 / 24,950 / 25,596 / 25,735 | 0.155 / 0.180 / 0.198 / 0.216 | 0.093 / 0.117 / 0.133 / 0.137 | 0.147 / 0.152 / 0.153 / 0.154 | 0.214 / 0.228 / 0.234 / 0.235 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,866 / 2,196 / 2,328 / 2,367 | 0.189 / 0.251 / 0.308 / 0.333 | 0.043 / 0.054 / 0.061 / 0.073 | 0.161 / 0.172 / 0.181 / 0.184 | 0.222 / 0.230 / 0.230 / 0.234 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 379 / 486 / 490 / 490 | 0.187 / 0.278 / 0.288 / 0.288 | 0.246 / 0.230 / 0.229 / 0.229 | 0.095 / 0.110 / 0.112 / 0.112 | 0.215 / 0.212 / 0.234 / 0.234 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 874 / 918 / 941 / 944 | 0.156 / 0.179 / 0.199 / 0.206 | 0.151 / 0.153 / 0.162 / 0.161 | 0.231 / 0.238 / 0.243 / 0.244 | 0.577 / 0.602 / 0.594 / 0.593 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 7,461 / 7,553 / 7,587 / 7,622 | 0.116 / 0.125 / 0.136 / 0.167 | 0.429 / 0.432 / 0.433 / 0.442 | 0.213 / 0.214 / 0.215 / 0.215 | 0.287 / 0.286 / 0.287 / 0.291 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 19 / 22 / 22 / 23 | 0.156 / 0.203 / 0.203 / 0.324 | 1.731 / 1.638 / 1.638 / 1.580 | 0.430 / 0.472 / 0.472 / 0.487 | 0.079 / 0.082 / 0.082 / 0.083 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +--- + +**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 (4,682 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.7371** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 60,938 | 53,537 / 55,808 / 56,797 / 57,392 | 0.844 / 0.892 / 0.914 / 0.925 | 0.898 / 0.925 / 0.933 / 0.938 | 0.251 / 0.220 / 0.157 / 0.147 | +| truck | 7,081 | 5,634 / 6,249 / 6,533 / 6,699 | 0.712 / 0.833 / 0.886 / 0.916 | 0.797 / 0.870 / 0.900 / 0.919 | 0.263 / 0.186 / 0.186 / 0.184 | +| bus | 2,370 | 2,078 / 2,230 / 2,293 / 2,305 | 0.822 / 0.913 / 0.952 / 0.963 | 0.885 / 0.940 / 0.961 / 0.963 | 0.243 / 0.168 / 0.153 / 0.153 | +| bicycle | 1,357 | 1,274 / 1,285 / 1,286 / 1,286 | 0.895 / 0.911 / 0.912 / 0.912 | 0.896 / 0.904 / 0.905 / 0.905 | 0.158 / 0.158 / 0.158 / 0.158 | +| pedestrian | 18,202 | 16,664 / 17,018 / 17,157 / 17,273 | 0.827 / 0.852 / 0.865 / 0.872 | 0.828 / 0.842 / 0.849 / 0.855 | 0.171 / 0.166 / 0.163 / 0.166 | +| traffic_cone | 8,250 | 5,459 / 5,894 / 6,075 / 6,309 | 0.430 / 0.490 / 0.511 / 0.545 | 0.582 / 0.619 / 0.634 / 0.654 | 0.123 / 0.110 / 0.095 / 0.086 | +| barrier | 1,350 | 572 / 754 / 803 / 843 | 0.175 / 0.270 / 0.292 / 0.310 | 0.410 / 0.464 / 0.474 / 0.485 | 0.283 / 0.248 / 0.248 / 0.248 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 6,703 / 6,703 / 6,703 / 6,703 | 0.107 / 0.111 / 0.113 / 0.117 | 0.031 / 0.034 / 0.036 / 0.036 | 0.116 / 0.117 / 0.118 / 0.118 | 0.123 / 0.124 / 0.125 / 0.126 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 778 / 778 / 778 / 778 | 0.144 / 0.166 / 0.182 / 0.204 | 0.028 / 0.030 / 0.031 / 0.031 | 0.128 / 0.132 / 0.135 / 0.137 | 0.312 / 0.322 / 0.328 / 0.327 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 260 / 260 / 260 / 260 | 0.105 / 0.115 / 0.137 / 0.139 | 0.026 / 0.027 / 0.028 / 0.028 | 0.083 / 0.085 / 0.091 / 0.091 | 0.122 / 0.125 / 0.123 / 0.124 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 149 / 149 / 149 / 149 | 0.129 / 0.133 / 0.133 / 0.133 | 0.067 / 0.068 / 0.068 / 0.068 | 0.194 / 0.194 / 0.194 / 0.194 | 0.530 / 0.530 / 0.530 / 0.530 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 2,002 / 2,002 / 2,002 / 2,002 | 0.105 / 0.111 / 0.121 / 0.138 | 0.427 / 0.429 / 0.433 / 0.435 | 0.240 / 0.241 / 0.241 / 0.241 | 0.236 / 0.236 / 0.236 / 0.237 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 907 / 907 / 907 / 907 | 0.176 / 0.198 / 0.219 / 0.296 | 0.327 / 0.324 / 0.326 / 0.327 | 0.645 / 0.648 / 0.649 / 0.650 | 0.026 / 0.026 / 0.026 / 0.026 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 148 / 148 / 148 / 148 | 0.232 / 0.293 / 0.318 / 0.363 | 0.374 / 0.376 / 0.378 / 0.375 | 0.458 / 0.477 / 0.484 / 0.492 | 0.024 / 0.025 / 0.025 / 0.025 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 24,984 / 24,984 / 24,984 / 24,984 | 0.115 / 0.121 / 0.124 / 0.130 | 0.036 / 0.040 / 0.042 / 0.043 | 0.122 / 0.123 / 0.123 / 0.124 | 0.136 / 0.137 / 0.138 / 0.139 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,903 / 2,903 / 2,903 / 2,903 | 0.153 / 0.181 / 0.204 / 0.235 | 0.031 / 0.034 / 0.035 / 0.036 | 0.132 / 0.138 / 0.142 / 0.145 | 0.336 / 0.345 / 0.352 / 0.350 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 971 / 971 / 971 / 971 | 0.113 / 0.129 / 0.146 / 0.148 | 0.030 / 0.032 / 0.032 / 0.033 | 0.086 / 0.089 / 0.093 / 0.094 | 0.140 / 0.142 / 0.141 / 0.141 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 556 / 556 / 556 / 556 | 0.128 / 0.131 / 0.131 / 0.132 | 0.070 / 0.070 / 0.070 / 0.070 | 0.200 / 0.200 / 0.201 / 0.201 | 0.558 / 0.558 / 0.558 / 0.558 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 7,462 / 7,462 / 7,462 / 7,462 | 0.109 / 0.117 / 0.132 / 0.154 | 0.455 / 0.457 / 0.462 / 0.464 | 0.245 / 0.247 / 0.247 / 0.248 | 0.241 / 0.240 / 0.241 / 0.242 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 3,382 / 3,382 / 3,382 / 3,382 | 0.187 / 0.215 / 0.248 / 0.352 | 0.373 / 0.367 / 0.367 / 0.369 | 0.655 / 0.659 / 0.660 / 0.661 | 0.028 / 0.028 / 0.027 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 553 / 553 / 553 / 553 | 0.251 / 0.333 / 0.368 / 0.443 | 0.422 / 0.406 / 0.410 / 0.403 | 0.542 / 0.541 / 0.547 / 0.554 | 0.025 / 0.025 / 0.025 / 0.025 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 52,338 / 54,149 / 55,189 / 55,565 | 0.130 / 0.146 / 0.161 / 0.177 | 0.050 / 0.061 / 0.071 / 0.074 | 0.131 / 0.133 / 0.134 / 0.135 | 0.148 / 0.151 / 0.153 / 0.153 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 5,350 / 5,996 / 6,202 / 6,337 | 0.164 / 0.212 / 0.249 / 0.305 | 0.041 / 0.055 / 0.055 / 0.058 | 0.141 / 0.153 / 0.158 / 0.164 | 0.333 / 0.345 / 0.353 / 0.353 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,027 / 2,173 / 2,227 / 2,232 | 0.134 / 0.168 / 0.193 / 0.202 | 0.049 / 0.054 / 0.059 / 0.061 | 0.095 / 0.101 / 0.106 / 0.106 | 0.171 / 0.166 / 0.166 / 0.168 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,170 / 1,181 / 1,182 / 1,182 | 0.131 / 0.137 / 0.137 / 0.137 | 0.076 / 0.077 / 0.077 / 0.077 | 0.209 / 0.210 / 0.210 / 0.210 | 0.563 / 0.562 / 0.563 / 0.563 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 14,547 / 14,883 / 15,058 / 15,106 | 0.115 / 0.128 / 0.149 / 0.185 | 0.478 / 0.482 / 0.488 / 0.491 | 0.251 / 0.253 / 0.254 / 0.254 | 0.248 / 0.248 / 0.249 / 0.250 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,519 / 4,942 / 5,249 / 5,546 | 0.187 / 0.219 / 0.265 / 0.403 | 0.382 / 0.379 / 0.382 / 0.383 | 0.654 / 0.661 / 0.666 / 0.670 | 0.028 / 0.028 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 453 / 538 / 550 / 562 | 0.240 / 0.301 / 0.323 / 0.375 | 0.394 / 0.397 / 0.395 / 0.389 | 0.491 / 0.508 / 0.510 / 0.516 | 0.023 / 0.024 / 0.024 / 0.024 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5833** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 54,217 | 41,798 / 46,774 / 49,152 / 50,157 | 0.682 / 0.803 / 0.855 / 0.877 | 0.772 / 0.841 / 0.868 / 0.877 | 0.242 / 0.189 / 0.157 / 0.157 | +| truck | 4,913 | 2,974 / 3,651 / 4,060 / 4,258 | 0.440 / 0.619 / 0.724 / 0.772 | 0.610 / 0.720 / 0.775 / 0.797 | 0.249 / 0.164 / 0.164 / 0.164 | +| bus | 2,116 | 1,429 / 1,813 / 1,969 / 2,024 | 0.559 / 0.788 / 0.892 / 0.918 | 0.675 / 0.828 / 0.885 / 0.904 | 0.415 / 0.184 / 0.171 / 0.181 | +| bicycle | 838 | 666 / 706 / 708 / 709 | 0.642 / 0.708 / 0.713 / 0.716 | 0.723 / 0.752 / 0.755 / 0.756 | 0.110 / 0.136 / 0.136 / 0.110 | +| pedestrian | 8,336 | 7,155 / 7,320 / 7,400 / 7,475 | 0.621 / 0.643 / 0.660 / 0.674 | 0.681 / 0.691 / 0.698 / 0.704 | 0.145 / 0.155 / 0.155 / 0.148 | +| traffic_cone | 2,632 | 1,306 / 1,440 / 1,508 / 1,612 | 0.231 / 0.280 / 0.312 / 0.364 | 0.450 / 0.492 / 0.509 / 0.540 | 0.079 / 0.085 / 0.075 / 0.075 | +| barrier | 622 | 216 / 296 / 314 / 328 | 0.118 / 0.226 / 0.242 / 0.252 | 0.335 / 0.427 / 0.438 / 0.445 | 0.183 / 0.106 / 0.106 / 0.082 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 5,963 / 5,963 / 5,963 / 5,963 | 0.158 / 0.180 / 0.197 / 0.212 | 0.113 / 0.144 / 0.163 / 0.167 | 0.160 / 0.163 / 0.164 / 0.164 | 0.148 / 0.149 / 0.149 / 0.150 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 540 / 540 / 540 / 540 | 0.192 / 0.244 / 0.299 / 0.344 | 0.034 / 0.037 / 0.041 / 0.043 | 0.153 / 0.166 / 0.173 / 0.178 | 0.472 / 0.474 / 0.475 / 0.481 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 232 / 232 / 232 / 232 | 0.151 / 0.201 / 0.235 / 0.248 | 0.115 / 0.107 / 0.111 / 0.119 | 0.117 / 0.125 / 0.131 / 0.132 | 0.140 / 0.147 / 0.147 / 0.147 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 92 / 92 / 92 / 92 | 0.169 / 0.186 / 0.190 / 0.195 | 0.123 / 0.127 / 0.127 / 0.127 | 0.208 / 0.212 / 0.212 / 0.212 | 0.647 / 0.643 / 0.642 / 0.641 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 916 / 916 / 916 / 916 | 0.117 / 0.127 / 0.152 / 0.197 | 0.592 / 0.598 / 0.603 / 0.612 | 0.229 / 0.230 / 0.230 / 0.230 | 0.295 / 0.295 / 0.296 / 0.299 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 289 / 289 / 289 / 289 | 0.190 / 0.223 / 0.313 / 0.573 | 0.271 / 0.283 / 0.285 / 0.308 | 0.686 / 0.691 / 0.692 / 0.692 | 0.043 / 0.044 / 0.044 / 0.044 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 68 / 68 / 68 / 68 | 0.247 / 0.336 / 0.361 / 0.416 | 0.375 / 0.360 / 0.359 / 0.359 | 0.456 / 0.473 / 0.481 / 0.485 | 0.032 / 0.033 / 0.034 / 0.034 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 22,228 / 22,228 / 22,228 / 22,228 | 0.169 / 0.199 / 0.221 / 0.242 | 0.135 / 0.174 / 0.198 / 0.203 | 0.166 / 0.169 / 0.170 / 0.171 | 0.155 / 0.156 / 0.156 / 0.157 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,014 / 2,014 / 2,014 / 2,014 | 0.202 / 0.268 / 0.337 / 0.393 | 0.045 / 0.047 / 0.051 / 0.054 | 0.164 / 0.177 / 0.186 / 0.192 | 0.482 / 0.489 / 0.493 / 0.501 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 867 / 867 / 867 / 867 | 0.170 / 0.238 / 0.284 / 0.303 | 0.082 / 0.083 / 0.092 / 0.105 | 0.126 / 0.135 / 0.143 / 0.144 | 0.156 / 0.161 / 0.160 / 0.160 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 343 / 343 / 343 / 343 | 0.179 / 0.198 / 0.204 / 0.212 | 0.147 / 0.150 / 0.150 / 0.150 | 0.209 / 0.213 / 0.213 / 0.213 | 0.663 / 0.657 / 0.656 / 0.655 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 3,417 / 3,417 / 3,417 / 3,417 | 0.123 / 0.135 / 0.161 / 0.214 | 0.621 / 0.626 / 0.632 / 0.642 | 0.230 / 0.231 / 0.231 / 0.232 | 0.321 / 0.319 / 0.320 / 0.324 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,079 / 1,079 / 1,079 / 1,079 | 0.205 / 0.249 / 0.352 / 0.633 | 0.355 / 0.365 / 0.362 / 0.382 | 0.690 / 0.696 / 0.697 / 0.698 | 0.053 / 0.051 / 0.051 / 0.052 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 255 / 255 / 255 | 1.000 / 0.380 / 0.432 / 0.563 | 1.000 / 0.422 / 0.413 / 0.407 | 1.000 / 0.546 / 0.552 / 0.554 | 1.000 / 0.036 / 0.036 / 0.036 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 39,059 / 43,688 / 45,797 / 46,308 | 0.180 / 0.225 / 0.264 / 0.299 | 0.165 / 0.216 / 0.249 / 0.256 | 0.171 / 0.176 / 0.177 / 0.178 | 0.164 / 0.167 / 0.169 / 0.169 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,628 / 3,307 / 3,561 / 3,660 | 0.205 / 0.286 / 0.364 / 0.427 | 0.047 / 0.056 / 0.059 / 0.060 | 0.167 / 0.185 / 0.195 / 0.201 | 0.480 / 0.500 / 0.512 / 0.518 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,261 / 1,700 / 1,825 / 1,855 | 0.179 / 0.290 / 0.353 / 0.413 | 0.078 / 0.086 / 0.109 / 0.153 | 0.129 / 0.143 / 0.151 / 0.154 | 0.164 / 0.163 / 0.163 / 0.163 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 584 / 579 / 581 / 611 | 0.182 / 0.202 / 0.207 / 0.216 | 0.167 / 0.158 / 0.157 / 0.168 | 0.213 / 0.217 / 0.216 / 0.216 | 0.672 / 0.661 / 0.659 / 0.664 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 5,589 / 5,531 / 5,588 / 5,745 | 0.123 / 0.135 / 0.159 / 0.216 | 0.625 / 0.626 / 0.632 / 0.647 | 0.229 / 0.230 / 0.230 / 0.231 | 0.326 / 0.323 / 0.325 / 0.332 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,142 / 1,223 / 1,314 / 1,394 | 0.203 / 0.248 / 0.349 / 0.635 | 0.349 / 0.369 / 0.370 / 0.395 | 0.689 / 0.696 / 0.697 / 0.699 | 0.051 / 0.050 / 0.053 / 0.054 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 156 / 231 / 237 / 268 | 0.255 / 0.361 / 0.392 / 0.529 | 0.380 / 0.406 / 0.399 / 0.397 | 0.461 / 0.502 / 0.509 / 0.539 | 0.033 / 0.036 / 0.036 / 0.036 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4384** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 19,301 | 13,605 / 16,188 / 17,481 / 17,847 | 0.528 / 0.703 / 0.787 / 0.811 | 0.660 / 0.759 / 0.799 / 0.810 | 0.188 / 0.181 / 0.156 / 0.156 | +| truck | 2,906 | 1,284 / 1,828 / 2,299 / 2,482 | 0.229 / 0.431 / 0.643 / 0.716 | 0.442 / 0.598 / 0.722 / 0.760 | 0.158 / 0.159 / 0.111 / 0.111 | +| bus | 484 | 225 / 324 / 385 / 401 | 0.261 / 0.534 / 0.668 / 0.702 | 0.460 / 0.635 / 0.719 / 0.742 | 0.349 / 0.126 / 0.066 / 0.066 | +| bicycle | 291 | 215 / 246 / 251 / 252 | 0.381 / 0.577 / 0.588 / 0.590 | 0.535 / 0.629 / 0.633 / 0.633 | 0.136 / 0.136 / 0.136 / 0.136 | +| pedestrian | 2,564 | 2,056 / 2,093 / 2,118 / 2,143 | 0.455 / 0.470 / 0.480 / 0.488 | 0.572 / 0.580 / 0.585 / 0.589 | 0.133 / 0.128 / 0.128 / 0.128 | +| traffic_cone | 462 | 183 / 207 / 225 / 235 | 0.114 / 0.146 / 0.161 / 0.183 | 0.324 / 0.359 / 0.372 / 0.388 | 0.088 / 0.088 / 0.088 / 0.088 | +| barrier | 145 | 49 / 72 / 90 / 96 | 0.042 / 0.140 / 0.204 / 0.242 | 0.237 / 0.362 / 0.427 / 0.452 | 0.139 / 0.119 / 0.085 / 0.095 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 2,123 / 2,123 / 2,123 / 2,123 | 0.199 / 0.243 / 0.281 / 0.312 | 0.217 / 0.268 / 0.299 / 0.309 | 0.180 / 0.184 / 0.186 / 0.186 | 0.274 / 0.267 / 0.265 / 0.265 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 319 / 319 / 319 / 319 | 0.232 / 0.319 / 0.462 / 0.532 | 0.044 / 0.048 / 0.057 / 0.062 | 0.178 / 0.194 / 0.215 / 0.222 | 0.461 / 0.495 / 0.507 / 0.513 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 53 / 53 / 53 / 53 | 0.230 / 0.326 / 0.388 / 0.414 | 0.036 / 0.034 / 0.039 / 0.040 | 0.143 / 0.158 / 0.167 / 0.170 | 0.416 / 0.429 / 0.428 / 0.430 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 32 / 32 / 32 / 32 | 0.245 / 0.306 / 0.312 / 0.316 | 0.096 / 0.091 / 0.093 / 0.092 | 0.247 / 0.264 / 0.265 / 0.264 | 0.779 / 0.757 / 0.761 / 0.763 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 282 / 282 / 282 / 282 | 0.124 / 0.137 / 0.155 / 0.193 | 0.525 / 0.533 / 0.542 / 0.546 | 0.255 / 0.256 / 0.256 / 0.257 | 0.389 / 0.389 / 0.389 / 0.391 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 50 / 50 / 50 / 50 | 0.193 / 0.234 / 0.272 / 0.526 | 0.288 / 0.286 / 0.309 / 0.312 | 0.702 / 0.701 / 0.703 / 0.699 | 0.044 / 0.046 / 0.046 / 0.046 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 15 / 15 / 15 / 15 | 0.301 / 0.435 / 0.535 / 0.997 | 0.250 / 0.220 / 0.216 / 0.212 | 0.487 / 0.511 / 0.530 / 0.530 | 0.045 / 0.045 / 0.046 / 0.046 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 7,913 / 7,913 / 7,913 / 7,913 | 0.208 / 0.262 / 0.308 / 0.347 | 0.271 / 0.321 / 0.353 / 0.363 | 0.186 / 0.189 / 0.191 / 0.191 | 0.272 / 0.266 / 0.265 / 0.265 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,191 / 1,191 / 1,191 / 1,191 | 0.247 / 0.356 / 0.521 / 0.613 | 0.072 / 0.063 / 0.071 / 0.078 | 0.191 / 0.206 / 0.226 / 0.235 | 0.513 / 0.557 / 0.558 / 0.562 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 198 / 198 / 198 / 198 | 0.250 / 0.363 / 0.453 / 0.493 | 0.063 / 0.047 / 0.051 / 0.053 | 0.151 / 0.164 / 0.177 / 0.181 | 0.456 / 0.444 / 0.437 / 0.441 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 119 / 119 / 119 / 119 | 0.227 / 0.289 / 0.299 / 0.306 | 0.129 / 0.119 / 0.120 / 0.119 | 0.241 / 0.257 / 0.258 / 0.258 | 0.760 / 0.771 / 0.776 / 0.781 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,051 / 1,051 / 1,051 / 1,051 | 0.131 / 0.147 / 0.171 / 0.217 | 0.581 / 0.588 / 0.599 / 0.605 | 0.252 / 0.252 / 0.253 / 0.253 | 0.427 / 0.425 / 0.425 / 0.427 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 189 / 189 / 189 | 1.000 / 0.274 / 0.365 / 0.629 | 1.000 / 0.405 / 0.430 / 0.444 | 1.000 / 0.718 / 0.719 / 0.714 | 1.000 / 0.060 / 0.058 / 0.055 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 59 / 59 / 59 | 1.000 / 0.438 / 0.604 / 1.056 | 1.000 / 0.237 / 0.228 / 0.223 | 1.000 / 0.567 / 0.578 / 0.581 | 1.000 / 0.045 / 0.047 / 0.047 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 12,130 / 14,043 / 15,222 / 15,426 | 0.212 / 0.274 / 0.334 / 0.381 | 0.306 / 0.364 / 0.410 / 0.419 | 0.188 / 0.193 / 0.195 / 0.195 | 0.272 / 0.267 / 0.270 / 0.270 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,162 / 1,568 / 2,007 / 2,112 | 0.243 / 0.360 / 0.552 / 0.656 | 0.052 / 0.061 / 0.085 / 0.094 | 0.188 / 0.209 / 0.235 / 0.244 | 0.533 / 0.561 / 0.554 / 0.558 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 169 / 279 / 355 / 366 | 0.240 / 0.375 / 0.521 / 0.578 | 0.045 / 0.057 / 0.062 / 0.063 | 0.143 / 0.167 / 0.187 / 0.193 | 0.407 / 0.458 / 0.445 / 0.451 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 153 / 180 / 181 / 181 | 0.228 / 0.284 / 0.290 / 0.290 | 0.086 / 0.094 / 0.094 / 0.094 | 0.240 / 0.255 / 0.256 / 0.256 | 0.747 / 0.757 / 0.760 / 0.760 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,439 / 1,486 / 1,498 / 1,509 | 0.127 / 0.142 / 0.164 / 0.206 | 0.558 / 0.566 / 0.576 / 0.581 | 0.250 / 0.251 / 0.252 / 0.253 | 0.425 / 0.424 / 0.423 / 0.425 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 155 / 172 / 178 / 186 | 0.209 / 0.261 / 0.313 / 0.600 | 0.413 / 0.390 / 0.401 / 0.422 | 0.714 / 0.709 / 0.710 / 0.705 | 0.056 / 0.055 / 0.055 / 0.054 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 33 / 52 / 72 / 73 | 0.295 / 0.437 / 0.603 / 1.064 | 0.239 / 0.211 / 0.222 / 0.219 | 0.469 / 0.512 / 0.568 / 0.572 | 0.048 / 0.047 / 0.047 / 0.047 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.6650** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 134,456 | 109,151 / 119,093 / 123,838 / 125,849 | 0.751 / 0.842 / 0.886 / 0.900 | 0.816 / 0.870 / 0.890 / 0.897 | 0.260 / 0.189 / 0.157 / 0.157 | +| truck | 14,900 | 9,912 / 11,766 / 12,962 / 13,522 | 0.538 / 0.695 / 0.797 / 0.845 | 0.673 / 0.772 / 0.828 / 0.851 | 0.264 / 0.176 / 0.166 / 0.157 | +| bus | 4,970 | 3,740 / 4,384 / 4,667 / 4,750 | 0.669 / 0.836 / 0.909 / 0.931 | 0.761 / 0.869 / 0.909 / 0.919 | 0.314 / 0.182 / 0.172 / 0.126 | +| bicycle | 2,486 | 2,157 / 2,239 / 2,247 / 2,249 | 0.763 / 0.818 / 0.821 / 0.822 | 0.795 / 0.821 / 0.823 / 0.823 | 0.152 / 0.152 / 0.152 / 0.152 | +| pedestrian | 29,102 | 25,909 / 26,467 / 26,706 / 26,922 | 0.753 / 0.773 / 0.787 / 0.799 | 0.764 / 0.777 / 0.784 / 0.790 | 0.154 / 0.154 / 0.162 / 0.159 | +| traffic_cone | 11,344 | 6,960 / 7,555 / 7,823 / 8,173 | 0.372 / 0.429 / 0.451 / 0.493 | 0.542 / 0.580 / 0.595 / 0.619 | 0.112 / 0.086 / 0.089 / 0.086 | +| barrier | 2,117 | 839 / 1,125 / 1,212 / 1,272 | 0.145 / 0.244 / 0.267 / 0.284 | 0.367 / 0.441 / 0.453 / 0.462 | 0.274 / 0.185 / 0.185 / 0.182 | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 14,790 / 14,790 / 14,790 / 14,790 | 0.129 / 0.143 / 0.154 / 0.163 | 0.065 / 0.081 / 0.092 / 0.094 | 0.134 / 0.137 / 0.138 / 0.139 | 0.143 / 0.145 / 0.147 / 0.147 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,639 / 1,639 / 1,639 / 1,639 | 0.164 / 0.202 / 0.244 / 0.282 | 0.031 / 0.034 / 0.037 / 0.039 | 0.138 / 0.147 / 0.154 / 0.159 | 0.368 / 0.381 / 0.390 / 0.392 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 546 / 546 / 546 / 546 | 0.123 / 0.153 / 0.179 / 0.187 | 0.055 / 0.055 / 0.058 / 0.061 | 0.095 / 0.101 / 0.107 / 0.108 | 0.139 / 0.146 / 0.146 / 0.147 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 273 / 273 / 273 / 273 | 0.145 / 0.157 / 0.158 / 0.160 | 0.080 / 0.082 / 0.082 / 0.082 | 0.203 / 0.206 / 0.206 / 0.206 | 0.575 / 0.575 / 0.575 / 0.575 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 3,201 / 3,201 / 3,201 / 3,201 | 0.109 / 0.116 / 0.131 / 0.156 | 0.468 / 0.470 / 0.474 / 0.479 | 0.240 / 0.241 / 0.241 / 0.242 | 0.252 / 0.252 / 0.252 / 0.254 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,247 / 1,247 / 1,247 / 1,247 | 0.179 / 0.203 / 0.234 / 0.349 | 0.321 / 0.319 / 0.321 / 0.326 | 0.652 / 0.656 / 0.657 / 0.659 | 0.029 / 0.029 / 0.029 / 0.030 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 232 / 232 / 232 / 232 | 0.239 / 0.311 / 0.339 / 0.410 | 0.374 / 0.369 / 0.369 / 0.366 | 0.469 / 0.488 / 0.494 / 0.502 | 0.026 / 0.027 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 55,126 / 55,126 / 55,126 / 55,126 | 0.143 / 0.162 / 0.177 / 0.189 | 0.084 / 0.107 / 0.122 / 0.124 | 0.143 / 0.146 / 0.148 / 0.148 | 0.156 / 0.157 / 0.158 / 0.159 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,109 / 6,109 / 6,109 / 6,109 | 0.178 / 0.227 / 0.284 / 0.336 | 0.038 / 0.041 / 0.044 / 0.047 | 0.148 / 0.158 / 0.167 / 0.172 | 0.391 / 0.406 / 0.414 / 0.417 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 2,037 / 2,037 / 2,037 / 2,037 | 0.139 / 0.180 / 0.208 / 0.220 | 0.054 / 0.056 / 0.059 / 0.065 | 0.103 / 0.110 / 0.115 / 0.117 | 0.162 / 0.167 / 0.166 / 0.167 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,019 / 1,019 / 1,019 / 1,019 | 0.148 / 0.162 / 0.164 / 0.166 | 0.091 / 0.092 / 0.092 / 0.093 | 0.208 / 0.211 / 0.211 / 0.211 | 0.602 / 0.602 / 0.602 / 0.602 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 11,931 / 11,931 / 11,931 / 11,931 | 0.114 / 0.124 / 0.144 / 0.178 | 0.503 / 0.505 / 0.510 / 0.516 | 0.245 / 0.245 / 0.246 / 0.247 | 0.266 / 0.265 / 0.265 / 0.268 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 4,651 / 4,651 / 4,651 / 4,651 | 0.191 / 0.223 / 0.268 / 0.415 | 0.368 / 0.365 / 0.365 / 0.370 | 0.664 / 0.668 / 0.669 / 0.671 | 0.032 / 0.032 / 0.032 / 0.032 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 867 / 867 / 867 | 1.000 / 0.355 / 0.402 / 0.517 | 1.000 / 0.398 / 0.397 / 0.391 | 1.000 / 0.544 / 0.550 / 0.557 | 1.000 / 0.029 / 0.029 / 0.030 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 102,297 / 112,278 / 116,527 / 117,516 | 0.157 / 0.194 / 0.225 / 0.252 | 0.117 / 0.160 / 0.186 / 0.191 | 0.152 / 0.157 / 0.159 / 0.160 | 0.168 / 0.172 / 0.175 / 0.175 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 9,001 / 10,847 / 11,695 / 12,090 | 0.184 / 0.255 / 0.332 / 0.401 | 0.043 / 0.055 / 0.059 / 0.063 | 0.153 / 0.170 / 0.182 / 0.189 | 0.399 / 0.423 / 0.437 / 0.439 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 3,495 / 4,146 / 4,350 / 4,483 | 0.158 / 0.232 / 0.278 / 0.322 | 0.056 / 0.066 / 0.079 / 0.102 | 0.110 / 0.122 / 0.129 / 0.133 | 0.174 / 0.182 / 0.183 / 0.187 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,859 / 1,919 / 1,923 / 1,923 | 0.153 / 0.169 / 0.172 / 0.173 | 0.100 / 0.101 / 0.101 / 0.101 | 0.212 / 0.215 / 0.215 / 0.215 | 0.608 / 0.609 / 0.610 / 0.610 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 21,639 / 21,986 / 21,863 / 22,136 | 0.118 / 0.131 / 0.152 / 0.194 | 0.521 / 0.525 / 0.528 / 0.535 | 0.246 / 0.248 / 0.248 / 0.249 | 0.278 / 0.277 / 0.277 / 0.280 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 5,782 / 6,632 / 6,752 / 7,077 | 0.190 / 0.231 / 0.284 / 0.453 | 0.370 / 0.381 / 0.379 / 0.385 | 0.662 / 0.674 / 0.674 / 0.676 | 0.032 / 0.033 / 0.033 / 0.033 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 602 / 827 / 850 / 872 | 0.245 / 0.329 / 0.356 / 0.433 | 0.378 / 0.383 / 0.380 / 0.377 | 0.478 / 0.506 / 0.509 / 0.515 | 0.026 / 0.027 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +--- + ### BEVFusion-LiDAR J6Gen2_base/2.7.1
From 00a0422d09f4a6ff8d5180c2df700883cb212d9f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jun 2026 13:20:12 +0000 Subject: [PATCH 160/162] ci(pre-commit): autofix --- .../docs/BEVFusion-L/v2/j6gen2_base.md | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md index eb477b9b9..044ab291f 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md @@ -67,7 +67,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(75,589) | truck
(8,273) | bus
(2,706) | bicycle
(2,097) | pedestrian
(23,254) | traffic_cone
(8,310) | barrier
(1,350) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.7289 | 0.6808 | 0.6820 | 0.6757 | 0.6579 | 0.6516 | 0.9000 | 0.8398 | 0.9130 | 0.8907 | 0.8535 | 0.4465 | 0.2590 | @@ -76,7 +76,7 @@
Eval Range: 50.0 - 90.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(64,960) | truck
(5,922) | bus
(2,257) | bicycle
(1,298) | pedestrian
(12,052) | traffic_cone
(2,636) | barrier
(622) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5802 | 0.5217 | 0.5876 | 0.5690 | 0.5584 | 0.5398 | 0.8127 | 0.6518 | 0.7926 | 0.6527 | 0.6690 | 0.2760 | 0.2064 | @@ -94,7 +94,7 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(162,690) | truck
(17,701) | bus
(5,507) | bicycle
(3,771) | pedestrian
(38,962) | traffic_cone
(11,408) | barrier
(2,117) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.6590 | 0.6053 | 0.6391 | 0.6214 | 0.6122 | 0.5946 | 0.8547 | 0.7285 | 0.8389 | 0.7843 | 0.7789 | 0.3955 | 0.2321 | @@ -107,7 +107,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1699 | 0.1847 | 0.2714 | 0.1985 | 1.0000 | 0.1878 | 0.2040 | 0.2866 | 0.2091 | 1.0000 | 0.2039 | 0.2176 | 0.2883 | 0.2138 | 1.0000 | @@ -133,7 +133,7 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 65,297 / 67,749 / 68,945 / 69,400 | 6,298 / 7,033 / 7,266 / 7,405 | 2,282 / 2,494 / 2,565 / 2,589 | 1,789 / 1,815 / 1,822 / 1,825 | 18,701 / 19,099 / 19,305 / 19,357 | 4,529 / 4,841 / 5,046 / 5,561 | 453 / 529 / 550 / 562 |
- +
Eval Range: 50.0 - 90.0m @@ -165,7 +165,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3252 | 0.2112 | 0.3207 | 0.3389 | 1.0000 | 0.4093 | 0.3020 | 0.3601 | 0.4304 | 1.0000 | 0.3625 | 0.2467 | 0.3279 | 0.3624 | 1.0000 | @@ -191,10 +191,10 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 13,838 / 16,236 / 17,440 / 17,774 | 1,483 / 1,981 / 2,484 / 2,599 | 192 / 312 / 394 / 405 | 187 / 218 / 223 / 223 | 2,141 / 2,167 / 2,181 / 2,199 | 132 / 147 / 178 / 186 | 33 / 52 / 72 / 73 |
- +
Eval Range: 0.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1972 | 0.2036 | 0.2826 | 0.2208 | 1.0000 | 0.2503 | 0.2473 | 0.3146 | 0.2684 | 1.0000 | 0.2405 | 0.2402 | 0.2976 | 0.2433 | 1.0000 | @@ -237,7 +237,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(14,872) | truck
(1,192) | bus
(336) | bicycle
(740) | pedestrian
(5,055) | traffic_cone
(60) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.6313 | 0.6015 | 0.5746 | 0.5493 | 0.5597 | 0.5344 | 0.9156 | 0.8702 | 0.9160 | 0.8586 | 0.8588 | 0.0000 | 0.0000 | @@ -246,7 +246,7 @@
Eval Range: 50.0 - 90.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(10,929) | truck
(1,009) | bus
(141) | bicycle
(460) | pedestrian
(3,721) | traffic_cone
(4) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5281 | 0.4877 | 0.4942 | 0.4916 | 0.4740 | 0.4714 | 0.8442 | 0.7108 | 0.8522 | 0.5764 | 0.7129 | 0.0000 | 0.0000 | @@ -255,7 +255,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(2,883) | truck
(600) | bus
(60) | bicycle
(85) | pedestrian
(1,092) | traffic_cone
(0) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.4172 | 0.3831 | 0.4189 | 0.4104 | 0.4018 | 0.3934 | 0.7548 | 0.6586 | 0.5716 | 0.3759 | 0.5594 | 0.0000 | 0.0000 | @@ -264,7 +264,7 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(28,684) | truck
(2,801) | bus
(537) | bicycle
(1,285) | pedestrian
(9,868) | traffic_cone
(64) | barrier
(0) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.5779 | 0.5432 | 0.5404 | 0.5154 | 0.5230 | 0.4980 | 0.8813 | 0.7754 | 0.8642 | 0.7410 | 0.7836 | 0.0000 | 0.0000 | @@ -301,10 +301,10 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 13,178 / 13,676 / 13,748 / 13,798 | 925 / 1,041 / 1,064 / 1,073 | 254 / 330 / 333 / 333 | 612 / 628 / 640 / 643 | 4,247 / 4,294 / 4,313 / 4,330 | 19 / 20 / 20 / 21 | 0 / 0 / 0 / 0 |
- +
Eval Range: 50.0 - 90.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3091 | 0.6981 | 0.3081 | 0.3833 | 1.0000 | 0.3181 | 0.6966 | 0.3115 | 0.3980 | 1.0000 | 0.2197 | 0.6583 | 0.2015 | 0.3121 | 1.0000 | @@ -330,7 +330,7 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 8,463 / 9,288 / 9,554 / 9,621 | 617 / 739 / 799 / 804 | 103 / 124 / 124 / 124 | 263 / 289 / 292 / 292 | 2,604 / 2,652 / 2,667 / 2,682 | 2 / 2 / 2 / 2 | 0 / 0 / 0 / 0 |
- +
Eval Range: 90.0 - 121.0m @@ -359,10 +359,10 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 1,929 / 2,144 / 2,236 / 2,337 | 301 / 408 / 466 / 475 | 26 / 32 / 41 / 41 | 39 / 42 / 46 / 46 | 691 / 697 / 700 / 730 | 0 / 0 / 0 / 0 | 0 / 0 / 0 / 0 |
- +
Eval Range: 0.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.2907 | 0.5134 | 0.3242 | 0.3572 | 1.0000 | 0.4150 | 0.4172 | 0.4068 | 0.4972 | 1.0000 | 0.2100 | 0.4414 | 0.2253 | 0.2736 | 1.0000 | @@ -430,7 +430,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(19,301) | truck
(2,906) | bus
(484) | bicycle
(291) | pedestrian
(2,564) | traffic_cone
(462) | barrier
(145) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.4384 | 0.3949 | 0.4973 | 0.4673 | 0.4756 | 0.4456 | 0.7075 | 0.5046 | 0.5412 | 0.5343 | 0.4732 | 0.1509 | 0.1571 | @@ -439,18 +439,18 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(134,456) | truck
(14,900) | bus
(4,970) | bicycle
(2,486) | pedestrian
(29,102) | traffic_cone
(11,344) | barrier
(2,117) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.6650 | 0.6111 | 0.6420 | 0.6241 | 0.6150 | 0.5972 | 0.8448 | 0.7186 | 0.8363 | 0.8063 | 0.7779 | 0.4361 | 0.2350 |
- + - **Mean TPError - J6Gen2**
Eval Range: 0.0 - 50.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1692 | 0.1845 | 0.2711 | 0.1983 | 1.0000 | 0.1868 | 0.2021 | 0.2864 | 0.2108 | 1.0000 | 0.2026 | 0.2155 | 0.2887 | 0.2184 | 1.0000 | @@ -506,10 +506,10 @@ | BEVFusion-LiDAR j6gen2_base/2.8.1 | 39,059 / 43,688 / 45,797 / 46,308 | 2,628 / 3,307 / 3,561 / 3,660 | 1,261 / 1,700 / 1,825 / 1,855 | 584 / 579 / 581 / 611 | 5,589 / 5,531 / 5,588 / 5,745 | 1,142 / 1,223 / 1,314 / 1,394 | 156 / 231 / 237 / 268 |
- +
Eval Range: 90.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.3293 | 0.2166 | 0.3253 | 0.3476 | 1.0000 | 0.4132 | 0.3083 | 0.3639 | 0.4336 | 1.0000 | 0.3657 | 0.2567 | 0.3324 | 0.3634 | 1.0000 | @@ -538,7 +538,7 @@
Eval Range: 0.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | BEVFusion-LiDAR j6gen2_base/2.8.1 | 0.1971 | 0.2029 | 0.2829 | 0.2224 | 1.0000 | 0.2506 | 0.2464 | 0.3150 | 0.2717 | 1.0000 | 0.2419 | 0.2401 | 0.2986 | 0.2465 | 1.0000 | From f0af17409d5ba845ec0eded84fab31b72c4f1a76 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Mon, 22 Jun 2026 23:07:32 +0900 Subject: [PATCH 161/162] Update base, j6gen2, jpntaxi docstring --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 3 + .../docs/BEVFusion-L/v2/j6gen2_base.md | 9 +- .../docs/BEVFusion-L/v2/jpntaxi_base.md | 427 ++++++++++++++++-- 3 files changed, 404 insertions(+), 35 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index fdd038874..124c3041c 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -728,6 +728,9 @@ - Purse custom LayerNorm in mmdeploy to support ONNX LayerNorm ops (purge_mmdeploy_symbolics). - Add two classes: `traffic_cone` and `barrier` to the model. - Do not max-pooling of `bicycle`. +- Filter 3D bboxes in the train set by distance (< 60m, >= 3) and (60m <= x < 130m, >= 2). +- Filter 3D bboxes in the test set by number of points (>= 2). +
diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md index 044ab291f..36812924c 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md @@ -221,7 +221,7 @@
-
+
## Datasets @@ -569,6 +569,8 @@ ## Release ### BEVFusion-LiDAR J6Gen2_base/2.8.1 + +
Changes - Finetune from `BEVFusion-LiDAR base/2.8.0` with j6gen2 base dataset and intensity. @@ -594,8 +596,8 @@ Training configs - [Config file path](https://github.com/KSeangTan/AWML/blob/3d5e2fa3df7ad61d9ae773a3ea3f418f4916e05b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py) -- Train time: NVIDIA H200 140GB * 8 * 30 epochs = 1 day -- Batch size: 8*8 = 64 +- Train time: NVIDIA H200 140GB * 8 * 30 epochs = 20 hours +- Batch size: 8*16 = 128 - Training Dataset (frames: 63,813): - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + db_j6gen2_v10 + db_j6gen2_v11 + db_j6gen2_v12 (51,208 frames) - largebus: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (12,605 frames) @@ -1300,6 +1302,7 @@ | traffic_cone | 5,782 / 6,632 / 6,752 / 7,077 | 0.190 / 0.231 / 0.284 / 0.453 | 0.370 / 0.381 / 0.379 / 0.385 | 0.662 / 0.674 / 0.674 / 0.676 | 0.032 / 0.033 / 0.033 / 0.033 | 1.000 / 1.000 / 1.000 / 1.000 | | barrier | 602 / 827 / 850 / 872 | 0.245 / 0.329 / 0.356 / 0.433 | 0.378 / 0.383 / 0.380 / 0.377 | 0.478 / 0.506 / 0.509 / 0.515 | 0.026 / 0.027 / 0.028 / 0.028 | 1.000 / 1.000 / 1.000 / 1.000 | +
--- diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md index 24454df13..e316cf3f0 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md @@ -13,7 +13,7 @@ - **Total Frames: 5,179**
- jpntaxi_gen2 (9,975 frames) + jpntaxi_gen2 (10,687 frames) - `db_jpntaxigen2_v1` - `db_jpntaxigen2_v2` @@ -25,7 +25,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(50,954) | truck
(18,624) | bus
(3,853) | bicycle
(3,768) | pedestrian
(70,699) | traffic_cone
(12,525) | barrier
(2,009) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.6765 | 0.6414 | 0.6054 | 0.6011 | 0.5878 | 0.5835 | 0.9267 | 0.8595 | 0.8713 | 0.7844 | 0.9097 | 0.3843 | 0.0000 | @@ -34,65 +34,428 @@
Eval Range: 50.0 - 90.0m - - | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7125 | 0.6854 | 0.8453 | 0.6838 | 0.5362 | 0.6969 | 0.8003 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(41,196) | truck
(22,942) | bus
(3,033) | bicycle
(2,310) | pedestrian
(36,881) | traffic_cone
(7,183) | barrier
(1,847) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.5673 | 0.5339 | 0.5415 | 0.5344 | 0.5248 | 0.5177 | 0.8485 | 0.7006 | 0.5352 | 0.6574 | 0.8105 | 0.4191 | 0.0000 |
Eval Range: 90.0 - 121.0m - - | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.6030 | 0.5762 | 0.6947 | 0.5260 | 0.5030 | 0.5321 | 0.7591 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(17,510) | truck
(14,707) | bus
(2,997) | bicycle
(566) | pedestrian
(16,580) | traffic_cone
(769) | barrier
(566) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.4071 | 0.3837 | 0.4400 | 0.4130 | 0.4284 | 0.4014 | 0.7505 | 0.5566 | 0.4693 | 0.3213 | 0.7219 | 0.0302 | 0.0000 |
Eval Range: 0.0 - 121.0m - - | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7805 | 0.7527 | 0.8730 | 0.7118 | 0.6785 | 0.7655 | 0.8739 | + + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(109,660) | truck
(56,273) | bus
(9,883) | bicycle
(6,644) | pedestrian
(124,160) | traffic_cone
(20,477) | barrier
(4,422) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.6019 | 0.5676 | 0.5626 | 0.5563 | 0.5455 | 0.5392 | 0.8804 | 0.7288 | 0.6567 | 0.7104 | 0.8656 | 0.3711 | 0.0000 |
### Mean TPError - JPNTaxi_gen2 +- Recalls: `0.10`, `0.40`, `optimal` + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.2719 | 0.2964 | 0.2975 | 0.4627 | 1.0000 | 0.2842 | 0.3077 | 0.3000 | 0.4803 | 1.0000 | 0.1861 | 0.1991 | 0.1904 | 0.4301 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 5,604 / 5,604 / 5,604 / 5,604 | 2,048 / 2,048 / 2,048 / 2,048 | 423 / 423 / 423 / 423 | 414 / 414 / 414 / 414 | 7,776 / 7,776 / 7,776 / 7,776 | 1,377 / 1,377 / 1,377 / 1,377 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 20,891 / 20,891 / 20,891 / 20,891 | 7,635 / 7,635 / 7,635 / 7,635 | 1,579 / 1,579 / 1,579 / 1,579 | 1,544 / 1,544 / 1,544 / 1,544 | 28,986 / 28,986 / 28,986 / 28,986 | 5,135 / 5,135 / 5,135 / 5,135 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 44,532 / 46,751 / 47,578 / 47,681 | 14,348 / 16,107 / 17,107 / 17,466 | 3,000 / 3,436 / 3,542 / 3,547 | 2,845 / 2,913 / 2,876 / 2,876 | 60,539 / 61,514 / 62,277 / 62,441 | 6,486 / 6,824 / 7,125 / 7,398 | 0 / 0 / 0 / 0 | + +
+ +
+ Eval Range: 50.0 - 90.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.3372 | 0.2750 | 0.3096 | 0.4999 | 1.0000 | 0.3578 | 0.2816 | 0.3131 | 0.5396 | 1.0000 | 0.2676 | 0.1735 | 0.2026 | 0.5014 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 41,196) | truck
0.5/1.0/2.0/4.0
(GTs: 22,942) | bus
0.5/1.0/2.0/4.0
(GTs: 3,033) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,310) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 36,881) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 7,183) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,847) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 4,531 / 4,531 / 4,531 / 4,531 | 2,523 / 2,523 / 2,523 / 2,523 | 333 / 333 / 333 / 333 | 254 / 254 / 254 / 254 | 4,056 / 4,056 / 4,056 / 4,056 | 790 / 790 / 790 / 790 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 41,196) | truck
0.5/1.0/2.0/4.0
(GTs: 22,942) | bus
0.5/1.0/2.0/4.0
(GTs: 3,033) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,310) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 36,881) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 7,183) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,847) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 16,890 / 16,890 / 16,890 / 16,890 | 9,406 / 9,406 / 9,406 / 9,406 | 1,243 / 1,243 / 1,243 / 1,243 | 947 / 947 / 947 / 947 | 15,121 / 15,121 / 15,121 / 15,121 | 2,945 / 2,945 / 2,945 / 2,945 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 41,196) | truck
0.5/1.0/2.0/4.0
(GTs: 22,942) | bus
0.5/1.0/2.0/4.0
(GTs: 3,033) | bicycle
0.5/1.0/2.0/4.0
(GTs: 2,310) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 36,881) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 7,183) | barrier
0.5/1.0/2.0/4.0
(GTs: 1,847) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 31,946 / 34,752 / 35,481 / 35,761 | 13,234 / 15,760 / 17,748 / 18,656 | 1,143 / 1,712 / 1,912 / 1,948 | 1,403 / 1,491 / 1,492 / 1,475 | 28,761 / 29,331 / 29,372 / 29,481 | 3,696 / 3,847 / 3,872 / 4,000 | 0 / 0 / 0 / 0 | + +
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.3797 | 0.3240 | 0.3484 | 0.5829 | 1.0000 | 0.4651 | 0.3644 | 0.3942 | 0.6814 | 1.0000 | 0.3149 | 0.2340 | 0.2477 | 0.5670 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 17,510) | truck
0.5/1.0/2.0/4.0
(GTs: 14,707) | bus
0.5/1.0/2.0/4.0
(GTs: 2,997) | bicycle
0.5/1.0/2.0/4.0
(GTs: 566) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 16,580) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 769) | barrier
0.5/1.0/2.0/4.0
(GTs: 566) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 1,926 / 1,926 / 1,926 / 1,926 | 1,617 / 1,617 / 1,617 / 1,617 | 329 / 329 / 329 / 329 | 62 / 62 / 62 / 62 | 1,823 / 1,823 / 1,823 / 1,823 | 84 / 84 / 84 / 84 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 17,510) | truck
0.5/1.0/2.0/4.0
(GTs: 14,707) | bus
0.5/1.0/2.0/4.0
(GTs: 2,997) | bicycle
0.5/1.0/2.0/4.0
(GTs: 566) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 16,580) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 769) | barrier
0.5/1.0/2.0/4.0
(GTs: 566) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 7,179 / 7,179 / 7,179 / 7,179 | 6,029 / 6,029 / 6,029 / 6,029 | 1,228 / 1,228 / 1,228 / 1,228 | 232 / 232 / 232 / 232 | 6,797 / 6,797 / 6,797 / 6,797 | 0 / 0 / 315 / 315 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 17,510) | truck
0.5/1.0/2.0/4.0
(GTs: 14,707) | bus
0.5/1.0/2.0/4.0
(GTs: 2,997) | bicycle
0.5/1.0/2.0/4.0
(GTs: 566) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 16,580) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 769) | barrier
0.5/1.0/2.0/4.0
(GTs: 566) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 11,780 / 13,293 / 14,056 / 14,133 | 5,970 / 8,371 / 10,098 / 11,087 | 1,279 / 1,474 / 1,872 / 1,945 | 222 / 261 / 241 / 246 | 11,842 / 12,158 / 12,274 / 12,415 | 148 / 150 / 155 / 167 | 0 / 0 / 0 / 0 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.3015 | 0.2921 | 0.3048 | 0.4845 | 1.0000 | 0.3235 | 0.3009 | 0.3096 | 0.5122 | 1.0000 | 0.2333 | 0.1943 | 0.2011 | 0.4683 | 1.0000 | + + Num match summary + + **recall 0.10** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 109,660) | truck
0.5/1.0/2.0/4.0
(GTs: 56,273) | bus
0.5/1.0/2.0/4.0
(GTs: 9,883) | bicycle
0.5/1.0/2.0/4.0
(GTs: 6,644) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 124,160) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,477) | barrier
0.5/1.0/2.0/4.0
(GTs: 4,422) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 12,062 / 12,062 / 12,062 / 12,062 | 6,190 / 6,190 / 6,190 / 6,190 | 1,087 / 1,087 / 1,087 / 1,087 | 730 / 730 / 730 / 730 | 13,657 / 13,657 / 13,657 / 13,657 | 2,252 / 2,252 / 2,252 / 2,252 | 0 / 0 / 0 / 0 | + + **recall 0.40** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 109,660) | truck
0.5/1.0/2.0/4.0
(GTs: 56,273) | bus
0.5/1.0/2.0/4.0
(GTs: 9,883) | bicycle
0.5/1.0/2.0/4.0
(GTs: 6,644) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 124,160) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,477) | barrier
0.5/1.0/2.0/4.0
(GTs: 4,422) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 44,960 / 44,960 / 44,960 / 44,960 | 23,071 / 23,071 / 23,071 / 23,071 | 4,052 / 4,052 / 4,052 / 4,052 | 2,724 / 2,724 / 2,724 / 2,724 | 50,905 / 50,905 / 50,905 / 50,905 | 8,395 / 8,395 / 8,395 / 8,395 | 0 / 0 / 0 / 0 | + + **optimal** + + | Model version | car
0.5/1.0/2.0/4.0
(GTs: 109,660) | truck
0.5/1.0/2.0/4.0
(GTs: 56,273) | bus
0.5/1.0/2.0/4.0
(GTs: 9,883) | bicycle
0.5/1.0/2.0/4.0
(GTs: 6,644) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 124,160) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 20,477) | barrier
0.5/1.0/2.0/4.0
(GTs: 4,422) | + | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | + | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 87,589 / 94,880 / 97,103 / 97,882 | 33,139 / 40,397 / 44,749 / 47,200 | 4,572 / 6,029 / 7,207 / 7,319 | 4,479 / 4,726 / 4,742 / 4,763 | 100,710 / 102,804 / 103,519 / 104,226 | 10,025 / 10,672 / 11,043 / 11,437 | 0 / 0 / 0 / 0 | + +
+ + +## Release + +### BEVFusion-LiDAR JPNTaxi_base/2.8.1 +
- Eval Range: 0.0 - 50.0m + Changes - | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.2719 | 0.2964 | 0.2975 | 0.4627 | 1.0000 | 0.2842 | 0.3077 | 0.3000 | 0.4803 | 1.0000 | 0.1861 | 0.1991 | 0.1904 | 0.4301 | 1.0000 | +- Finetune from `BEVFusion-LiDAR base/2.7.0` with JPNTaxi_base dataset and intensity. +
-Num match summary +
+ Artifacts -**recall 0.10** +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.ci.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/73d62897-9beb-400f-a2bf-5af234da909d?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.8.1/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1UaTEqJlKMzLh2e4h5C3wl0ZoGdn9gIn-/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.8.1/logs.zip) + - [Google drive](https://drive.google.com/file/d/1J27_CDEokWZD6s3aPzxuYS7mcdI-HqtW/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.8.1/best_epoch_30.zip) + - [Google drive](https://drive.google.com/file/d/1Oe6aZq4N_fL7ejhploDm2sm440JDOLvw/view?usp=drive_link) - | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | - | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | - | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 5,604 / 5,604 / 5,604 / 5,604 | 2,048 / 2,048 / 2,048 / 2,048 | 423 / 423 / 423 / 423 | 414 / 414 / 414 / 414 | 7,776 / 7,776 / 7,776 / 7,776 | 1,377 / 1,377 / 1,377 / 1,377 | 0 / 0 / 0 / 0 | +
-**recall 0.40** +
+ Training configs - | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | - | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | - | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 20,891 / 20,891 / 20,891 / 20,891 | 7,635 / 7,635 / 7,635 / 7,635 | 1,579 / 1,579 / 1,579 / 1,579 | 1,544 / 1,544 / 1,544 / 1,544 | 28,986 / 28,986 / 28,986 / 28,986 | 5,135 / 5,135 / 5,135 / 5,135 | 0 / 0 / 0 / 0 | +- [Config file path](https://github.com/KSeangTan/AWML/blob/00a0422d09f4a6ff8d5180c2df700883cb212d9f/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py) +- Train time: NVIDIA H200 140GB * 8 * 30 epochs = 20 hours +- Batch size: 16*8 = 128 +- Training Dataset (frames: 58,329): + - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames) + jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (30,168 frames) -**optimal** +
+ +
+ Evaluation - | Model version | car
0.5/1.0/2.0/4.0
(GTs: 50,954) | truck
0.5/1.0/2.0/4.0
(GTs: 18,624) | bus
0.5/1.0/2.0/4.0
(GTs: 3,853) | bicycle
0.5/1.0/2.0/4.0
(GTs: 3,768) | pedestrian
0.5/1.0/2.0/4.0
(GTs: 70,699) | traffic_cone
0.5/1.0/2.0/4.0
(GTs: 12,525) | barrier
0.5/1.0/2.0/4.0
(GTs: 2,009) | - | :---- | :---- | :---- | :---- | :---- | :---- | :---- | :---- | - | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 44,532 / 46,751 / 47,578 / 47,681 | 14,348 / 16,107 / 17,107 / 17,466 | 3,000 / 3,436 / 3,542 / 3,547 | 2,845 / 2,913 / 2,876 / 2,876 | 60,539 / 61,514 / 62,277 / 62,441 | 6,486 / 6,824 / 7,125 / 7,398 | 0 / 0 / 0 / 0 | +**JPNTaxi_gen2 Datasets (10,687 frames)**: + + - jpntaxi_gen2 (10,687 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.6765** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 50,954 | 46,144 / 48,579 / 49,273 / 49,538 | 0.868 / 0.934 / 0.947 / 0.957 | 0.909 / 0.941 / 0.947 / 0.950 | 0.315 / 0.210 / 0.157 / 0.157 | +| truck | 18,624 | 14,858 / 16,616 / 17,746 / 18,084 | 0.697 / 0.851 / 0.933 / 0.957 | 0.808 / 0.893 / 0.939 / 0.954 | 0.316 / 0.228 / 0.185 / 0.167 | +| bus | 3,853 | 3,238 / 3,521 / 3,636 / 3,647 | 0.787 / 0.875 / 0.911 / 0.912 | 0.808 / 0.872 / 0.897 / 0.899 | 0.056 / 0.033 / 0.033 / 0.033 | +| bicycle | 3,768 | 3,284 / 3,339 / 3,343 / 3,343 | 0.771 / 0.788 / 0.789 / 0.789 | 0.808 / 0.815 / 0.816 / 0.816 | 0.253 / 0.229 / 0.251 / 0.251 | +| pedestrian | 70,699 | 66,395 / 67,135 / 67,375 / 67,695 | 0.893 / 0.907 / 0.918 / 0.921 | 0.884 / 0.893 / 0.900 / 0.905 | 0.159 / 0.153 / 0.147 / 0.151 | +| traffic_cone | 12,525 | 8,566 / 9,087 / 9,426 / 9,627 | 0.338 / 0.367 / 0.402 / 0.430 | 0.517 / 0.534 / 0.558 / 0.579 | 0.133 / 0.127 / 0.127 / 0.127 | +| barrier | 2,009 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 5,604 / 5,604 / 5,604 / 5,604 | 0.104 / 0.112 / 0.114 / 0.120 | 0.023 / 0.024 / 0.025 / 0.025 | 0.104 / 0.105 / 0.106 / 0.106 | 0.326 / 0.331 / 0.333 / 0.334 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,048 / 2,048 / 2,048 / 2,048 | 0.133 / 0.170 / 0.193 / 0.205 | 0.015 / 0.016 / 0.016 / 0.016 | 0.112 / 0.121 / 0.125 / 0.127 | 0.366 / 0.367 / 0.377 / 0.381 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 423 / 423 / 423 / 423 | 0.152 / 0.162 / 0.169 / 0.170 | 0.016 / 0.016 / 0.017 / 0.017 | 0.107 / 0.109 / 0.110 / 0.110 | 0.253 / 0.264 / 0.267 / 0.267 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 414 / 414 / 414 / 414 | 0.125 / 0.128 / 0.128 / 0.129 | 0.089 / 0.089 / 0.089 / 0.090 | 0.194 / 0.195 / 0.195 / 0.195 | 0.740 / 0.744 / 0.745 / 0.746 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 7,776 / 7,776 / 7,776 / 7,776 | 0.106 / 0.108 / 0.113 / 0.120 | 0.239 / 0.240 / 0.242 / 0.244 | 0.231 / 0.231 / 0.231 / 0.231 | 0.425 / 0.425 / 0.426 / 0.427 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 1,377 / 1,377 / 1,377 / 1,377 | 0.133 / 0.148 / 0.227 / 0.344 | 0.684 / 0.693 / 0.693 / 0.679 | 0.322 / 0.322 / 0.321 / 0.319 | 0.103 / 0.103 / 0.102 / 0.102 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 |
+
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 20,891 / 20,891 / 20,891 / 20,891 | 0.111 / 0.122 / 0.125 / 0.131 | 0.025 / 0.027 / 0.028 / 0.028 | 0.107 / 0.109 / 0.109 / 0.109 | 0.353 / 0.358 / 0.360 / 0.361 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 7,635 / 7,635 / 7,635 / 7,635 | 0.142 / 0.181 / 0.213 / 0.230 | 0.016 / 0.017 / 0.017 / 0.018 | 0.116 / 0.125 / 0.131 / 0.133 | 0.423 / 0.423 / 0.434 / 0.438 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,579 / 1,579 / 1,579 / 1,579 | 0.154 / 0.169 / 0.180 / 0.182 | 0.017 / 0.017 / 0.018 / 0.018 | 0.110 / 0.113 / 0.115 / 0.115 | 0.305 / 0.313 / 0.315 / 0.316 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,544 / 1,544 / 1,544 / 1,544 | 0.131 / 0.135 / 0.136 / 0.137 | 0.097 / 0.098 / 0.098 / 0.099 | 0.198 / 0.199 / 0.199 / 0.199 | 0.719 / 0.724 / 0.726 / 0.726 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 28,986 / 28,986 / 28,986 / 28,986 | 0.111 / 0.114 / 0.121 / 0.132 | 0.246 / 0.247 / 0.250 / 0.253 | 0.233 / 0.234 / 0.234 / 0.234 | 0.438 / 0.438 / 0.438 / 0.440 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 5,135 / 5,135 / 5,135 / 5,135 | 0.147 / 0.170 / 0.268 / 0.415 | 0.746 / 0.755 / 0.749 / 0.732 | 0.320 / 0.321 / 0.319 / 0.318 | 0.100 / 0.100 / 0.099 / 0.099 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | +
-## Release +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 44,532 / 46,751 / 47,578 / 47,681 | 0.125 / 0.145 / 0.157 / 0.168 | 0.032 / 0.040 / 0.047 / 0.047 | 0.113 / 0.118 / 0.119 / 0.119 | 0.398 / 0.415 / 0.427 / 0.427 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 14,348 / 16,107 / 17,107 / 17,466 | 0.153 / 0.208 / 0.267 / 0.314 | 0.019 / 0.022 / 0.023 / 0.024 | 0.123 / 0.135 / 0.145 / 0.151 | 0.489 / 0.513 / 0.550 / 0.568 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 3,000 / 3,436 / 3,542 / 3,547 | 0.159 / 0.199 / 0.229 / 0.234 | 0.018 / 0.020 / 0.022 / 0.022 | 0.116 / 0.127 / 0.131 / 0.131 | 0.358 / 0.367 / 0.367 / 0.368 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 2,845 / 2,913 / 2,876 / 2,876 | 0.136 / 0.141 / 0.141 / 0.142 | 0.107 / 0.108 / 0.108 / 0.109 | 0.203 / 0.204 / 0.204 / 0.204 | 0.712 / 0.716 / 0.719 / 0.720 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 60,539 / 61,514 / 62,277 / 62,441 | 0.118 / 0.127 / 0.142 / 0.169 | 0.262 / 0.267 / 0.271 / 0.277 | 0.237 / 0.238 / 0.238 / 0.239 | 0.452 / 0.451 / 0.452 / 0.454 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 6,486 / 6,824 / 7,125 / 7,398 | 0.144 / 0.165 / 0.266 / 0.419 | 0.732 / 0.740 / 0.735 / 0.725 | 0.320 / 0.321 / 0.318 / 0.316 | 0.100 / 0.100 / 0.099 / 0.099 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.5673** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 41,196 | 34,194 / 37,651 / 39,026 / 39,360 | 0.757 / 0.853 / 0.887 / 0.898 | 0.807 / 0.862 / 0.880 / 0.884 | 0.201 / 0.166 / 0.166 / 0.159 | +| truck | 22,942 | 14,460 / 17,423 / 19,681 / 20,483 | 0.506 / 0.667 / 0.794 / 0.835 | 0.651 / 0.753 / 0.828 / 0.849 | 0.255 / 0.200 / 0.160 / 0.127 | +| bus | 3,033 | 1,394 / 2,043 / 2,373 / 2,438 | 0.234 / 0.531 / 0.676 / 0.700 | 0.434 / 0.642 / 0.726 / 0.740 | 0.094 / 0.086 / 0.094 / 0.094 | +| bicycle | 2,310 | 1,834 / 1,933 / 1,939 / 1,942 | 0.612 / 0.668 / 0.669 / 0.680 | 0.683 / 0.712 / 0.713 / 0.719 | 0.168 / 0.155 / 0.155 / 0.170 | +| pedestrian | 36,881 | 33,944 / 34,428 / 34,559 / 34,739 | 0.793 / 0.808 / 0.815 / 0.826 | 0.794 / 0.803 / 0.808 / 0.814 | 0.162 / 0.156 / 0.159 / 0.161 | +| traffic_cone | 7,183 | 5,066 / 5,255 / 5,320 / 5,413 | 0.375 / 0.405 / 0.434 / 0.463 | 0.522 / 0.542 / 0.561 / 0.582 | 0.127 / 0.126 / 0.134 / 0.135 | +| barrier | 1,847 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 4,531 / 4,531 / 4,531 / 4,531 | 0.139 / 0.152 / 0.160 / 0.167 | 0.035 / 0.042 / 0.045 / 0.047 | 0.136 / 0.139 / 0.140 / 0.140 | 0.298 / 0.322 / 0.334 / 0.339 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 2,523 / 2,523 / 2,523 / 2,523 | 0.185 / 0.224 / 0.277 / 0.302 | 0.023 / 0.026 / 0.028 / 0.031 | 0.150 / 0.157 / 0.165 / 0.169 | 0.434 / 0.474 / 0.511 / 0.529 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 333 / 333 / 333 / 333 | 0.216 / 0.346 / 0.420 / 0.450 | 0.026 / 0.031 / 0.034 / 0.035 | 0.121 / 0.137 / 0.146 / 0.147 | 0.269 / 0.274 / 0.279 / 0.281 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 254 / 254 / 254 / 254 | 0.169 / 0.184 / 0.185 / 0.222 | 0.084 / 0.084 / 0.084 / 0.085 | 0.237 / 0.237 / 0.237 / 0.236 | 0.830 / 0.849 / 0.850 / 0.846 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 4,056 / 4,056 / 4,056 / 4,056 | 0.116 / 0.121 / 0.131 / 0.159 | 0.301 / 0.304 / 0.308 / 0.314 | 0.259 / 0.260 / 0.260 / 0.261 | 0.456 / 0.456 / 0.457 / 0.460 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 790 / 790 / 790 / 790 | 0.169 / 0.185 / 0.298 / 0.468 | 0.433 / 0.436 / 0.436 / 0.431 | 0.233 / 0.233 / 0.234 / 0.234 | 0.113 / 0.112 / 0.112 / 0.112 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 16,890 / 16,890 / 16,890 / 16,890 | 0.150 / 0.168 / 0.180 / 0.190 | 0.043 / 0.053 / 0.057 / 0.059 | 0.141 / 0.144 / 0.145 / 0.145 | 0.353 / 0.381 / 0.398 / 0.403 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 9,406 / 9,406 / 9,406 / 9,406 | 0.191 / 0.244 / 0.319 / 0.355 | 0.027 / 0.030 / 0.033 / 0.037 | 0.156 / 0.164 / 0.175 / 0.180 | 0.529 / 0.572 / 0.610 / 0.632 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,243 / 1,243 / 1,243 / 1,243 | 0.237 / 0.382 / 0.487 / 0.535 | 0.055 / 0.045 / 0.046 / 0.048 | 0.129 / 0.144 / 0.156 / 0.158 | 0.356 / 0.325 / 0.323 / 0.324 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 947 / 947 / 947 / 947 | 0.177 / 0.195 / 0.197 / 0.243 | 0.102 / 0.100 / 0.100 / 0.101 | 0.246 / 0.247 / 0.247 / 0.246 | 0.881 / 0.896 / 0.897 / 0.894 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 15,121 / 15,121 / 15,121 / 15,121 | 0.120 / 0.126 / 0.139 / 0.176 | 0.325 / 0.328 / 0.332 / 0.339 | 0.261 / 0.261 / 0.262 / 0.262 | 0.478 / 0.478 / 0.479 / 0.483 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 2,945 / 2,945 / 2,945 / 2,945 | 0.172 / 0.193 / 0.318 / 0.527 | 0.404 / 0.409 / 0.407 / 0.403 | 0.224 / 0.225 / 0.225 / 0.226 | 0.104 / 0.104 / 0.103 / 0.103 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 31,946 / 34,752 / 35,481 / 35,761 | 0.165 / 0.200 / 0.223 / 0.241 | 0.067 / 0.087 / 0.094 / 0.096 | 0.149 / 0.154 / 0.155 / 0.155 | 0.433 / 0.483 / 0.513 / 0.522 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 13,234 / 15,760 / 17,748 / 18,656 | 0.197 / 0.269 / 0.377 / 0.455 | 0.028 / 0.036 / 0.045 / 0.052 | 0.160 / 0.174 / 0.190 / 0.201 | 0.576 / 0.664 / 0.728 / 0.774 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,143 / 1,712 / 1,912 / 1,948 | 0.237 / 0.393 / 0.503 / 0.559 | 0.042 / 0.055 / 0.055 / 0.057 | 0.127 / 0.147 / 0.159 / 0.161 | 0.311 / 0.346 / 0.328 / 0.326 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 1,403 / 1,491 / 1,492 / 1,475 | 0.176 / 0.196 / 0.197 / 0.242 | 0.101 / 0.106 / 0.106 / 0.105 | 0.247 / 0.247 / 0.247 / 0.246 | 0.895 / 0.915 / 0.915 / 0.902 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 28,761 / 29,331 / 29,372 / 29,481 | 0.124 / 0.134 / 0.150 / 0.193 | 0.345 / 0.350 / 0.355 / 0.362 | 0.262 / 0.263 / 0.263 / 0.263 | 0.495 / 0.496 / 0.497 / 0.501 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 3,696 / 3,847 / 3,872 / 4,000 | 0.169 / 0.190 / 0.317 / 0.517 | 0.405 / 0.408 / 0.405 / 0.399 | 0.224 / 0.224 / 0.223 / 0.223 | 0.105 / 0.104 / 0.103 / 0.102 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4071** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 17,510 | 13,505 / 15,174 / 15,882 / 16,111 | 0.639 / 0.753 / 0.798 / 0.812 | 0.718 / 0.784 / 0.807 / 0.812 | 0.219 / 0.182 / 0.156 / 0.156 | +| truck | 14,707 | 6,697 / 9,720 / 11,675 / 13,042 | 0.250 / 0.516 / 0.671 / 0.789 | 0.473 / 0.653 / 0.747 / 0.812 | 0.262 / 0.239 / 0.163 / 0.150 | +| bus | 2,997 | 1,441 / 1,779 / 2,237 / 2,331 | 0.261 / 0.392 / 0.591 / 0.634 | 0.415 / 0.515 / 0.655 / 0.680 | 0.031 / 0.040 / 0.040 / 0.040 | +| bicycle | 566 | 373 / 415 / 430 / 431 | 0.245 / 0.307 / 0.356 / 0.377 | 0.451 / 0.482 / 0.507 / 0.518 | 0.144 / 0.114 / 0.161 / 0.161 | +| pedestrian | 16,580 | 14,776 / 14,991 / 15,087 / 15,224 | 0.704 / 0.718 / 0.725 / 0.741 | 0.731 / 0.738 / 0.744 / 0.752 | 0.146 / 0.138 / 0.137 / 0.137 | +| traffic_cone | 769 | 310 / 315 / 325 / 336 | 0.024 / 0.027 / 0.030 / 0.039 | 0.205 / 0.208 / 0.215 / 0.231 | 0.212 / 0.212 / 0.212 / 0.212 | +| barrier | 566 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 1,926 / 1,926 / 1,926 / 1,926 | 0.188 / 0.209 / 0.225 / 0.236 | 0.051 / 0.054 / 0.058 / 0.060 | 0.178 / 0.181 / 0.184 / 0.184 | 0.513 / 0.543 / 0.555 / 0.561 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 1,617 / 1,617 / 1,617 / 1,617 | 0.232 / 0.330 / 0.406 / 0.540 | 0.025 / 0.028 / 0.031 / 0.036 | 0.171 / 0.188 / 0.197 / 0.215 | 0.564 / 0.574 / 0.600 / 0.604 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 329 / 329 / 329 / 329 | 0.165 / 0.241 / 0.405 / 0.451 | 0.034 / 0.038 / 0.107 / 0.114 | 0.130 / 0.135 / 0.161 / 0.168 | 0.147 / 0.176 / 0.208 / 0.220 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 62 / 62 / 62 / 62 | 0.177 / 0.223 / 0.295 / 0.412 | 0.139 / 0.134 / 0.127 / 0.136 | 0.233 / 0.244 / 0.266 / 0.263 | 0.917 / 1.004 / 1.090 / 1.068 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 1,823 / 1,823 / 1,823 / 1,823 | 0.132 / 0.138 / 0.149 / 0.202 | 0.255 / 0.256 / 0.261 / 0.268 | 0.280 / 0.280 / 0.280 / 0.280 | 0.561 / 0.561 / 0.562 / 0.565 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 84 / 84 / 84 / 84 | 0.176 / 0.200 / 0.317 / 0.583 | 0.707 / 0.719 / 0.715 / 0.718 | 0.384 / 0.386 / 0.385 / 0.383 | 0.181 / 0.181 / 0.182 / 0.185 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 7,179 / 7,179 / 7,179 / 7,179 | 0.201 / 0.229 / 0.251 / 0.267 | 0.066 / 0.069 / 0.074 / 0.077 | 0.183 / 0.187 / 0.189 / 0.189 | 0.566 / 0.601 / 0.616 / 0.623 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,029 / 6,029 / 6,029 / 6,029 | 0.247 / 0.361 / 0.470 / 0.627 | 0.041 / 0.036 / 0.039 / 0.046 | 0.181 / 0.196 / 0.207 / 0.228 | 0.699 / 0.685 / 0.699 / 0.696 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,228 / 1,228 / 1,228 / 1,228 | 0.200 / 0.302 / 0.536 / 0.596 | 0.036 / 0.048 / 0.168 / 0.178 | 0.143 / 0.151 / 0.191 / 0.198 | 0.209 / 0.265 / 0.296 / 0.309 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 232 / 232 / 232 / 232 | 0.189 / 0.238 / 0.279 / 0.376 | 0.211 / 0.191 / 0.181 / 0.181 | 0.223 / 0.234 / 0.248 / 0.248 | 0.956 / 1.039 / 1.085 / 1.071 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 6,797 / 6,797 / 6,797 / 6,797 | 0.137 / 0.145 / 0.161 / 0.226 | 0.289 / 0.290 / 0.295 / 0.306 | 0.276 / 0.276 / 0.276 / 0.277 | 0.570 / 0.570 / 0.571 / 0.575 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 0 / 0 / 315 / 315 | 1.000 / 1.000 / 0.359 / 0.628 | 1.000 / 1.000 / 0.690 / 0.692 | 1.000 / 1.000 / 0.370 / 0.367 | 1.000 / 1.000 / 0.188 / 0.190 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 11,780 / 13,293 / 14,056 / 14,133 | 0.206 / 0.247 / 0.284 / 0.304 | 0.069 / 0.076 / 0.085 / 0.088 | 0.185 / 0.190 / 0.193 / 0.193 | 0.592 / 0.650 / 0.688 / 0.695 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 5,970 / 8,371 / 10,098 / 11,087 | 0.244 / 0.366 / 0.506 / 0.702 | 0.033 / 0.035 / 0.044 / 0.053 | 0.178 / 0.198 / 0.215 / 0.242 | 0.662 / 0.702 / 0.764 / 0.766 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,279 / 1,474 / 1,872 / 1,945 | 0.199 / 0.304 / 0.558 / 0.628 | 0.036 / 0.049 / 0.187 / 0.200 | 0.142 / 0.151 / 0.198 / 0.207 | 0.213 / 0.270 / 0.309 / 0.322 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 222 / 261 / 241 / 246 | 0.178 / 0.228 / 0.268 / 0.379 | 0.122 / 0.135 / 0.114 / 0.124 | 0.230 / 0.236 / 0.256 / 0.252 | 0.899 / 0.973 / 1.038 / 1.020 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 11,842 / 12,158 / 12,274 / 12,415 | 0.139 / 0.149 / 0.169 / 0.244 | 0.302 / 0.307 / 0.315 / 0.326 | 0.275 / 0.275 / 0.275 / 0.275 | 0.575 / 0.577 / 0.578 / 0.582 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 148 / 150 / 155 / 167 | 0.176 / 0.192 / 0.302 / 0.587 | 0.724 / 0.734 / 0.733 / 0.725 | 0.393 / 0.396 / 0.397 / 0.394 | 0.180 / 0.181 / 0.184 / 0.188 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.6019** + +| class_name | GTs | num_match@0.5/1.0/2.0/4.0 | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | :---- | :---- | :---- | :---- | +| car | 109,660 | 94,016 / 101,637 / 104,473 / 105,345 | 0.797 / 0.884 / 0.915 / 0.925 | 0.842 / 0.888 / 0.902 / 0.905 | 0.250 / 0.182 / 0.166 / 0.157 | +| truck | 56,273 | 36,118 / 43,936 / 49,381 / 51,968 | 0.513 / 0.701 / 0.822 / 0.879 | 0.664 / 0.779 / 0.850 / 0.880 | 0.305 / 0.215 / 0.184 / 0.150 | +| bus | 9,883 | 6,087 / 7,377 / 8,290 / 8,461 | 0.467 / 0.636 / 0.751 / 0.773 | 0.582 / 0.694 / 0.770 / 0.782 | 0.256 / 0.083 / 0.053 / 0.053 | +| bicycle | 6,644 | 5,499 / 5,696 / 5,721 / 5,725 | 0.680 / 0.715 / 0.721 / 0.725 | 0.736 / 0.753 / 0.756 / 0.759 | 0.199 / 0.170 / 0.170 / 0.170 | +| pedestrian | 124,160 | 115,223 / 116,646 / 117,116 / 117,764 | 0.848 / 0.862 / 0.873 / 0.879 | 0.837 / 0.846 / 0.852 / 0.858 | 0.161 / 0.153 / 0.153 / 0.153 | +| traffic_cone | 20,477 | 13,955 / 14,674 / 15,090 / 15,403 | 0.329 / 0.355 / 0.385 / 0.416 | 0.499 / 0.515 / 0.536 / 0.558 | 0.142 / 0.132 / 0.134 / 0.135 | +| barrier | 4,422 | 0 / 0 / 0 / 0 | 0.000 / 0.000 / 0.000 / 0.000 | nan / nan / nan / nan | nan / nan / nan / nan | + +
+TP error — default (recall @0.10) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 12,062 / 12,062 / 12,062 / 12,062 | 0.122 / 0.134 / 0.140 / 0.147 | 0.029 / 0.032 / 0.035 / 0.036 | 0.119 / 0.122 / 0.123 / 0.123 | 0.334 / 0.347 / 0.356 / 0.359 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 6,190 / 6,190 / 6,190 / 6,190 | 0.166 / 0.214 / 0.259 / 0.299 | 0.019 / 0.021 / 0.023 / 0.025 | 0.134 / 0.145 / 0.152 / 0.157 | 0.424 / 0.446 / 0.471 / 0.484 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 1,087 / 1,087 / 1,087 / 1,087 | 0.165 / 0.215 / 0.267 / 0.283 | 0.021 / 0.023 / 0.036 / 0.038 | 0.114 / 0.120 / 0.128 / 0.130 | 0.264 / 0.273 / 0.281 / 0.284 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 730 / 730 / 730 / 730 | 0.137 / 0.145 / 0.148 / 0.161 | 0.091 / 0.091 / 0.092 / 0.092 | 0.204 / 0.206 / 0.207 / 0.206 | 0.749 / 0.761 / 0.765 / 0.764 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 13,657 / 13,657 / 13,657 / 13,657 | 0.111 / 0.114 / 0.121 / 0.137 | 0.256 / 0.257 / 0.260 / 0.264 | 0.241 / 0.241 / 0.242 / 0.242 | 0.444 / 0.444 / 0.445 / 0.446 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 2,252 / 2,252 / 2,252 / 2,252 | 0.146 / 0.161 / 0.253 / 0.398 | 0.609 / 0.614 / 0.613 / 0.603 | 0.295 / 0.296 / 0.295 / 0.294 | 0.107 / 0.107 / 0.107 / 0.106 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — medium (recall @0.40) + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 44,960 / 44,960 / 44,960 / 44,960 | 0.132 / 0.148 / 0.157 / 0.166 | 0.033 / 0.039 / 0.042 / 0.043 | 0.125 / 0.128 / 0.129 / 0.130 | 0.371 / 0.388 / 0.399 / 0.402 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 23,071 / 23,071 / 23,071 / 23,071 | 0.178 / 0.237 / 0.299 / 0.356 | 0.022 / 0.025 / 0.027 / 0.030 | 0.141 / 0.153 / 0.162 / 0.170 | 0.505 / 0.528 / 0.552 / 0.566 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 4,052 / 4,052 / 4,052 / 4,052 | 0.179 / 0.252 / 0.332 / 0.355 | 0.027 / 0.029 / 0.049 / 0.052 | 0.120 / 0.129 / 0.140 / 0.143 | 0.309 / 0.312 / 0.316 / 0.320 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 2,724 / 2,724 / 2,724 / 2,724 | 0.147 / 0.158 / 0.163 / 0.182 | 0.101 / 0.101 / 0.101 / 0.102 | 0.215 / 0.216 / 0.217 / 0.217 | 0.766 / 0.778 / 0.785 / 0.783 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 50,905 / 50,905 / 50,905 / 50,905 | 0.116 / 0.121 / 0.131 / 0.155 | 0.269 / 0.271 / 0.275 / 0.280 | 0.245 / 0.246 / 0.246 / 0.246 | 0.462 / 0.462 / 0.463 / 0.465 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 8,395 / 8,395 / 8,395 / 8,395 | 0.157 / 0.180 / 0.289 / 0.470 | 0.626 / 0.633 / 0.630 / 0.620 | 0.288 / 0.289 / 0.288 / 0.287 | 0.103 / 0.103 / 0.103 / 0.102 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | 1.000 / 1.000 / 1.000 / 1.000 | + +
+ +
+TP error — optimal + +| class_name | num_match@0.5/1.0/2.0/4.0 | ATE@0.5/1.0/2.0/4.0 | AOE@0.5/1.0/2.0/4.0 | ASE@0.5/1.0/2.0/4.0 | AVE@0.5/1.0/2.0/4.0 | AEE@0.5/1.0/2.0/4.0 | +| :---- | :---- | :---- | :---- | :---- | :---- | :---- | +| car | 87,589 / 94,880 / 97,103 / 97,882 | 0.149 / 0.180 / 0.199 / 0.215 | 0.048 / 0.062 / 0.069 / 0.071 | 0.135 / 0.141 / 0.143 / 0.143 | 0.434 / 0.474 / 0.496 / 0.503 | 1.000 / 1.000 / 1.000 / 1.000 | +| truck | 33,139 / 40,397 / 44,749 / 47,200 | 0.185 / 0.266 / 0.362 / 0.461 | 0.024 / 0.030 / 0.035 / 0.041 | 0.146 / 0.163 / 0.178 / 0.192 | 0.548 / 0.617 / 0.665 / 0.693 | 1.000 / 1.000 / 1.000 / 1.000 | +| bus | 4,572 / 6,029 / 7,207 / 7,319 | 0.174 / 0.272 / 0.389 / 0.422 | 0.021 / 0.035 / 0.075 / 0.080 | 0.116 / 0.133 / 0.154 / 0.157 | 0.290 / 0.334 / 0.357 / 0.362 | 1.000 / 1.000 / 1.000 / 1.000 | +| bicycle | 4,479 / 4,726 / 4,742 / 4,763 | 0.149 / 0.163 / 0.167 / 0.189 | 0.103 / 0.108 / 0.108 / 0.110 | 0.218 / 0.220 / 0.221 / 0.220 | 0.771 / 0.786 / 0.791 / 0.789 | 1.000 / 1.000 / 1.000 / 1.000 | +| pedestrian | 100,710 / 102,804 / 103,519 / 104,226 | 0.122 / 0.131 / 0.147 / 0.186 | 0.290 / 0.295 / 0.299 / 0.307 | 0.249 / 0.249 / 0.250 / 0.250 | 0.478 / 0.478 / 0.479 / 0.483 | 1.000 / 1.000 / 1.000 / 1.000 | +| traffic_cone | 10,025 / 10,672 / 11,043 / 11,437 | 0.152 / 0.173 / 0.284 / 0.461 | 0.614 / 0.619 / 0.614 / 0.604 | 0.288 / 0.288 / 0.286 / 0.285 | 0.104 / 0.104 / 0.103 / 0.102 | 1.000 / 1.000 / 1.000 / 1.000 | +| barrier | 0 / 0 / 0 / 0 | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | nan / nan / nan / nan | + +
+ +
+ +--- ### BEVFusion-LiDAR JPNTaxi_base/2.7.1 From 15f2c9965bfdd7ee1ed9edc4c086db19ff2bd3aa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jun 2026 14:08:59 +0000 Subject: [PATCH 162/162] ci(pre-commit): autofix --- projects/BEVFusion/docs/BEVFusion-L/v2/base.md | 2 +- projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 124c3041c..c814f93a6 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -728,7 +728,7 @@ - Purse custom LayerNorm in mmdeploy to support ONNX LayerNorm ops (purge_mmdeploy_symbolics). - Add two classes: `traffic_cone` and `barrier` to the model. - Do not max-pooling of `bicycle`. -- Filter 3D bboxes in the train set by distance (< 60m, >= 3) and (60m <= x < 130m, >= 2). +- Filter 3D bboxes in the train set by distance (< 60m, >= 3) and (60m <= x < 130m, >= 2). - Filter 3D bboxes in the test set by number of points (>= 2). diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md index e316cf3f0..45c5827d1 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md @@ -25,7 +25,7 @@
Eval Range: 0.0 - 50.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(50,954) | truck
(18,624) | bus
(3,853) | bicycle
(3,768) | pedestrian
(70,699) | traffic_cone
(12,525) | barrier
(2,009) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.6765 | 0.6414 | 0.6054 | 0.6011 | 0.5878 | 0.5835 | 0.9267 | 0.8595 | 0.8713 | 0.7844 | 0.9097 | 0.3843 | 0.0000 | @@ -34,7 +34,7 @@
Eval Range: 50.0 - 90.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(41,196) | truck
(22,942) | bus
(3,033) | bicycle
(2,310) | pedestrian
(36,881) | traffic_cone
(7,183) | barrier
(1,847) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.5673 | 0.5339 | 0.5415 | 0.5344 | 0.5248 | 0.5177 | 0.8485 | 0.7006 | 0.5352 | 0.6574 | 0.8105 | 0.4191 | 0.0000 | @@ -43,7 +43,7 @@
Eval Range: 90.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(17,510) | truck
(14,707) | bus
(2,997) | bicycle
(566) | pedestrian
(16,580) | traffic_cone
(769) | barrier
(566) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.4071 | 0.3837 | 0.4400 | 0.4130 | 0.4284 | 0.4014 | 0.7505 | 0.5566 | 0.4693 | 0.3213 | 0.7219 | 0.0302 | 0.0000 | @@ -52,7 +52,7 @@
Eval Range: 0.0 - 121.0m - + | Model version | mAP | mAPH | map_based_nds (recall @ 0.10) | map_based_nds (recall @ 0.40) | maph_based_nds (recall @ 0.10) | maph_based_nds (recall 0.40) | car
(109,660) | truck
(56,273) | bus
(9,883) | bicycle
(6,644) | pedestrian
(124,160) | traffic_cone
(20,477) | barrier
(4,422) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.6019 | 0.5676 | 0.5626 | 0.5563 | 0.5455 | 0.5392 | 0.8804 | 0.7288 | 0.6567 | 0.7104 | 0.8656 | 0.3711 | 0.0000 | @@ -152,7 +152,7 @@
Eval Range: 0.0 - 121.0m - + | Model version | mATE (recall @ 0.10) | mAOE (recall @ 0.10) | mASE (recall @ 0.10) | mAVE (recall @ 0.10) | mAAE (recall @ 0.10) | mATE (recall @ 0.40) | mAOE (recall @ 0.40) | mASE (recall @ 0.40) | mAVE (recall @ 0.40) | mAAE (recall @ 0.40) | mATE (optimal) | mAOE (optimal) | mASE (optimal) | mAVE (optimal) | mAAE (optimal) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | | BEVFusion-LiDAR jpntaxi_base/2.8.1 | 0.3015 | 0.2921 | 0.3048 | 0.4845 | 1.0000 | 0.3235 | 0.3009 | 0.3096 | 0.5122 | 1.0000 | 0.2333 | 0.1943 | 0.2011 | 0.4683 | 1.0000 |