Removed YOLO node, removed _gem utils file

LucasEby · LucasEby · commit 40bedbe63818 · 2025-05-16T15:47:41.000-05:00
diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md
@@ -54,15 +54,11 @@ docker compose -f setup/docker-compose.yaml up
 ```
 roscore
 ```
-6. Start up YOLO node (make sure you source first):
-```
-python3 GEMstack/onboard/perception/yolo_node.py
-```
-7. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first):
+6. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first):
 ```
 python3 main.py --variant=detector_only launch/combined_detection.yaml
 ```
-8. Run a rosbag on a loop (make sure you source first):
+7. Run a rosbag on a loop (make sure you source first):
 ```
 rosbag play -l yourRosbagNameGoesHere.bag 
 ```
diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py
@@ -1,8 +1,7 @@
 from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum
 from ..interface.gem import GEMInterface
 from ..component import Component
-from .perception_utils import pose_to_matrix, calculate_3d_iou
-from .perception_utils_gem import *
+from .perception_utils import *
 from typing import Dict, List, Optional, Tuple
 import rospy
 from message_filters import Subscriber, ApproximateTimeSynchronizer
@@ -15,6 +14,12 @@
 from geometry_msgs.msg import Quaternion
 
 import copy
+from cv_bridge import CvBridge
+from sensor_msgs.msg import PointCloud2, Image
+from scipy.spatial.transform import Rotation as R
+from ultralytics import YOLO
+import cv2
+from .combined_detection_utils import add_bounding_box
 
 
 def average_yaw(yaw1, yaw2):
@@ -223,10 +228,12 @@ class CombinedDetector3D(Component):
     def __init__(
         self,
         vehicle_interface: GEMInterface,
-        enable_tracking: bool = True,
-        use_start_frame: bool = True,
+        camera_name,
+        camera_calib_file,
         iou_threshold: float = 0.1,
         merge_mode: str = "Average",
+        enable_tracking: bool = True,
+        use_start_frame: bool = True,
         **kwargs 
     ):
         self.vehicle_interface = vehicle_interface
@@ -245,11 +252,39 @@ def __init__(
 
         self.yolo_topic = '/yolo_boxes'
         self.pp_topic = '/pointpillars_boxes'
-        self.debug = False
 
         rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.")
         rospy.loginfo(f"Using merge mode: {self.merge_mode}")
 
+        self.latest_image = None
+        self.latest_lidar = None
+        self.bridge = CvBridge()
+        self.camera_name = camera_name
+        self.camera_front = (self.camera_name == 'front')
+        self.score_threshold = 0.4
+        self.debug = True
+
+        # Load camera intrinsics/extrinsics from YAML
+        with open(camera_calib_file, 'r') as f:
+            calib = yaml.safe_load(f)
+
+        # Expect structure:
+        # cameras:
+        #   front:
+        #     K:   [[...], [...], [...]]
+        #     D:   [...]
+        #     T_l2c: [[...], ..., [...]]
+        cam_cfg = calib['cameras'][self.camera_name]
+        self.K = np.array(cam_cfg['K'])
+        self.D = np.array(cam_cfg['D'])
+        self.T_l2c = np.array(cam_cfg['T_l2c'])
+        self.T_l2v = np.array(cam_cfg['T_l2v'])
+
+        self.undistort_map1 = None
+        self.undistort_map2 = None
+        self.camera_front = (self.camera_name == 'front')
+
+
     def rate(self) -> float:
         return 8.0
 
@@ -261,6 +296,33 @@ def state_outputs(self) -> list:
 
     def initialize(self):
         """Initialize subscribers and publishers."""
+        # # --- Determine the correct RGB topic for this camera ---
+        rgb_topic_map = {
+            'front': '/oak/rgb/image_raw',
+            'front_right': '/camera_fr/arena_camera_node/image_raw',
+            # add additional camera mappings here if needed
+        }
+        rgb_topic = rgb_topic_map.get(
+            self.camera_name,
+            f'/{self.camera_name}/rgb/image_raw'
+        )
+        
+        # Create bounding box publisher
+        self.pub_yolo = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1)
+        rospy.loginfo("YOLO node initialized and waiting for messages.")
+
+        # Initialize the YOLO detector and move to GPU if available
+        self.detector = YOLO('yolov8n.pt')
+        self.detector.to('cuda')
+
+        # Subscribe to the RGB and LiDAR streams
+        self.rgb_sub = Subscriber(rgb_topic, Image)
+        self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
+        self.sync = ApproximateTimeSynchronizer([
+            self.rgb_sub, self.lidar_sub
+        ], queue_size=10, slop=0.1)
+        self.sync.registerCallback(self.synchronized_yolo_callback)
+
         self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray)
         self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray)
         self.pub_fused = rospy.Publisher("/fused_boxes", BoundingBoxArray, queue_size=1)
@@ -276,6 +338,171 @@ def initialize(self):
         self.sync.registerCallback(self.synchronized_callback)
         rospy.loginfo("CombinedDetector3D Subscribers Initialized.")
 
+    def synchronized_yolo_callback(self, image_msg, lidar_msg):
+        """Process synchronized RGB and LiDAR messages to detect pedestrians."""
+        rospy.loginfo("Received synchronized RGB and LiDAR messages")
+        
+        # Convert image message to OpenCV format
+        try:
+            self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8")
+        except Exception as e:
+            rospy.logerr(f"Failed to convert image: {e}")
+            self.latest_image = None
+            
+        # Convert LiDAR message to numpy array
+        self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
+
+        # Gate guards against data not being present for both sensors:
+        if self.latest_image is None or self.latest_lidar is None:
+            return {} # Skip
+            
+        latest_image = self.latest_image.copy()
+
+        # Optionally downsample LiDAR points
+        downsample = False
+        if downsample:
+            lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
+        else:
+            lidar_down = self.latest_lidar.copy()
+        
+        if self.camera_front == False:
+            start = time.time()
+            undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D)
+            end = time.time()
+            # print('-------processing time undistort_image---', end -start)
+            self.current_K = current_K
+            orig_H, orig_W = undistorted_img.shape[:2]
+
+            # --- Begin modifications for three-angle detection ---
+            img_normal = undistorted_img
+        else:
+            img_normal = latest_image.copy()
+            undistorted_img = latest_image.copy()
+            orig_H, orig_W = latest_image.shape[:2]
+            self.current_K = self.K
+            
+        # Run YOLO detection on the image
+        results_normal = self.detector(img_normal, conf=0.4, classes=[0])
+        combined_boxes = []
+
+        boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
+        # for box in boxes_normal:
+        #     cx, cy, w, h = box
+        #     combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING))
+
+        start = time.time()
+        # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference.
+        # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects)
+        pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
+        projected_pts = project_points(pts_cam, self.current_K, lidar_down)
+        # What is returned:
+        # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position)
+        # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position)
+        # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame
+
+
+        # Create empty list of bounding boxes to fill and publish later
+        boxes = BoundingBoxArray()
+        boxes.header.frame_id = 'currentVehicleFrame'
+        boxes.header.stamp = lidar_msg.header.stamp
+
+        # Process YOLO detections
+        boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
+        conf_scores = np.array(results_normal[0].boxes.conf.cpu()) if len(results_normal) > 0 else []
+    
+        for i, box in enumerate(boxes_normal):
+            # Skip low confidence detections
+            if conf_scores[i] < self.score_threshold:
+                continue
+                
+            # Calculate the 2D bounding box in the image
+            cx, cy, w, h = box
+            left = int(cx - w / 2)
+            right = int(cx + w / 2)
+            top = int(cy - h / 2)
+            bottom = int(cy + h / 2)
+
+            # Find LiDAR points that project to this box
+            mask = ((projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & 
+                   (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom))
+            roi_pts = projected_pts[mask]
+            
+            # Ignore regions with too few points
+            if roi_pts.shape[0] < 5:
+                continue
+
+            # Get the 3D points corresponding to the box
+            points_3d = roi_pts[:, 2:5]
+            points_3d = filter_depth_points(points_3d, max_depth_diff=0.8)
+            refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10)
+            refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.1)
+
+            if refined_cluster.shape[0] < 5:
+                continue
+                
+            # Create a point cloud from the filtered points
+            pcd = o3d.geometry.PointCloud()
+            pcd.points = o3d.utility.Vector3dVector(refined_cluster)
+            
+            # Get an oriented bounding box
+            obb = pcd.get_oriented_bounding_box()
+            refined_center = obb.center
+            dims = tuple(obb.extent)
+            R_lidar = obb.R.copy()
+            
+            # We are assuming that dims[0] is height and dims[2] is length of obb.extent
+
+            # Transform from LiDAR to vehicle coordinates
+            refined_center_hom = np.append(refined_center, 1)
+            refined_center_vehicle_hom = self.T_l2v @ refined_center_hom
+            refined_center_vehicle = refined_center_vehicle_hom[:3]
+            
+            # Calculate rotation in vehicle frame
+            R_vehicle = self.T_l2v[:3, :3] @ R_lidar
+            # yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False)
+            yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0])
+
+            # Add the bounding box
+            boxes = add_bounding_box(
+                boxes=boxes, 
+                frame_id='currentVehicleFrame', 
+                stamp=lidar_msg.header.stamp, 
+                x=refined_center_vehicle[0], 
+                y=refined_center_vehicle[1], 
+                z=refined_center_vehicle[2], 
+                l=dims[2],  # length 
+                w=dims[1],  # width 
+                h=dims[0],  # height 
+                yaw=yaw,
+                conf_score=float(conf_scores[i]),
+                label=0  # person/pedestrian class
+            )
+            
+            rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, "
+                         f"{refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) "
+                         f"with score {conf_scores[i]:.2f}")
+        
+        # Publish the bounding boxes
+        rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes")
+        self.pub_yolo.publish(boxes)
+
+    def undistort_image(self, image, K, D):
+        """Undistort an image using the camera calibration parameters."""
+        h, w = image.shape[:2]
+        newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+        
+        # Initialize undistortion maps if not already done
+        if self.undistort_map1 is None or self.undistort_map2 is None:
+            self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None,
+                                                                                   newCameraMatrix=newK, size=(w, h),
+                                                                                   m1type=cv2.CV_32FC1)
+
+        start = time.time()
+        undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST)
+        end = time.time()
+        # print('--------undistort', end-start)
+        return undistorted, newK
+
     def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray):
         """Callback for synchronized YOLO and PointPillars messages."""
         self.latest_yolo_bbxs = yolo_bbxs_msg
diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
@@ -3,7 +3,6 @@
 from ..interface.gem import GEMInterface
 from ..component import Component
 from .perception_utils import *
-from .perception_utils_gem import *
 from ultralytics import YOLO
 import cv2
 from typing import Dict
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -2,7 +2,6 @@
 from ..interface.gem import GEMInterface
 from ..component import Component
 from .perception_utils import *  # If you want to alias functions for clarity, do so in perception_utils
-from .perception_utils_gem import *
 from ultralytics import YOLO
 from typing import Dict
 import open3d as o3d
diff --git a/GEMstack/onboard/perception/perception_utils.py b/GEMstack/onboard/perception/perception_utils.py
@@ -7,10 +7,44 @@
 import sensor_msgs.point_cloud2 as pc2
 import ros_numpy
 import math
-
+from ...state import ObjectPose, AgentState
+from typing import Dict
+import numpy as np
 
 # ----- Helper Functions -----
 
+
+def match_existing_cone(
+        new_center: np.ndarray,
+        new_dims: tuple,
+        existing_agents: Dict[str, AgentState],
+        distance_threshold: float = 1.0
+) -> str:
+    """
+    Find the closest existing Cone agent within a specified distance threshold.
+    """
+    best_agent_id = None
+    best_dist = float('inf')
+    for agent_id, agent_state in existing_agents.items():
+        old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z])
+        dist = np.linalg.norm(new_center - old_center)
+        if dist < distance_threshold and dist < best_dist:
+            best_dist = dist
+            best_agent_id = agent_id
+    return best_agent_id
+
+def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple:
+    """
+    Compute the (vx, vy, vz) velocity based on change in pose over time.
+    """
+    if dt <= 0:
+        return (0, 0, 0)
+    vx = (new_pose.x - old_pose.x) / dt
+    vy = (new_pose.y - old_pose.y) / dt
+    vz = (new_pose.z - old_pose.z) / dt
+    return (vx, vy, vz)
+
+
 def cylindrical_roi(points, center, radius, height):
     horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1)
     vertical_diff = np.abs(points[:, 2] - center[2])
diff --git a/GEMstack/onboard/perception/perception_utils_gem.py b/GEMstack/onboard/perception/perception_utils_gem.py