11from ...state import AllState , VehicleState , ObjectPose , ObjectFrameEnum , AgentState , AgentEnum , AgentActivityEnum
22from ..interface .gem import GEMInterface
33from ..component import Component
4- from .perception_utils import pose_to_matrix , calculate_3d_iou
5- from .perception_utils_gem import *
4+ from .perception_utils import *
65from typing import Dict , List , Optional , Tuple
76import rospy
87from message_filters import Subscriber , ApproximateTimeSynchronizer
1514from geometry_msgs .msg import Quaternion
1615
1716import copy
17+ from cv_bridge import CvBridge
18+ from sensor_msgs .msg import PointCloud2 , Image
19+ from scipy .spatial .transform import Rotation as R
20+ from ultralytics import YOLO
21+ import cv2
22+ from .combined_detection_utils import add_bounding_box
1823
1924
2025def average_yaw (yaw1 , yaw2 ):
@@ -223,10 +228,12 @@ class CombinedDetector3D(Component):
223228 def __init__ (
224229 self ,
225230 vehicle_interface : GEMInterface ,
226- enable_tracking : bool = True ,
227- use_start_frame : bool = True ,
231+ camera_name ,
232+ camera_calib_file ,
228233 iou_threshold : float = 0.1 ,
229234 merge_mode : str = "Average" ,
235+ enable_tracking : bool = True ,
236+ use_start_frame : bool = True ,
230237 ** kwargs
231238 ):
232239 self .vehicle_interface = vehicle_interface
@@ -245,11 +252,39 @@ def __init__(
245252
246253 self .yolo_topic = '/yolo_boxes'
247254 self .pp_topic = '/pointpillars_boxes'
248- self .debug = False
249255
250256 rospy .loginfo (f"CombinedDetector3D Initialized. Subscribing to '{ self .yolo_topic } ' and '{ self .pp_topic } '." )
251257 rospy .loginfo (f"Using merge mode: { self .merge_mode } " )
252258
259+ self .latest_image = None
260+ self .latest_lidar = None
261+ self .bridge = CvBridge ()
262+ self .camera_name = camera_name
263+ self .camera_front = (self .camera_name == 'front' )
264+ self .score_threshold = 0.4
265+ self .debug = True
266+
267+ # Load camera intrinsics/extrinsics from YAML
268+ with open (camera_calib_file , 'r' ) as f :
269+ calib = yaml .safe_load (f )
270+
271+ # Expect structure:
272+ # cameras:
273+ # front:
274+ # K: [[...], [...], [...]]
275+ # D: [...]
276+ # T_l2c: [[...], ..., [...]]
277+ cam_cfg = calib ['cameras' ][self .camera_name ]
278+ self .K = np .array (cam_cfg ['K' ])
279+ self .D = np .array (cam_cfg ['D' ])
280+ self .T_l2c = np .array (cam_cfg ['T_l2c' ])
281+ self .T_l2v = np .array (cam_cfg ['T_l2v' ])
282+
283+ self .undistort_map1 = None
284+ self .undistort_map2 = None
285+ self .camera_front = (self .camera_name == 'front' )
286+
287+
253288 def rate (self ) -> float :
254289 return 8.0
255290
@@ -261,6 +296,33 @@ def state_outputs(self) -> list:
261296
262297 def initialize (self ):
263298 """Initialize subscribers and publishers."""
299+ # # --- Determine the correct RGB topic for this camera ---
300+ rgb_topic_map = {
301+ 'front' : '/oak/rgb/image_raw' ,
302+ 'front_right' : '/camera_fr/arena_camera_node/image_raw' ,
303+ # add additional camera mappings here if needed
304+ }
305+ rgb_topic = rgb_topic_map .get (
306+ self .camera_name ,
307+ f'/{ self .camera_name } /rgb/image_raw'
308+ )
309+
310+ # Create bounding box publisher
311+ self .pub_yolo = rospy .Publisher ('/yolo_boxes' , BoundingBoxArray , queue_size = 1 )
312+ rospy .loginfo ("YOLO node initialized and waiting for messages." )
313+
314+ # Initialize the YOLO detector and move to GPU if available
315+ self .detector = YOLO ('yolov8n.pt' )
316+ self .detector .to ('cuda' )
317+
318+ # Subscribe to the RGB and LiDAR streams
319+ self .rgb_sub = Subscriber (rgb_topic , Image )
320+ self .lidar_sub = Subscriber ('/ouster/points' , PointCloud2 )
321+ self .sync = ApproximateTimeSynchronizer ([
322+ self .rgb_sub , self .lidar_sub
323+ ], queue_size = 10 , slop = 0.1 )
324+ self .sync .registerCallback (self .synchronized_yolo_callback )
325+
264326 self .yolo_sub = Subscriber (self .yolo_topic , BoundingBoxArray )
265327 self .pp_sub = Subscriber (self .pp_topic , BoundingBoxArray )
266328 self .pub_fused = rospy .Publisher ("/fused_boxes" , BoundingBoxArray , queue_size = 1 )
@@ -276,6 +338,171 @@ def initialize(self):
276338 self .sync .registerCallback (self .synchronized_callback )
277339 rospy .loginfo ("CombinedDetector3D Subscribers Initialized." )
278340
341+ def synchronized_yolo_callback (self , image_msg , lidar_msg ):
342+ """Process synchronized RGB and LiDAR messages to detect pedestrians."""
343+ rospy .loginfo ("Received synchronized RGB and LiDAR messages" )
344+
345+ # Convert image message to OpenCV format
346+ try :
347+ self .latest_image = self .bridge .imgmsg_to_cv2 (image_msg , "bgr8" )
348+ except Exception as e :
349+ rospy .logerr (f"Failed to convert image: { e } " )
350+ self .latest_image = None
351+
352+ # Convert LiDAR message to numpy array
353+ self .latest_lidar = pc2_to_numpy (lidar_msg , want_rgb = False )
354+
355+ # Gate guards against data not being present for both sensors:
356+ if self .latest_image is None or self .latest_lidar is None :
357+ return {} # Skip
358+
359+ latest_image = self .latest_image .copy ()
360+
361+ # Optionally downsample LiDAR points
362+ downsample = False
363+ if downsample :
364+ lidar_down = downsample_points (self .latest_lidar , voxel_size = 0.1 )
365+ else :
366+ lidar_down = self .latest_lidar .copy ()
367+
368+ if self .camera_front == False :
369+ start = time .time ()
370+ undistorted_img , current_K = self .undistort_image (lastest_image , self .K , self .D )
371+ end = time .time ()
372+ # print('-------processing time undistort_image---', end -start)
373+ self .current_K = current_K
374+ orig_H , orig_W = undistorted_img .shape [:2 ]
375+
376+ # --- Begin modifications for three-angle detection ---
377+ img_normal = undistorted_img
378+ else :
379+ img_normal = latest_image .copy ()
380+ undistorted_img = latest_image .copy ()
381+ orig_H , orig_W = latest_image .shape [:2 ]
382+ self .current_K = self .K
383+
384+ # Run YOLO detection on the image
385+ results_normal = self .detector (img_normal , conf = 0.4 , classes = [0 ])
386+ combined_boxes = []
387+
388+ boxes_normal = np .array (results_normal [0 ].boxes .xywh .cpu ()) if len (results_normal ) > 0 else []
389+ # for box in boxes_normal:
390+ # cx, cy, w, h = box
391+ # combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING))
392+
393+ start = time .time ()
394+ # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference.
395+ # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects)
396+ pts_cam = transform_points_l2c (lidar_down , self .T_l2c )
397+ projected_pts = project_points (pts_cam , self .current_K , lidar_down )
398+ # What is returned:
399+ # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position)
400+ # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position)
401+ # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame
402+
403+
404+ # Create empty list of bounding boxes to fill and publish later
405+ boxes = BoundingBoxArray ()
406+ boxes .header .frame_id = 'currentVehicleFrame'
407+ boxes .header .stamp = lidar_msg .header .stamp
408+
409+ # Process YOLO detections
410+ boxes_normal = np .array (results_normal [0 ].boxes .xywh .cpu ()) if len (results_normal ) > 0 else []
411+ conf_scores = np .array (results_normal [0 ].boxes .conf .cpu ()) if len (results_normal ) > 0 else []
412+
413+ for i , box in enumerate (boxes_normal ):
414+ # Skip low confidence detections
415+ if conf_scores [i ] < self .score_threshold :
416+ continue
417+
418+ # Calculate the 2D bounding box in the image
419+ cx , cy , w , h = box
420+ left = int (cx - w / 2 )
421+ right = int (cx + w / 2 )
422+ top = int (cy - h / 2 )
423+ bottom = int (cy + h / 2 )
424+
425+ # Find LiDAR points that project to this box
426+ mask = ((projected_pts [:, 0 ] >= left ) & (projected_pts [:, 0 ] <= right ) &
427+ (projected_pts [:, 1 ] >= top ) & (projected_pts [:, 1 ] <= bottom ))
428+ roi_pts = projected_pts [mask ]
429+
430+ # Ignore regions with too few points
431+ if roi_pts .shape [0 ] < 5 :
432+ continue
433+
434+ # Get the 3D points corresponding to the box
435+ points_3d = roi_pts [:, 2 :5 ]
436+ points_3d = filter_depth_points (points_3d , max_depth_diff = 0.8 )
437+ refined_cluster = refine_cluster (points_3d , np .mean (points_3d , axis = 0 ), eps = 0.15 , min_samples = 10 )
438+ refined_cluster = remove_ground_by_min_range (refined_cluster , z_range = 0.1 )
439+
440+ if refined_cluster .shape [0 ] < 5 :
441+ continue
442+
443+ # Create a point cloud from the filtered points
444+ pcd = o3d .geometry .PointCloud ()
445+ pcd .points = o3d .utility .Vector3dVector (refined_cluster )
446+
447+ # Get an oriented bounding box
448+ obb = pcd .get_oriented_bounding_box ()
449+ refined_center = obb .center
450+ dims = tuple (obb .extent )
451+ R_lidar = obb .R .copy ()
452+
453+ # We are assuming that dims[0] is height and dims[2] is length of obb.extent
454+
455+ # Transform from LiDAR to vehicle coordinates
456+ refined_center_hom = np .append (refined_center , 1 )
457+ refined_center_vehicle_hom = self .T_l2v @ refined_center_hom
458+ refined_center_vehicle = refined_center_vehicle_hom [:3 ]
459+
460+ # Calculate rotation in vehicle frame
461+ R_vehicle = self .T_l2v [:3 , :3 ] @ R_lidar
462+ # yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False)
463+ yaw = np .arctan2 (R_vehicle [1 , 0 ], R_vehicle [0 , 0 ])
464+
465+ # Add the bounding box
466+ boxes = add_bounding_box (
467+ boxes = boxes ,
468+ frame_id = 'currentVehicleFrame' ,
469+ stamp = lidar_msg .header .stamp ,
470+ x = refined_center_vehicle [0 ],
471+ y = refined_center_vehicle [1 ],
472+ z = refined_center_vehicle [2 ],
473+ l = dims [2 ], # length
474+ w = dims [1 ], # width
475+ h = dims [0 ], # height
476+ yaw = yaw ,
477+ conf_score = float (conf_scores [i ]),
478+ label = 0 # person/pedestrian class
479+ )
480+
481+ rospy .loginfo (f"Person detected at ({ refined_center_vehicle [0 ]:.2f} , "
482+ f"{ refined_center_vehicle [1 ]:.2f} , { refined_center_vehicle [2 ]:.2f} ) "
483+ f"with score { conf_scores [i ]:.2f} " )
484+
485+ # Publish the bounding boxes
486+ rospy .loginfo (f"Publishing { len (boxes .boxes )} person bounding boxes" )
487+ self .pub_yolo .publish (boxes )
488+
489+ def undistort_image (self , image , K , D ):
490+ """Undistort an image using the camera calibration parameters."""
491+ h , w = image .shape [:2 ]
492+ newK , _ = cv2 .getOptimalNewCameraMatrix (K , D , (w , h ), 1 , (w , h ))
493+
494+ # Initialize undistortion maps if not already done
495+ if self .undistort_map1 is None or self .undistort_map2 is None :
496+ self .undistort_map1 , self .undistort_map2 = cv2 .initUndistortRectifyMap (K , D , R = None ,
497+ newCameraMatrix = newK , size = (w , h ),
498+ m1type = cv2 .CV_32FC1 )
499+
500+ start = time .time ()
501+ undistorted = cv2 .remap (image , self .undistort_map1 , self .undistort_map2 , interpolation = cv2 .INTER_NEAREST )
502+ end = time .time ()
503+ # print('--------undistort', end-start)
504+ return undistorted , newK
505+
279506 def synchronized_callback (self , yolo_bbxs_msg : BoundingBoxArray , pp_bbxs_msg : BoundingBoxArray ):
280507 """Callback for synchronized YOLO and PointPillars messages."""
281508 self .latest_yolo_bbxs = yolo_bbxs_msg
0 commit comments