Skip to content

Commit 40bedbe

Browse files
committed
Removed YOLO node, removed _gem utils file
1 parent cd915f6 commit 40bedbe

6 files changed

Lines changed: 269 additions & 49 deletions

File tree

GEMstack/onboard/perception/README.md

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,11 @@ docker compose -f setup/docker-compose.yaml up
5454
```
5555
roscore
5656
```
57-
6. Start up YOLO node (make sure you source first):
58-
```
59-
python3 GEMstack/onboard/perception/yolo_node.py
60-
```
61-
7. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first):
57+
6. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first):
6258
```
6359
python3 main.py --variant=detector_only launch/combined_detection.yaml
6460
```
65-
8. Run a rosbag on a loop (make sure you source first):
61+
7. Run a rosbag on a loop (make sure you source first):
6662
```
6763
rosbag play -l yourRosbagNameGoesHere.bag
6864
```

GEMstack/onboard/perception/combined_detection.py

Lines changed: 232 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum
22
from ..interface.gem import GEMInterface
33
from ..component import Component
4-
from .perception_utils import pose_to_matrix, calculate_3d_iou
5-
from .perception_utils_gem import *
4+
from .perception_utils import *
65
from typing import Dict, List, Optional, Tuple
76
import rospy
87
from message_filters import Subscriber, ApproximateTimeSynchronizer
@@ -15,6 +14,12 @@
1514
from geometry_msgs.msg import Quaternion
1615

1716
import copy
17+
from cv_bridge import CvBridge
18+
from sensor_msgs.msg import PointCloud2, Image
19+
from scipy.spatial.transform import Rotation as R
20+
from ultralytics import YOLO
21+
import cv2
22+
from .combined_detection_utils import add_bounding_box
1823

1924

2025
def average_yaw(yaw1, yaw2):
@@ -223,10 +228,12 @@ class CombinedDetector3D(Component):
223228
def __init__(
224229
self,
225230
vehicle_interface: GEMInterface,
226-
enable_tracking: bool = True,
227-
use_start_frame: bool = True,
231+
camera_name,
232+
camera_calib_file,
228233
iou_threshold: float = 0.1,
229234
merge_mode: str = "Average",
235+
enable_tracking: bool = True,
236+
use_start_frame: bool = True,
230237
**kwargs
231238
):
232239
self.vehicle_interface = vehicle_interface
@@ -245,11 +252,39 @@ def __init__(
245252

246253
self.yolo_topic = '/yolo_boxes'
247254
self.pp_topic = '/pointpillars_boxes'
248-
self.debug = False
249255

250256
rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.")
251257
rospy.loginfo(f"Using merge mode: {self.merge_mode}")
252258

259+
self.latest_image = None
260+
self.latest_lidar = None
261+
self.bridge = CvBridge()
262+
self.camera_name = camera_name
263+
self.camera_front = (self.camera_name == 'front')
264+
self.score_threshold = 0.4
265+
self.debug = True
266+
267+
# Load camera intrinsics/extrinsics from YAML
268+
with open(camera_calib_file, 'r') as f:
269+
calib = yaml.safe_load(f)
270+
271+
# Expect structure:
272+
# cameras:
273+
# front:
274+
# K: [[...], [...], [...]]
275+
# D: [...]
276+
# T_l2c: [[...], ..., [...]]
277+
cam_cfg = calib['cameras'][self.camera_name]
278+
self.K = np.array(cam_cfg['K'])
279+
self.D = np.array(cam_cfg['D'])
280+
self.T_l2c = np.array(cam_cfg['T_l2c'])
281+
self.T_l2v = np.array(cam_cfg['T_l2v'])
282+
283+
self.undistort_map1 = None
284+
self.undistort_map2 = None
285+
self.camera_front = (self.camera_name == 'front')
286+
287+
253288
def rate(self) -> float:
254289
return 8.0
255290

@@ -261,6 +296,33 @@ def state_outputs(self) -> list:
261296

262297
def initialize(self):
263298
"""Initialize subscribers and publishers."""
299+
# # --- Determine the correct RGB topic for this camera ---
300+
rgb_topic_map = {
301+
'front': '/oak/rgb/image_raw',
302+
'front_right': '/camera_fr/arena_camera_node/image_raw',
303+
# add additional camera mappings here if needed
304+
}
305+
rgb_topic = rgb_topic_map.get(
306+
self.camera_name,
307+
f'/{self.camera_name}/rgb/image_raw'
308+
)
309+
310+
# Create bounding box publisher
311+
self.pub_yolo = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1)
312+
rospy.loginfo("YOLO node initialized and waiting for messages.")
313+
314+
# Initialize the YOLO detector and move to GPU if available
315+
self.detector = YOLO('yolov8n.pt')
316+
self.detector.to('cuda')
317+
318+
# Subscribe to the RGB and LiDAR streams
319+
self.rgb_sub = Subscriber(rgb_topic, Image)
320+
self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
321+
self.sync = ApproximateTimeSynchronizer([
322+
self.rgb_sub, self.lidar_sub
323+
], queue_size=10, slop=0.1)
324+
self.sync.registerCallback(self.synchronized_yolo_callback)
325+
264326
self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray)
265327
self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray)
266328
self.pub_fused = rospy.Publisher("/fused_boxes", BoundingBoxArray, queue_size=1)
@@ -276,6 +338,171 @@ def initialize(self):
276338
self.sync.registerCallback(self.synchronized_callback)
277339
rospy.loginfo("CombinedDetector3D Subscribers Initialized.")
278340

341+
def synchronized_yolo_callback(self, image_msg, lidar_msg):
342+
"""Process synchronized RGB and LiDAR messages to detect pedestrians."""
343+
rospy.loginfo("Received synchronized RGB and LiDAR messages")
344+
345+
# Convert image message to OpenCV format
346+
try:
347+
self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8")
348+
except Exception as e:
349+
rospy.logerr(f"Failed to convert image: {e}")
350+
self.latest_image = None
351+
352+
# Convert LiDAR message to numpy array
353+
self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
354+
355+
# Gate guards against data not being present for both sensors:
356+
if self.latest_image is None or self.latest_lidar is None:
357+
return {} # Skip
358+
359+
latest_image = self.latest_image.copy()
360+
361+
# Optionally downsample LiDAR points
362+
downsample = False
363+
if downsample:
364+
lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
365+
else:
366+
lidar_down = self.latest_lidar.copy()
367+
368+
if self.camera_front == False:
369+
start = time.time()
370+
undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D)
371+
end = time.time()
372+
# print('-------processing time undistort_image---', end -start)
373+
self.current_K = current_K
374+
orig_H, orig_W = undistorted_img.shape[:2]
375+
376+
# --- Begin modifications for three-angle detection ---
377+
img_normal = undistorted_img
378+
else:
379+
img_normal = latest_image.copy()
380+
undistorted_img = latest_image.copy()
381+
orig_H, orig_W = latest_image.shape[:2]
382+
self.current_K = self.K
383+
384+
# Run YOLO detection on the image
385+
results_normal = self.detector(img_normal, conf=0.4, classes=[0])
386+
combined_boxes = []
387+
388+
boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
389+
# for box in boxes_normal:
390+
# cx, cy, w, h = box
391+
# combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING))
392+
393+
start = time.time()
394+
# Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference.
395+
# Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects)
396+
pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
397+
projected_pts = project_points(pts_cam, self.current_K, lidar_down)
398+
# What is returned:
399+
# projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position)
400+
# projected_pts[:, 1]: v-coordinate in the image (vertical pixel position)
401+
# projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame
402+
403+
404+
# Create empty list of bounding boxes to fill and publish later
405+
boxes = BoundingBoxArray()
406+
boxes.header.frame_id = 'currentVehicleFrame'
407+
boxes.header.stamp = lidar_msg.header.stamp
408+
409+
# Process YOLO detections
410+
boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
411+
conf_scores = np.array(results_normal[0].boxes.conf.cpu()) if len(results_normal) > 0 else []
412+
413+
for i, box in enumerate(boxes_normal):
414+
# Skip low confidence detections
415+
if conf_scores[i] < self.score_threshold:
416+
continue
417+
418+
# Calculate the 2D bounding box in the image
419+
cx, cy, w, h = box
420+
left = int(cx - w / 2)
421+
right = int(cx + w / 2)
422+
top = int(cy - h / 2)
423+
bottom = int(cy + h / 2)
424+
425+
# Find LiDAR points that project to this box
426+
mask = ((projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) &
427+
(projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom))
428+
roi_pts = projected_pts[mask]
429+
430+
# Ignore regions with too few points
431+
if roi_pts.shape[0] < 5:
432+
continue
433+
434+
# Get the 3D points corresponding to the box
435+
points_3d = roi_pts[:, 2:5]
436+
points_3d = filter_depth_points(points_3d, max_depth_diff=0.8)
437+
refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10)
438+
refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.1)
439+
440+
if refined_cluster.shape[0] < 5:
441+
continue
442+
443+
# Create a point cloud from the filtered points
444+
pcd = o3d.geometry.PointCloud()
445+
pcd.points = o3d.utility.Vector3dVector(refined_cluster)
446+
447+
# Get an oriented bounding box
448+
obb = pcd.get_oriented_bounding_box()
449+
refined_center = obb.center
450+
dims = tuple(obb.extent)
451+
R_lidar = obb.R.copy()
452+
453+
# We are assuming that dims[0] is height and dims[2] is length of obb.extent
454+
455+
# Transform from LiDAR to vehicle coordinates
456+
refined_center_hom = np.append(refined_center, 1)
457+
refined_center_vehicle_hom = self.T_l2v @ refined_center_hom
458+
refined_center_vehicle = refined_center_vehicle_hom[:3]
459+
460+
# Calculate rotation in vehicle frame
461+
R_vehicle = self.T_l2v[:3, :3] @ R_lidar
462+
# yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False)
463+
yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0])
464+
465+
# Add the bounding box
466+
boxes = add_bounding_box(
467+
boxes=boxes,
468+
frame_id='currentVehicleFrame',
469+
stamp=lidar_msg.header.stamp,
470+
x=refined_center_vehicle[0],
471+
y=refined_center_vehicle[1],
472+
z=refined_center_vehicle[2],
473+
l=dims[2], # length
474+
w=dims[1], # width
475+
h=dims[0], # height
476+
yaw=yaw,
477+
conf_score=float(conf_scores[i]),
478+
label=0 # person/pedestrian class
479+
)
480+
481+
rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, "
482+
f"{refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) "
483+
f"with score {conf_scores[i]:.2f}")
484+
485+
# Publish the bounding boxes
486+
rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes")
487+
self.pub_yolo.publish(boxes)
488+
489+
def undistort_image(self, image, K, D):
490+
"""Undistort an image using the camera calibration parameters."""
491+
h, w = image.shape[:2]
492+
newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
493+
494+
# Initialize undistortion maps if not already done
495+
if self.undistort_map1 is None or self.undistort_map2 is None:
496+
self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None,
497+
newCameraMatrix=newK, size=(w, h),
498+
m1type=cv2.CV_32FC1)
499+
500+
start = time.time()
501+
undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST)
502+
end = time.time()
503+
# print('--------undistort', end-start)
504+
return undistorted, newK
505+
279506
def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray):
280507
"""Callback for synchronized YOLO and PointPillars messages."""
281508
self.latest_yolo_bbxs = yolo_bbxs_msg

GEMstack/onboard/perception/cone_detection.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from ..interface.gem import GEMInterface
44
from ..component import Component
55
from .perception_utils import *
6-
from .perception_utils_gem import *
76
from ultralytics import YOLO
87
import cv2
98
from typing import Dict

GEMstack/onboard/perception/pedestrian_detection.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from ..interface.gem import GEMInterface
33
from ..component import Component
44
from .perception_utils import * # If you want to alias functions for clarity, do so in perception_utils
5-
from .perception_utils_gem import *
65
from ultralytics import YOLO
76
from typing import Dict
87
import open3d as o3d

GEMstack/onboard/perception/perception_utils.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,44 @@
77
import sensor_msgs.point_cloud2 as pc2
88
import ros_numpy
99
import math
10-
10+
from ...state import ObjectPose, AgentState
11+
from typing import Dict
12+
import numpy as np
1113

1214
# ----- Helper Functions -----
1315

16+
17+
def match_existing_cone(
18+
new_center: np.ndarray,
19+
new_dims: tuple,
20+
existing_agents: Dict[str, AgentState],
21+
distance_threshold: float = 1.0
22+
) -> str:
23+
"""
24+
Find the closest existing Cone agent within a specified distance threshold.
25+
"""
26+
best_agent_id = None
27+
best_dist = float('inf')
28+
for agent_id, agent_state in existing_agents.items():
29+
old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z])
30+
dist = np.linalg.norm(new_center - old_center)
31+
if dist < distance_threshold and dist < best_dist:
32+
best_dist = dist
33+
best_agent_id = agent_id
34+
return best_agent_id
35+
36+
def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple:
37+
"""
38+
Compute the (vx, vy, vz) velocity based on change in pose over time.
39+
"""
40+
if dt <= 0:
41+
return (0, 0, 0)
42+
vx = (new_pose.x - old_pose.x) / dt
43+
vy = (new_pose.y - old_pose.y) / dt
44+
vz = (new_pose.z - old_pose.z) / dt
45+
return (vx, vy, vz)
46+
47+
1448
def cylindrical_roi(points, center, radius, height):
1549
horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1)
1650
vertical_diff = np.abs(points[:, 2] - center[2])

0 commit comments

Comments
 (0)