Skip to content

Commit 517e491

Browse files
authored
Merge pull request #1181 from Shared-Reality-Lab/update-multistage
Update multistage-diagram-segmentation preprocessor
2 parents a9c3562 + 514e0ab commit 517e491

2 files changed

Lines changed: 21 additions & 2 deletions

File tree

utils/llm/prompts.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@
138138
Output a only JSON list of bounding boxes where each entry contains
139139
the 2D bounding box in the key "box_2d",
140140
and the stage name in the key "label".
141+
Include in the bounding boxes only the illustrations of the objects themselves,
142+
not any surrounding text or arrows.
141143
142144
"""
143145

utils/segmentation/sam_processor.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,25 @@ def segment_with_boxes(
136136
)
137137
continue
138138

139-
logging.pii(f"Processing bounding box for label: '{label}'")
140-
bboxes.append(bbox)
139+
logging.pii(
140+
f"Processing bounding box for label: '{label}' "
141+
f"(normalized coords: {bbox})"
142+
)
143+
144+
# Convert normalized coordinates (0-1000) received from Qwen 3
145+
# to pixel coordinates
146+
bbox_pixels = [
147+
(bbox[0] / 1000.0) * width,
148+
(bbox[1] / 1000.0) * height,
149+
(bbox[2] / 1000.0) * width,
150+
(bbox[3] / 1000.0) * height
151+
]
152+
153+
logging.pii(
154+
f"Converted to pixel coords: {bbox_pixels}"
155+
)
156+
157+
bboxes.append(bbox_pixels)
141158
labels.append(label)
142159

143160
if not bboxes:

0 commit comments

Comments
 (0)