groundlight · f-wright · May 29, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+video/*
+!video/counting_video.mp4
+__pycache__
diff --git a/create_detector_submit_images.py b/create_detector_submit_images.py
@@ -1,24 +1,48 @@
-from groundlight import Groundlight, Detector, ImageQuery
+from groundlight import ExperimentalApi, Detector, ImageQuery
 from fetch_frames import FRAMES_DIR
 from glob import glob
 import cv2
+import argparse
 
 SAMPLE_INTERVAL = 10 # sample every 10th frame
 
-gl = Groundlight()
-detector: Detector = gl.get_or_create_detector(
-    name="employees_with_groundlight_tshirt",
-    query="Label each employee with Groundlight T-shirt in the image",
-)
+def create_detector_submit_images(detector_name, detector_query, class_name):
+	gl = ExperimentalApi()
 
-frame_files = sorted(glob(f"{FRAMES_DIR}/*.jpg"))
+	# TODO: update to use get_or_create_detector once mode can be specified
+	try:
+		detector: Detector = gl.get_detector_by_name(detector_name)
+	except:
+		detector: Detector = gl.create_counting_detector(
+			name=detector_name,
+			query=detector_query,
+			class_name=class_name,
+		)
 
-# submit every SAMPLE_INTERVAL frame to the detector in the first half of the video
-for frame_file in frame_files[:len(frame_files)//2:SAMPLE_INTERVAL]:
-	# load frame
-	frame = cv2.imread(frame_file)
-	# resize frame to 640x480 for faster processing
-	frame = cv2.resize(frame, (640, 480))
-	iq: ImageQuery = gl.submit_image_query(detector=detector, image=frame)
-	print("count: ", iq.result.count)
-	print("confidence: ", iq.result.confidence)		
+	frame_files = sorted(glob(f"{FRAMES_DIR}/*.jpg"))
+
+	# submit every SAMPLE_INTERVAL frame to the detector in the first half of the video
+	for frame_file in frame_files[:len(frame_files)//2:SAMPLE_INTERVAL]:
+		# load frame
+		frame = cv2.imread(frame_file)
+		# resize frame to 640x480 for faster processing
+		frame = cv2.resize(frame, (640, 480))
+		iq: ImageQuery = gl.submit_image_query(detector=detector, image=frame)
+		print("count: ", iq.result.count)
+		print("confidence: ", iq.result.confidence)
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description="Create detector and submit images for analysis")
+
+	parser.add_argument("--name", "-n",
+						default="employees_with_groundlight_tshirt",
+						help="Name of the detector (default: employees_with_groundlight_tshirt)")
+	parser.add_argument("--query", "-q",
+						default="Label each employee with Groundlight T-shirt in the image", 
+						help="Query for the detector (default: Label each employee with Groundlight T-shirt in the image)")
+	parser.add_argument("--class-name", "-c",
+						default="employee",
+						help="Name of the class to count (default: employee)")
+
+	args = parser.parse_args()
+	create_detector_submit_images(args.name, args.query, args.class_name)
diff --git a/enter_exit_counts.py b/enter_exit_counts.py
@@ -1,55 +1,76 @@
 import cv2
+import argparse
+
 from tracker import IOUTracker
 from fetch_frames import VIDEO_PATH, FRAMES_DIR
 from glob import glob
 from groundlight import Groundlight, Detector
 
-# get the detector
-gl = Groundlight()
-detector: Detector = gl.get_or_create_detector(
-    name="employees_with_groundlight_tshirt",
-    query="Label each employee with Groundlight T-shirt in the image",
-)
-# load frames and submit to detector to get rois
-frame_files = sorted(glob(f"{FRAMES_DIR}/*.jpg"))[::3] # every 3rd frame to reduce load
-frames_rois = []
-for frame_file in frame_files:
-    frame = cv2.imread(frame_file)
-    # Resize frame to 640x480 for faster processing
-    frame = cv2.resize(frame, (640, 480))
-    # Submit image to detector
-    iq = gl.submit_image_query(detector=detector, image=frame)
-    frames_rois.append(iq.rois)
+def process_frames(detector_name, output_video):
+    # get the detector
+    gl = Groundlight()
+    detector: Detector = gl.get_detector_by_name(
+        name=detector_name,
+    )
+    # load frames and submit to detector to get rois
+    frame_files = sorted(glob(f"{FRAMES_DIR}/*.jpg"))[::3] # every 3rd frame to reduce load
+    frames_rois = []
+    for frame_file in frame_files:
+        frame = cv2.imread(frame_file)
+        # Resize frame to 640x480 for faster processing
+        frame = cv2.resize(frame, (640, 480))
+        # Submit image to detector
+        iq = gl.ask_ml(detector=detector, image=frame)
+        frames_rois.append(iq.rois)
+
+    # initialize the tracker
+    tracker = IOUTracker()
+    # specify a virtual line in the video in normalized coordinates
+    line_start = (0.54, 0.99)
+    line_end = (0.35, 0.70)
 
-# initialize the tracker
-tracker = IOUTracker()
-# specify a virtual line in the video in normalized coordinates
-line_start = (0.54, 0.99)
-line_end = (0.35, 0.70)
+    # write a video with the detections
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    video_out = cv2.VideoWriter(f"{VIDEO_PATH}/{output_video}", fourcc, 10, (640, 480))
+    for frame_file, frame_rois in zip(frame_files, frames_rois):
+        detections = [(roi.geometry.left, roi.geometry.top, roi.geometry.right, roi.geometry.bottom) for roi in frame_rois]
+        tracker.update(detections)
+        tracker.count_crossing(line_start, line_end)
+
+        frame = cv2.imread(frame_file)
+        frame = cv2.resize(frame, (640, 480))
+        h, w = frame.shape[:2]
+        for roi in frame_rois:
+                left, top, right, bottom = roi.geometry.left, roi.geometry.top, roi.geometry.right, roi.geometry.bottom
+                left = int(left * w)
+                right = int(right * w)
+                top = int(top * h)
+                bottom = int(bottom * h)
+                cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
+                cv2.putText(frame, f"{roi.score:.2f}", (left, top+15), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 128), 2)
+        # draw virtual line
+        cv2.line(frame, (int(line_start[0]*w), int(line_start[1]*h)), (int(line_end[0]*w), int(line_end[1]*h)), (0, 0, 255), 2)
+        # draw counts
+        cv2.putText(frame, f"Total: {len(frame_rois)}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 128), 2)
+        cv2.putText(frame, f"Entered: {tracker.enter_count}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 128), 2)
+        cv2.putText(frame, f"Exited: {tracker.exit_count}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+        video_out.write(frame)
+    video_out.release()
 
-# write a video with the detections
-fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-video_out = cv2.VideoWriter(f"{VIDEO_PATH}/output.mp4", fourcc, 10, (640, 480))
-for frame_file, frame_rois in zip(frame_files, frames_rois):
-    detections = [(roi.geometry.left, roi.geometry.top, roi.geometry.right, roi.geometry.bottom) for roi in frame_rois]
-    tracker.update(detections)
-    tracker.count_crossing(line_start, line_end)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process frames with Groundlight detector and create output video")
+    parser.add_argument(
+        "--name", "-n",
+        type=str, 
+        default="employees_with_groundlight_tshirt",
+        help="Name of the Groundlight detector to use (default: employees_with_groundlight_tshirt)",
+    )
+    parser.add_argument(
+        "--output-video", "-o",
+        type=str,
+        default="output.mp4",
+        help="Output video name (default: output.mp4)",
+    )
 
-    frame = cv2.imread(frame_file)
-    frame = cv2.resize(frame, (640, 480))
-    h, w = frame.shape[:2]
-    for roi in frame_rois:
-            left, top, right, bottom = roi.geometry.left, roi.geometry.top, roi.geometry.right, roi.geometry.bottom
-            left = int(left * w)
-            right = int(right * w)
-            top = int(top * h)
-            bottom = int(bottom * h)
-            cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
-            cv2.putText(frame, f"{roi.score:.2f}", (left, top+15), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 128), 2)
-    # draw virtual line
-    cv2.line(frame, (int(line_start[0]*w), int(line_start[1]*h)), (int(line_end[0]*w), int(line_end[1]*h)), (0, 0, 255), 2)
-    # draw counts
-    cv2.putText(frame, f"Entered: {tracker.enter_count}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 128), 2)
-    cv2.putText(frame, f"Exited: {tracker.exit_count}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-    video_out.write(frame)
-video_out.release()
+    args = parser.parse_args()
+    process_frames(args.name, args.output_video)
diff --git a/fetch_frames.py b/fetch_frames.py
@@ -1,18 +1,29 @@
 import os
 import cv2
+import argparse
 
 VIDEO_PATH = "./video/"
 FRAMES_DIR = f"{VIDEO_PATH}/frames"
 
-# create directory to save frames
-os.makedirs(FRAMES_DIR, exist_ok=True)
-# open mp4 file and read frames
-cap = cv2.VideoCapture(f"{VIDEO_PATH}/counting_video.mp4")
+def fetch_frames(video_name):
+    # create directory to save frames
+    os.makedirs(FRAMES_DIR, exist_ok=True)
+    # open mp4 file and read frames
+    cap = cv2.VideoCapture(f"{VIDEO_PATH}/{video_name}")
 
-frame_num = 0
-while cap.isOpened():
-    ret, frame = cap.read()
-    if not ret:
-        break
-    cv2.imwrite(f"{FRAMES_DIR}/{frame_num:04d}.jpg", frame)
-    frame_num += 1
+    frame_num = 0
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        cv2.imwrite(f"{FRAMES_DIR}/{frame_num:04d}.jpg", frame)
+        frame_num += 1
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Extract frames from a video file")
+    parser.add_argument("--video", "-v", 
+                       default="counting_video.mp4", 
+                       help="Name of the video file to get frames from (default: our sample video, counting_video.mp4)")
+
+    args = parser.parse_args()
+    fetch_frames(args.video)