From 9f3ccee8cfe9c71860e83d82d4eaa73805ec6601 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 07:48:07 -0400 Subject: [PATCH 01/23] Add frame metadata text parser --- .../dive_utils/serializers/frame_metadata.py | 166 ++++++++++++++++++ server/dive_utils/serializers/viame.py | 35 ++++ server/tests/test_frame_metadata.py | 135 ++++++++++++++ 3 files changed, 336 insertions(+) create mode 100644 server/dive_utils/serializers/frame_metadata.py create mode 100644 server/tests/test_frame_metadata.py diff --git a/server/dive_utils/serializers/frame_metadata.py b/server/dive_utils/serializers/frame_metadata.py new file mode 100644 index 000000000..db8dc90d3 --- /dev/null +++ b/server/dive_utils/serializers/frame_metadata.py @@ -0,0 +1,166 @@ +import csv +from dataclasses import dataclass +import io +import os +import re +from typing import Dict, Iterable, List, Mapping, Optional, Sequence, Tuple + +from dive_utils import constants +from dive_utils.serializers import viame + + +@dataclass(frozen=True) +class ParsedFrameMetadata: + source_name: Optional[str] + header: List[str] + rows: List[Dict[str, str]] + join_columns: List[str] + payload_columns: List[str] + records: Dict[str, Dict[str, str]] + + +def normalize_key(value: str) -> str: + """Normalize a media filename the same way valid_image_names_dict keys images.""" + basename = os.path.basename(str(value).strip()) + stem, ext = os.path.splitext(basename) + if ext.lower().lstrip('.') in constants.allValidLargeImageFormats: + return stem + return basename + + +def parse_table(text: str) -> Tuple[List[str], List[Dict[str, str]]]: + raw_rows = _read_rows(text) + if not raw_rows: + return [], [] + + header = [cell.strip() for cell in raw_rows[0]] + if not all(header): + return [], [] + + rows: List[Dict[str, str]] = [] + for raw_row in raw_rows[1:]: + values = [cell.strip() for cell in raw_row] + if not any(values): + continue + values = values[: len(header)] + [''] * max(0, len(header) - len(values)) + rows.append(dict(zip(header, values))) + return header, rows + + +def find_join_columns( + header: Sequence[str], + rows: Iterable[Mapping[str, str]], + media_keys: Mapping[str, int], +) -> List[str]: + normalized_media_keys = _normalized_media_keys(media_keys) + materialized_rows = list(rows) + return [ + column + for column in header + if any( + row.get(column) and normalize_key(row[column]) in normalized_media_keys + for row in materialized_rows + ) + ] + + +def is_frame_metadata(text: str, media_keys: Mapping[str, int]) -> bool: + return parse_frame_metadata_source(text, media_keys) is not None + + +def parse_frame_metadata_source( + text: str, + media_keys: Mapping[str, int], + source_name: Optional[str] = None, +) -> Optional[ParsedFrameMetadata]: + if viame.is_viame_csv(text.splitlines(), dict(media_keys)): + return None + + header, rows = parse_table(text) + if not header or not rows: + return None + + join_columns = find_join_columns(header, rows, media_keys) + if not join_columns: + return None + + payload_columns = [column for column in header if column not in join_columns] + if not payload_columns: + return None + + records: Dict[str, Dict[str, str]] = {} + normalized_media_keys = _normalized_media_keys(media_keys) + for row in rows: + for column in join_columns: + key = normalize_key(row.get(column, '')) + if key in normalized_media_keys: + records[key] = {field: row.get(field, '') for field in header} + + if not records: + return None + + return ParsedFrameMetadata( + source_name=source_name, + header=list(header), + rows=rows, + join_columns=join_columns, + payload_columns=payload_columns, + records=records, + ) + + +def select_frame_metadata_source( + candidates: Iterable[Tuple[str, str]], + media_keys: Mapping[str, int], +) -> Optional[ParsedFrameMetadata]: + matches: List[ParsedFrameMetadata] = [] + for source_name, text in candidates: + if not _is_text_candidate(source_name): + continue + source = parse_frame_metadata_source(text, media_keys, source_name=source_name) + if source is not None: + matches.append(source) + + if len(matches) != 1: + return None + return matches[0] + + +def _read_rows(text: str) -> List[List[str]]: + first_line = _first_nonempty_line(text) + if first_line is None: + return [] + + delimiter = _sniff_delimiter(first_line) + if delimiter is None: + return [re.split(r'\s+', line.strip()) for line in text.splitlines() if line.strip()] + + reader = csv.reader(io.StringIO(text), delimiter=delimiter) + return [ + [cell.strip() for cell in row] + for row in reader + if row and any(cell.strip() for cell in row) + ] + + +def _first_nonempty_line(text: str) -> Optional[str]: + for line in text.splitlines(): + if line.strip(): + return line.strip() + return None + + +def _sniff_delimiter(line: str) -> Optional[str]: + if ',' in line: + return ',' + if '\t' in line: + return '\t' + return None + + +def _normalized_media_keys(media_keys: Mapping[str, int]) -> set: + return {normalize_key(key) for key in media_keys} + + +def _is_text_candidate(source_name: str) -> bool: + return os.path.splitext(source_name.lower())[1] in {'.txt', '.csv'} diff --git a/server/dive_utils/serializers/viame.py b/server/dive_utils/serializers/viame.py index ff8e34540..9a55e025b 100644 --- a/server/dive_utils/serializers/viame.py +++ b/server/dive_utils/serializers/viame.py @@ -63,6 +63,41 @@ def row_info(row: List[str]) -> Tuple[int, str, int, List[int], float]: return trackId, filename, frame, bounds, fish_length +def _is_viame_data_row(row: List[str]) -> bool: + if len(row) < 9: + return False + try: + row_info(row) + except (TypeError, ValueError): + return False + return True + + +def is_viame_csv(rows: List[str], imageMap: Optional[Dict[str, int]] = None) -> bool: + """Return true when rows look like a VIAME annotation CSV.""" + reader = csv.reader(row for row in rows) + has_header = False + has_data_row = False + has_matching_image = False + + for row in reader: + if not row: + continue + if row[0].startswith('#'): + has_header = has_header or row[0].startswith('# 1: Detection or Track-id') + continue + if not _is_viame_data_row(row): + continue + has_data_row = True + if imageMap: + imageName, _ = os.path.splitext(os.path.basename(row[1])) + has_matching_image = has_matching_image or imageName in imageMap + + if has_header and has_data_row: + return True + return has_data_row and has_matching_image + + def _resolve_detection_length( attributes: Optional[Dict[str, Any]], fish_length_from_column: float, diff --git a/server/tests/test_frame_metadata.py b/server/tests/test_frame_metadata.py new file mode 100644 index 000000000..bdcca567d --- /dev/null +++ b/server/tests/test_frame_metadata.py @@ -0,0 +1,135 @@ +from dive_utils.serializers.frame_metadata import ( + find_join_columns, + normalize_key, + parse_frame_metadata_source, + select_frame_metadata_source, +) + + +def test_normalize_key_matches_image_name_map_keys(): + assert normalize_key("nested/20191009.154056.00082_rect_color.tif") == ( + "20191009.154056.00082_rect_color" + ) + + +def test_parse_noaa_style_rows_with_multiple_image_columns(): + media_keys = { + "20191009.154056.00082_rect_color": 0, + "20191009.154056.00081_rect_color": 0, + } + text = """port_image date time latitude longitude water_depth altitude starboard_image +20191009.154056.00082_rect_color.tif 2019/10/09 15:40:56.1122 46.575870 -124.603094 192.80 2.78 20191009.154056.00081_rect_color.tif +""" + + source = parse_frame_metadata_source(text, media_keys, source_name="nav.txt") + + assert source is not None + assert source.source_name == "nav.txt" + assert source.header == [ + "port_image", + "date", + "time", + "latitude", + "longitude", + "water_depth", + "altitude", + "starboard_image", + ] + assert source.join_columns == ["port_image", "starboard_image"] + assert source.payload_columns == [ + "date", + "time", + "latitude", + "longitude", + "water_depth", + "altitude", + ] + assert set(source.records) == { + "20191009.154056.00082_rect_color", + "20191009.154056.00081_rect_color", + } + port_record = source.records["20191009.154056.00082_rect_color"] + assert list(port_record) == source.header + assert port_record["latitude"] == "46.575870" + assert all(isinstance(value, str) for value in port_record.values()) + + +def test_parse_comma_tab_and_whitespace_delimited_sources(): + media_keys = {"image_0001": 0} + + for text in ( + "filename,depth,latitude\nimage_0001.jpg,192.80,46.575870\n", + "filename\tdepth\tlatitude\nimage_0001.jpg\t192.80\t46.575870\n", + "filename depth latitude\nimage_0001.jpg 192.80 46.575870\n", + ): + source = parse_frame_metadata_source(text, media_keys) + + assert source is not None + assert source.header == ["filename", "depth", "latitude"] + assert source.join_columns == ["filename"] + assert source.records["image_0001"] == { + "filename": "image_0001.jpg", + "depth": "192.80", + "latitude": "46.575870", + } + + +def test_find_join_columns_matches_by_filename_value(): + rows = [ + { + "port_image": "20191009.154056.00082_rect_color.tif", + "latitude": "46.575870", + "starboard_image": "20191009.154056.00081_rect_color.tif", + } + ] + + assert find_join_columns( + ["port_image", "latitude", "starboard_image"], + rows, + { + "20191009.154056.00082_rect_color": 0, + "20191009.154056.00081_rect_color": 0, + }, + ) == ["port_image", "starboard_image"] + + +def test_rejects_viame_annotation_csv_even_when_image_column_matches(): + media_keys = {"20191009.154056.00082_rect_color": 0} + viame_csv = """# 1: Detection or Track-id,2: Video or Image Identifier,3: Unique Frame Identifier,4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes +1,20191009.154056.00082_rect_color.tif,0,0,0,10,10,1.0,-1,fish,0.9 +""" + + assert parse_frame_metadata_source(viame_csv, media_keys) is None + + +def test_rejects_bare_image_list_and_unrelated_text(): + media_keys = {"image_0001": 0} + + assert parse_frame_metadata_source("image\nimage_0001.jpg\n", media_keys) is None + assert parse_frame_metadata_source("note,value\nhello,world\n", media_keys) is None + + +def test_select_source_rejects_ambiguous_candidates_and_non_text_extensions(): + media_keys = {"image_0001": 0} + accepted_text = "filename,depth\nimage_0001.jpg,192.80\n" + + assert ( + select_frame_metadata_source( + [ + ("metadata.json", accepted_text), + ("telemetry-a.txt", accepted_text), + ], + media_keys, + ).source_name + == "telemetry-a.txt" + ) + assert ( + select_frame_metadata_source( + [ + ("telemetry-a.txt", accepted_text), + ("telemetry-b.csv", "filename,temperature\nimage_0001.jpg,4.2\n"), + ], + media_keys, + ) + is None + ) From 5edb4fbef77666067004824c58c6a42fa2c2a550 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 08:14:56 -0400 Subject: [PATCH 02/23] Gate VIAME detection on comment header to spare lookalike telemetry The headerless image-match fallback in is_viame_csv could misclassify a VIAME-shaped telemetry table (filename in column 1, leading integer columns) as an annotation CSV and reject it. DIVE's VIAME exports always carry the '# 1: Detection or Track-id' comment header, so key detection on that header and drop the now-dead imageMap fallback. --- .../dive_utils/serializers/frame_metadata.py | 2 +- server/dive_utils/serializers/viame.py | 23 +++++++++---------- server/tests/test_frame_metadata.py | 16 +++++++++++++ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/server/dive_utils/serializers/frame_metadata.py b/server/dive_utils/serializers/frame_metadata.py index db8dc90d3..391689fe9 100644 --- a/server/dive_utils/serializers/frame_metadata.py +++ b/server/dive_utils/serializers/frame_metadata.py @@ -73,7 +73,7 @@ def parse_frame_metadata_source( media_keys: Mapping[str, int], source_name: Optional[str] = None, ) -> Optional[ParsedFrameMetadata]: - if viame.is_viame_csv(text.splitlines(), dict(media_keys)): + if viame.is_viame_csv(text.splitlines()): return None header, rows = parse_table(text) diff --git a/server/dive_utils/serializers/viame.py b/server/dive_utils/serializers/viame.py index 9a55e025b..a52494741 100644 --- a/server/dive_utils/serializers/viame.py +++ b/server/dive_utils/serializers/viame.py @@ -73,12 +73,17 @@ def _is_viame_data_row(row: List[str]) -> bool: return True -def is_viame_csv(rows: List[str], imageMap: Optional[Dict[str, int]] = None) -> bool: - """Return true when rows look like a VIAME annotation CSV.""" +def is_viame_csv(rows: List[str]) -> bool: + """Return true when rows look like a VIAME annotation CSV. + + DIVE's VIAME exports always carry the ``# 1: Detection or Track-id`` comment + header, so detection keys on that header plus at least one VIAME-shaped data + row. A telemetry file has a plain field-name header and no ``#`` comment + header, so it passes even when one of its columns matches the media names. + """ reader = csv.reader(row for row in rows) has_header = False has_data_row = False - has_matching_image = False for row in reader: if not row: @@ -86,16 +91,10 @@ def is_viame_csv(rows: List[str], imageMap: Optional[Dict[str, int]] = None) -> if row[0].startswith('#'): has_header = has_header or row[0].startswith('# 1: Detection or Track-id') continue - if not _is_viame_data_row(row): - continue - has_data_row = True - if imageMap: - imageName, _ = os.path.splitext(os.path.basename(row[1])) - has_matching_image = has_matching_image or imageName in imageMap + if _is_viame_data_row(row): + has_data_row = True - if has_header and has_data_row: - return True - return has_data_row and has_matching_image + return has_header and has_data_row def _resolve_detection_length( diff --git a/server/tests/test_frame_metadata.py b/server/tests/test_frame_metadata.py index bdcca567d..b43fa9f7b 100644 --- a/server/tests/test_frame_metadata.py +++ b/server/tests/test_frame_metadata.py @@ -102,6 +102,22 @@ def test_rejects_viame_annotation_csv_even_when_image_column_matches(): assert parse_frame_metadata_source(viame_csv, media_keys) is None +def test_accepts_viame_shaped_telemetry_without_viame_header(): + """Telemetry whose rows coincidentally match VIAME's numeric shape but lacks the + ``# 1: Detection or Track-id`` comment header is still accepted as telemetry.""" + media_keys = {"image_0001": 0} + text = ( + "index,image,frame,x,y,depth,altitude,heading,temperature\n" + "1,image_0001.jpg,100,46.5,-124.6,192.8,2.7,180.5,4.2\n" + ) + + source = parse_frame_metadata_source(text, media_keys) + + assert source is not None + assert source.join_columns == ["image"] + assert source.records["image_0001"]["depth"] == "192.8" + + def test_rejects_bare_image_list_and_unrelated_text(): media_keys = {"image_0001": 0} From f9c50c8ee5c93a2b44947aadb63982e0b4b755a9 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 08:20:35 -0400 Subject: [PATCH 03/23] Add single-camera frame metadata loader --- server/dive_server/crud_dataset.py | 53 +++++++- server/tests/test_frame_metadata_crud.py | 157 +++++++++++++++++++++++ 2 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 server/tests/test_frame_metadata_crud.py diff --git a/server/dive_server/crud_dataset.py b/server/dive_server/crud_dataset.py index c0f7dc24f..db15fd843 100644 --- a/server/dive_server/crud_dataset.py +++ b/server/dive_server/crud_dataset.py @@ -17,7 +17,7 @@ from dive_server import crud, crud_annotation from dive_tasks import tasks from dive_utils import TRUTHY_META_VALUES, asbool, calibration_format, constants, fromMeta, models, types -from dive_utils.serializers import kwcoco +from dive_utils.serializers import frame_metadata, kwcoco def get_url(dataset: types.GirderModel, item: types.GirderModel) -> str: @@ -378,6 +378,57 @@ def get_media( ) +def load_frame_metadata( + dsFolder: types.GirderModel, + user: types.GirderUserModel, + startFrame: int = 0, + endFrame: Optional[int] = None, +) -> dict: + crud.verify_dataset(dsFolder) + if fromMeta(dsFolder, constants.TypeMarker) != constants.ImageSequenceType: + return {'cameras': {}} + + images = crud.valid_images(dsFolder, user) + media_keys = crud.valid_image_names_dict(images) + media_root = crud.getCloneRoot(user, dsFolder) + source = frame_metadata.select_frame_metadata_source( + _frame_metadata_candidate_texts(media_root), + media_keys, + ) + if source is None: + return {'cameras': {}} + + if endFrame is None: + endFrame = len(images) - 1 + + records = {} + for media_key, frame_number in media_keys.items(): + if startFrame <= frame_number <= endFrame and media_key in source.records: + records[str(frame_number)] = source.records[media_key] + + return {'cameras': {'singleCam': records}} + + +def _frame_metadata_candidate_texts(folder: types.GirderModel) -> Iterable[Tuple[str, str]]: + for item in Folder().childItems(folder): + if _is_frame_metadata_source_item(item): + yield item['name'], _download_item_text(item) + + +def _is_frame_metadata_source_item(item: types.GirderModel) -> bool: + return Path(item['name'].lower()).suffix in {'.txt', '.csv'} + + +def _download_item_text(item: types.GirderModel) -> str: + file = next(iter(Item().childFiles(item)), None) + if file is None: + return '' + chunks = File().download(file, headers=False)() + return b''.join( + chunk if isinstance(chunk, bytes) else str(chunk).encode('utf-8') for chunk in chunks + ).decode('utf-8') + + class MetadataMutableUpdateArgs(models.MetadataMutable): """Update schema for mutable metadata fields""" diff --git a/server/tests/test_frame_metadata_crud.py b/server/tests/test_frame_metadata_crud.py new file mode 100644 index 000000000..09ed71d77 --- /dev/null +++ b/server/tests/test_frame_metadata_crud.py @@ -0,0 +1,157 @@ +from unittest.mock import patch + +from dive_server import crud_dataset +from dive_utils import constants + + +def _dataset_folder(): + return { + '_id': 'dataset-id', + 'name': 'single-camera', + 'meta': { + 'annotate': True, + 'type': constants.ImageSequenceType, + }, + } + + +def _image_item(name: str): + return { + '_id': f'{name}-id', + 'name': name, + } + + +def _source_item(name: str): + return { + '_id': f'{name}-id', + 'name': name, + } + + +def _wire_item_downloads(item_model, file_model, texts_by_name): + def child_files(item): + if item['name'] not in texts_by_name: + raise AssertionError(f'unexpected download for {item["name"]}') + return iter( + [ + { + '_id': f'{item["_id"]}-file', + 'itemId': item['_id'], + 'name': item['name'], + } + ] + ) + + def download(file, headers=False): + return lambda: [texts_by_name[file['name']].encode('utf-8')] + + item_model.childFiles.side_effect = child_files + file_model.download.side_effect = download + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_reads_co_located_source_and_applies_window( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + dataset = _dataset_folder() + source_root = {'_id': 'source-root-id', 'name': 'source-root', 'meta': dataset['meta']} + user = {'_id': 'user-id'} + valid_images.return_value = [ + _image_item('image_0001.jpg'), + _image_item('image_0002.jpg'), + _image_item('image_0003.jpg'), + ] + get_clone_root.return_value = source_root + folder_model = folder_cls.return_value + folder_model.childItems.return_value = [ + _source_item('image_0001.jpg'), + _source_item('frame_metadata.json'), + _source_item('navigation.txt'), + ] + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'navigation.txt': ( + "filename,depth,temperature\n" + "image_0001.jpg,192.80,4.0\n" + "image_0002.jpg,193.10,4.1\n" + "image_0003.jpg,193.40,4.2\n" + ), + }, + ) + + result = crud_dataset.load_frame_metadata(dataset, user, startFrame=1, endFrame=2) + + assert result == { + 'cameras': { + 'singleCam': { + '1': { + 'filename': 'image_0002.jpg', + 'depth': '193.10', + 'temperature': '4.1', + }, + '2': { + 'filename': 'image_0003.jpg', + 'depth': '193.40', + 'temperature': '4.2', + }, + }, + }, + } + folder_model.childItems.assert_called_once_with(source_root) + item_model.childFiles.assert_called_once_with(_source_item('navigation.txt')) + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_returns_empty_cameras_without_text_source( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + dataset = _dataset_folder() + source_root = {'_id': 'source-root-id', 'name': 'source-root', 'meta': dataset['meta']} + user = {'_id': 'user-id'} + valid_images.return_value = [_image_item('image_0001.jpg')] + get_clone_root.return_value = source_root + folder_model = folder_cls.return_value + folder_model.childItems.return_value = [ + _source_item('frame_metadata.json'), + _source_item('notes.txt'), + ] + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'notes.txt': "note,value\nhello,world\n", + }, + ) + + result = crud_dataset.load_frame_metadata(dataset, user, startFrame=0, endFrame=0) + + assert result == {'cameras': {}} + item_model.childFiles.assert_called_once_with(_source_item('notes.txt')) + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + file_model.save.assert_not_called() From 55678a557c8ee04f8951de13992f2b130fa1df9c Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 10:32:20 -0400 Subject: [PATCH 04/23] Add multicam frame metadata routing --- server/dive_server/crud_dataset.py | 95 +++++++- server/tests/test_frame_metadata_crud.py | 266 +++++++++++++++++++++++ 2 files changed, 360 insertions(+), 1 deletion(-) diff --git a/server/dive_server/crud_dataset.py b/server/dive_server/crud_dataset.py index db15fd843..f340a361e 100644 --- a/server/dive_server/crud_dataset.py +++ b/server/dive_server/crud_dataset.py @@ -385,7 +385,10 @@ def load_frame_metadata( endFrame: Optional[int] = None, ) -> dict: crud.verify_dataset(dsFolder) - if fromMeta(dsFolder, constants.TypeMarker) != constants.ImageSequenceType: + source_type = fromMeta(dsFolder, constants.TypeMarker) + if source_type == constants.MultiType: + return _load_multicam_frame_metadata(dsFolder, user, startFrame, endFrame) + if source_type != constants.ImageSequenceType: return {'cameras': {}} images = crud.valid_images(dsFolder, user) @@ -409,6 +412,96 @@ def load_frame_metadata( return {'cameras': {'singleCam': records}} +def _load_multicam_frame_metadata( + dsFolder: types.GirderModel, + user: types.GirderUserModel, + startFrame: int, + endFrame: Optional[int], +) -> dict: + multi_cam = fromMeta(dsFolder, constants.MultiCamMarker) or {} + root = crud.getCloneRoot(user, dsFolder) + root_candidates = list(_frame_metadata_candidate_texts(root)) + cameras: Dict[str, Dict[str, Dict[str, str]]] = {} + has_source = False + + for camera_name in _multicam_camera_order(multi_cam): + cam_info = multi_cam['cameras'][camera_name] + child = Folder().load(cam_info['folderId'], level=AccessType.READ, user=user) + if child is None: + raise RestException( + f'Camera folder for "{camera_name}" was not found', + code=404, + ) + if fromMeta(child, constants.TypeMarker) != constants.ImageSequenceType: + continue + + child_root = crud.getCloneRoot(user, child) + candidates = root_candidates + list(_frame_metadata_candidate_texts(child_root)) + records = _load_camera_frame_metadata_records( + child, + user, + startFrame, + endFrame, + candidates, + ) + if records is not None: + has_source = True + cameras[camera_name] = records + else: + cameras[camera_name] = {} + + if not has_source: + return {'cameras': {}} + return {'cameras': cameras} + + +def _load_camera_frame_metadata_records( + folder: types.GirderModel, + user: types.GirderUserModel, + startFrame: int, + endFrame: Optional[int], + candidates: Iterable[Tuple[str, str]], +) -> Optional[Dict[str, Dict[str, str]]]: + images = crud.valid_images(folder, user) + media_keys = crud.valid_image_names_dict(images) + if endFrame is None: + endFrame = len(images) - 1 + + sources = [ + source + for source in ( + frame_metadata.parse_frame_metadata_source(text, media_keys, source_name=name) + for name, text in candidates + ) + if source is not None + ] + if not sources: + return None + + frame_by_key = { + frame_metadata.normalize_key(media_key): frame_number + for media_key, frame_number in media_keys.items() + } + records: Dict[str, Dict[str, str]] = {} + collided_frames = set() + for source in sources: + for media_key, values in source.records.items(): + frame_number = frame_by_key.get(frame_metadata.normalize_key(media_key)) + if frame_number is None or not startFrame <= frame_number <= endFrame: + continue + + frame_key = str(frame_number) + if frame_key in collided_frames: + continue + if frame_key not in records: + records[frame_key] = values + elif records[frame_key] != values: + records.pop(frame_key, None) + collided_frames.add(frame_key) + + return records + + def _frame_metadata_candidate_texts(folder: types.GirderModel) -> Iterable[Tuple[str, str]]: for item in Folder().childItems(folder): if _is_frame_metadata_source_item(item): diff --git a/server/tests/test_frame_metadata_crud.py b/server/tests/test_frame_metadata_crud.py index 09ed71d77..180a3e4eb 100644 --- a/server/tests/test_frame_metadata_crud.py +++ b/server/tests/test_frame_metadata_crud.py @@ -15,6 +15,41 @@ def _dataset_folder(): } +def _multicam_parent_folder(): + return { + '_id': 'parent-id', + 'name': 'stereo-camera', + 'meta': { + 'annotate': True, + 'type': constants.MultiType, + 'fps': 5, + 'multiCam': { + 'defaultDisplay': 'port', + 'cameraOrder': ['port', 'starboard'], + 'cameras': { + 'port': {'folderId': 'port-id', 'type': constants.ImageSequenceType}, + 'starboard': { + 'folderId': 'starboard-id', + 'type': constants.ImageSequenceType, + }, + }, + }, + }, + } + + +def _camera_folder(folder_id: str, name: str): + return { + '_id': folder_id, + 'name': name, + 'meta': { + 'annotate': True, + 'type': constants.ImageSequenceType, + 'fps': 5, + }, + } + + def _image_item(name: str): return { '_id': f'{name}-id', @@ -29,6 +64,14 @@ def _source_item(name: str): } +def _root_folder(folder_id: str, name: str): + return { + '_id': folder_id, + 'name': name, + 'meta': {}, + } + + def _wire_item_downloads(item_model, file_model, texts_by_name): def child_files(item): if item['name'] not in texts_by_name: @@ -50,6 +93,31 @@ def download(file, headers=False): file_model.download.side_effect = download +def _wire_multicam_folders(folder_model, children, items_by_folder_id): + def load_folder(folder_id, level=None, user=None): + return children.get(folder_id) + + def child_items(folder): + return items_by_folder_id.get(folder['_id'], []) + + folder_model.load.side_effect = load_folder + folder_model.childItems.side_effect = child_items + + +def _wire_multicam_clone_roots(get_clone_root, roots_by_folder_id): + def clone_root(user, folder): + return roots_by_folder_id[folder['_id']] + + get_clone_root.side_effect = clone_root + + +def _wire_multicam_valid_images(valid_images, images_by_folder_id): + def images(folder, user): + return images_by_folder_id.get(folder['_id'], []) + + valid_images.side_effect = images + + @patch('dive_server.crud_dataset.File') @patch('dive_server.crud_dataset.Item') @patch('dive_server.crud_dataset.Folder') @@ -155,3 +223,201 @@ def test_load_frame_metadata_returns_empty_cameras_without_text_source( folder_model.save.assert_not_called() item_model.move.assert_not_called() file_model.save.assert_not_called() + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_routes_root_multicam_source_by_child_camera( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + parent = _multicam_parent_folder() + port = _camera_folder('port-id', 'port') + starboard = _camera_folder('starboard-id', 'starboard') + parent_root = _root_folder('parent-root-id', 'parent-root') + port_root = _root_folder('port-root-id', 'port-root') + starboard_root = _root_folder('starboard-root-id', 'starboard-root') + user = {'_id': 'user-id'} + + folder_model = folder_cls.return_value + _wire_multicam_folders( + folder_model, + {'port-id': port, 'starboard-id': starboard}, + { + 'parent-root-id': [_source_item('navigation.txt')], + 'port-root-id': [], + 'starboard-root-id': [], + }, + ) + _wire_multicam_clone_roots( + get_clone_root, + { + 'parent-id': parent_root, + 'port-id': port_root, + 'starboard-id': starboard_root, + }, + ) + _wire_multicam_valid_images( + valid_images, + { + 'port-id': [_image_item('port_0001.jpg'), _image_item('port_0002.jpg')], + 'starboard-id': [ + _image_item('starboard_0001.jpg'), + _image_item('starboard_0002.jpg'), + ], + }, + ) + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'navigation.txt': ( + "port_image,starboard_image,depth,temperature\n" + "port_0001.jpg,starboard_0001.jpg,192.80,4.0\n" + "port_0002.jpg,starboard_0002.jpg,193.10,4.1\n" + ), + }, + ) + + result = crud_dataset.load_frame_metadata(parent, user, startFrame=0, endFrame=1) + + assert result == { + 'cameras': { + 'port': { + '0': { + 'port_image': 'port_0001.jpg', + 'starboard_image': 'starboard_0001.jpg', + 'depth': '192.80', + 'temperature': '4.0', + }, + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + 'temperature': '4.1', + }, + }, + 'starboard': { + '0': { + 'port_image': 'port_0001.jpg', + 'starboard_image': 'starboard_0001.jpg', + 'depth': '192.80', + 'temperature': '4.0', + }, + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + 'temperature': '4.1', + }, + }, + }, + } + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + file_model.save.assert_not_called() + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_omits_multicam_frame_on_distinct_record_collision( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + parent = _multicam_parent_folder() + port = _camera_folder('port-id', 'port') + starboard = _camera_folder('starboard-id', 'starboard') + parent_root = _root_folder('parent-root-id', 'parent-root') + port_root = _root_folder('port-root-id', 'port-root') + starboard_root = _root_folder('starboard-root-id', 'starboard-root') + user = {'_id': 'user-id'} + + folder_model = folder_cls.return_value + _wire_multicam_folders( + folder_model, + {'port-id': port, 'starboard-id': starboard}, + { + 'parent-root-id': [_source_item('navigation.txt')], + 'port-root-id': [_source_item('port_override.txt')], + 'starboard-root-id': [], + }, + ) + _wire_multicam_clone_roots( + get_clone_root, + { + 'parent-id': parent_root, + 'port-id': port_root, + 'starboard-id': starboard_root, + }, + ) + _wire_multicam_valid_images( + valid_images, + { + 'port-id': [_image_item('port_0001.jpg'), _image_item('port_0002.jpg')], + 'starboard-id': [ + _image_item('starboard_0001.jpg'), + _image_item('starboard_0002.jpg'), + ], + }, + ) + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'navigation.txt': ( + "port_image,starboard_image,depth\n" + "port_0001.jpg,starboard_0001.jpg,192.80\n" + "port_0002.jpg,starboard_0002.jpg,193.10\n" + ), + 'port_override.txt': ( + "port_image,starboard_image,depth\n" + "port_0001.jpg,starboard_0001.jpg,999.99\n" + "port_0002.jpg,starboard_0002.jpg,193.10\n" + ), + }, + ) + + result = crud_dataset.load_frame_metadata(parent, user, startFrame=0, endFrame=1) + + assert result == { + 'cameras': { + 'port': { + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + }, + }, + 'starboard': { + '0': { + 'port_image': 'port_0001.jpg', + 'starboard_image': 'starboard_0001.jpg', + 'depth': '192.80', + }, + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + }, + }, + }, + } + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + file_model.save.assert_not_called() From ec86c476106018bed960a4335b1730d25addf859 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 10:47:18 -0400 Subject: [PATCH 05/23] Add frame metadata window API --- client/dive-common/apispec.ts | 7 ++ client/platform/web-girder/App.vue | 2 + .../web-girder/api/dataset.service.spec.ts | 30 +++++++ .../web-girder/api/dataset.service.ts | 11 ++- server/dive_server/views_dataset.py | 32 +++++++ server/tests/test_frame_metadata_crud.py | 83 +++++++++++++++++++ 6 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 client/platform/web-girder/api/dataset.service.spec.ts diff --git a/client/dive-common/apispec.ts b/client/dive-common/apispec.ts index 55fca437c..770144ba4 100644 --- a/client/dive-common/apispec.ts +++ b/client/dive-common/apispec.ts @@ -125,6 +125,10 @@ interface FrameImage { id?: string; } +interface FrameMetadataResponse { + cameras: Record>>; +} + export interface MultiCamImportFolderArgs { datasetName?: string; // Girder parent folder name (required on web) defaultDisplay: string; // In multicam the default camera to display @@ -266,6 +270,8 @@ interface Api { loadMetadata(datasetId: string): Promise; loadDetections(datasetId: string, revision?: number, set?: string): Promise; + loadFrameMetadata?(datasetId: string, startFrame: number, endFrame: number): + Promise; saveDetections(datasetId: string, args: SaveDetectionsArgs): Promise; saveMetadata(datasetId: string, metadata: DatasetMetaMutable): Promise; @@ -444,6 +450,7 @@ export { PipelineRuntimeParams, PipeMetadata, Pipelines, + FrameMetadataResponse, SaveDetectionsArgs, SaveAttributeArgs, SaveAttributeTrackFilterArgs, diff --git a/client/platform/web-girder/App.vue b/client/platform/web-girder/App.vue index 8786b2321..13a0300fc 100644 --- a/client/platform/web-girder/App.vue +++ b/client/platform/web-girder/App.vue @@ -24,6 +24,7 @@ import { saveAttributeTrackFilters, importAnnotationFile, loadDetections, + loadFrameMetadata, saveDetections, unwrap, getTiles, @@ -67,6 +68,7 @@ export default defineComponent({ getTrainingConfigurations: unwrap(getTrainingConfigurations), runTraining: unwrap(runTraining), loadDetections, + loadFrameMetadata: unwrap(loadFrameMetadata), saveDetections: unwrap(saveDetections), saveMetadata: unwrap(saveMetadata), saveAttributes: unwrap(saveAttributes), diff --git a/client/platform/web-girder/api/dataset.service.spec.ts b/client/platform/web-girder/api/dataset.service.spec.ts new file mode 100644 index 000000000..0dc5ca856 --- /dev/null +++ b/client/platform/web-girder/api/dataset.service.spec.ts @@ -0,0 +1,30 @@ +// @vitest-environment jsdom + +// eslint-disable-next-line import/no-extraneous-dependencies -- Vitest is only used in tests +import { + beforeEach, + describe, + expect, + it, + vi, +} from 'vitest'; + +import girderRest from '../plugins/girder'; +import { loadFrameMetadata } from './dataset.service'; + +describe('dataset.service frame metadata', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('requests frame metadata from the parent dataset with explicit window params', async () => { + const response = { data: { cameras: { port: { 3: { depth: '193.10' } } } } }; + const get = vi.spyOn(girderRest, 'get').mockResolvedValue(response as never); + + await expect(loadFrameMetadata('parent-id/port', 3, 7)).resolves.toBe(response); + + expect(get).toHaveBeenCalledWith('dive_dataset/parent-id/frame_metadata', { + params: { startFrame: 3, endFrame: 7 }, + }); + }); +}); diff --git a/client/platform/web-girder/api/dataset.service.ts b/client/platform/web-girder/api/dataset.service.ts index c2503d493..62d6915c2 100644 --- a/client/platform/web-girder/api/dataset.service.ts +++ b/client/platform/web-girder/api/dataset.service.ts @@ -1,7 +1,8 @@ import type { GirderModel } from '@girder/components/src'; import { - DatasetMetaMutable, FrameImage, SaveAttributeArgs, SaveAttributeTrackFilterArgs, + DatasetMetaMutable, FrameImage, FrameMetadataResponse, + SaveAttributeArgs, SaveAttributeTrackFilterArgs, } from 'dive-common/apispec'; import { calibrationFileMarker, jsonCalibrationFileMarker } from 'dive-common/constants'; import { parentDatasetId } from 'dive-common/compositeDatasetId'; @@ -64,6 +65,13 @@ async function getDatasetMedia(datasetId: string) { return girderRest.get(`dive_dataset/${folderId}/media`); } +function loadFrameMetadata(datasetId: string, startFrame: number, endFrame: number) { + return girderRest.get( + `dive_dataset/${parentDatasetId(datasetId)}/frame_metadata`, + { params: { startFrame, endFrame } }, + ); +} + function clone({ folderId, name, parentFolderId, revision, }: { @@ -318,6 +326,7 @@ export { hasCalibrationFile, getDatasetCalibration, importAnnotationFile, + loadFrameMetadata, makeViameFolder, saveAttributes, saveAttributeTrackFilters, diff --git a/server/dive_server/views_dataset.py b/server/dive_server/views_dataset.py index 7978590f0..eb3471ab4 100644 --- a/server/dive_server/views_dataset.py +++ b/server/dive_server/views_dataset.py @@ -43,6 +43,7 @@ def __init__(self, resourceName): self.route("GET", ("export",), self.export) self.route("GET", (":id", "configuration"), self.get_configuration) self.route("GET", (":id", "media", ":mediaId", "download"), self.download_media) + self.route("GET", (":id", "frame_metadata"), self.get_frame_metadata) self.route("POST", ("validate_files",), self.validate_files) self.route("PATCH", (":id",), self.patch_metadata) @@ -262,6 +263,37 @@ def get_configuration(self, folder): def get_media(self, folder): return crud_dataset.get_media(folder, self.getCurrentUser()).dict(exclude_none=True) + @access.user + @autoDescribeRoute( + Description("Get dataset frame metadata for an explicit frame window") + .modelParam("id", level=AccessType.READ, **DatasetModelParam) + .param( + "startFrame", + "Inclusive first frame to return", + paramType="query", + dataType="integer", + required=True, + ) + .param( + "endFrame", + "Inclusive last frame to return", + paramType="query", + dataType="integer", + required=True, + ) + ) + def get_frame_metadata(self, folder, startFrame: int, endFrame: int): + if startFrame < 0 or endFrame < 0: + raise RestException('Frame metadata window bounds must be non-negative', code=400) + if startFrame > endFrame: + raise RestException('startFrame must be less than or equal to endFrame', code=400) + return crud_dataset.load_frame_metadata( + folder, + self.getCurrentUser(), + startFrame=startFrame, + endFrame=endFrame, + ) + @access.public(scope=TokenScope.DATA_READ, cookie=True) @autoDescribeRoute( Description("Export all selected datasets") diff --git a/server/tests/test_frame_metadata_crud.py b/server/tests/test_frame_metadata_crud.py index 180a3e4eb..1fa0c06cc 100644 --- a/server/tests/test_frame_metadata_crud.py +++ b/server/tests/test_frame_metadata_crud.py @@ -1,6 +1,10 @@ from unittest.mock import patch +import pytest +from girder.exceptions import RestException + from dive_server import crud_dataset +from dive_server.views_dataset import DatasetResource from dive_utils import constants @@ -72,6 +76,19 @@ def _root_folder(folder_id: str, name: str): } +def _call_frame_metadata_route(folder, user, params): + with patch('dive_server.views_dataset.Folder'): + resource = DatasetResource('dive_dataset') + resource.getCurrentUser = lambda: user + method = DatasetResource.get_frame_metadata.__wrapped__.__wrapped__ + return method( + resource, + folder, + startFrame=int(params['startFrame']), + endFrame=int(params['endFrame']), + ) + + def _wire_item_downloads(item_model, file_model, texts_by_name): def child_files(item): if item['name'] not in texts_by_name: @@ -118,6 +135,72 @@ def images(folder, user): valid_images.side_effect = images +@patch('girder.api.rest.Resource.route') +def test_dataset_resource_registers_frame_metadata_route(route): + with patch('dive_server.views_dataset.Folder'): + resource = DatasetResource('dive_dataset') + + assert any( + call.args == ("GET", (":id", "frame_metadata"), resource.get_frame_metadata) + for call in route.call_args_list + ) + + +@patch('dive_server.views_dataset.crud_dataset.load_frame_metadata') +def test_get_frame_metadata_route_accepts_explicit_window(load_frame_metadata): + dataset = _dataset_folder() + user = {'_id': 'user-id'} + response = {'cameras': {'singleCam': {'1': {'depth': '193.10'}}}} + load_frame_metadata.return_value = response + + result = _call_frame_metadata_route( + dataset, + user, + {'startFrame': '1', 'endFrame': '2'}, + ) + + assert result == response + load_frame_metadata.assert_called_once_with( + dataset, + user, + startFrame=1, + endFrame=2, + ) + + +@patch('dive_server.views_dataset.crud_dataset.load_frame_metadata') +def test_get_frame_metadata_route_returns_empty_cameras_without_source(load_frame_metadata): + dataset = _dataset_folder() + user = {'_id': 'user-id'} + load_frame_metadata.return_value = {'cameras': {}} + + result = _call_frame_metadata_route( + dataset, + user, + {'startFrame': '0', 'endFrame': '0'}, + ) + + assert result == {'cameras': {}} + + +@pytest.mark.parametrize( + ('params', 'message'), + [ + ({'startFrame': '-1', 'endFrame': '0'}, 'non-negative'), + ({'startFrame': '2', 'endFrame': '1'}, 'less than or equal to endFrame'), + ], +) +@patch('dive_server.views_dataset.crud_dataset.load_frame_metadata') +def test_get_frame_metadata_route_rejects_invalid_window(load_frame_metadata, params, message): + dataset = _dataset_folder() + user = {'_id': 'user-id'} + + with pytest.raises(RestException, match=message): + _call_frame_metadata_route(dataset, user, params) + + load_frame_metadata.assert_not_called() + + @patch('dive_server.crud_dataset.File') @patch('dive_server.crud_dataset.Item') @patch('dive_server.crud_dataset.Folder') From 3eb606d0518242c055683c5e8ddb549aa396393e Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 10:56:18 -0400 Subject: [PATCH 06/23] Add desktop frame metadata parser --- .../backend/serializers/frameMetadata.spec.ts | 152 ++++++++++ .../backend/serializers/frameMetadata.ts | 264 ++++++++++++++++++ 2 files changed, 416 insertions(+) create mode 100644 client/platform/desktop/backend/serializers/frameMetadata.spec.ts create mode 100644 client/platform/desktop/backend/serializers/frameMetadata.ts diff --git a/client/platform/desktop/backend/serializers/frameMetadata.spec.ts b/client/platform/desktop/backend/serializers/frameMetadata.spec.ts new file mode 100644 index 000000000..565fa375f --- /dev/null +++ b/client/platform/desktop/backend/serializers/frameMetadata.spec.ts @@ -0,0 +1,152 @@ +/// +import { + findJoinColumns, + normalizeKey, + parseFrameMetadataSource, + selectFrameMetadataSource, +} from 'platform/desktop/backend/serializers/frameMetadata'; + +describe('desktop frame metadata serializer', () => { + it('normalizes media keys the same way as image name maps', () => { + expect(normalizeKey('nested/20191009.154056.00082_rect_color.tif')).toBe( + '20191009.154056.00082_rect_color', + ); + }); + + it('parses NOAA-style rows with multiple image columns', () => { + const mediaKeys = new Map([ + ['20191009.154056.00082_rect_color', 0], + ['20191009.154056.00081_rect_color', 0], + ]); + const text = [ + 'port_image date time latitude longitude water_depth altitude starboard_image', + '20191009.154056.00082_rect_color.tif 2019/10/09 15:40:56.1122 46.575870 -124.603094 192.80 2.78 20191009.154056.00081_rect_color.tif', + '', + ].join('\n'); + + const source = parseFrameMetadataSource(text, mediaKeys, 'nav.txt'); + + expect(source).not.toBeNull(); + expect(source?.sourceName).toBe('nav.txt'); + expect(source?.header).toEqual([ + 'port_image', + 'date', + 'time', + 'latitude', + 'longitude', + 'water_depth', + 'altitude', + 'starboard_image', + ]); + expect(source?.joinColumns).toEqual(['port_image', 'starboard_image']); + expect(source?.payloadColumns).toEqual([ + 'date', + 'time', + 'latitude', + 'longitude', + 'water_depth', + 'altitude', + ]); + expect(Object.keys(source?.records || {}).sort()).toEqual([ + '20191009.154056.00081_rect_color', + '20191009.154056.00082_rect_color', + ]); + const portRecord = source?.records['20191009.154056.00082_rect_color']; + expect(Object.keys(portRecord || {})).toEqual(source?.header); + expect(portRecord?.latitude).toBe('46.575870'); + expect(Object.values(portRecord || {}).every((value) => typeof value === 'string')).toBe(true); + }); + + it('parses comma, tab, and whitespace delimited sources', () => { + const mediaKeys = new Map([['image_0001', 0]]); + + [ + 'filename,depth,latitude\nimage_0001.jpg,192.80,46.575870\n', + 'filename\tdepth\tlatitude\nimage_0001.jpg\t192.80\t46.575870\n', + 'filename depth latitude\nimage_0001.jpg 192.80 46.575870\n', + ].forEach((text) => { + const source = parseFrameMetadataSource(text, mediaKeys); + + expect(source).not.toBeNull(); + expect(source?.header).toEqual(['filename', 'depth', 'latitude']); + expect(source?.joinColumns).toEqual(['filename']); + expect(source?.records.image_0001).toEqual({ + filename: 'image_0001.jpg', + depth: '192.80', + latitude: '46.575870', + }); + }); + }); + + it('finds join columns by filename value matches', () => { + const rows = [ + { + port_image: '20191009.154056.00082_rect_color.tif', + latitude: '46.575870', + starboard_image: '20191009.154056.00081_rect_color.tif', + }, + ]; + + expect(findJoinColumns( + ['port_image', 'latitude', 'starboard_image'], + rows, + new Map([ + ['20191009.154056.00082_rect_color', 0], + ['20191009.154056.00081_rect_color', 0], + ]), + )).toEqual(['port_image', 'starboard_image']); + }); + + it('rejects VIAME annotation CSV even when its image column matches', () => { + const mediaKeys = new Map([['20191009.154056.00082_rect_color', 0]]); + const viameCsv = [ + '# 1: Detection or Track-id,2: Video or Image Identifier,3: Unique Frame Identifier,4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes', + '1,20191009.154056.00082_rect_color.tif,0,0,0,10,10,1.0,-1,fish,0.9', + '', + ].join('\n'); + + expect(parseFrameMetadataSource(viameCsv, mediaKeys)).toBeNull(); + }); + + it('accepts VIAME-shaped telemetry without the VIAME header', () => { + const mediaKeys = new Map([['image_0001', 0]]); + const text = [ + 'index,image,frame,x,y,depth,altitude,heading,temperature', + '1,image_0001.jpg,100,46.5,-124.6,192.8,2.7,180.5,4.2', + '', + ].join('\n'); + + const source = parseFrameMetadataSource(text, mediaKeys); + + expect(source).not.toBeNull(); + expect(source?.joinColumns).toEqual(['image']); + expect(source?.records.image_0001.depth).toBe('192.8'); + }); + + it('rejects bare image lists and unrelated text', () => { + const mediaKeys = new Map([['image_0001', 0]]); + + expect(parseFrameMetadataSource('image\nimage_0001.jpg\n', mediaKeys)).toBeNull(); + expect(parseFrameMetadataSource('note,value\nhello,world\n', mediaKeys)).toBeNull(); + }); + + it('rejects ambiguous candidates and non-text extensions', () => { + const mediaKeys = new Map([['image_0001', 0]]); + const acceptedText = 'filename,depth\nimage_0001.jpg,192.80\n'; + + expect(selectFrameMetadataSource( + [ + ['metadata.json', acceptedText], + ['telemetry-a.txt', acceptedText], + ], + mediaKeys, + )?.sourceName).toBe('telemetry-a.txt'); + expect(selectFrameMetadataSource( + [ + ['telemetry-a.txt', acceptedText], + ['telemetry-b.csv', 'filename,temperature\nimage_0001.jpg,4.2\n'], + ], + mediaKeys, + )).toBeNull(); + }); +}); diff --git a/client/platform/desktop/backend/serializers/frameMetadata.ts b/client/platform/desktop/backend/serializers/frameMetadata.ts new file mode 100644 index 000000000..ec54b25d6 --- /dev/null +++ b/client/platform/desktop/backend/serializers/frameMetadata.ts @@ -0,0 +1,264 @@ +import parseSync from 'csv-parse/lib/sync'; +import path from 'path'; + +type FrameMetadataRow = Record; +type MediaKeys = Map | Record; + +interface ParsedFrameMetadata { + sourceName?: string; + header: string[]; + rows: FrameMetadataRow[]; + joinColumns: string[]; + payloadColumns: string[]; + records: Record; +} + +const imageExtensions = new Set([ + 'png', + 'jpg', + 'jpeg', + 'sgi', + 'bmp', + 'pgm', + 'nitf', + 'tif', + 'tiff', + 'ntf', + 'vrt', + 'r0', + 'r1', + 'r2', + 'r3', + 'r4', + 'r5', + 'r6', +]); + +function normalizeKey(value: string): string { + const basename = path.basename(String(value).trim()); + const ext = path.extname(basename); + const extension = ext.toLowerCase().replace(/^\./, ''); + if (imageExtensions.has(extension)) { + return path.basename(basename, ext); + } + return basename; +} + +function parseTable(text: string): { header: string[]; rows: FrameMetadataRow[] } { + const rawRows = readRows(text); + if (rawRows.length === 0) { + return { header: [], rows: [] }; + } + + const header = rawRows[0].map((cell) => cell.trim()); + if (!header.every((cell) => cell.length > 0)) { + return { header: [], rows: [] }; + } + + const rows: FrameMetadataRow[] = []; + rawRows.slice(1).forEach((rawRow) => { + const values = rawRow.map((cell) => cell.trim()); + if (!values.some((cell) => cell.length > 0)) { + return; + } + const row: FrameMetadataRow = {}; + header.forEach((field, index) => { + row[field] = values[index] || ''; + }); + rows.push(row); + }); + return { header, rows }; +} + +function findJoinColumns( + header: string[], + rows: FrameMetadataRow[], + mediaKeys: MediaKeys, +): string[] { + const normalizedMediaKeys = normalizedKeySet(mediaKeys); + return header.filter((column) => rows.some((row) => ( + row[column] && normalizedMediaKeys.has(normalizeKey(row[column])) + ))); +} + +function isFrameMetadata(text: string, mediaKeys: MediaKeys): boolean { + return parseFrameMetadataSource(text, mediaKeys) !== null; +} + +function parseFrameMetadataSource( + text: string, + mediaKeys: MediaKeys, + sourceName?: string, +): ParsedFrameMetadata | null { + if (isViameCsv(text)) { + return null; + } + + const { header, rows } = parseTable(text); + if (header.length === 0 || rows.length === 0) { + return null; + } + + const joinColumns = findJoinColumns(header, rows, mediaKeys); + if (joinColumns.length === 0) { + return null; + } + + const payloadColumns = header.filter((column) => !joinColumns.includes(column)); + if (payloadColumns.length === 0) { + return null; + } + + const records: Record = {}; + const normalizedMediaKeys = normalizedKeySet(mediaKeys); + rows.forEach((row) => { + joinColumns.forEach((column) => { + const key = normalizeKey(row[column] || ''); + if (normalizedMediaKeys.has(key)) { + const record: FrameMetadataRow = {}; + header.forEach((field) => { + record[field] = row[field] || ''; + }); + records[key] = record; + } + }); + }); + + if (Object.keys(records).length === 0) { + return null; + } + + return { + sourceName, + header, + rows, + joinColumns, + payloadColumns, + records, + }; +} + +function selectFrameMetadataSource( + candidates: [string, string][], + mediaKeys: MediaKeys, +): ParsedFrameMetadata | null { + const matches: ParsedFrameMetadata[] = []; + candidates.forEach(([sourceName, text]) => { + if (!isTextCandidate(sourceName)) { + return; + } + const source = parseFrameMetadataSource(text, mediaKeys, sourceName); + if (source !== null) { + matches.push(source); + } + }); + + if (matches.length !== 1) { + return null; + } + return matches[0]; +} + +function readRows(text: string): string[][] { + const firstLine = firstNonemptyLine(text); + if (firstLine === null) { + return []; + } + + const delimiter = sniffDelimiter(firstLine); + if (delimiter === null) { + return text + .split(/\r?\n/) + .filter((line) => line.trim().length > 0) + .map((line) => line.trim().split(/\s+/)); + } + + return parseSync(text, { + delimiter, + relax_column_count: true, + skip_empty_lines: true, + }).map((row: string[]) => row.map((cell) => cell.trim())); +} + +function firstNonemptyLine(text: string): string | null { + const line = text.split(/\r?\n/).find((candidate) => candidate.trim().length > 0); + return line === undefined ? null : line.trim(); +} + +function sniffDelimiter(line: string): ',' | '\t' | null { + if (line.includes(',')) { + return ','; + } + if (line.includes('\t')) { + return '\t'; + } + return null; +} + +function normalizedKeySet(mediaKeys: MediaKeys): Set { + if (mediaKeys instanceof Map) { + return new Set(Array.from(mediaKeys.keys()).map((key) => normalizeKey(key))); + } + return new Set(Object.keys(mediaKeys).map((key) => normalizeKey(key))); +} + +function isTextCandidate(sourceName: string): boolean { + return ['.txt', '.csv'].includes(path.extname(sourceName).toLowerCase()); +} + +function isViameCsv(text: string): boolean { + const rows = readCsvRows(text); + let hasHeader = false; + let hasDataRow = false; + + rows.forEach((row) => { + if (row.length === 0) { + return; + } + if (row[0].startsWith('#')) { + hasHeader = hasHeader || row[0].startsWith('# 1: Detection or Track-id'); + return; + } + if (isViameDataRow(row)) { + hasDataRow = true; + } + }); + + return hasHeader && hasDataRow; +} + +function readCsvRows(text: string): string[][] { + return parseSync(text, { + delimiter: ',', + relax_column_count: true, + skip_empty_lines: true, + }).map((row: string[]) => row.map((cell) => cell.trim())); +} + +function isViameDataRow(row: string[]): boolean { + if (row.length < 9) { + return false; + } + const trackId = Number.parseInt(row[0], 10); + const frame = Number.parseInt(row[2], 10); + const bounds = row.slice(3, 7).map((value) => Number.parseFloat(value)); + const fishLength = Number.parseFloat(row[8]); + return ( + Number.isFinite(trackId) + && Number.isFinite(frame) + && bounds.every((value) => Number.isFinite(value)) + && Number.isFinite(fishLength) + ); +} + +export { + FrameMetadataRow, + MediaKeys, + ParsedFrameMetadata, + findJoinColumns, + isFrameMetadata, + normalizeKey, + parseFrameMetadataSource, + parseTable, + selectFrameMetadataSource, +}; From 1ce424ebda4d575e37a27c5e2f9ade8684f38ab1 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 11:00:44 -0400 Subject: [PATCH 07/23] Dedupe delimited-row parsing in desktop frame metadata parser --- .../desktop/backend/serializers/frameMetadata.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/client/platform/desktop/backend/serializers/frameMetadata.ts b/client/platform/desktop/backend/serializers/frameMetadata.ts index ec54b25d6..6b52bf475 100644 --- a/client/platform/desktop/backend/serializers/frameMetadata.ts +++ b/client/platform/desktop/backend/serializers/frameMetadata.ts @@ -173,6 +173,10 @@ function readRows(text: string): string[][] { .map((line) => line.trim().split(/\s+/)); } + return parseDelimited(text, delimiter); +} + +function parseDelimited(text: string, delimiter: ',' | '\t'): string[][] { return parseSync(text, { delimiter, relax_column_count: true, @@ -207,7 +211,7 @@ function isTextCandidate(sourceName: string): boolean { } function isViameCsv(text: string): boolean { - const rows = readCsvRows(text); + const rows = parseDelimited(text, ','); let hasHeader = false; let hasDataRow = false; @@ -227,14 +231,6 @@ function isViameCsv(text: string): boolean { return hasHeader && hasDataRow; } -function readCsvRows(text: string): string[][] { - return parseSync(text, { - delimiter: ',', - relax_column_count: true, - skip_empty_lines: true, - }).map((row: string[]) => row.map((cell) => cell.trim())); -} - function isViameDataRow(row: string[]): boolean { if (row.length < 9) { return false; From 8ff43e7a5dc2a85560436ed56745ab538e956938 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 11:08:54 -0400 Subject: [PATCH 08/23] Add desktop frame metadata read-time resolver --- client/platform/desktop/backend/ipcService.ts | 5 + .../desktop/backend/native/common.spec.ts | 81 ++++++ .../platform/desktop/backend/native/common.ts | 247 +++++++++++++++++- client/platform/desktop/constants.ts | 2 + client/platform/desktop/frontend/api.ts | 10 + 5 files changed, 344 insertions(+), 1 deletion(-) diff --git a/client/platform/desktop/backend/ipcService.ts b/client/platform/desktop/backend/ipcService.ts index 91a5664d1..a57726dcd 100644 --- a/client/platform/desktop/backend/ipcService.ts +++ b/client/platform/desktop/backend/ipcService.ts @@ -210,6 +210,11 @@ export default function register() { return ret; }); + ipcMain.handle('load-frame-metadata', async ( + event, + { datasetId, startFrame, endFrame }: { datasetId: string; startFrame: number; endFrame: number }, + ) => common.loadFrameMetadata(settings.get(), datasetId, startFrame, endFrame)); + ipcMain.handle('import-multicam-media', async (event, { args }: { args: MultiCamImportArgs }) => { const ret = await beginMultiCamImport(args); diff --git a/client/platform/desktop/backend/native/common.spec.ts b/client/platform/desktop/backend/native/common.spec.ts index 4598d323c..38dd75b7c 100644 --- a/client/platform/desktop/backend/native/common.spec.ts +++ b/client/platform/desktop/backend/native/common.spec.ts @@ -259,6 +259,22 @@ beforeEach(() => { 'file1.csv': '', 'file2.csv': '', }, + frameMetadataSource: { + 'image_0001.jpg': '', + 'image_0002.jpg': '', + 'image_0003.jpg': '', + 'navigation.txt': [ + 'filename,depth,temperature', + 'image_0001.jpg,192.80,4.0', + 'image_0002.jpg,193.10,4.1', + 'image_0003.jpg,193.40,4.2', + '', + ].join('\n'), + }, + frameMetadataNoSource: { + 'image_0001.jpg': '', + 'notes.txt': 'note,value\nhello,world\n', + }, }, '/home/user/viamedata': { // eslint-disable-next-line @typescript-eslint/naming-convention @@ -313,6 +329,45 @@ beforeEach(() => { 'result_whatever.json': JSON.stringify({}), auxiliary: {}, }, + projectidFrameMetadata: { + 'meta.json': JSON.stringify({ + version: 1, + id: 'projectidFrameMetadata', + type: 'image-sequence', + fps: 5, + originalBasePath: '/home/user/data/frameMetadataSource', + originalImageFiles: [ + 'image_0001.jpg', + 'image_0002.jpg', + 'image_0003.jpg', + ], + frameMetadataFields: ['stale_project_field'], + }), + 'result_whatever.json': JSON.stringify({}), + 'frame_metadata.json': JSON.stringify({ + cameras: { + singleCam: { + 1: { stale_project_field: 'do-not-read' }, + }, + }, + }), + auxiliary: {}, + }, + projectidFrameMetadataNoSource: { + 'meta.json': JSON.stringify({ + version: 1, + id: 'projectidFrameMetadataNoSource', + type: 'image-sequence', + fps: 5, + originalBasePath: '/home/user/data/frameMetadataNoSource', + originalImageFiles: [ + 'image_0001.jpg', + ], + frameMetadataFields: ['stale_project_field'], + }), + 'result_whatever.json': JSON.stringify({}), + auxiliary: {}, + }, projectid2Bad: { 'meta.json': '{}', // Won't match @@ -468,6 +523,32 @@ describe('native.common', () => { .rejects.toThrow('Dataset: missingMulti is of type multiCam or stereo but contains no multiCam data'); }); + it('loadFrameMetadata reads a source sidecar next to imagery and applies the requested window', async () => { + const data = await common.loadFrameMetadata(settings, 'projectidFrameMetadata', 1, 2); + + expect(data).toEqual({ + cameras: { + singleCam: { + 1: { + filename: 'image_0002.jpg', + depth: '193.10', + temperature: '4.1', + }, + 2: { + filename: 'image_0003.jpg', + depth: '193.40', + temperature: '4.2', + }, + }, + }, + }); + }); + + it('loadFrameMetadata ignores non-telemetry text files', async () => { + await expect(common.loadFrameMetadata(settings, 'projectidFrameMetadataNoSource', 0, 0)) + .resolves.toEqual({ cameras: {} }); + }); + it('createWorkingDirectory creates pipeline run directories', async () => { await expect(createWorkingDirectory(settings, [], 'whatever.pipe')) .rejects.toThrow('At least 1 jsonMeta item'); diff --git a/client/platform/desktop/backend/native/common.ts b/client/platform/desktop/backend/native/common.ts index 6939811e0..41f760aa2 100644 --- a/client/platform/desktop/backend/native/common.ts +++ b/client/platform/desktop/backend/native/common.ts @@ -30,12 +30,18 @@ import { PipeMetadata, PipelineParamType, DatasetCalibrationResult, + FrameMetadataResponse, } from 'dive-common/apispec'; import * as viameSerializers from 'platform/desktop/backend/serializers/viame'; import * as nistSerializers from 'platform/desktop/backend/serializers/nist'; import * as dive from 'platform/desktop/backend/serializers/dive'; import * as coco from 'platform/desktop/backend/serializers/coco'; import kpf from 'platform/desktop/backend/serializers/kpf'; +import { + normalizeKey, + parseFrameMetadataSource, + selectFrameMetadataSource, +} from 'platform/desktop/backend/serializers/frameMetadata'; // TODO: Check to Refactor this // eslint-disable-next-line import/no-cycle import { checkMedia } from 'platform/desktop/backend/native/mediaJobs'; @@ -52,6 +58,7 @@ import { ExportConfigurationArgs, ExportMulticamEverythingArgs, JobsFolderName, JobsOutputFolderName, ProjectsFolderName, PipelinesFolderName, ConversionArgs, JobType, LastCalibrationBaseName, + FrameMetadataSourceExtensions, SingleCameraFrameMetadataKey, } from 'platform/desktop/constants'; import { cleanString, filterByGlob, makeid, strNumericCompare, @@ -439,6 +446,243 @@ async function loadDetections(settings: Settings, datasetId: string) { return loadAnnotationFile(projectDirData.trackFileAbsPath); } +interface ImageSequenceFrameMetadataSource { + originalBasePath: string; + originalImageFiles: string[]; + imageListPath?: string; +} + +type FrameMetadataCandidate = [string, string]; +type FrameMetadataRecords = Record>; + +function frameMetadataSourceDirectory(source: ImageSequenceFrameMetadataSource): string | null { + if (source.originalBasePath) { + return source.originalBasePath; + } + if (source.imageListPath) { + return npath.dirname(source.imageListPath); + } + const firstImage = source.originalImageFiles[0]; + if (firstImage && npath.isAbsolute(firstImage)) { + return npath.dirname(firstImage); + } + return null; +} + +async function frameMetadataCandidateTexts(directory: string | null): Promise { + if (!directory || !(await fs.pathExists(directory))) { + return []; + } + + const names = await fs.readdir(directory); + const candidates = await Promise.all(names + .filter((name) => ( + FrameMetadataSourceExtensions.includes( + npath.extname(name).toLowerCase() as typeof FrameMetadataSourceExtensions[number], + ) + )) + .map(async (name): Promise => { + const filePath = npath.join(directory, name); + const stat = await fs.stat(filePath); + if (!stat.isFile()) { + return null; + } + return [name, await fs.readFile(filePath, 'utf-8')]; + })); + + return candidates.filter((candidate): candidate is FrameMetadataCandidate => candidate !== null); +} + +function mediaKeyToFrameMap(mediaKeys: Map): Map { + return new Map( + Array.from(mediaKeys.entries()).map(([mediaKey, frameNumber]) => ( + [normalizeKey(mediaKey), frameNumber] + )), + ); +} + +function recordsForFrameWindow( + source: { records: Record> }, + mediaKeys: Map, + startFrame: number, + endFrame: number, +): FrameMetadataRecords { + const frameByKey = mediaKeyToFrameMap(mediaKeys); + const records: FrameMetadataRecords = {}; + Object.entries(source.records).forEach(([mediaKey, values]) => { + const frameNumber = frameByKey.get(normalizeKey(mediaKey)); + if (frameNumber !== undefined && startFrame <= frameNumber && frameNumber <= endFrame) { + records[String(frameNumber)] = values; + } + }); + return records; +} + +async function loadSingleCameraFrameMetadataRecords( + sourceMeta: ImageSequenceFrameMetadataSource, + startFrame: number, + endFrame?: number, +): Promise { + const mediaKeys = validImageNamesMap(sourceMeta); + if (!mediaKeys) { + return null; + } + const source = selectFrameMetadataSource( + await frameMetadataCandidateTexts(frameMetadataSourceDirectory(sourceMeta)), + mediaKeys, + ); + if (!source) { + return null; + } + return recordsForFrameWindow(source, mediaKeys, startFrame, endFrame ?? mediaKeys.size - 1); +} + +async function loadMultiCameraFrameMetadataRecords( + sourceMeta: ImageSequenceFrameMetadataSource, + candidates: FrameMetadataCandidate[], + startFrame: number, + endFrame?: number, +): Promise { + const mediaKeys = validImageNamesMap(sourceMeta); + if (!mediaKeys) { + return null; + } + + const sources = candidates + .map(([sourceName, text]) => parseFrameMetadataSource(text, mediaKeys, sourceName)) + .filter((source): source is NonNullable => source !== null); + if (!sources.length) { + return null; + } + + const frameByKey = mediaKeyToFrameMap(mediaKeys); + const records: FrameMetadataRecords = {}; + const collidedFrames = new Set(); + const windowEnd = endFrame ?? mediaKeys.size - 1; + sources.forEach((source) => { + Object.entries(source.records).forEach(([mediaKey, values]) => { + const frameNumber = frameByKey.get(normalizeKey(mediaKey)); + if (frameNumber === undefined || frameNumber < startFrame || frameNumber > windowEnd) { + return; + } + + const frameKey = String(frameNumber); + if (collidedFrames.has(frameKey)) { + return; + } + if (records[frameKey] === undefined) { + records[frameKey] = values; + } else if (JSON.stringify(records[frameKey]) !== JSON.stringify(values)) { + delete records[frameKey]; + collidedFrames.add(frameKey); + } + }); + }); + return records; +} + +function commonParentDirectory(paths: string[]): string | null { + const resolved = paths.filter((item) => item).map((item) => npath.resolve(item)); + if (!resolved.length) { + return null; + } + const [first, ...rest] = resolved; + const firstParts = first.split(npath.sep); + let { length } = firstParts; + rest.forEach((candidate) => { + const parts = candidate.split(npath.sep); + length = Math.min(length, parts.length); + for (let i = 0; i < length; i += 1) { + if (firstParts[i] !== parts[i]) { + length = i; + break; + } + } + }); + const prefix = firstParts.slice(0, length).join(npath.sep); + return prefix || npath.sep; +} + +async function loadMulticamFrameMetadata( + projectMetaData: JsonMeta, + startFrame: number, + endFrame?: number, +): Promise { + const { multiCam } = projectMetaData; + if (!multiCam) { + return { cameras: {} }; + } + + const cameraEntries = orderedMultiCamCameraNames({ + cameras: multiCam.cameras, + defaultDisplay: multiCam.defaultDisplay, + }).map((cameraName) => [cameraName, multiCam.cameras[cameraName]] as const); + + const rootDirectory = projectMetaData.originalBasePath + || commonParentDirectory(cameraEntries.map(([, camera]) => ( + frameMetadataSourceDirectory(camera) || camera.originalBasePath + ))); + const rootCandidates = await frameMetadataCandidateTexts(rootDirectory); + const cameras: FrameMetadataResponse['cameras'] = {}; + let hasSource = false; + + for (let i = 0; i < cameraEntries.length; i += 1) { + const [cameraName, cameraMeta] = cameraEntries[i]; + if (cameraMeta.type === 'image-sequence') { + const candidates = rootCandidates.concat( + // eslint-disable-next-line no-await-in-loop + await frameMetadataCandidateTexts(frameMetadataSourceDirectory(cameraMeta)), + ); + // eslint-disable-next-line no-await-in-loop + const records = await loadMultiCameraFrameMetadataRecords( + cameraMeta, + candidates, + startFrame, + endFrame, + ); + if (records !== null) { + hasSource = true; + cameras[cameraName] = records; + } else { + cameras[cameraName] = {}; + } + } + } + + if (!hasSource) { + return { cameras: {} }; + } + return { cameras }; +} + +async function loadFrameMetadata( + settings: Settings, + datasetId: string, + startFrame: number, + endFrame?: number, +): Promise { + const parentId = datasetId.split('/')[0]; + const projectDirData = await getValidatedProjectDir(settings, parentId); + const projectMetaData = await loadJsonMetadata(projectDirData.metaFileAbsPath); + + if (projectMetaData.type === MultiType) { + return loadMulticamFrameMetadata(projectMetaData, startFrame, endFrame); + } + if (projectMetaData.type !== 'image-sequence') { + return { cameras: {} }; + } + + const records = await loadSingleCameraFrameMetadataRecords( + projectMetaData, + startFrame, + endFrame, + ); + if (records === null) { + return { cameras: {} }; + } + return { cameras: { [SingleCameraFrameMetadataKey]: records } }; +} + /** * Look through DIVE project path, find subfolders that * look like datasets, and return them. @@ -1319,7 +1563,7 @@ async function beginMediaImport(path: string): Promise 0) { const imageMap = new Map(); jsonMeta.originalImageFiles.forEach((imgPath, i) => { @@ -2040,6 +2284,7 @@ export { loadJsonMetadata, loadAnnotationFile, loadDetections, + loadFrameMetadata, openLink, openPathInFileManager, ingestDataFiles, diff --git a/client/platform/desktop/constants.ts b/client/platform/desktop/constants.ts index 5b24868ff..11c3ac725 100644 --- a/client/platform/desktop/constants.ts +++ b/client/platform/desktop/constants.ts @@ -13,6 +13,8 @@ export const ProjectsFolderName = 'DIVE_Projects'; export const JobsFolderName = 'DIVE_Jobs'; export const JobsOutputFolderName = 'DIVE_Jobs_Output'; export const PipelinesFolderName = 'DIVE_Pipelines'; +export const FrameMetadataSourceExtensions = ['.txt', '.csv'] as const; +export const SingleCameraFrameMetadataKey = 'singleCam'; // Basename (without extension) of the saved "most recently used" calibration. // The stored file keeps the source file's real extension (e.g. last_calibration.npz). export const LastCalibrationBaseName = 'last_calibration'; diff --git a/client/platform/desktop/frontend/api.ts b/client/platform/desktop/frontend/api.ts index 4f1fb1c71..7f87e74eb 100644 --- a/client/platform/desktop/frontend/api.ts +++ b/client/platform/desktop/frontend/api.ts @@ -7,6 +7,7 @@ import type { DatasetCalibrationResult, SegmentationPredictRequest, SegmentationPredictResponse, SegmentationStatusResponse, SegmentationStereoSegmentRequest, SegmentationStereoSegmentResponse, + FrameMetadataResponse, } from 'dive-common/apispec'; import { @@ -548,6 +549,14 @@ async function loadDetections(datasetId: string) { }; } +function loadFrameMetadata( + datasetId: string, + startFrame: number, + endFrame: number, +): Promise { + return window.diveDesktop.invoke('load-frame-metadata', { datasetId, startFrame, endFrame }); +} + async function saveMetadata(id: string, args: DatasetMetaMutable) { const client = await getClient(); return client.post(`dataset/${id}/meta`, args); @@ -608,6 +617,7 @@ export { /* Standard Specification APIs */ loadMetadata, loadDetections, + loadFrameMetadata, getPipelineList, deleteTrainedPipeline, runPipeline, From 6f2ca3b0decff6544185042aefcc87d5d22eefc6 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 11:16:08 -0400 Subject: [PATCH 09/23] Drop redundant basePath fallback in multicam frame metadata root --- client/platform/desktop/backend/native/common.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/platform/desktop/backend/native/common.ts b/client/platform/desktop/backend/native/common.ts index 41f760aa2..2e36341eb 100644 --- a/client/platform/desktop/backend/native/common.ts +++ b/client/platform/desktop/backend/native/common.ts @@ -620,7 +620,7 @@ async function loadMulticamFrameMetadata( const rootDirectory = projectMetaData.originalBasePath || commonParentDirectory(cameraEntries.map(([, camera]) => ( - frameMetadataSourceDirectory(camera) || camera.originalBasePath + frameMetadataSourceDirectory(camera) ?? '' ))); const rootCandidates = await frameMetadataCandidateTexts(rootDirectory); const cameras: FrameMetadataResponse['cameras'] = {}; From 4c687dfe43c7286342ddad2801aee705ec477d32 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 11:22:46 -0400 Subject: [PATCH 10/23] Add client frame metadata window cache --- client/dive-common/apispec.ts | 17 +- client/dive-common/use/index.ts | 8 + .../use/useFrameMetadataWindow.spec.ts | 73 ++++++ .../dive-common/use/useFrameMetadataWindow.ts | 232 ++++++++++++++++++ 4 files changed, 329 insertions(+), 1 deletion(-) create mode 100644 client/dive-common/use/useFrameMetadataWindow.spec.ts create mode 100644 client/dive-common/use/useFrameMetadataWindow.ts diff --git a/client/dive-common/apispec.ts b/client/dive-common/apispec.ts index 770144ba4..0e33a6490 100644 --- a/client/dive-common/apispec.ts +++ b/client/dive-common/apispec.ts @@ -125,8 +125,20 @@ interface FrameImage { id?: string; } +interface FrameMetadataValues { + [field: string]: string; +} + +interface FrameMetadataFrameMap { + [frame: string]: FrameMetadataValues; +} + +interface FrameMetadataCameraMap { + [camera: string]: FrameMetadataFrameMap; +} + interface FrameMetadataResponse { - cameras: Record>>; + cameras: FrameMetadataCameraMap; } export interface MultiCamImportFolderArgs { @@ -450,7 +462,10 @@ export { PipelineRuntimeParams, PipeMetadata, Pipelines, + FrameMetadataCameraMap, + FrameMetadataFrameMap, FrameMetadataResponse, + FrameMetadataValues, SaveDetectionsArgs, SaveAttributeArgs, SaveAttributeTrackFilterArgs, diff --git a/client/dive-common/use/index.ts b/client/dive-common/use/index.ts index e35d1597e..53d113c5d 100644 --- a/client/dive-common/use/index.ts +++ b/client/dive-common/use/index.ts @@ -2,10 +2,18 @@ import useModeManager from './useModeManager'; import useSave from './useSave'; import useRequest from './useRequest'; import { useLassoMode } from './useLassoMode'; +import { useFrameMetadataWindow } from './useFrameMetadataWindow'; export { + useFrameMetadataWindow, useModeManager, useRequest, useSave, useLassoMode, }; + +export type { + FrameMetadataWindowRange, + LoadFrameMetadata, + UseFrameMetadataWindow, +} from './useFrameMetadataWindow'; diff --git a/client/dive-common/use/useFrameMetadataWindow.spec.ts b/client/dive-common/use/useFrameMetadataWindow.spec.ts new file mode 100644 index 000000000..6677bf336 --- /dev/null +++ b/client/dive-common/use/useFrameMetadataWindow.spec.ts @@ -0,0 +1,73 @@ +import { ref } from 'vue'; + +// eslint-disable-next-line import/no-extraneous-dependencies -- Vitest is only used in tests +import { + describe, expect, it, vi, +} from 'vitest'; + +import type { FrameMetadataResponse } from '../apispec'; +import { useFrameMetadataWindow } from './useFrameMetadataWindow'; + +describe('useFrameMetadataWindow', () => { + it('fetches bounded playhead windows and reads active-camera rows from cache', async () => { + const datasetId = ref('dataset-id'); + const frame = ref(10); + const selectedCamera = ref('port'); + const responses: FrameMetadataResponse[] = [ + { + cameras: { + port: { + 10: { latitude: '58.10', depth_m: '100' }, + 12: { latitude: '58.12', depth_m: '120' }, + }, + starboard: { + 10: { latitude: '59.10', depth_m: '200' }, + 12: { latitude: '59.12', depth_m: '220' }, + }, + }, + }, + { + cameras: { + port: { + 13: { latitude: '58.13', depth_m: '130' }, + }, + starboard: { + 13: { latitude: '59.13', depth_m: '230' }, + }, + }, + }, + ]; + const loadFrameMetadata = vi.fn(async () => responses.shift() ?? { cameras: {} }); + + const metadata = useFrameMetadataWindow({ + datasetId, + frame, + selectedCamera, + loadFrameMetadata, + windowSize: 5, + }); + + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + expect(loadFrameMetadata).toHaveBeenLastCalledWith('dataset-id', 8, 12); + expect(metadata.currentRows.value).toEqual({ latitude: '58.10', depth_m: '100' }); + + frame.value = 12; + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + expect(metadata.currentRows.value).toEqual({ latitude: '58.12', depth_m: '120' }); + + selectedCamera.value = 'starboard'; + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + expect(metadata.currentRows.value).toEqual({ latitude: '59.12', depth_m: '220' }); + + frame.value = 13; + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(2); + expect(loadFrameMetadata).toHaveBeenLastCalledWith('dataset-id', 11, 15); + expect(metadata.windowRange.value).toEqual({ startFrame: 11, endFrame: 15 }); + expect(metadata.currentRows.value).toEqual({ latitude: '59.13', depth_m: '230' }); + expect(metadata.cameras.value.port[10]).toBeUndefined(); + }); +}); diff --git a/client/dive-common/use/useFrameMetadataWindow.ts b/client/dive-common/use/useFrameMetadataWindow.ts new file mode 100644 index 000000000..96e9c53e7 --- /dev/null +++ b/client/dive-common/use/useFrameMetadataWindow.ts @@ -0,0 +1,232 @@ +import { + computed, readonly, ref, watch, +} from 'vue'; +import type { Ref } from 'vue'; + +import type { + FrameMetadataCameraMap, + FrameMetadataResponse, + FrameMetadataValues, +} from '../apispec'; + +export const DEFAULT_FRAME_METADATA_WINDOW_SIZE = 101; + +export interface FrameMetadataWindowRange { + startFrame: number; + endFrame: number; +} + +export type LoadFrameMetadata = ( + datasetId: string, + startFrame: number, + endFrame: number +) => Promise; + +interface UseFrameMetadataWindowOptions { + datasetId: Readonly>; + frame: Readonly>; + selectedCamera: Readonly>; + loadFrameMetadata?: LoadFrameMetadata; + windowSize?: number; + maxFrame?: Readonly>; +} + +function finiteFloor(value: number, fallback: number) { + if (!Number.isFinite(value)) { + return fallback; + } + return Math.floor(value); +} + +function normalizeWindowSize(windowSize: number) { + return Math.max(1, finiteFloor(windowSize, DEFAULT_FRAME_METADATA_WINDOW_SIZE)); +} + +function normalizeMaxFrame(maxFrame: number | undefined) { + if (maxFrame === undefined || !Number.isFinite(maxFrame)) { + return undefined; + } + return Math.max(0, Math.floor(maxFrame)); +} + +function normalizeFrame(frame: number, maxFrame?: number) { + const safeFrame = Math.max(0, finiteFloor(frame, 0)); + if (maxFrame === undefined) { + return safeFrame; + } + return Math.min(safeFrame, maxFrame); +} + +function containsFrame(range: FrameMetadataWindowRange | null, frame: number) { + return !!range && range.startFrame <= frame && frame <= range.endFrame; +} + +function errorMessage(err: unknown) { + if (err instanceof Error) { + return err.message; + } + return String(err); +} + +export function frameMetadataWindowForFrame( + frame: number, + windowSize = DEFAULT_FRAME_METADATA_WINDOW_SIZE, + maxFrame?: number, +): FrameMetadataWindowRange { + const size = normalizeWindowSize(windowSize); + const safeMaxFrame = normalizeMaxFrame(maxFrame); + const targetFrame = normalizeFrame(frame, safeMaxFrame); + const framesBefore = Math.floor((size - 1) / 2); + + let startFrame = Math.max(0, targetFrame - framesBefore); + let endFrame = startFrame + size - 1; + + if (safeMaxFrame !== undefined && endFrame > safeMaxFrame) { + endFrame = safeMaxFrame; + startFrame = Math.max(0, endFrame - size + 1); + } + + return { startFrame, endFrame }; +} + +export function useFrameMetadataWindow({ + datasetId, + frame, + selectedCamera, + loadFrameMetadata, + windowSize = DEFAULT_FRAME_METADATA_WINDOW_SIZE, + maxFrame, +}: UseFrameMetadataWindowOptions) { + const cameras = ref({}); + const windowRange = ref(null); + const loadedDatasetId = ref(null); + const loading = ref(false); + const error = ref(null); + + let requestToken = 0; + let pendingRequest: Promise | null = null; + let pendingDatasetId: string | null = null; + let pendingRange: FrameMetadataWindowRange | null = null; + + function clearCache() { + cameras.value = {}; + windowRange.value = null; + loadedDatasetId.value = null; + } + + async function fetchWindow(targetFrame: number) { + if (!loadFrameMetadata || !datasetId.value) { + clearCache(); + return; + } + + const requestDatasetId = datasetId.value; + const requestRange = frameMetadataWindowForFrame( + targetFrame, + windowSize, + maxFrame?.value, + ); + + if ( + pendingRequest + && pendingDatasetId === requestDatasetId + && containsFrame(pendingRange, targetFrame) + ) { + return pendingRequest; + } + + if (loadedDatasetId.value !== requestDatasetId) { + clearCache(); + } + + const token = requestToken + 1; + requestToken = token; + pendingDatasetId = requestDatasetId; + pendingRange = requestRange; + loading.value = true; + error.value = null; + + pendingRequest = (async () => { + try { + const response = await loadFrameMetadata( + requestDatasetId, + requestRange.startFrame, + requestRange.endFrame, + ); + + if (token === requestToken) { + cameras.value = response.cameras; + windowRange.value = requestRange; + loadedDatasetId.value = requestDatasetId; + } + } catch (err) { + if (token === requestToken) { + error.value = errorMessage(err); + } + } finally { + if (token === requestToken) { + loading.value = false; + pendingRequest = null; + pendingDatasetId = null; + pendingRange = null; + } + } + })(); + + return pendingRequest; + } + + async function ensureFrameLoaded() { + const targetFrame = normalizeFrame(frame.value, normalizeMaxFrame(maxFrame?.value)); + if ( + loadedDatasetId.value === datasetId.value + && containsFrame(windowRange.value, targetFrame) + ) { + return; + } + + if ( + pendingRequest + && pendingDatasetId === datasetId.value + && containsFrame(pendingRange, targetFrame) + ) { + return pendingRequest; + } + + return fetchWindow(targetFrame); + } + + const currentFrameKey = computed(() => String( + normalizeFrame(frame.value, normalizeMaxFrame(maxFrame?.value)), + )); + const currentRows = computed(() => ( + cameras.value[selectedCamera.value]?.[currentFrameKey.value] ?? null + )); + const currentEntries = computed(() => ( + currentRows.value ? Object.entries(currentRows.value) : [] + )); + const hasMetadataSource = computed(() => Object.keys(cameras.value).length > 0); + const unsupported = computed(() => loadFrameMetadata === undefined); + + watch( + () => [datasetId.value, currentFrameKey.value, maxFrame?.value], + () => { + void ensureFrameLoaded(); + }, + { immediate: true }, + ); + + return { + cameras: readonly(cameras), + currentEntries, + currentRows, + ensureFrameLoaded, + error: readonly(error), + hasMetadataSource, + loading: readonly(loading), + unsupported, + windowRange: readonly(windowRange), + }; +} + +export type UseFrameMetadataWindow = ReturnType; From 9d08d21e3ad87ed536ed7ab4684d762009bc54c0 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 11:32:18 -0400 Subject: [PATCH 11/23] Fix frame metadata window cache lint violations --- client/dive-common/use/useFrameMetadataWindow.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/client/dive-common/use/useFrameMetadataWindow.ts b/client/dive-common/use/useFrameMetadataWindow.ts index 96e9c53e7..4256f0550 100644 --- a/client/dive-common/use/useFrameMetadataWindow.ts +++ b/client/dive-common/use/useFrameMetadataWindow.ts @@ -71,7 +71,7 @@ function errorMessage(err: unknown) { export function frameMetadataWindowForFrame( frame: number, windowSize = DEFAULT_FRAME_METADATA_WINDOW_SIZE, - maxFrame?: number, + maxFrame: number | undefined = undefined, ): FrameMetadataWindowRange { const size = normalizeWindowSize(windowSize); const safeMaxFrame = normalizeMaxFrame(maxFrame); @@ -117,7 +117,7 @@ export function useFrameMetadataWindow({ async function fetchWindow(targetFrame: number) { if (!loadFrameMetadata || !datasetId.value) { clearCache(); - return; + return undefined; } const requestDatasetId = datasetId.value; @@ -182,7 +182,7 @@ export function useFrameMetadataWindow({ loadedDatasetId.value === datasetId.value && containsFrame(windowRange.value, targetFrame) ) { - return; + return undefined; } if ( @@ -211,7 +211,7 @@ export function useFrameMetadataWindow({ watch( () => [datasetId.value, currentFrameKey.value, maxFrame?.value], () => { - void ensureFrameLoaded(); + ensureFrameLoaded(); }, { immediate: true }, ); From dfa5f55de23786a3644dab3659e4e62b3fa8bad6 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 11:51:03 -0400 Subject: [PATCH 12/23] Add frame info metadata panel --- .../dive-common/components/FrameInfo.spec.ts | 189 ++++++++++++++++++ client/dive-common/components/FrameInfo.vue | 121 +++++++++++ client/dive-common/store/context.ts | 5 + 3 files changed, 315 insertions(+) create mode 100644 client/dive-common/components/FrameInfo.spec.ts create mode 100644 client/dive-common/components/FrameInfo.vue diff --git a/client/dive-common/components/FrameInfo.spec.ts b/client/dive-common/components/FrameInfo.spec.ts new file mode 100644 index 000000000..c559879bc --- /dev/null +++ b/client/dive-common/components/FrameInfo.spec.ts @@ -0,0 +1,189 @@ +// @vitest-environment jsdom +// eslint-disable-next-line import/no-extraneous-dependencies -- Vue Test Utils is only used in tests +import { mount } from '@vue/test-utils'; +import { + defineComponent, nextTick, ref, +} from 'vue'; + +// eslint-disable-next-line import/no-extraneous-dependencies -- Vitest is only used in tests +import { + describe, expect, it, vi, +} from 'vitest'; + +import { + DatasetMeta, + FrameMetadataResponse, + provideApi, +} from 'dive-common/apispec'; +import { + dummyHandler, + dummyState, + provideAnnotator, +} from 'vue-media-annotator/provides'; +import FrameInfo from './FrameInfo.vue'; + +function flushPromises() { + return new Promise((resolve) => { + window.setTimeout(resolve, 0); + }); +} + +function apiWithFrameMetadata( + loadFrameMetadata?: (datasetId: string, startFrame: number, endFrame: number) => + Promise, +): Parameters[0] { + return { + getPipelineList: async () => ({}), + runPipeline: async () => undefined, + deleteTrainedPipeline: async () => undefined, + exportTrainedPipeline: async () => undefined, + getDatasetCalibration: async () => null, + getTrainingConfigurations: async () => ({ training: { configs: [], default: '' }, models: {} }), + runTraining: async () => undefined, + loadMetadata: async () => ({} as DatasetMeta), + loadDetections: async () => ({ + version: 2, + tracks: [], + groups: [], + sets: [], + }), + loadFrameMetadata, + saveDetections: async () => undefined, + saveMetadata: async () => undefined, + saveAttributes: async () => undefined, + saveAttributeTrackFilters: async () => undefined, + openFromDisk: async () => ({ canceled: true, filePaths: [] }), + importAnnotationFile: async () => false, + }; +} + +function mountFrameInfo({ + response, + loadFrameMetadata, + selectedCamera = 'port', +}: { + response?: FrameMetadataResponse; + loadFrameMetadata?: (datasetId: string, startFrame: number, endFrame: number) => + Promise; + selectedCamera?: string; +} = {}) { + const state = dummyState(); + state.datasetId = ref('dataset-id'); + state.selectedCamera = ref(selectedCamera); + state.time = { + ...state.time, + frame: ref(10), + }; + const loader = loadFrameMetadata ?? ( + response === undefined + ? undefined + : vi.fn(async () => response) + ); + const api = apiWithFrameMetadata(loader); + + const Root = defineComponent({ + components: { FrameInfo }, + setup() { + provideApi(api); + provideAnnotator( + state, + dummyHandler(() => undefined), + {} as Parameters[2], + ); + return {}; + }, + template: '', + }); + + const wrapper = mount(Root); + return { wrapper, state, loadFrameMetadata: loader }; +} + +describe('FrameInfo', () => { + it('renders the active frame metadata fields in source order with raw values', async () => { + const { wrapper } = mountFrameInfo({ + response: { + cameras: { + port: { + 10: { + latitude: '58.10', + depth_m: '100', + note: ' raw text ', + }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.findAll('.frame-info-key').wrappers.map((item) => item.text())) + .toEqual(['latitude', 'depth_m', 'note']); + expect(wrapper.findAll('.frame-info-value').wrappers.map((item) => item.element.textContent)) + .toEqual(['58.10', '100', ' raw text ']); + expect(wrapper.text()).not.toContain('Frame 10'); + expect(wrapper.find('input').exists()).toBe(false); + expect(wrapper.find('button').exists()).toBe(false); + }); + + it('shows the unsupported platform state when no load API is provided', async () => { + const { wrapper } = mountFrameInfo(); + + await nextTick(); + + expect(wrapper.text()).toContain('Frame metadata is not supported on this platform.'); + }); + + it('shows the no-source state after an empty cameras response', async () => { + const { wrapper } = mountFrameInfo({ response: { cameras: {} } }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.text()).toContain('No frame metadata source found.'); + expect(wrapper.text()).toContain('Place a .txt or .csv telemetry file next to the imagery.'); + }); + + it('shows the no-current-frame state when the dataset has metadata but not this frame', async () => { + const { wrapper } = mountFrameInfo({ + response: { + cameras: { + port: { + 11: { latitude: '58.11' }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.text()).toContain('No frame metadata for the current frame.'); + }); + + it('follows the active multicam camera from the cached frame window', async () => { + const loadFrameMetadata = vi.fn(async () => ({ + cameras: { + port: { + 10: { latitude: '58.10' }, + }, + starboard: { + 10: { latitude: '59.10' }, + }, + }, + })); + const { wrapper, state } = mountFrameInfo({ loadFrameMetadata }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.text()).toContain('58.10'); + state.selectedCamera.value = 'starboard'; + await nextTick(); + + expect(wrapper.text()).toContain('59.10'); + expect(wrapper.text()).not.toContain('58.10'); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + }); +}); diff --git a/client/dive-common/components/FrameInfo.vue b/client/dive-common/components/FrameInfo.vue new file mode 100644 index 000000000..dc99934d2 --- /dev/null +++ b/client/dive-common/components/FrameInfo.vue @@ -0,0 +1,121 @@ + + + + + diff --git a/client/dive-common/store/context.ts b/client/dive-common/store/context.ts index c6ee24ff6..41cfc3f95 100644 --- a/client/dive-common/store/context.ts +++ b/client/dive-common/store/context.ts @@ -7,6 +7,7 @@ import AttributesSideBar from 'dive-common/components/Attributes/AttributesSideB import MultiCamTools from 'dive-common/components/MultiCamTools.vue'; import AttributeTrackFilters from 'vue-media-annotator/components/AttributeTrackFilters.vue'; import DatasetInfo from 'dive-common/components/DatasetInfo.vue'; +import FrameInfo from 'dive-common/components/FrameInfo.vue'; interface ContextState { last: string; @@ -30,6 +31,10 @@ const componentMap: Record = { description: 'Dataset Info', component: DatasetInfo, }, + [FrameInfo.name]: { + description: 'Frame Info', + component: FrameInfo, + }, [TypeThreshold.name]: { description: 'Threshold Controls', component: TypeThreshold, From af9459c6068c0186e9ec4730286befa74683da2c Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 13:06:55 -0400 Subject: [PATCH 13/23] Remove frame metadata import export paths --- .../desktop/backend/native/common.spec.ts | 9 --- .../desktop/backend/serializers/coco.spec.ts | 11 +++ .../desktop/backend/serializers/viame.spec.ts | 17 +++++ server/dive_server/crud.py | 1 + server/dive_server/crud_rpc.py | 35 +++++++-- server/tests/test_deserialize_kwcoco_json.py | 2 + server/tests/test_serialize_viame_csv.py | 1 + server/tests/test_update_metadata.py | 75 +++++++++++++++++++ 8 files changed, 135 insertions(+), 16 deletions(-) diff --git a/client/platform/desktop/backend/native/common.spec.ts b/client/platform/desktop/backend/native/common.spec.ts index 38dd75b7c..abe12de01 100644 --- a/client/platform/desktop/backend/native/common.spec.ts +++ b/client/platform/desktop/backend/native/common.spec.ts @@ -341,16 +341,8 @@ beforeEach(() => { 'image_0002.jpg', 'image_0003.jpg', ], - frameMetadataFields: ['stale_project_field'], }), 'result_whatever.json': JSON.stringify({}), - 'frame_metadata.json': JSON.stringify({ - cameras: { - singleCam: { - 1: { stale_project_field: 'do-not-read' }, - }, - }, - }), auxiliary: {}, }, projectidFrameMetadataNoSource: { @@ -363,7 +355,6 @@ beforeEach(() => { originalImageFiles: [ 'image_0001.jpg', ], - frameMetadataFields: ['stale_project_field'], }), 'result_whatever.json': JSON.stringify({}), auxiliary: {}, diff --git a/client/platform/desktop/backend/serializers/coco.spec.ts b/client/platform/desktop/backend/serializers/coco.spec.ts index 774a48678..da1159e50 100644 --- a/client/platform/desktop/backend/serializers/coco.spec.ts +++ b/client/platform/desktop/backend/serializers/coco.spec.ts @@ -213,6 +213,17 @@ describe('COCO serializer', () => { expect(out.annotations[0].dive_notes).toEqual(['exported note']); }); + it('does not include frame metadata in COCO exports', async () => { + await serializeFile('/output/out.coco.json', annotationSchema, { + ...imageMeta, + frameMetadataFields: ['depth'], + frameMetadata: { singleCam: { 0: { depth: '192.80' } } }, + } as JsonMeta); + const out = await fs.readJSON('/output/out.coco.json'); + expect(out.info).not.toHaveProperty('dive_frame_metadata'); + expect(out.info.dive_extensions).not.toContain('dive_frame_metadata'); + }); + // --- datasetInfo passthrough --- const datasetInfo = { diff --git a/client/platform/desktop/backend/serializers/viame.spec.ts b/client/platform/desktop/backend/serializers/viame.spec.ts index a01b5d198..f64bd779d 100644 --- a/client/platform/desktop/backend/serializers/viame.spec.ts +++ b/client/platform/desktop/backend/serializers/viame.spec.ts @@ -350,6 +350,23 @@ describe('VIAME datasetInfo passthrough', () => { expect(fields?.some((field) => field.startsWith('dataset_info'))).toBe(false); }); + it('does not include frame metadata fields in VIAME exports', async () => { + const path = '/home/test.json'; + const stream = fs.createWriteStream(path); + await serialize(stream, data, { + ...meta, + frameMetadataFields: ['depth'], + frameMetadata: { singleCam: { 0: { depth: '192.80' } } }, + } as JsonMeta, new Set(), { + excludeBelowThreshold: false, + header: true, + }); + const output = fs.readFileSync(path).toString(); + expect(output).not.toContain('frameMetadataFields'); + expect(output).not.toContain('frameMetadata'); + expect(output).not.toContain('frame_metadata'); + }); + it('restores datasetInfo from the # metadata line on parse', async () => { const path = '/home/test.json'; const stream = fs.createWriteStream(path); diff --git a/server/dive_server/crud.py b/server/dive_server/crud.py index a584cb9dd..7ec0dfabe 100644 --- a/server/dive_server/crud.py +++ b/server/dive_server/crud.py @@ -24,6 +24,7 @@ class FileType(Enum): COCO_JSON = 3 DIVE_CONF = 4 MEVA_KPF = 5 + FRAME_METADATA = 6 def get_validated_model(model: BaseModel, **kwargs): diff --git a/server/dive_server/crud_rpc.py b/server/dive_server/crud_rpc.py index a5a416c97..a38847935 100644 --- a/server/dive_server/crud_rpc.py +++ b/server/dive_server/crud_rpc.py @@ -21,7 +21,7 @@ from dive_tasks.multicam_pipeline import is_stereo_or_multicam_pipeline, pipeline_requires_input from dive_utils import TRUTHY_META_VALUES, asbool, constants, fromMeta, models, types from dive_utils.constants import TrainingModelExtensions -from dive_utils.serializers import dive, kpf, kwcoco, viame +from dive_utils.serializers import dive, frame_metadata, kpf, kwcoco, viame class RunTrainingArgs(BaseModel): @@ -511,6 +511,10 @@ def run_training( ) +def _is_stored_frame_metadata_json(data: dict) -> bool: + return set(data.keys()) == {'cameras'} and isinstance(data.get('cameras'), dict) + + def _get_data_by_type( file: types.GirderModel, image_map: Optional[Dict[str, int]] = None, @@ -533,12 +537,17 @@ def _get_data_by_type( # Discover the type of the mystery file if file['exts'][-1] == 'csv': - as_type = crud.FileType.VIAME_CSV + if image_map is not None and frame_metadata.is_frame_metadata(file_string, image_map): + as_type = crud.FileType.FRAME_METADATA + else: + as_type = crud.FileType.VIAME_CSV elif file['exts'][-1] == 'json': data_dict = json.loads(file_string) if type(data_dict) is list: raise RestException('No array-type json objects are supported') - if kwcoco.is_coco_json(data_dict): + if _is_stored_frame_metadata_json(data_dict): + as_type = crud.FileType.FRAME_METADATA + elif kwcoco.is_coco_json(data_dict): as_type = crud.FileType.COCO_JSON elif models.MetadataMutable.is_dive_configuration(data_dict): data_dict = models.MetadataMutable(**data_dict).dict(exclude_none=True) @@ -569,6 +578,13 @@ def _get_data_by_type( 'attributes': attributes, 'type': as_type, }, warnings + if as_type == crud.FileType.FRAME_METADATA: + return { + 'annotations': None, + 'meta': None, + 'attributes': None, + 'type': as_type, + }, warnings if as_type == crud.FileType.MEVA_KPF: converted, attributes = kpf.convert(kpf.load(file_string)) return { @@ -650,10 +666,7 @@ def process_items( # Processing order: oldest to newest sort=[("created", pymongo.ASCENDING)], ) - auxiliary = crud.get_or_create_auxiliary_folder( - folder, - user, - ) + auxiliary = None aggregate_warnings = [] for item in unprocessed_items: file: Optional[types.GirderModel] = next(Item().childFiles(item), None) @@ -677,6 +690,14 @@ def process_items( Item().remove(item) raise RestException(f'Unknown file type for {file["name"]}') + if results['type'] == crud.FileType.FRAME_METADATA: + continue + + if auxiliary is None: + auxiliary = crud.get_or_create_auxiliary_folder( + folder, + user, + ) item['meta'][constants.ProcessedMarker] = True Item().move(item, auxiliary) if results['annotations']: diff --git a/server/tests/test_deserialize_kwcoco_json.py b/server/tests/test_deserialize_kwcoco_json.py index 396d29526..2139e48fe 100644 --- a/server/tests/test_deserialize_kwcoco_json.py +++ b/server/tests/test_deserialize_kwcoco_json.py @@ -754,6 +754,8 @@ def test_export_dive_as_coco_writes_dataset_info(): ) assert coco["info"]["dive_dataset_info"] == DATASET_INFO assert "dive_dataset_info" in coco["info"]["dive_extensions"] + assert "dive_frame_metadata" not in coco["info"] + assert "dive_frame_metadata" not in coco["info"]["dive_extensions"] @pytest.mark.parametrize("datasetInfo", [None, {}]) diff --git a/server/tests/test_serialize_viame_csv.py b/server/tests/test_serialize_viame_csv.py index ea647f0ec..f0667115a 100644 --- a/server/tests/test_serialize_viame_csv.py +++ b/server/tests/test_serialize_viame_csv.py @@ -523,6 +523,7 @@ def test_dataset_info_on_metadata_line(): assert isinstance(parsed['cruise'], int) assert isinstance(parsed['sta_lat'], float) assert isinstance(parsed['gfishsite_id'], str) + assert all('frame_metadata' not in field for field in fields) @pytest.mark.parametrize("datasetInfo", [None, {}]) diff --git a/server/tests/test_update_metadata.py b/server/tests/test_update_metadata.py index ba08140a7..5d7bf4db2 100644 --- a/server/tests/test_update_metadata.py +++ b/server/tests/test_update_metadata.py @@ -5,6 +5,7 @@ from dive_server import crud_dataset from dive_server.crud_rpc import process_items, resolve_imported_dataset_info +from dive_utils import constants @patch('dive_server.crud_dataset.Folder') @@ -153,3 +154,77 @@ def test_process_items_resolves_dataset_info_from_dive_configuration_import( assert update_payload['datasetInfo'] == expected assert update_payload['version'] == 1 assert verify is False + + +@patch('dive_server.crud_rpc.crud_annotation.save_annotations') +@patch('dive_server.crud_rpc.crud.saveImportAttributes') +@patch('dive_server.crud_rpc.crud_dataset.update_metadata') +@patch('dive_server.crud_rpc.crud.valid_images') +@patch('dive_server.crud_rpc.crud.get_or_create_auxiliary_folder') +@patch('dive_server.crud_rpc.File') +@patch('dive_server.crud_rpc.Item') +@patch('dive_server.crud_rpc.Folder') +@pytest.mark.parametrize( + ('name', 'exts', 'payload'), + [ + ( + 'navigation.csv', + ['csv'], + '\n'.join( + [ + 'filename,depth,temperature', + 'image_0001.jpg,192.80,4.0', + 'image_0002.jpg,193.10,4.1', + '', + ] + ), + ), + ( + 'frame_metadata.json', + ['json'], + json.dumps({'cameras': {'singleCam': {'0': {'depth': '192.80'}}}}), + ), + ], +) +def test_process_items_leaves_frame_metadata_import_sources_in_dataset_folder( + folder_cls, + item_cls, + file_cls, + get_auxiliary_folder, + valid_images, + update_metadata, + save_import_attributes, + save_annotations, + name, + exts, + payload, +): + folder = { + '_id': 'dataset-id', + 'meta': { + 'annotate': True, + 'type': constants.ImageSequenceType, + 'fps': 5, + }, + } + item = {'_id': 'item-id', 'name': name, 'meta': {}} + file = {'_id': 'file-id', 'name': name, 'exts': exts} + + folder_cls.return_value.childItems.return_value = [item] + item_cls.return_value.childFiles.return_value = iter([file]) + file_cls.return_value.download.return_value = lambda: [payload.encode()] + valid_images.return_value = [ + {'name': 'image_0001.jpg'}, + {'name': 'image_0002.jpg'}, + ] + + warnings = process_items(folder, {'_id': 'user-id'}) + + assert warnings == [] + assert constants.ProcessedMarker not in item['meta'] + item_cls.return_value.move.assert_not_called() + item_cls.return_value.remove.assert_not_called() + get_auxiliary_folder.assert_not_called() + save_annotations.assert_not_called() + save_import_attributes.assert_not_called() + update_metadata.assert_not_called() From d986748b19419dcad277d0ae1f48451bbeedea1b Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 13:23:55 -0400 Subject: [PATCH 14/23] Add frame metadata fixture contract tests --- .../backend/serializers/frameMetadata.spec.ts | 123 ++++++++++++++++++ server/tests/test_frame_metadata.py | 93 +++++++++++++ 2 files changed, 216 insertions(+) diff --git a/client/platform/desktop/backend/serializers/frameMetadata.spec.ts b/client/platform/desktop/backend/serializers/frameMetadata.spec.ts index 565fa375f..13a409ede 100644 --- a/client/platform/desktop/backend/serializers/frameMetadata.spec.ts +++ b/client/platform/desktop/backend/serializers/frameMetadata.spec.ts @@ -1,11 +1,77 @@ /// +import fs from 'fs-extra'; +import path from 'path'; + import { + ParsedFrameMetadata, findJoinColumns, normalizeKey, parseFrameMetadataSource, selectFrameMetadataSource, } from 'platform/desktop/backend/serializers/frameMetadata'; +type ContractRecord = Record; +type ContractSource = { + header: string[]; + recordsByFrame: Record; + cameras: Record; +}; +type Contract = { + selectionStatus: Record<'missing' | 'ambiguous', 'none' | 'selected'>; + sources: Record; +}; + +const fixtureDir = path.resolve( + process.cwd(), + '../../..', + 'test-datasets', + 'fixtures', + 'frame-metadata', +); +const contractPath = path.join(fixtureDir, 'synthetic_auv_nav_expected.json'); + +function loadContract(): Contract { + return fs.readJSONSync(contractPath) as Contract; +} + +function fixtureText(sourceName: string): string { + return fs.readFileSync(path.join(fixtureDir, sourceName), 'utf8'); +} + +function mediaKeys( + cameraRecords: Record, + joinColumn: string, +): Map { + return new Map(Object.entries(cameraRecords).map(([frame, record]) => ( + [normalizeKey(record[joinColumn]), Number(frame)] + ))); +} + +function recordsByFrame( + source: ParsedFrameMetadata, + keys: Map, +): Record { + const records: Record = {}; + Array.from(keys.entries()) + .sort(([, frameA], [, frameB]) => frameA - frameB) + .forEach(([key, frame]) => { + if (source.records[key] !== undefined) { + records[String(frame)] = source.records[key]; + } + }); + return records; +} + +function sourceStatus( + source: ReturnType, +): 'none' | 'selected' { + return source === null ? 'none' : 'selected'; +} + describe('desktop frame metadata serializer', () => { it('normalizes media keys the same way as image name maps', () => { expect(normalizeKey('nested/20191009.154056.00082_rect_color.tif')).toBe( @@ -149,4 +215,61 @@ describe('desktop frame metadata serializer', () => { mediaKeys, )).toBeNull(); }); + + it('matches the shared synthetic AUV fixture contract', () => { + const contract = loadContract(); + + Object.entries(contract.sources).forEach(([sourceName, expected]) => { + const text = fixtureText(sourceName); + Object.entries(expected.cameras).forEach(([camera, cameraContract]) => { + const expectedRecords = Object.fromEntries( + cameraContract.frames.map((frame) => [frame, expected.recordsByFrame[frame]]), + ); + const { joinColumn } = cameraContract; + const keys = mediaKeys(expectedRecords, joinColumn); + const source = parseFrameMetadataSource(text, keys, sourceName); + + expect(source).not.toBeNull(); + if (source === null) { + throw new Error(`Expected ${sourceName} to parse for ${camera}`); + } + expect(source.sourceName).toBe(sourceName); + expect(source.header).toEqual(expected.header); + expect(source.joinColumns).toEqual([joinColumn]); + expect(source.payloadColumns).toEqual(cameraContract.payloadColumns); + expect(recordsByFrame(source, keys)).toEqual(expectedRecords); + expect(Object.values(source.records).every((record) => ( + Object.values(record).every((value) => typeof value === 'string') + ))).toBe(true); + }); + }); + }); + + it('matches shared missing and ambiguous source decisions', () => { + const contract = loadContract(); + const sourceContract = contract.sources['synthetic_auv_nav_rect.txt']; + const portContract = sourceContract.cameras.port; + const portRecords = Object.fromEntries( + portContract.frames.map((frame) => [frame, sourceContract.recordsByFrame[frame]]), + ); + const keys = mediaKeys(portRecords, portContract.joinColumn); + const rectText = fixtureText('synthetic_auv_nav_rect.txt'); + + const missingSource = selectFrameMetadataSource( + [['synthetic_auv_nav_jpg.txt', fixtureText('synthetic_auv_nav_jpg.txt')]], + keys, + ); + const ambiguousSource = selectFrameMetadataSource( + [ + ['synthetic_auv_nav_rect.txt', rectText], + ['synthetic_auv_nav_rect_copy.csv', rectText], + ], + keys, + ); + + expect({ + missing: sourceStatus(missingSource), + ambiguous: sourceStatus(ambiguousSource), + }).toEqual(contract.selectionStatus); + }); }); diff --git a/server/tests/test_frame_metadata.py b/server/tests/test_frame_metadata.py index b43fa9f7b..aabb7ad66 100644 --- a/server/tests/test_frame_metadata.py +++ b/server/tests/test_frame_metadata.py @@ -1,3 +1,6 @@ +import json +from pathlib import Path + from dive_utils.serializers.frame_metadata import ( find_join_columns, normalize_key, @@ -6,6 +9,39 @@ ) +FIXTURE_DIR = ( + Path(__file__).resolve().parents[4] / "test-datasets" / "fixtures" / "frame-metadata" +) +CONTRACT_PATH = FIXTURE_DIR / "synthetic_auv_nav_expected.json" + + +def _load_contract(): + return json.loads(CONTRACT_PATH.read_text(encoding="utf-8")) + + +def _fixture_text(source_name): + return (FIXTURE_DIR / source_name).read_text(encoding="utf-8") + + +def _media_keys(camera_records, join_column): + return { + normalize_key(record[join_column]): int(frame) + for frame, record in camera_records.items() + } + + +def _records_by_frame(source, media_keys): + return { + str(frame): source.records[key] + for key, frame in sorted(media_keys.items(), key=lambda item: item[1]) + if key in source.records + } + + +def _source_status(source): + return "none" if source is None else "selected" + + def test_normalize_key_matches_image_name_map_keys(): assert normalize_key("nested/20191009.154056.00082_rect_color.tif") == ( "20191009.154056.00082_rect_color" @@ -149,3 +185,60 @@ def test_select_source_rejects_ambiguous_candidates_and_non_text_extensions(): ) is None ) + + +def test_shared_synthetic_auv_fixture_contract(): + contract = _load_contract() + + for source_name, expected in contract["sources"].items(): + text = _fixture_text(source_name) + for camera, camera_contract in expected["cameras"].items(): + expected_records = { + frame: expected["recordsByFrame"][frame] + for frame in camera_contract["frames"] + } + join_column = camera_contract["joinColumn"] + media_keys = _media_keys(expected_records, join_column) + + source = parse_frame_metadata_source(text, media_keys, source_name=source_name) + + assert source is not None + assert source.source_name == source_name + assert source.header == expected["header"] + assert source.join_columns == [join_column] + assert source.payload_columns == camera_contract["payloadColumns"] + assert _records_by_frame(source, media_keys) == expected_records + assert all( + isinstance(value, str) + for record in source.records.values() + for value in record.values() + ) + + +def test_shared_synthetic_auv_selection_status_contract(): + contract = _load_contract() + source_contract = contract["sources"]["synthetic_auv_nav_rect.txt"] + port_contract = source_contract["cameras"]["port"] + port_records = { + frame: source_contract["recordsByFrame"][frame] + for frame in port_contract["frames"] + } + media_keys = _media_keys(port_records, port_contract["joinColumn"]) + rect_text = _fixture_text("synthetic_auv_nav_rect.txt") + + missing_source = select_frame_metadata_source( + [("synthetic_auv_nav_jpg.txt", _fixture_text("synthetic_auv_nav_jpg.txt"))], + media_keys, + ) + ambiguous_source = select_frame_metadata_source( + [ + ("synthetic_auv_nav_rect.txt", rect_text), + ("synthetic_auv_nav_rect_copy.csv", rect_text), + ], + media_keys, + ) + + assert { + "missing": _source_status(missing_source), + "ambiguous": _source_status(ambiguous_source), + } == contract["selectionStatus"] From 73e3bbf5adf2c53715d820eca3384657ee3ffd61 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 14:57:25 -0400 Subject: [PATCH 15/23] Document read-time frame metadata --- docs/DataFormats.md | 49 +++++++++++++++- docs/Frame-Metadata-Architecture.md | 90 +++++++++++++++++++++++++++++ docs/UI-FrameInfo.md | 73 +++++++++++++++++++++++ mkdocs.yml | 2 + 4 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 docs/Frame-Metadata-Architecture.md create mode 100644 docs/UI-FrameInfo.md diff --git a/docs/DataFormats.md b/docs/DataFormats.md index e9491e9fa..a7ad9e7a8 100644 --- a/docs/DataFormats.md +++ b/docs/DataFormats.md @@ -5,7 +5,9 @@ hide: # Data Formats -DIVE Desktop and Web support a number of annotation and configuration formats. The following formats can be uploaded or imported alongside your media and will be automatically parsed. +DIVE Desktop and Web support a number of annotation, configuration, and +media-side metadata formats. The annotation and configuration formats below can +be uploaded or imported alongside your media and will be automatically parsed. * DIVE Annotation JSON (default annotation format) * DIVE Configuration JSON @@ -13,6 +15,51 @@ DIVE Desktop and Web support a number of annotation and configuration formats. * KPF (KWIVER Packet Format) * COCO and KWCOCO +Per-frame telemetry sidecars are different: DIVE reads matching `.txt` and +`.csv` files from the image-sequence folder on demand. They are not imported into +annotations or exported in v1. + +## Per-frame Metadata Text Sidecars + +DIVE can display read-only per-frame telemetry in the +[Frame Info panel](UI-FrameInfo.md). The stored form is a delimited text file +placed next to the image sequence. DIVE reads it at view time and joins rows to +frames by filename value. + +Supported sidecar contract: + +* `.txt` or `.csv` file in the dataset folder for single-camera image sequences. +* For multicamera image sequences, either one shared `.txt` or `.csv` file in + the multicam parent folder, or one sidecar in each camera child folder. +* Header row with field names. +* Comma, tab, or whitespace delimiter. +* At least one filename column whose values match the image filenames. +* At least one metadata column beyond the filename column. + +Example: + +```text +image_file timestamp latitude longitude water_depth +img_0001.tif 15:40:56 46.575870 -124.603094 192.80 +img_0002.tif 15:41:04 46.575912 -124.603080 193.10 +``` + +Rows are matched by filename value, not by row order. DIVE ignores the filename +extension while matching, so `img_0001.tif` can match an image key of +`img_0001`. Rows that do not match an image are omitted. + +For multicamera data, a shared source can contain one filename column per camera, +such as `port_image` and `starboard_image`. Each active camera displays the rows +that matched that camera's imagery. + +Values are shown as raw strings in source field order. DIVE does not infer +types, units, or pinned display order for v1 frame telemetry. + +Frame metadata sidecars are read-only. They are not edited in DIVE, saved as +derived metadata, imported as annotations, or included in VIAME, DIVE JSON, COCO, +KWCOCO, or zip exports. Video telemetry, embedded KLV, embedded EXIF, and +manually selecting a source from another location are future work. + ## DIVE Annotation JSON !!! info diff --git a/docs/Frame-Metadata-Architecture.md b/docs/Frame-Metadata-Architecture.md new file mode 100644 index 000000000..affe49a25 --- /dev/null +++ b/docs/Frame-Metadata-Architecture.md @@ -0,0 +1,90 @@ +# Frame Metadata Architecture + +Per-frame metadata is read-only telemetry that describes the media at capture +time, such as timestamp, latitude, longitude, depth, or altitude. DIVE treats it +as a media-side property, not as editable annotation data. + +The stored source is the user's `.txt` or `.csv` file next to the imagery. +Everything DIVE serves or displays is a read-time projection of that source. + +## Source contract + +v1 supports delimited text sidecars for image sequences: + +* file extension `.txt` or `.csv`, +* header row, +* comma, tab, or whitespace delimiter, +* at least one column whose values match image filenames, +* at least one payload column beyond the filename column. + +The parser keeps payload values as raw strings and preserves source field order. +Filename matching is by value after normalizing the media key, so a reordered or +partial table cannot shift metadata onto the wrong frame. Rows without a filename +match are omitted. + +DIVE sniffs candidate text files in the dataset folder. Annotation and other +known DIVE formats are rejected before filename matching, including VIAME CSV. +Bare image lists and unrelated text files are ignored. If more than one distinct +candidate matches, DIVE skips frame metadata instead of guessing. + +## Read path + +The web backend exposes a windowed endpoint: + +```http +GET /dive_dataset/:id/frame_metadata?startFrame=0&endFrame=100 +``` + +`startFrame` and `endFrame` are inclusive, non-negative bounds. The response is +keyed by camera, then frame: + +```json +{ + "cameras": { + "singleCam": { + "0": { + "timestamp": "15:40:56", + "water_depth": "192.80" + } + } + } +} +``` + +Single-camera datasets use the camera key `singleCam`. Multicamera datasets use +their runtime camera names. A missing or unusable source returns an empty +`cameras` map. Only frames with matching metadata appear in the response. + +The desktop backend mirrors the same contract through `loadFrameMetadata`. + +## Multicamera routing + +For multicamera datasets, DIVE checks text sidecars at the parent folder and in +each child camera folder. Each camera builds its own media filename map and +selects matching rows for that camera. + +A shared root file can therefore route one row to multiple cameras through +different filename columns, for example `port_image` and `starboard_image`. +Per-camera child files are also supported. If two distinct records target the +same camera and frame, that frame is omitted rather than resolved by precedence. + +## Client behavior + +The client keeps a bounded frame window around the playhead. It fetches a new +window only when the active frame leaves the cached range. The Frame Info panel +reads from this cache and displays the active camera's current-frame values in +source order. + +Frame metadata is kept out of annotation, attribute, and dataset metadata stores. +Those stores have edit, save, revision, and export behavior that does not apply +to observed read-only telemetry. + +## Non-goals in v1 + +v1 does not write `frame_metadata.json`, maintain a field registry, import a +telemetry file into annotations, or include frame telemetry in VIAME, DIVE JSON, +COCO, KWCOCO, or zip exports. + +Video telemetry, embedded KLV, embedded EXIF, manually selected out-of-folder +sources, charting, training export, and server-side caching for very large +sources are future extensions behind the same read-time contract. diff --git a/docs/UI-FrameInfo.md b/docs/UI-FrameInfo.md new file mode 100644 index 000000000..7a238dd4a --- /dev/null +++ b/docs/UI-FrameInfo.md @@ -0,0 +1,73 @@ +# Frame Info + +The **Frame Info** panel shows read-only per-frame telemetry for the current +image. It is one pane of the +[context sidebar](UI-Navigation-Editing-Bar.md#context-sidebar-web). + +Frame telemetry is not an annotation stream. DIVE reads it from a `.txt` or +`.csv` sidecar file next to the imagery and displays the values for the active +frame. The sidecar remains the source of truth; DIVE does not import it into an +editable store or save a derived copy. + +## Source file + +Use a delimited text file with: + +* a header row, +* one or more columns containing image filenames, +* at least one metadata column beyond the filename column. + +The delimiter can be comma, tab, or whitespace. DIVE joins rows to frames by +matching filename values, not by row order. A row that does not match an image is +ignored instead of being shifted onto another frame. + +Example: + +```text +image_file timestamp latitude longitude water_depth +img_0001.tif 15:40:56 46.575870 -124.603094 192.80 +img_0002.tif 15:41:04 46.575912 -124.603080 193.10 +``` + +The filename extension is ignored during matching, so `img_0001.tif` matches the +image key `img_0001`. Values are displayed as raw strings in the order they +appear in the source file. + +## Placement + +For a single-camera image sequence, place the `.txt` or `.csv` file in the +dataset folder beside the images. + +For a multicamera image sequence, use either placement: + +* Place one shared file at the multicam parent folder. Each camera selects the + rows or filename column that match its own images. +* Place one file inside each camera child folder. Each file is read only for that + camera. + +A shared multicam file can contain one filename column per camera, such as +`port_image` and `starboard_image`, or one filename column with separate rows for +each camera. The Frame Info panel follows the active camera, so switching cameras +switches the displayed records. + +## Display behavior + +Open **Frame Info** from the context sidebar while viewing an image-sequence +dataset. The panel updates as the playhead moves. + +The panel shows only the source fields for the active frame. It does not repeat +the current frame number or filename, which are already shown by the playback +controls. + +The panel may show an empty state when: + +* the platform or dataset type does not support frame metadata, +* no matching `.txt` or `.csv` source is present, +* the current frame has no matching row. + +Frame telemetry is read-only in v1. There is no edit, save, import, or export +flow for these values. Video telemetry, embedded KLV, embedded EXIF, and manual +selection of a source file from another location are future work. + +See [Data Formats](DataFormats.md#per-frame-metadata-text-sidecars) for the +sidecar file contract. diff --git a/mkdocs.yml b/mkdocs.yml index 3944a0ad8..5bc42d24e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -69,6 +69,7 @@ nav: - Attribute Track Filtering: UI-AttributeTrackFiltering.md - Group Manager: UI-Group-Manager.md - Dataset Info: UI-DatasetInfo.md + - Frame Info: UI-FrameInfo.md - Annotation Sets: Annotation-Sets.md - Keyboard Shortcut Reference: Mouse-Keyboard-Shortcuts.md - Advanced features: @@ -83,6 +84,7 @@ nav: - Data Formats: DataFormats.md - Developer Guide: - Codebase architecture: Architecture-For-New-Developers.md + - Frame metadata architecture: Frame-Metadata-Architecture.md - Administrator Guide: - Deployment Options Overview: Deployment-Overview.md - Provisioning Google Cloud: Deployment-Provision.md From 6f39af9e43e009fed39892d475cbff8521e558a1 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 30 Jun 2026 16:38:12 -0400 Subject: [PATCH 16/23] Move frame metadata into media metadata panel --- .../components/DatasetInfo.spec.ts | 265 ++++++++++++++ client/dive-common/components/DatasetInfo.vue | 329 +++++++++++------- .../dive-common/components/FrameInfo.spec.ts | 189 ---------- client/dive-common/components/FrameInfo.vue | 121 ------- client/dive-common/store/context.ts | 11 +- docs/DataFormats.md | 19 +- docs/Frame-Metadata-Architecture.md | 6 +- docs/UI-DatasetInfo.md | 82 ++++- docs/UI-FrameInfo.md | 73 ---- mkdocs.yml | 3 +- 10 files changed, 563 insertions(+), 535 deletions(-) create mode 100644 client/dive-common/components/DatasetInfo.spec.ts delete mode 100644 client/dive-common/components/FrameInfo.spec.ts delete mode 100644 client/dive-common/components/FrameInfo.vue delete mode 100644 docs/UI-FrameInfo.md diff --git a/client/dive-common/components/DatasetInfo.spec.ts b/client/dive-common/components/DatasetInfo.spec.ts new file mode 100644 index 000000000..2aa0ffe8f --- /dev/null +++ b/client/dive-common/components/DatasetInfo.spec.ts @@ -0,0 +1,265 @@ +// @vitest-environment jsdom +// eslint-disable-next-line import/no-extraneous-dependencies -- Vue Test Utils is only used in tests +import { mount } from '@vue/test-utils'; +import Vue, { + defineComponent, nextTick, ref, +} from 'vue'; + +// eslint-disable-next-line import/no-extraneous-dependencies -- Vitest is only used in tests +import { + describe, expect, it, vi, +} from 'vitest'; + +import { + DatasetMeta, + FrameMetadataResponse, + provideApi, +} from 'dive-common/apispec'; +import { + dummyHandler, + dummyState, + provideAnnotator, +} from 'vue-media-annotator/provides'; +import DatasetInfo from './DatasetInfo.vue'; + +Vue.config.ignoredElements = [/^v-/]; + +function flushPromises() { + return new Promise((resolve) => { + window.setTimeout(resolve, 0); + }); +} + +const defaultMetadata: DatasetMeta = { + id: 'dataset-id', + imageData: [], + videoUrl: undefined, + type: 'image-sequence', + fps: 5, + name: 'Mouss Set', + createdAt: '2024-01-02T03:04:05.000Z', + originalFps: 10, + subType: null, + multiCamMedia: null, + datasetInfo: { + cruise: '2403', + station: 'TXN-012', + }, +}; + +function apiWithMetadata({ + loadMetadata, + loadFrameMetadata, + saveMetadata, +}: { + loadMetadata: (datasetId: string) => Promise; + loadFrameMetadata?: (datasetId: string, startFrame: number, endFrame: number) => + Promise; + saveMetadata: Parameters[0]['saveMetadata']; +}): Parameters[0] { + return { + getPipelineList: async () => ({}), + runPipeline: async () => undefined, + deleteTrainedPipeline: async () => undefined, + exportTrainedPipeline: async () => undefined, + getDatasetCalibration: async () => null, + getTrainingConfigurations: async () => ({ training: { configs: [], default: '' }, models: {} }), + runTraining: async () => undefined, + loadMetadata, + loadDetections: async () => ({ + version: 2, + tracks: [], + groups: [], + sets: [], + }), + loadFrameMetadata, + saveDetections: async () => undefined, + saveMetadata, + saveAttributes: async () => undefined, + saveAttributeTrackFilters: async () => undefined, + openFromDisk: async () => ({ canceled: true, filePaths: [] }), + importAnnotationFile: async () => false, + }; +} + +function mountDatasetInfo({ + response, + loadFrameMetadata, + selectedCamera = 'port', + readOnlyMode = true, + metadata = defaultMetadata, +}: { + response?: FrameMetadataResponse; + loadFrameMetadata?: (datasetId: string, startFrame: number, endFrame: number) => + Promise; + selectedCamera?: string; + readOnlyMode?: boolean; + metadata?: DatasetMeta; +} = {}) { + const state = dummyState(); + state.datasetId = ref('dataset-id'); + state.selectedCamera = ref(selectedCamera); + state.time = { + ...state.time, + frame: ref(10), + }; + state.readOnlyMode = ref(readOnlyMode); + + const loader = loadFrameMetadata ?? ( + response === undefined + ? undefined + : vi.fn(async () => response) + ); + const loadMetadata = vi.fn(async () => metadata); + const saveMetadata = vi.fn(async () => undefined); + const api = apiWithMetadata({ loadMetadata, loadFrameMetadata: loader, saveMetadata }); + + const Root = defineComponent({ + components: { DatasetInfo }, + setup() { + provideApi(api); + provideAnnotator( + state, + dummyHandler(() => undefined), + {} as Parameters[2], + ); + return {}; + }, + template: '', + }); + + const wrapper = mount(Root, { + stubs: { + DatasetMetaEditorDialog: true, + }, + }); + return { + wrapper, + state, + loadFrameMetadata: loader, + loadMetadata, + saveMetadata, + }; +} + +describe('DatasetInfo', () => { + it('renders frame metadata above dataset info rows in source order', async () => { + const { wrapper } = mountDatasetInfo({ + response: { + cameras: { + port: { + 10: { + latitude: '58.10', + depth_m: '100', + note: ' raw text ', + }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + const frameRows = wrapper.findAll('.frame-metadata-row').wrappers; + expect(frameRows.map((item) => item.find('.frame-metadata-key').text())) + .toEqual(['latitude', 'depth_m', 'note']); + expect(frameRows.map((item) => item.find('.frame-metadata-value').element.textContent)) + .toEqual(['58.10', '100', ' raw text ']); + + const text = wrapper.text(); + expect(text.indexOf('Frame Metadata')).toBeLessThan(text.indexOf('Dataset Info')); + expect(text.indexOf('Dataset Info')).toBeLessThan(text.indexOf('Custom Metadata')); + expect(wrapper.find('.dataset-info-section').text()).toContain('Mouss Set'); + expect(wrapper.find('.dataset-info-section').text()).toContain('image-sequence'); + expect(wrapper.find('.custom-metadata-section').text()).toContain('cruise'); + expect(wrapper.find('.custom-metadata-section').text()).toContain('2403'); + }); + + it('keeps frame metadata read-only without edit controls', async () => { + const { wrapper } = mountDatasetInfo({ + readOnlyMode: false, + response: { + cameras: { + port: { + 10: { + latitude: '58.10', + }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + const frameSection = wrapper.find('.frame-metadata-section'); + expect(frameSection.find('v-text-field').exists()).toBe(false); + expect(frameSection.find('v-btn').exists()).toBe(false); + expect(wrapper.find('.custom-metadata-section').find('v-text-field').exists()).toBe(true); + expect(wrapper.find('.custom-metadata-section').find('v-btn').exists()).toBe(true); + }); + + it('shows the unsupported platform state when no load API is provided', async () => { + const { wrapper } = mountDatasetInfo(); + + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()) + .toContain('Frame metadata is not supported on this platform.'); + }); + + it('shows the no-source state after an empty cameras response', async () => { + const { wrapper } = mountDatasetInfo({ response: { cameras: {} } }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()).toContain('No frame metadata source found.'); + expect(wrapper.find('.frame-metadata-section').text()) + .toContain('Place a .txt or .csv telemetry file next to the imagery.'); + }); + + it('shows the no-current-frame state when the dataset has metadata but not this frame', async () => { + const { wrapper } = mountDatasetInfo({ + response: { + cameras: { + port: { + 11: { latitude: '58.11' }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()) + .toContain('No frame metadata for the current frame.'); + }); + + it('follows the active multicam camera from the cached frame window', async () => { + const loadFrameMetadata = vi.fn(async () => ({ + cameras: { + port: { + 10: { latitude: '58.10' }, + }, + starboard: { + 10: { latitude: '59.10' }, + }, + }, + })); + const { wrapper, state } = mountDatasetInfo({ loadFrameMetadata }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()).toContain('58.10'); + state.selectedCamera.value = 'starboard'; + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()).toContain('59.10'); + expect(wrapper.find('.frame-metadata-section').text()).not.toContain('58.10'); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + }); +}); diff --git a/client/dive-common/components/DatasetInfo.vue b/client/dive-common/components/DatasetInfo.vue index aa3f4e23a..5435d1541 100644 --- a/client/dive-common/components/DatasetInfo.vue +++ b/client/dive-common/components/DatasetInfo.vue @@ -2,9 +2,15 @@ import { computed, defineComponent, ref, watch, } from 'vue'; -import { useDatasetId, useReadOnlyMode } from 'vue-media-annotator/provides'; +import { + useDatasetId, + useReadOnlyMode, + useSelectedCamera, + useTime, +} from 'vue-media-annotator/provides'; import { useApi, DatasetMeta } from 'dive-common/apispec'; import { usePrompt } from 'dive-common/vue-utilities/prompt-service'; +import { useFrameMetadataWindow } from 'dive-common/use'; import DatasetMetaEditorDialog from 'dive-common/components/DatasetMetaEditorDialog.vue'; export default defineComponent({ @@ -15,7 +21,9 @@ export default defineComponent({ setup() { const datasetId = useDatasetId(); const readOnlyMode = useReadOnlyMode(); - const { loadMetadata, saveMetadata } = useApi(); + const selectedCamera = useSelectedCamera(); + const time = useTime(); + const { loadMetadata, saveMetadata, loadFrameMetadata } = useApi(); const { prompt } = usePrompt(); const meta = ref(null); const customMeta = ref>({}); @@ -37,6 +45,29 @@ export default defineComponent({ watch(datasetId, fetchMetadata, { immediate: true }); + const frameMetadata = useFrameMetadataWindow({ + datasetId, + frame: time.frame, + selectedCamera, + loadFrameMetadata, + }); + + const frameMetadataEmptyState = computed(() => { + if (frameMetadata.unsupported.value) { + return 'Frame metadata is not supported on this platform.'; + } + if (frameMetadata.loading.value && !frameMetadata.currentEntries.value.length) { + return 'Loading frame metadata...'; + } + if (frameMetadata.error.value) { + return `Unable to load frame metadata: ${frameMetadata.error.value}`; + } + if (!frameMetadata.hasMetadataSource.value) { + return 'No frame metadata source found. Place a .txt or .csv telemetry file next to the imagery.'; + } + return 'No frame metadata for the current frame.'; + }); + const infoRows = computed(() => { const m = meta.value; if (!m) { @@ -144,6 +175,8 @@ export default defineComponent({ return { readOnlyMode, + frameMetadataEntries: frameMetadata.currentEntries, + frameMetadataEmptyState, infoRows, customMeta, customMetaKeys, @@ -166,138 +199,182 @@ export default defineComponent({