diff --git a/client/dive-common/apispec.ts b/client/dive-common/apispec.ts index 55fca437c..0e33a6490 100644 --- a/client/dive-common/apispec.ts +++ b/client/dive-common/apispec.ts @@ -125,6 +125,22 @@ interface FrameImage { id?: string; } +interface FrameMetadataValues { + [field: string]: string; +} + +interface FrameMetadataFrameMap { + [frame: string]: FrameMetadataValues; +} + +interface FrameMetadataCameraMap { + [camera: string]: FrameMetadataFrameMap; +} + +interface FrameMetadataResponse { + cameras: FrameMetadataCameraMap; +} + export interface MultiCamImportFolderArgs { datasetName?: string; // Girder parent folder name (required on web) defaultDisplay: string; // In multicam the default camera to display @@ -266,6 +282,8 @@ interface Api { loadMetadata(datasetId: string): Promise; loadDetections(datasetId: string, revision?: number, set?: string): Promise; + loadFrameMetadata?(datasetId: string, startFrame: number, endFrame: number): + Promise; saveDetections(datasetId: string, args: SaveDetectionsArgs): Promise; saveMetadata(datasetId: string, metadata: DatasetMetaMutable): Promise; @@ -444,6 +462,10 @@ export { PipelineRuntimeParams, PipeMetadata, Pipelines, + FrameMetadataCameraMap, + FrameMetadataFrameMap, + FrameMetadataResponse, + FrameMetadataValues, SaveDetectionsArgs, SaveAttributeArgs, SaveAttributeTrackFilterArgs, diff --git a/client/dive-common/components/DatasetInfo.vue b/client/dive-common/components/DatasetInfo.vue deleted file mode 100644 index aa3f4e23a..000000000 --- a/client/dive-common/components/DatasetInfo.vue +++ /dev/null @@ -1,324 +0,0 @@ - - - - - diff --git a/client/dive-common/components/MediaMetadata.spec.ts b/client/dive-common/components/MediaMetadata.spec.ts new file mode 100644 index 000000000..f057f5941 --- /dev/null +++ b/client/dive-common/components/MediaMetadata.spec.ts @@ -0,0 +1,265 @@ +// @vitest-environment jsdom +// eslint-disable-next-line import/no-extraneous-dependencies -- Vue Test Utils is only used in tests +import { mount } from '@vue/test-utils'; +import Vue, { + defineComponent, nextTick, ref, +} from 'vue'; + +// eslint-disable-next-line import/no-extraneous-dependencies -- Vitest is only used in tests +import { + describe, expect, it, vi, +} from 'vitest'; + +import { + DatasetMeta, + FrameMetadataResponse, + provideApi, +} from 'dive-common/apispec'; +import { + dummyHandler, + dummyState, + provideAnnotator, +} from 'vue-media-annotator/provides'; +import MediaMetadata from './MediaMetadata.vue'; + +Vue.config.ignoredElements = [/^v-/]; + +function flushPromises() { + return new Promise((resolve) => { + window.setTimeout(resolve, 0); + }); +} + +const defaultMetadata: DatasetMeta = { + id: 'dataset-id', + imageData: [], + videoUrl: undefined, + type: 'image-sequence', + fps: 5, + name: 'Mouss Set', + createdAt: '2024-01-02T03:04:05.000Z', + originalFps: 10, + subType: null, + multiCamMedia: null, + datasetInfo: { + cruise: '2403', + station: 'TXN-012', + }, +}; + +function apiWithMetadata({ + loadMetadata, + loadFrameMetadata, + saveMetadata, +}: { + loadMetadata: (datasetId: string) => Promise; + loadFrameMetadata?: (datasetId: string, startFrame: number, endFrame: number) => + Promise; + saveMetadata: Parameters[0]['saveMetadata']; +}): Parameters[0] { + return { + getPipelineList: async () => ({}), + runPipeline: async () => undefined, + deleteTrainedPipeline: async () => undefined, + exportTrainedPipeline: async () => undefined, + getDatasetCalibration: async () => null, + getTrainingConfigurations: async () => ({ training: { configs: [], default: '' }, models: {} }), + runTraining: async () => undefined, + loadMetadata, + loadDetections: async () => ({ + version: 2, + tracks: [], + groups: [], + sets: [], + }), + loadFrameMetadata, + saveDetections: async () => undefined, + saveMetadata, + saveAttributes: async () => undefined, + saveAttributeTrackFilters: async () => undefined, + openFromDisk: async () => ({ canceled: true, filePaths: [] }), + importAnnotationFile: async () => false, + }; +} + +function mountMediaMetadata({ + response, + loadFrameMetadata, + selectedCamera = 'port', + readOnlyMode = true, + metadata = defaultMetadata, +}: { + response?: FrameMetadataResponse; + loadFrameMetadata?: (datasetId: string, startFrame: number, endFrame: number) => + Promise; + selectedCamera?: string; + readOnlyMode?: boolean; + metadata?: DatasetMeta; +} = {}) { + const state = dummyState(); + state.datasetId = ref('dataset-id'); + state.selectedCamera = ref(selectedCamera); + state.time = { + ...state.time, + frame: ref(10), + }; + state.readOnlyMode = ref(readOnlyMode); + + const loader = loadFrameMetadata ?? ( + response === undefined + ? undefined + : vi.fn(async () => response) + ); + const loadMetadata = vi.fn(async () => metadata); + const saveMetadata = vi.fn(async () => undefined); + const api = apiWithMetadata({ loadMetadata, loadFrameMetadata: loader, saveMetadata }); + + const Root = defineComponent({ + components: { MediaMetadata }, + setup() { + provideApi(api); + provideAnnotator( + state, + dummyHandler(() => undefined), + {} as Parameters[2], + ); + return {}; + }, + template: '', + }); + + const wrapper = mount(Root, { + stubs: { + DatasetMetaEditorDialog: true, + }, + }); + return { + wrapper, + state, + loadFrameMetadata: loader, + loadMetadata, + saveMetadata, + }; +} + +describe('MediaMetadata', () => { + it('renders frame metadata above dataset info rows in source order', async () => { + const { wrapper } = mountMediaMetadata({ + response: { + cameras: { + port: { + 10: { + latitude: '58.10', + depth_m: '100', + note: ' raw text ', + }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + const frameRows = wrapper.findAll('.frame-metadata-row').wrappers; + expect(frameRows.map((item) => item.find('.frame-metadata-key').text())) + .toEqual(['latitude', 'depth_m', 'note']); + expect(frameRows.map((item) => item.find('.frame-metadata-value').element.textContent)) + .toEqual(['58.10', '100', ' raw text ']); + + const text = wrapper.text(); + expect(text.indexOf('Frame Metadata')).toBeLessThan(text.indexOf('Dataset Info')); + expect(text.indexOf('Dataset Info')).toBeLessThan(text.indexOf('Custom Metadata')); + expect(wrapper.find('.dataset-info-section').text()).toContain('Mouss Set'); + expect(wrapper.find('.dataset-info-section').text()).toContain('image-sequence'); + expect(wrapper.find('.custom-metadata-section').text()).toContain('cruise'); + expect(wrapper.find('.custom-metadata-section').text()).toContain('2403'); + }); + + it('keeps frame metadata read-only without edit controls', async () => { + const { wrapper } = mountMediaMetadata({ + readOnlyMode: false, + response: { + cameras: { + port: { + 10: { + latitude: '58.10', + }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + const frameSection = wrapper.find('.frame-metadata-section'); + expect(frameSection.find('v-text-field').exists()).toBe(false); + expect(frameSection.find('v-btn').exists()).toBe(false); + expect(wrapper.find('.custom-metadata-section').find('v-text-field').exists()).toBe(true); + expect(wrapper.find('.custom-metadata-section').find('v-btn').exists()).toBe(true); + }); + + it('shows the unsupported platform state when no load API is provided', async () => { + const { wrapper } = mountMediaMetadata(); + + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()) + .toContain('Frame metadata is not supported on this platform.'); + }); + + it('shows the no-source state after an empty cameras response', async () => { + const { wrapper } = mountMediaMetadata({ response: { cameras: {} } }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()).toContain('No frame metadata source found.'); + expect(wrapper.find('.frame-metadata-section').text()) + .toContain('Place a .txt or .csv telemetry file next to the imagery.'); + }); + + it('shows the no-current-frame state when the dataset has metadata but not this frame', async () => { + const { wrapper } = mountMediaMetadata({ + response: { + cameras: { + port: { + 11: { latitude: '58.11' }, + }, + }, + }, + }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()) + .toContain('No frame metadata for the current frame.'); + }); + + it('follows the active multicam camera from the cached frame window', async () => { + const loadFrameMetadata = vi.fn(async () => ({ + cameras: { + port: { + 10: { latitude: '58.10' }, + }, + starboard: { + 10: { latitude: '59.10' }, + }, + }, + })); + const { wrapper, state } = mountMediaMetadata({ loadFrameMetadata }); + + await flushPromises(); + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()).toContain('58.10'); + state.selectedCamera.value = 'starboard'; + await nextTick(); + + expect(wrapper.find('.frame-metadata-section').text()).toContain('59.10'); + expect(wrapper.find('.frame-metadata-section').text()).not.toContain('58.10'); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + }); +}); diff --git a/client/dive-common/components/MediaMetadata.vue b/client/dive-common/components/MediaMetadata.vue new file mode 100644 index 000000000..96948ab8f --- /dev/null +++ b/client/dive-common/components/MediaMetadata.vue @@ -0,0 +1,401 @@ + + + + + diff --git a/client/dive-common/store/context.ts b/client/dive-common/store/context.ts index c6ee24ff6..1ea7c489c 100644 --- a/client/dive-common/store/context.ts +++ b/client/dive-common/store/context.ts @@ -6,7 +6,7 @@ import GroupSidebar from 'dive-common/components/GroupSidebar.vue'; import AttributesSideBar from 'dive-common/components/Attributes/AttributesSideBar.vue'; import MultiCamTools from 'dive-common/components/MultiCamTools.vue'; import AttributeTrackFilters from 'vue-media-annotator/components/AttributeTrackFilters.vue'; -import DatasetInfo from 'dive-common/components/DatasetInfo.vue'; +import MediaMetadata from 'dive-common/components/MediaMetadata.vue'; interface ContextState { last: string; @@ -20,15 +20,15 @@ interface ComponentMapItem { } const state: ContextState = reactive({ - last: 'TypeThreshold', + last: 'MediaMetadata', active: null, subCategory: null, }); const componentMap: Record = { - [DatasetInfo.name]: { - description: 'Dataset Info', - component: DatasetInfo, + [MediaMetadata.name]: { + description: 'Media Metadata', + component: MediaMetadata, }, [TypeThreshold.name]: { description: 'Threshold Controls', @@ -67,7 +67,7 @@ function unregister(item: ComponentMapItem) { } function resetActive() { - state.last = 'TypeThreshold'; + state.last = 'MediaMetadata'; state.active = null; } diff --git a/client/dive-common/use/index.ts b/client/dive-common/use/index.ts index e35d1597e..53d113c5d 100644 --- a/client/dive-common/use/index.ts +++ b/client/dive-common/use/index.ts @@ -2,10 +2,18 @@ import useModeManager from './useModeManager'; import useSave from './useSave'; import useRequest from './useRequest'; import { useLassoMode } from './useLassoMode'; +import { useFrameMetadataWindow } from './useFrameMetadataWindow'; export { + useFrameMetadataWindow, useModeManager, useRequest, useSave, useLassoMode, }; + +export type { + FrameMetadataWindowRange, + LoadFrameMetadata, + UseFrameMetadataWindow, +} from './useFrameMetadataWindow'; diff --git a/client/dive-common/use/useFrameMetadataWindow.spec.ts b/client/dive-common/use/useFrameMetadataWindow.spec.ts new file mode 100644 index 000000000..6f759e0ef --- /dev/null +++ b/client/dive-common/use/useFrameMetadataWindow.spec.ts @@ -0,0 +1,115 @@ +import { nextTick, ref } from 'vue'; + +// eslint-disable-next-line import/no-extraneous-dependencies -- Vitest is only used in tests +import { + describe, expect, it, vi, +} from 'vitest'; + +import type { FrameMetadataResponse } from '../apispec'; +import { useFrameMetadataWindow } from './useFrameMetadataWindow'; + +describe('useFrameMetadataWindow', () => { + it('fetches bounded playhead windows and reads active-camera rows from cache', async () => { + const datasetId = ref('dataset-id'); + const frame = ref(10); + const selectedCamera = ref('port'); + const responses: FrameMetadataResponse[] = [ + { + cameras: { + port: { + 10: { latitude: '58.10', depth_m: '100' }, + 12: { latitude: '58.12', depth_m: '120' }, + }, + starboard: { + 10: { latitude: '59.10', depth_m: '200' }, + 12: { latitude: '59.12', depth_m: '220' }, + }, + }, + }, + { + cameras: { + port: { + 13: { latitude: '58.13', depth_m: '130' }, + }, + starboard: { + 13: { latitude: '59.13', depth_m: '230' }, + }, + }, + }, + ]; + const loadFrameMetadata = vi.fn(async () => responses.shift() ?? { cameras: {} }); + + const metadata = useFrameMetadataWindow({ + datasetId, + frame, + selectedCamera, + loadFrameMetadata, + windowSize: 5, + }); + + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + expect(loadFrameMetadata).toHaveBeenLastCalledWith('dataset-id', 8, 12); + expect(metadata.currentRows.value).toEqual({ latitude: '58.10', depth_m: '100' }); + + frame.value = 12; + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + expect(metadata.currentRows.value).toEqual({ latitude: '58.12', depth_m: '120' }); + + selectedCamera.value = 'starboard'; + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + expect(metadata.currentRows.value).toEqual({ latitude: '59.12', depth_m: '220' }); + + frame.value = 13; + await metadata.ensureFrameLoaded(); + expect(loadFrameMetadata).toHaveBeenCalledTimes(2); + expect(loadFrameMetadata).toHaveBeenLastCalledWith('dataset-id', 11, 15); + expect(metadata.windowRange.value).toEqual({ startFrame: 11, endFrame: 15 }); + expect(metadata.currentRows.value).toEqual({ latitude: '59.13', depth_m: '230' }); + expect(metadata.cameras.value.port[10]).toBeUndefined(); + }); + + it('ignores an in-flight response after the dataset is cleared', async () => { + const datasetId = ref('dataset-id'); + const frame = ref(10); + const selectedCamera = ref('port'); + let resolveRequest: (response: FrameMetadataResponse) => void = () => {}; + const request = new Promise((resolve) => { + resolveRequest = resolve; + }); + const loadFrameMetadata = vi.fn(() => request); + + const metadata = useFrameMetadataWindow({ + datasetId, + frame, + selectedCamera, + loadFrameMetadata, + windowSize: 5, + }); + + expect(loadFrameMetadata).toHaveBeenCalledTimes(1); + expect(metadata.loading.value).toBe(true); + + datasetId.value = ''; + await nextTick(); + + expect(metadata.loading.value).toBe(false); + expect(metadata.windowRange.value).toBeNull(); + + resolveRequest({ + cameras: { + port: { + 10: { latitude: 'stale' }, + }, + }, + }); + await request; + + expect(metadata.cameras.value).toEqual({}); + expect(metadata.currentRows.value).toBeNull(); + expect(metadata.hasMetadataSource.value).toBe(false); + expect(metadata.windowRange.value).toBeNull(); + }); +}); diff --git a/client/dive-common/use/useFrameMetadataWindow.ts b/client/dive-common/use/useFrameMetadataWindow.ts new file mode 100644 index 000000000..bcf0317eb --- /dev/null +++ b/client/dive-common/use/useFrameMetadataWindow.ts @@ -0,0 +1,234 @@ +import { + computed, readonly, ref, watch, +} from 'vue'; +import type { Ref } from 'vue'; + +import type { + FrameMetadataCameraMap, + FrameMetadataResponse, + FrameMetadataValues, +} from '../apispec'; + +export const DEFAULT_FRAME_METADATA_WINDOW_SIZE = 101; + +export interface FrameMetadataWindowRange { + startFrame: number; + endFrame: number; +} + +export type LoadFrameMetadata = ( + datasetId: string, + startFrame: number, + endFrame: number +) => Promise; + +interface UseFrameMetadataWindowOptions { + datasetId: Readonly>; + frame: Readonly>; + selectedCamera: Readonly>; + loadFrameMetadata?: LoadFrameMetadata; + windowSize?: number; + maxFrame?: Readonly>; +} + +function finiteFloor(value: number, fallback: number) { + if (!Number.isFinite(value)) { + return fallback; + } + return Math.floor(value); +} + +function normalizeWindowSize(windowSize: number) { + return Math.max(1, finiteFloor(windowSize, DEFAULT_FRAME_METADATA_WINDOW_SIZE)); +} + +function normalizeMaxFrame(maxFrame: number | undefined) { + if (maxFrame === undefined || !Number.isFinite(maxFrame)) { + return undefined; + } + return Math.max(0, Math.floor(maxFrame)); +} + +function normalizeFrame(frame: number, maxFrame?: number) { + const safeFrame = Math.max(0, finiteFloor(frame, 0)); + if (maxFrame === undefined) { + return safeFrame; + } + return Math.min(safeFrame, maxFrame); +} + +function containsFrame(range: FrameMetadataWindowRange | null, frame: number) { + return !!range && range.startFrame <= frame && frame <= range.endFrame; +} + +function errorMessage(err: unknown) { + if (err instanceof Error) { + return err.message; + } + return String(err); +} + +export function frameMetadataWindowForFrame( + frame: number, + windowSize = DEFAULT_FRAME_METADATA_WINDOW_SIZE, + maxFrame: number | undefined = undefined, +): FrameMetadataWindowRange { + const size = normalizeWindowSize(windowSize); + const safeMaxFrame = normalizeMaxFrame(maxFrame); + const targetFrame = normalizeFrame(frame, safeMaxFrame); + const framesBefore = Math.floor((size - 1) / 2); + + let startFrame = Math.max(0, targetFrame - framesBefore); + let endFrame = startFrame + size - 1; + + if (safeMaxFrame !== undefined && endFrame > safeMaxFrame) { + endFrame = safeMaxFrame; + startFrame = Math.max(0, endFrame - size + 1); + } + + return { startFrame, endFrame }; +} + +export function useFrameMetadataWindow({ + datasetId, + frame, + selectedCamera, + loadFrameMetadata, + windowSize = DEFAULT_FRAME_METADATA_WINDOW_SIZE, + maxFrame, +}: UseFrameMetadataWindowOptions) { + const cameras = ref({}); + const windowRange = ref(null); + const loadedDatasetId = ref(null); + const loading = ref(false); + const error = ref(null); + + let requestToken = 0; + let pendingRequest: Promise | null = null; + let pendingDatasetId: string | null = null; + let pendingRange: FrameMetadataWindowRange | null = null; + + function clearCache() { + cameras.value = {}; + windowRange.value = null; + loadedDatasetId.value = null; + } + + function clearPendingRequest() { + requestToken += 1; + pendingRequest = null; + pendingDatasetId = null; + pendingRange = null; + loading.value = false; + error.value = null; + } + + async function fetchWindow(targetFrame: number) { + if (!loadFrameMetadata || !datasetId.value) { + clearPendingRequest(); + clearCache(); + return undefined; + } + + const requestDatasetId = datasetId.value; + const requestRange = frameMetadataWindowForFrame( + targetFrame, + windowSize, + maxFrame?.value, + ); + + if (loadedDatasetId.value !== requestDatasetId) { + clearCache(); + } + + const token = requestToken + 1; + requestToken = token; + pendingDatasetId = requestDatasetId; + pendingRange = requestRange; + loading.value = true; + error.value = null; + + pendingRequest = (async () => { + try { + const response = await loadFrameMetadata( + requestDatasetId, + requestRange.startFrame, + requestRange.endFrame, + ); + + if (token === requestToken) { + cameras.value = response.cameras; + windowRange.value = requestRange; + loadedDatasetId.value = requestDatasetId; + } + } catch (err) { + if (token === requestToken) { + error.value = errorMessage(err); + } + } finally { + if (token === requestToken) { + loading.value = false; + pendingRequest = null; + pendingDatasetId = null; + pendingRange = null; + } + } + })(); + + return pendingRequest; + } + + async function ensureFrameLoaded() { + const targetFrame = normalizeFrame(frame.value, normalizeMaxFrame(maxFrame?.value)); + if ( + loadedDatasetId.value === datasetId.value + && containsFrame(windowRange.value, targetFrame) + ) { + return undefined; + } + + if ( + pendingRequest + && pendingDatasetId === datasetId.value + && containsFrame(pendingRange, targetFrame) + ) { + return pendingRequest; + } + + return fetchWindow(targetFrame); + } + + const currentFrameKey = computed(() => String( + normalizeFrame(frame.value, normalizeMaxFrame(maxFrame?.value)), + )); + const currentRows = computed(() => ( + cameras.value[selectedCamera.value]?.[currentFrameKey.value] ?? null + )); + const currentEntries = computed(() => ( + currentRows.value ? Object.entries(currentRows.value) : [] + )); + const hasMetadataSource = computed(() => Object.keys(cameras.value).length > 0); + const unsupported = computed(() => loadFrameMetadata === undefined); + + watch( + () => [datasetId.value, currentFrameKey.value, maxFrame?.value], + () => { + ensureFrameLoaded(); + }, + { immediate: true }, + ); + + return { + cameras: readonly(cameras), + currentEntries, + currentRows, + ensureFrameLoaded, + error: readonly(error), + hasMetadataSource, + loading: readonly(loading), + unsupported, + windowRange: readonly(windowRange), + }; +} + +export type UseFrameMetadataWindow = ReturnType; diff --git a/client/platform/desktop/backend/ipcService.ts b/client/platform/desktop/backend/ipcService.ts index 91a5664d1..a57726dcd 100644 --- a/client/platform/desktop/backend/ipcService.ts +++ b/client/platform/desktop/backend/ipcService.ts @@ -210,6 +210,11 @@ export default function register() { return ret; }); + ipcMain.handle('load-frame-metadata', async ( + event, + { datasetId, startFrame, endFrame }: { datasetId: string; startFrame: number; endFrame: number }, + ) => common.loadFrameMetadata(settings.get(), datasetId, startFrame, endFrame)); + ipcMain.handle('import-multicam-media', async (event, { args }: { args: MultiCamImportArgs }) => { const ret = await beginMultiCamImport(args); diff --git a/client/platform/desktop/backend/native/common.spec.ts b/client/platform/desktop/backend/native/common.spec.ts index 4598d323c..abe12de01 100644 --- a/client/platform/desktop/backend/native/common.spec.ts +++ b/client/platform/desktop/backend/native/common.spec.ts @@ -259,6 +259,22 @@ beforeEach(() => { 'file1.csv': '', 'file2.csv': '', }, + frameMetadataSource: { + 'image_0001.jpg': '', + 'image_0002.jpg': '', + 'image_0003.jpg': '', + 'navigation.txt': [ + 'filename,depth,temperature', + 'image_0001.jpg,192.80,4.0', + 'image_0002.jpg,193.10,4.1', + 'image_0003.jpg,193.40,4.2', + '', + ].join('\n'), + }, + frameMetadataNoSource: { + 'image_0001.jpg': '', + 'notes.txt': 'note,value\nhello,world\n', + }, }, '/home/user/viamedata': { // eslint-disable-next-line @typescript-eslint/naming-convention @@ -313,6 +329,36 @@ beforeEach(() => { 'result_whatever.json': JSON.stringify({}), auxiliary: {}, }, + projectidFrameMetadata: { + 'meta.json': JSON.stringify({ + version: 1, + id: 'projectidFrameMetadata', + type: 'image-sequence', + fps: 5, + originalBasePath: '/home/user/data/frameMetadataSource', + originalImageFiles: [ + 'image_0001.jpg', + 'image_0002.jpg', + 'image_0003.jpg', + ], + }), + 'result_whatever.json': JSON.stringify({}), + auxiliary: {}, + }, + projectidFrameMetadataNoSource: { + 'meta.json': JSON.stringify({ + version: 1, + id: 'projectidFrameMetadataNoSource', + type: 'image-sequence', + fps: 5, + originalBasePath: '/home/user/data/frameMetadataNoSource', + originalImageFiles: [ + 'image_0001.jpg', + ], + }), + 'result_whatever.json': JSON.stringify({}), + auxiliary: {}, + }, projectid2Bad: { 'meta.json': '{}', // Won't match @@ -468,6 +514,32 @@ describe('native.common', () => { .rejects.toThrow('Dataset: missingMulti is of type multiCam or stereo but contains no multiCam data'); }); + it('loadFrameMetadata reads a source sidecar next to imagery and applies the requested window', async () => { + const data = await common.loadFrameMetadata(settings, 'projectidFrameMetadata', 1, 2); + + expect(data).toEqual({ + cameras: { + singleCam: { + 1: { + filename: 'image_0002.jpg', + depth: '193.10', + temperature: '4.1', + }, + 2: { + filename: 'image_0003.jpg', + depth: '193.40', + temperature: '4.2', + }, + }, + }, + }); + }); + + it('loadFrameMetadata ignores non-telemetry text files', async () => { + await expect(common.loadFrameMetadata(settings, 'projectidFrameMetadataNoSource', 0, 0)) + .resolves.toEqual({ cameras: {} }); + }); + it('createWorkingDirectory creates pipeline run directories', async () => { await expect(createWorkingDirectory(settings, [], 'whatever.pipe')) .rejects.toThrow('At least 1 jsonMeta item'); diff --git a/client/platform/desktop/backend/native/common.ts b/client/platform/desktop/backend/native/common.ts index 6939811e0..d313b2eab 100644 --- a/client/platform/desktop/backend/native/common.ts +++ b/client/platform/desktop/backend/native/common.ts @@ -30,12 +30,20 @@ import { PipeMetadata, PipelineParamType, DatasetCalibrationResult, + FrameMetadataResponse, } from 'dive-common/apispec'; import * as viameSerializers from 'platform/desktop/backend/serializers/viame'; import * as nistSerializers from 'platform/desktop/backend/serializers/nist'; import * as dive from 'platform/desktop/backend/serializers/dive'; import * as coco from 'platform/desktop/backend/serializers/coco'; import kpf from 'platform/desktop/backend/serializers/kpf'; +import { parentDatasetId } from 'dive-common/compositeDatasetId'; +import { + normalizeKey, + isFrameMetadataSourceName, + parseFrameMetadataSource, + selectFrameMetadataSource, +} from 'platform/desktop/backend/serializers/frameMetadata'; // TODO: Check to Refactor this // eslint-disable-next-line import/no-cycle import { checkMedia } from 'platform/desktop/backend/native/mediaJobs'; @@ -52,6 +60,7 @@ import { ExportConfigurationArgs, ExportMulticamEverythingArgs, JobsFolderName, JobsOutputFolderName, ProjectsFolderName, PipelinesFolderName, ConversionArgs, JobType, LastCalibrationBaseName, + SingleCameraFrameMetadataKey, } from 'platform/desktop/constants'; import { cleanString, filterByGlob, makeid, strNumericCompare, @@ -439,6 +448,269 @@ async function loadDetections(settings: Settings, datasetId: string) { return loadAnnotationFile(projectDirData.trackFileAbsPath); } +interface ImageSequenceFrameMetadataSource { + originalBasePath: string; + originalImageFiles: string[]; + imageListPath?: string; +} + +type FrameMetadataCandidate = [string, string]; +type FrameMetadataRecords = Record>; + +function frameMetadataSourceDirectory(source: ImageSequenceFrameMetadataSource): string | null { + if (source.originalBasePath) { + return source.originalBasePath; + } + if (source.imageListPath) { + return npath.dirname(source.imageListPath); + } + const firstImage = source.originalImageFiles[0]; + if (firstImage && npath.isAbsolute(firstImage)) { + return npath.dirname(firstImage); + } + return null; +} + +async function frameMetadataCandidateTexts(directory: string | null): Promise { + if (!directory || !(await fs.pathExists(directory))) { + return []; + } + + const names = await fs.readdir(directory); + const candidates = await Promise.all(names + .filter(isFrameMetadataSourceName) + .map(async (name): Promise => { + const filePath = npath.join(directory, name); + const stat = await fs.stat(filePath); + if (!stat.isFile()) { + return null; + } + return [name, await fs.readFile(filePath, 'utf-8')]; + })); + + return candidates.filter((candidate): candidate is FrameMetadataCandidate => candidate !== null); +} + +function mediaKeyToFrameMap(mediaKeys: Map): Map { + return new Map( + Array.from(mediaKeys.entries()).map(([mediaKey, frameNumber]) => ( + [normalizeKey(mediaKey), frameNumber] + )), + ); +} + +function frameRecordsForSource( + source: { records: Record> }, + frameByKey: Map, + startFrame: number, + endFrame: number, +): FrameMetadataRecords { + const records: FrameMetadataRecords = {}; + Object.entries(source.records).forEach(([mediaKey, values]) => { + const frameNumber = frameByKey.get(mediaKey); + if (frameNumber !== undefined && startFrame <= frameNumber && frameNumber <= endFrame) { + records[String(frameNumber)] = values; + } + }); + return records; +} + +// Records are built in source-header order, so two equal records may differ in +// key order. Compare field-by-field (order-independent) to match the server's +// dict comparison, otherwise identical metadata is wrongly flagged as a collision. +function frameMetadataRecordsEqual( + a: Record, + b: Record, +): boolean { + const aKeys = Object.keys(a); + if (aKeys.length !== Object.keys(b).length) { + return false; + } + return aKeys.every((key) => a[key] === b[key]); +} + +function recordsForFrameWindow( + source: { records: Record> }, + mediaKeys: Map, + startFrame: number, + endFrame: number, +): FrameMetadataRecords { + return frameRecordsForSource(source, mediaKeyToFrameMap(mediaKeys), startFrame, endFrame); +} + +function mergeFrameRecords( + records: FrameMetadataRecords, + collidedFrames: Set, + nextRecords: FrameMetadataRecords, +): FrameMetadataRecords { + const mergedRecords = { ...records }; + Object.entries(nextRecords).forEach(([frameKey, values]) => { + if (collidedFrames.has(frameKey)) { + return; + } + if (mergedRecords[frameKey] === undefined) { + mergedRecords[frameKey] = values; + } else if (!frameMetadataRecordsEqual(mergedRecords[frameKey], values)) { + delete mergedRecords[frameKey]; + collidedFrames.add(frameKey); + } + }); + return mergedRecords; +} + +async function loadSingleCameraFrameMetadataRecords( + sourceMeta: ImageSequenceFrameMetadataSource, + startFrame: number, + endFrame?: number, +): Promise { + const mediaKeys = validImageNamesMap(sourceMeta); + if (!mediaKeys) { + return null; + } + const source = selectFrameMetadataSource( + await frameMetadataCandidateTexts(frameMetadataSourceDirectory(sourceMeta)), + mediaKeys, + ); + if (!source) { + return null; + } + return recordsForFrameWindow(source, mediaKeys, startFrame, endFrame ?? mediaKeys.size - 1); +} + +async function loadMultiCameraFrameMetadataRecords( + sourceMeta: ImageSequenceFrameMetadataSource, + candidates: FrameMetadataCandidate[], + startFrame: number, + endFrame?: number, +): Promise { + const mediaKeys = validImageNamesMap(sourceMeta); + if (!mediaKeys) { + return null; + } + + const sources = candidates + .map(([sourceName, text]) => parseFrameMetadataSource(text, mediaKeys, sourceName)) + .filter((source): source is NonNullable => source !== null); + if (!sources.length) { + return null; + } + + const frameByKey = mediaKeyToFrameMap(mediaKeys); + let records: FrameMetadataRecords = {}; + const collidedFrames = new Set(); + const windowEnd = endFrame ?? mediaKeys.size - 1; + sources.forEach((source) => { + records = mergeFrameRecords( + records, + collidedFrames, + frameRecordsForSource(source, frameByKey, startFrame, windowEnd), + ); + }); + return records; +} + +function commonParentDirectory(paths: string[]): string | null { + const resolved = paths.filter((item) => item).map((item) => npath.resolve(item)); + if (!resolved.length) { + return null; + } + const [first, ...rest] = resolved; + const firstParts = first.split(npath.sep); + let { length } = firstParts; + rest.forEach((candidate) => { + const parts = candidate.split(npath.sep); + length = Math.min(length, parts.length); + for (let i = 0; i < length; i += 1) { + if (firstParts[i] !== parts[i]) { + length = i; + break; + } + } + }); + const prefix = firstParts.slice(0, length).join(npath.sep); + return prefix || npath.sep; +} + +async function loadMulticamFrameMetadata( + projectMetaData: JsonMeta, + startFrame: number, + endFrame?: number, +): Promise { + const { multiCam } = projectMetaData; + if (!multiCam) { + return { cameras: {} }; + } + + const cameraEntries = orderedMultiCamCameraNames({ + cameras: multiCam.cameras, + defaultDisplay: multiCam.defaultDisplay, + }).map((cameraName) => [cameraName, multiCam.cameras[cameraName]] as const); + + const rootDirectory = projectMetaData.originalBasePath + || commonParentDirectory(cameraEntries.map(([, camera]) => ( + frameMetadataSourceDirectory(camera) ?? '' + ))); + const rootCandidates = await frameMetadataCandidateTexts(rootDirectory); + const cameras: FrameMetadataResponse['cameras'] = {}; + let hasSource = false; + + for (let i = 0; i < cameraEntries.length; i += 1) { + const [cameraName, cameraMeta] = cameraEntries[i]; + if (cameraMeta.type === 'image-sequence') { + const candidates = rootCandidates.concat( + // eslint-disable-next-line no-await-in-loop + await frameMetadataCandidateTexts(frameMetadataSourceDirectory(cameraMeta)), + ); + // eslint-disable-next-line no-await-in-loop + const records = await loadMultiCameraFrameMetadataRecords( + cameraMeta, + candidates, + startFrame, + endFrame, + ); + if (records !== null) { + hasSource = true; + cameras[cameraName] = records; + } else { + cameras[cameraName] = {}; + } + } + } + + if (!hasSource) { + return { cameras: {} }; + } + return { cameras }; +} + +async function loadFrameMetadata( + settings: Settings, + datasetId: string, + startFrame: number, + endFrame?: number, +): Promise { + const parentId = parentDatasetId(datasetId); + const projectDirData = await getValidatedProjectDir(settings, parentId); + const projectMetaData = await loadJsonMetadata(projectDirData.metaFileAbsPath); + + if (projectMetaData.type === MultiType) { + return loadMulticamFrameMetadata(projectMetaData, startFrame, endFrame); + } + if (projectMetaData.type !== 'image-sequence') { + return { cameras: {} }; + } + + const records = await loadSingleCameraFrameMetadataRecords( + projectMetaData, + startFrame, + endFrame, + ); + if (records === null) { + return { cameras: {} }; + } + return { cameras: { [SingleCameraFrameMetadataKey]: records } }; +} + /** * Look through DIVE project path, find subfolders that * look like datasets, and return them. @@ -1319,7 +1591,7 @@ async function beginMediaImport(path: string): Promise 0) { const imageMap = new Map(); jsonMeta.originalImageFiles.forEach((imgPath, i) => { @@ -2040,6 +2312,7 @@ export { loadJsonMetadata, loadAnnotationFile, loadDetections, + loadFrameMetadata, openLink, openPathInFileManager, ingestDataFiles, diff --git a/client/platform/desktop/backend/serializers/coco.spec.ts b/client/platform/desktop/backend/serializers/coco.spec.ts index 774a48678..da1159e50 100644 --- a/client/platform/desktop/backend/serializers/coco.spec.ts +++ b/client/platform/desktop/backend/serializers/coco.spec.ts @@ -213,6 +213,17 @@ describe('COCO serializer', () => { expect(out.annotations[0].dive_notes).toEqual(['exported note']); }); + it('does not include frame metadata in COCO exports', async () => { + await serializeFile('/output/out.coco.json', annotationSchema, { + ...imageMeta, + frameMetadataFields: ['depth'], + frameMetadata: { singleCam: { 0: { depth: '192.80' } } }, + } as JsonMeta); + const out = await fs.readJSON('/output/out.coco.json'); + expect(out.info).not.toHaveProperty('dive_frame_metadata'); + expect(out.info.dive_extensions).not.toContain('dive_frame_metadata'); + }); + // --- datasetInfo passthrough --- const datasetInfo = { diff --git a/client/platform/desktop/backend/serializers/frameMetadata.spec.ts b/client/platform/desktop/backend/serializers/frameMetadata.spec.ts new file mode 100644 index 000000000..f4039ded7 --- /dev/null +++ b/client/platform/desktop/backend/serializers/frameMetadata.spec.ts @@ -0,0 +1,342 @@ +/// + +import { + ParsedFrameMetadata, + findJoinColumns, + normalizeKey, + parseFrameMetadataSource, + selectFrameMetadataSource, +} from 'platform/desktop/backend/serializers/frameMetadata'; + +type ContractRecord = Record; +type ContractSource = { + header: string[]; + recordsByFrame: Record; + cameras: Record; +}; +type Contract = { + selectionStatus: Record<'missing' | 'ambiguous', 'none' | 'selected'>; + sources: Record; +}; + +const syntheticHeader = [ + 'port_image', + 'depth_m', + 'heading', + 'starboard_image', +]; + +const syntheticSources: Record = { + 'synthetic_auv_nav_rect.txt': [ + ['rect_port_0001.tif', '192.80', '174.5', 'rect_starboard_0001.tif'], + ['rect_port_0002.tif', '193.05', '175.1', 'rect_starboard_0002.tif'], + ], + 'synthetic_auv_nav_jpg.txt': [ + ['jpg_port_0001.jpg', '88.40', '92.5', 'jpg_starboard_0001.jpg'], + ['jpg_port_0002.jpg', '88.72', '93.1', 'jpg_starboard_0002.jpg'], + ], +}; + +function sourceText(sourceName: string): string { + const rows = syntheticSources[sourceName]; + return [syntheticHeader.join(' '), ...rows.map((row) => row.join(' ')), ''].join('\n'); +} + +function sourceContract(rows: string[][]): ContractSource { + return { + header: syntheticHeader, + recordsByFrame: Object.fromEntries(rows.map((row, frame) => [ + String(frame), + Object.fromEntries(syntheticHeader.map((field, index) => [field, row[index]])), + ])), + cameras: { + port: cameraContract('port_image', rows), + starboard: cameraContract('starboard_image', rows), + }, + }; +} + +function cameraContract(joinColumn: string, rows: string[][]) { + return { + joinColumn, + payloadColumns: syntheticHeader.filter((column) => column !== joinColumn), + frames: rows.map((_, frame) => String(frame)), + }; +} + +function loadContract(): Contract { + return { + selectionStatus: { missing: 'none', ambiguous: 'none' }, + sources: Object.fromEntries( + Object.entries(syntheticSources).map(([sourceName, rows]) => [ + sourceName, + sourceContract(rows), + ]), + ), + }; +} + +function fixtureText(sourceName: string): string { + return sourceText(sourceName); +} + +function mediaKeys( + cameraRecords: Record, + joinColumn: string, +): Map { + return new Map(Object.entries(cameraRecords).map(([frame, record]) => ( + [normalizeKey(record[joinColumn]), Number(frame)] + ))); +} + +function recordsByFrame( + source: ParsedFrameMetadata, + keys: Map, +): Record { + const records: Record = {}; + Array.from(keys.entries()) + .sort(([, frameA], [, frameB]) => frameA - frameB) + .forEach(([key, frame]) => { + if (source.records[key] !== undefined) { + records[String(frame)] = source.records[key]; + } + }); + return records; +} + +function sourceStatus( + source: ReturnType, +): 'none' | 'selected' { + return source === null ? 'none' : 'selected'; +} + +describe('desktop frame metadata serializer', () => { + it('normalizes media keys the same way as image name maps', () => { + expect(normalizeKey('nested/20191009.154056.00082_rect_color.tif')).toBe( + '20191009.154056.00082_rect_color', + ); + }); + + it('parses multicamera rows with multiple image columns', () => { + const mediaKeys = new Map([ + ['20191009.154056.00082_rect_color', 0], + ['20191009.154056.00081_rect_color', 0], + ]); + const text = [ + 'port_image date time latitude longitude water_depth altitude starboard_image', + '20191009.154056.00082_rect_color.tif 2019/10/09 15:40:56.1122 46.575870 -124.603094 192.80 2.78 20191009.154056.00081_rect_color.tif', + '', + ].join('\n'); + + const source = parseFrameMetadataSource(text, mediaKeys, 'nav.txt'); + + expect(source).not.toBeNull(); + expect(source?.sourceName).toBe('nav.txt'); + expect(source?.header).toEqual([ + 'port_image', + 'date', + 'time', + 'latitude', + 'longitude', + 'water_depth', + 'altitude', + 'starboard_image', + ]); + expect(source?.joinColumns).toEqual(['port_image', 'starboard_image']); + expect(source?.payloadColumns).toEqual([ + 'date', + 'time', + 'latitude', + 'longitude', + 'water_depth', + 'altitude', + ]); + expect(Object.keys(source?.records || {}).sort()).toEqual([ + '20191009.154056.00081_rect_color', + '20191009.154056.00082_rect_color', + ]); + const portRecord = source?.records['20191009.154056.00082_rect_color']; + expect(Object.keys(portRecord || {})).toEqual(source?.header); + expect(portRecord?.latitude).toBe('46.575870'); + expect(Object.values(portRecord || {}).every((value) => typeof value === 'string')).toBe(true); + }); + + it('parses comma, tab, and whitespace delimited sources', () => { + const mediaKeys = new Map([['image_0001', 0]]); + + [ + 'filename,depth,latitude\nimage_0001.jpg,192.80,46.575870\n', + 'filename\tdepth\tlatitude\nimage_0001.jpg\t192.80\t46.575870\n', + 'filename depth latitude\nimage_0001.jpg 192.80 46.575870\n', + ].forEach((text) => { + const source = parseFrameMetadataSource(text, mediaKeys); + + expect(source).not.toBeNull(); + expect(source?.header).toEqual(['filename', 'depth', 'latitude']); + expect(source?.joinColumns).toEqual(['filename']); + expect(source?.records.image_0001).toEqual({ + filename: 'image_0001.jpg', + depth: '192.80', + latitude: '46.575870', + }); + }); + }); + + it('finds join columns by filename value matches', () => { + const rows = [ + { + port_image: '20191009.154056.00082_rect_color.tif', + latitude: '46.575870', + starboard_image: '20191009.154056.00081_rect_color.tif', + }, + ]; + + expect(findJoinColumns( + ['port_image', 'latitude', 'starboard_image'], + rows, + new Map([ + ['20191009.154056.00082_rect_color', 0], + ['20191009.154056.00081_rect_color', 0], + ]), + )).toEqual(['port_image', 'starboard_image']); + }); + + it('rejects VIAME annotation CSV even when its image column matches', () => { + const mediaKeys = new Map([['20191009.154056.00082_rect_color', 0]]); + const viameCsv = [ + '# 1: Detection or Track-id,2: Video or Image Identifier,3: Unique Frame Identifier,4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes', + '1,20191009.154056.00082_rect_color.tif,0,0,0,10,10,1.0,-1,fish,0.9', + '', + ].join('\n'); + + expect(parseFrameMetadataSource(viameCsv, mediaKeys)).toBeNull(); + }); + + it('rejects a headerless VIAME annotation CSV', () => { + const mediaKeys = new Map([['frame_0001', 0], ['frame_0002', 1]]); + const headerlessViame = [ + '1,frame_0001.png,0,10,20,30,40,1.0,-1,fish,0.9', + '2,frame_0002.png,1,11,21,31,41,1.0,-1,fish,0.8', + '', + ].join('\n'); + + expect(parseFrameMetadataSource(headerlessViame, mediaKeys)).toBeNull(); + }); + + it('parses a sidecar containing a bare double-quote character', () => { + const mediaKeys = new Map([['image_0001', 0]]); + const text = [ + 'filename,depth', + 'image_0001.jpg,5"', + '', + ].join('\n'); + + const source = parseFrameMetadataSource(text, mediaKeys); + + expect(source).not.toBeNull(); + expect(source?.records.image_0001.depth).toBe('5"'); + }); + + it('accepts VIAME-shaped telemetry without the VIAME header', () => { + const mediaKeys = new Map([['image_0001', 0]]); + const text = [ + 'index,image,frame,x,y,depth,altitude,heading,temperature', + '1,image_0001.jpg,100,46.5,-124.6,192.8,2.7,180.5,4.2', + '', + ].join('\n'); + + const source = parseFrameMetadataSource(text, mediaKeys); + + expect(source).not.toBeNull(); + expect(source?.joinColumns).toEqual(['image']); + expect(source?.records.image_0001.depth).toBe('192.8'); + }); + + it('rejects bare image lists and unrelated text', () => { + const mediaKeys = new Map([['image_0001', 0]]); + + expect(parseFrameMetadataSource('image\nimage_0001.jpg\n', mediaKeys)).toBeNull(); + expect(parseFrameMetadataSource('note,value\nhello,world\n', mediaKeys)).toBeNull(); + }); + + it('rejects ambiguous candidates and non-text extensions', () => { + const mediaKeys = new Map([['image_0001', 0]]); + const acceptedText = 'filename,depth\nimage_0001.jpg,192.80\n'; + + expect(selectFrameMetadataSource( + [ + ['metadata.json', acceptedText], + ['telemetry-a.txt', acceptedText], + ], + mediaKeys, + )?.sourceName).toBe('telemetry-a.txt'); + expect(selectFrameMetadataSource( + [ + ['telemetry-a.txt', acceptedText], + ['telemetry-b.csv', 'filename,temperature\nimage_0001.jpg,4.2\n'], + ], + mediaKeys, + )).toBeNull(); + }); + + it('matches the shared synthetic AUV fixture contract', () => { + const contract = loadContract(); + + Object.entries(contract.sources).forEach(([sourceName, expected]) => { + const text = fixtureText(sourceName); + Object.entries(expected.cameras).forEach(([camera, cameraContract]) => { + const expectedRecords = Object.fromEntries( + cameraContract.frames.map((frame) => [frame, expected.recordsByFrame[frame]]), + ); + const { joinColumn } = cameraContract; + const keys = mediaKeys(expectedRecords, joinColumn); + const source = parseFrameMetadataSource(text, keys, sourceName); + + expect(source).not.toBeNull(); + if (source === null) { + throw new Error(`Expected ${sourceName} to parse for ${camera}`); + } + expect(source.sourceName).toBe(sourceName); + expect(source.header).toEqual(expected.header); + expect(source.joinColumns).toEqual([joinColumn]); + expect(source.payloadColumns).toEqual(cameraContract.payloadColumns); + expect(recordsByFrame(source, keys)).toEqual(expectedRecords); + expect(Object.values(source.records).every((record) => ( + Object.values(record).every((value) => typeof value === 'string') + ))).toBe(true); + }); + }); + }); + + it('matches shared missing and ambiguous source decisions', () => { + const contract = loadContract(); + const sourceContract = contract.sources['synthetic_auv_nav_rect.txt']; + const portContract = sourceContract.cameras.port; + const portRecords = Object.fromEntries( + portContract.frames.map((frame) => [frame, sourceContract.recordsByFrame[frame]]), + ); + const keys = mediaKeys(portRecords, portContract.joinColumn); + const rectText = fixtureText('synthetic_auv_nav_rect.txt'); + + const missingSource = selectFrameMetadataSource( + [['synthetic_auv_nav_jpg.txt', fixtureText('synthetic_auv_nav_jpg.txt')]], + keys, + ); + const ambiguousSource = selectFrameMetadataSource( + [ + ['synthetic_auv_nav_rect.txt', rectText], + ['synthetic_auv_nav_rect_copy.csv', rectText], + ], + keys, + ); + + expect({ + missing: sourceStatus(missingSource), + ambiguous: sourceStatus(ambiguousSource), + }).toEqual(contract.selectionStatus); + }); +}); diff --git a/client/platform/desktop/backend/serializers/frameMetadata.ts b/client/platform/desktop/backend/serializers/frameMetadata.ts new file mode 100644 index 000000000..57fe7d4d3 --- /dev/null +++ b/client/platform/desktop/backend/serializers/frameMetadata.ts @@ -0,0 +1,251 @@ +import parseSync from 'csv-parse/lib/sync'; +import path from 'path'; +import { FrameMetadataSourceExtensions } from 'platform/desktop/constants'; +import { isViameCsvRows } from 'platform/desktop/backend/serializers/viame'; + +type FrameMetadataRow = Record; +type MediaKeys = Map | Record; +type Delimiter = ',' | '\t' | null; + +interface ParsedFrameMetadata { + sourceName?: string; + header: string[]; + rows: FrameMetadataRow[]; + joinColumns: string[]; + payloadColumns: string[]; + records: Record; +} + +const imageExtensions = new Set([ + 'png', + 'jpg', + 'jpeg', + 'sgi', + 'bmp', + 'pgm', + 'nitf', + 'tif', + 'tiff', + 'ntf', + 'vrt', + 'r0', + 'r1', + 'r2', + 'r3', + 'r4', + 'r5', + 'r6', +]); +const frameMetadataSourceExtensions = new Set(FrameMetadataSourceExtensions); + +function normalizeKey(value: string): string { + const basename = path.basename(String(value).trim()); + const ext = path.extname(basename); + const extension = ext.toLowerCase().replace(/^\./, ''); + if (imageExtensions.has(extension)) { + return path.basename(basename, ext); + } + return basename; +} + +function parseTable(text: string): { header: string[]; rows: FrameMetadataRow[] } { + return parseTableRows(readRows(text)); +} + +function parseTableRows(rawRows: string[][]): { header: string[]; rows: FrameMetadataRow[] } { + if (rawRows.length === 0) { + return { header: [], rows: [] }; + } + + const header = rawRows[0].map((cell) => cell.trim()); + if (!header.every((cell) => cell.length > 0)) { + return { header: [], rows: [] }; + } + + const rows: FrameMetadataRow[] = []; + rawRows.slice(1).forEach((rawRow) => { + const values = rawRow.map((cell) => cell.trim()); + if (!values.some((cell) => cell.length > 0)) { + return; + } + const row: FrameMetadataRow = {}; + header.forEach((field, index) => { + row[field] = values[index] || ''; + }); + rows.push(row); + }); + return { header, rows }; +} + +function findJoinColumns( + header: string[], + rows: FrameMetadataRow[], + mediaKeys: MediaKeys, +): string[] { + return findJoinColumnsForKeys(header, rows, normalizedKeySet(mediaKeys)); +} + +function findJoinColumnsForKeys( + header: string[], + rows: FrameMetadataRow[], + normalizedMediaKeys: Set, +): string[] { + return header.filter((column) => rows.some((row) => ( + row[column] && normalizedMediaKeys.has(normalizeKey(row[column])) + ))); +} + +function isFrameMetadata(text: string, mediaKeys: MediaKeys): boolean { + return parseFrameMetadataSource(text, mediaKeys) !== null; +} + +function parseFrameMetadataSource( + text: string, + mediaKeys: MediaKeys, + sourceName?: string, +): ParsedFrameMetadata | null { + const { delimiter, rows: rawRows } = readRowsWithDelimiter(text); + if (delimiter === ',' && isViameCsvRows(rawRows)) { + return null; + } + + const { header, rows } = parseTableRows(rawRows); + if (header.length === 0 || rows.length === 0) { + return null; + } + + const normalizedMediaKeys = normalizedKeySet(mediaKeys); + const joinColumns = findJoinColumnsForKeys(header, rows, normalizedMediaKeys); + if (joinColumns.length === 0) { + return null; + } + + const payloadColumns = header.filter((column) => !joinColumns.includes(column)); + if (payloadColumns.length === 0) { + return null; + } + + const records: Record = {}; + rows.forEach((row) => { + joinColumns.forEach((column) => { + const key = normalizeKey(row[column] || ''); + if (normalizedMediaKeys.has(key)) { + const record: FrameMetadataRow = {}; + header.forEach((field) => { + record[field] = row[field] || ''; + }); + records[key] = record; + } + }); + }); + + if (Object.keys(records).length === 0) { + return null; + } + + return { + sourceName, + header, + rows, + joinColumns, + payloadColumns, + records, + }; +} + +function selectFrameMetadataSource( + candidates: [string, string][], + mediaKeys: MediaKeys, +): ParsedFrameMetadata | null { + const matches: ParsedFrameMetadata[] = []; + candidates.forEach(([sourceName, text]) => { + if (!isFrameMetadataSourceName(sourceName)) { + return; + } + const source = parseFrameMetadataSource(text, mediaKeys, sourceName); + if (source !== null) { + matches.push(source); + } + }); + + if (matches.length !== 1) { + return null; + } + return matches[0]; +} + +function readRows(text: string): string[][] { + return readRowsWithDelimiter(text).rows; +} + +function readRowsWithDelimiter(text: string): { delimiter: Delimiter; rows: string[][] } { + const firstLine = firstNonemptyLine(text); + if (firstLine === null) { + return { delimiter: null, rows: [] }; + } + + const delimiter = sniffDelimiter(firstLine); + if (delimiter === null) { + return { + delimiter, + rows: text + .split(/\r?\n/) + .filter((line) => line.trim().length > 0) + .map((line) => line.trim().split(/\s+/)), + }; + } + + return { delimiter, rows: parseDelimited(text, delimiter) }; +} + +function parseDelimited(text: string, delimiter: ',' | '\t'): string[][] { + return parseSync(text, { + delimiter, + relax_column_count: true, + // Telemetry sidecars routinely contain bare quote characters (e.g. depth + // 5"); `relax` treats a stray quote in an unquoted field as a literal + // character instead of throwing, matching Python's lenient csv.reader on + // the server while still honoring properly quoted fields. + relax: true, + skip_empty_lines: true, + }).map((row: string[]) => row.map((cell) => cell.trim())); +} + +function firstNonemptyLine(text: string): string | null { + const line = text.split(/\r?\n/).find((candidate) => candidate.trim().length > 0); + return line === undefined ? null : line.trim(); +} + +function sniffDelimiter(line: string): ',' | '\t' | null { + if (line.includes(',')) { + return ','; + } + if (line.includes('\t')) { + return '\t'; + } + return null; +} + +function normalizedKeySet(mediaKeys: MediaKeys): Set { + if (mediaKeys instanceof Map) { + return new Set(Array.from(mediaKeys.keys()).map((key) => normalizeKey(key))); + } + return new Set(Object.keys(mediaKeys).map((key) => normalizeKey(key))); +} + +function isFrameMetadataSourceName(sourceName: string): boolean { + return frameMetadataSourceExtensions.has(path.extname(sourceName).toLowerCase()); +} + +export { + FrameMetadataRow, + MediaKeys, + ParsedFrameMetadata, + findJoinColumns, + isFrameMetadata, + isFrameMetadataSourceName, + normalizeKey, + parseFrameMetadataSource, + parseTable, + selectFrameMetadataSource, +}; diff --git a/client/platform/desktop/backend/serializers/viame.spec.ts b/client/platform/desktop/backend/serializers/viame.spec.ts index a01b5d198..f64bd779d 100644 --- a/client/platform/desktop/backend/serializers/viame.spec.ts +++ b/client/platform/desktop/backend/serializers/viame.spec.ts @@ -350,6 +350,23 @@ describe('VIAME datasetInfo passthrough', () => { expect(fields?.some((field) => field.startsWith('dataset_info'))).toBe(false); }); + it('does not include frame metadata fields in VIAME exports', async () => { + const path = '/home/test.json'; + const stream = fs.createWriteStream(path); + await serialize(stream, data, { + ...meta, + frameMetadataFields: ['depth'], + frameMetadata: { singleCam: { 0: { depth: '192.80' } } }, + } as JsonMeta, new Set(), { + excludeBelowThreshold: false, + header: true, + }); + const output = fs.readFileSync(path).toString(); + expect(output).not.toContain('frameMetadataFields'); + expect(output).not.toContain('frameMetadata'); + expect(output).not.toContain('frame_metadata'); + }); + it('restores datasetInfo from the # metadata line on parse', async () => { const path = '/home/test.json'; const stream = fs.createWriteStream(path); diff --git a/client/platform/desktop/backend/serializers/viame.ts b/client/platform/desktop/backend/serializers/viame.ts index 25333581f..1ca8f1629 100644 --- a/client/platform/desktop/backend/serializers/viame.ts +++ b/client/platform/desktop/backend/serializers/viame.ts @@ -96,6 +96,53 @@ function _rowInfo(row: string[]) { }; } +function isViameDataRow(row: string[]): boolean { + try { + const info = _rowInfo(row); + return [ + info.id, + info.frame, + info.fishLength, + ...info.bounds, + ].every((value) => Number.isFinite(value)); + } catch { + return false; + } +} + +function isViameCsvRows(rows: string[][]): boolean { + let hasHeader = false; + let hasDataRow = false; + let firstRowIsDetection = false; + let seenDataRow = false; + + for (let i = 0; i < rows.length; i += 1) { + const row = rows[i]; + if (row.length) { + if (row[0].startsWith('#')) { + hasHeader = hasHeader || row[0].startsWith('# 1: Detection or Track-id'); + } else { + const rowIsDetection = isViameDataRow(row); + if (!seenDataRow) { + seenDataRow = true; + firstRowIsDetection = rowIsDetection; + } + if (rowIsDetection) { + hasDataRow = true; + if (hasHeader || firstRowIsDetection) { + return true; + } + } + } + } + } + + // A headerless VIAME CSV (no text header) leads with a detection row; a DIVE + // export carries the comment header. Telemetry leads with a field-name header + // that is not VIAME-shaped, so it is left for the frame metadata parser. + return hasDataRow && (hasHeader || firstRowIsDetection); +} + /** Resolve detection length from attributes.length or fishLength (either may be set). */ function resolveDetectionLength( fishLength?: number, @@ -817,6 +864,7 @@ async function serializeFile( } export { + isViameCsvRows, parse, parseFile, serialize, diff --git a/client/platform/desktop/constants.ts b/client/platform/desktop/constants.ts index 5b24868ff..11c3ac725 100644 --- a/client/platform/desktop/constants.ts +++ b/client/platform/desktop/constants.ts @@ -13,6 +13,8 @@ export const ProjectsFolderName = 'DIVE_Projects'; export const JobsFolderName = 'DIVE_Jobs'; export const JobsOutputFolderName = 'DIVE_Jobs_Output'; export const PipelinesFolderName = 'DIVE_Pipelines'; +export const FrameMetadataSourceExtensions = ['.txt', '.csv'] as const; +export const SingleCameraFrameMetadataKey = 'singleCam'; // Basename (without extension) of the saved "most recently used" calibration. // The stored file keeps the source file's real extension (e.g. last_calibration.npz). export const LastCalibrationBaseName = 'last_calibration'; diff --git a/client/platform/desktop/frontend/api.ts b/client/platform/desktop/frontend/api.ts index 4f1fb1c71..7f87e74eb 100644 --- a/client/platform/desktop/frontend/api.ts +++ b/client/platform/desktop/frontend/api.ts @@ -7,6 +7,7 @@ import type { DatasetCalibrationResult, SegmentationPredictRequest, SegmentationPredictResponse, SegmentationStatusResponse, SegmentationStereoSegmentRequest, SegmentationStereoSegmentResponse, + FrameMetadataResponse, } from 'dive-common/apispec'; import { @@ -548,6 +549,14 @@ async function loadDetections(datasetId: string) { }; } +function loadFrameMetadata( + datasetId: string, + startFrame: number, + endFrame: number, +): Promise { + return window.diveDesktop.invoke('load-frame-metadata', { datasetId, startFrame, endFrame }); +} + async function saveMetadata(id: string, args: DatasetMetaMutable) { const client = await getClient(); return client.post(`dataset/${id}/meta`, args); @@ -608,6 +617,7 @@ export { /* Standard Specification APIs */ loadMetadata, loadDetections, + loadFrameMetadata, getPipelineList, deleteTrainedPipeline, runPipeline, diff --git a/client/platform/web-girder/App.vue b/client/platform/web-girder/App.vue index 8786b2321..13a0300fc 100644 --- a/client/platform/web-girder/App.vue +++ b/client/platform/web-girder/App.vue @@ -24,6 +24,7 @@ import { saveAttributeTrackFilters, importAnnotationFile, loadDetections, + loadFrameMetadata, saveDetections, unwrap, getTiles, @@ -67,6 +68,7 @@ export default defineComponent({ getTrainingConfigurations: unwrap(getTrainingConfigurations), runTraining: unwrap(runTraining), loadDetections, + loadFrameMetadata: unwrap(loadFrameMetadata), saveDetections: unwrap(saveDetections), saveMetadata: unwrap(saveMetadata), saveAttributes: unwrap(saveAttributes), diff --git a/client/platform/web-girder/api/dataset.service.spec.ts b/client/platform/web-girder/api/dataset.service.spec.ts new file mode 100644 index 000000000..0dc5ca856 --- /dev/null +++ b/client/platform/web-girder/api/dataset.service.spec.ts @@ -0,0 +1,30 @@ +// @vitest-environment jsdom + +// eslint-disable-next-line import/no-extraneous-dependencies -- Vitest is only used in tests +import { + beforeEach, + describe, + expect, + it, + vi, +} from 'vitest'; + +import girderRest from '../plugins/girder'; +import { loadFrameMetadata } from './dataset.service'; + +describe('dataset.service frame metadata', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('requests frame metadata from the parent dataset with explicit window params', async () => { + const response = { data: { cameras: { port: { 3: { depth: '193.10' } } } } }; + const get = vi.spyOn(girderRest, 'get').mockResolvedValue(response as never); + + await expect(loadFrameMetadata('parent-id/port', 3, 7)).resolves.toBe(response); + + expect(get).toHaveBeenCalledWith('dive_dataset/parent-id/frame_metadata', { + params: { startFrame: 3, endFrame: 7 }, + }); + }); +}); diff --git a/client/platform/web-girder/api/dataset.service.ts b/client/platform/web-girder/api/dataset.service.ts index c2503d493..62d6915c2 100644 --- a/client/platform/web-girder/api/dataset.service.ts +++ b/client/platform/web-girder/api/dataset.service.ts @@ -1,7 +1,8 @@ import type { GirderModel } from '@girder/components/src'; import { - DatasetMetaMutable, FrameImage, SaveAttributeArgs, SaveAttributeTrackFilterArgs, + DatasetMetaMutable, FrameImage, FrameMetadataResponse, + SaveAttributeArgs, SaveAttributeTrackFilterArgs, } from 'dive-common/apispec'; import { calibrationFileMarker, jsonCalibrationFileMarker } from 'dive-common/constants'; import { parentDatasetId } from 'dive-common/compositeDatasetId'; @@ -64,6 +65,13 @@ async function getDatasetMedia(datasetId: string) { return girderRest.get(`dive_dataset/${folderId}/media`); } +function loadFrameMetadata(datasetId: string, startFrame: number, endFrame: number) { + return girderRest.get( + `dive_dataset/${parentDatasetId(datasetId)}/frame_metadata`, + { params: { startFrame, endFrame } }, + ); +} + function clone({ folderId, name, parentFolderId, revision, }: { @@ -318,6 +326,7 @@ export { hasCalibrationFile, getDatasetCalibration, importAnnotationFile, + loadFrameMetadata, makeViameFolder, saveAttributes, saveAttributeTrackFilters, diff --git a/docs/DataFormats.md b/docs/DataFormats.md index e9491e9fa..fb4f0af76 100644 --- a/docs/DataFormats.md +++ b/docs/DataFormats.md @@ -5,7 +5,9 @@ hide: # Data Formats -DIVE Desktop and Web support a number of annotation and configuration formats. The following formats can be uploaded or imported alongside your media and will be automatically parsed. +DIVE Desktop and Web support a number of annotation, configuration, and +media-side metadata formats. The annotation and configuration formats below can +be uploaded or imported alongside your media and will be automatically parsed. * DIVE Annotation JSON (default annotation format) * DIVE Configuration JSON @@ -13,6 +15,51 @@ DIVE Desktop and Web support a number of annotation and configuration formats. * KPF (KWIVER Packet Format) * COCO and KWCOCO +Per-frame telemetry sidecars are different: DIVE reads matching `.txt` and +`.csv` files from the image-sequence folder on demand. They are not imported into +annotations or exported in v1. + +## Per-frame Metadata Text Sidecars + +DIVE can display read-only per-frame telemetry in the +[Media Metadata panel](UI-DatasetInfo.md#frame-metadata). The stored form is a +delimited text file placed next to the image sequence. DIVE reads it at view time +and joins rows to frames by filename value. + +Supported sidecar contract: + +* `.txt` or `.csv` file in the dataset folder for single-camera image sequences. +* For multicamera image sequences, either one shared `.txt` or `.csv` file in + the multicam parent folder, or one sidecar in each camera child folder. +* Header row with field names. +* Comma, tab, or whitespace delimiter. +* At least one filename column whose values match the image filenames. +* At least one metadata column beyond the filename column. + +Example: + +```text +image_file timestamp latitude longitude water_depth +img_0001.tif 15:40:56 46.575870 -124.603094 192.80 +img_0002.tif 15:41:04 46.575912 -124.603080 193.10 +``` + +Rows are matched by filename value, not by row order. DIVE ignores the filename +extension while matching, so `img_0001.tif` can match an image key of +`img_0001`. Rows that do not match an image are omitted. + +For multicamera data, a shared source can contain one filename column per camera, +such as `port_image` and `starboard_image`. Each active camera displays the rows +that matched that camera's imagery. + +Values are shown as raw strings in source field order. DIVE does not infer +types, units, or pinned display order for v1 frame telemetry. + +Frame metadata sidecars are read-only. They are not edited in DIVE, saved as +derived metadata, imported as annotations, or included in VIAME, DIVE JSON, COCO, +KWCOCO, or zip exports. Video telemetry, embedded KLV, embedded EXIF, and +manually selecting a source from another location are future work. + ## DIVE Annotation JSON !!! info @@ -163,7 +210,7 @@ This information provides the specification for an individual dataset. It consi * Preset confidence filters for those types are defined in `confidenceFilters` * Track and Detection attribute specifications are defined in `attributes` * Free-form, dataset-level metadata (cruise id, station id, location, …) is stored in `datasetInfo` as a key/value object. - * Edited from the [Dataset Info panel](UI-DatasetInfo.md). + * Edited from the Dataset Info section of the [Media Metadata panel](UI-DatasetInfo.md). * Included in DIVE Configuration JSON as `datasetInfo`. * Included in [VIAME CSV](#viame-csv) and [COCO / KWCOCO](#coco-and-kwcoco) export, and restored on import. @@ -196,7 +243,7 @@ Read the [VIAME CSV Specification](https://viame.readthedocs.io/en/latest/sectio ### Dataset metadata in the header DIVE writes a `# metadata` comment line near the top of the CSV carrying dataset-level -values such as `fps`. When a dataset has [Dataset Info](UI-DatasetInfo.md) custom +values such as `fps`. When a dataset has [Media Metadata](UI-DatasetInfo.md) custom metadata, the whole `datasetInfo` object is added to that line as a single nested JSON entry keyed `dataset_info`: @@ -210,7 +257,8 @@ entry keyed `dataset_info`: * A CSV with no `dataset_info` entry leaves existing metadata untouched. * This is how dataset context, for example a `gfishsite_id` used to re-link annotations to an external database, travels with the exported annotations without - renaming files. See the [Dataset Info panel](UI-DatasetInfo.md) for how to populate it. + renaming files. See the Dataset Info section of the + [Media Metadata panel](UI-DatasetInfo.md) for how to populate it. ### VIAME CSV polygons and length @@ -283,9 +331,9 @@ These extension keys are declared in the COCO `info` object as: ### Dataset-level metadata (`datasetInfo`) -The dataset's free-form [Dataset Info](UI-DatasetInfo.md) metadata (e.g. `gfishsite_id`, -cruise, station) is written to the COCO `info` block under a single `dive_dataset_info` key and -advertised in `info.dive_extensions`: +The dataset's free-form [Media Metadata](UI-DatasetInfo.md) custom metadata +(e.g. `gfishsite_id`, cruise, station) is written to the COCO `info` block under +a single `dive_dataset_info` key and advertised in `info.dive_extensions`: * `info.dive_dataset_info = { "gfishsite_id": "2024TXN012", "year": "2024", ... }` diff --git a/docs/Frame-Metadata-Architecture.md b/docs/Frame-Metadata-Architecture.md new file mode 100644 index 000000000..d1b2f66e6 --- /dev/null +++ b/docs/Frame-Metadata-Architecture.md @@ -0,0 +1,90 @@ +# Frame Metadata Architecture + +Per-frame metadata is read-only telemetry that describes the media at capture +time, such as timestamp, latitude, longitude, depth, or altitude. DIVE treats it +as a media-side property, not as editable annotation data. + +The stored source is the user's `.txt` or `.csv` file next to the imagery. +Everything DIVE serves or displays is a read-time projection of that source. + +## Source contract + +v1 supports delimited text sidecars for image sequences: + +* file extension `.txt` or `.csv`, +* header row, +* comma, tab, or whitespace delimiter, +* at least one column whose values match image filenames, +* at least one payload column beyond the filename column. + +The parser keeps payload values as raw strings and preserves source field order. +Filename matching is by value after normalizing the media key, so a reordered or +partial table cannot shift metadata onto the wrong frame. Rows without a filename +match are omitted. + +DIVE sniffs candidate text files in the dataset folder. Annotation and other +known DIVE formats are rejected before filename matching, including VIAME CSV. +Bare image lists and unrelated text files are ignored. If more than one distinct +candidate matches, DIVE skips frame metadata instead of guessing. + +## Read path + +The web backend exposes a windowed endpoint: + +```http +GET /dive_dataset/:id/frame_metadata?startFrame=0&endFrame=100 +``` + +`startFrame` and `endFrame` are inclusive, non-negative bounds. The response is +keyed by camera, then frame: + +```json +{ + "cameras": { + "singleCam": { + "0": { + "timestamp": "15:40:56", + "water_depth": "192.80" + } + } + } +} +``` + +Single-camera datasets use the camera key `singleCam`. Multicamera datasets use +their runtime camera names. A missing or unusable source returns an empty +`cameras` map. Only frames with matching metadata appear in the response. + +The desktop backend mirrors the same contract through `loadFrameMetadata`. + +## Multicamera routing + +For multicamera datasets, DIVE checks text sidecars at the parent folder and in +each child camera folder. Each camera builds its own media filename map and +selects matching rows for that camera. + +A shared root file can therefore route one row to multiple cameras through +different filename columns, for example `port_image` and `starboard_image`. +Per-camera child files are also supported. If two distinct records target the +same camera and frame, that frame is omitted rather than resolved by precedence. + +## Client behavior + +The client keeps a bounded frame window around the playhead. It fetches a new +window only when the active frame leaves the cached range. The Media Metadata +panel reads from this cache and displays the active camera's current-frame values +in source order. + +Frame metadata is kept out of annotation, attribute, and dataset metadata stores. +Those stores have edit, save, revision, and export behavior that does not apply +to observed read-only telemetry. + +## Non-goals in v1 + +v1 does not write `frame_metadata.json`, maintain a field registry, import a +telemetry file into annotations, or include frame telemetry in VIAME, DIVE JSON, +COCO, KWCOCO, or zip exports. + +Video telemetry, embedded KLV, embedded EXIF, manually selected out-of-folder +sources, charting, training export, and server-side caching for very large +sources are future extensions behind the same read-time contract. diff --git a/docs/UI-DatasetInfo.md b/docs/UI-DatasetInfo.md index 38a9dfd85..e40d0479a 100644 --- a/docs/UI-DatasetInfo.md +++ b/docs/UI-DatasetInfo.md @@ -1,7 +1,7 @@ -# Dataset Info +# Media Metadata -The **Dataset Info** panel shows properties of the whole dataset and lets you attach -custom metadata to it. It is one pane of the +The **Media Metadata** panel shows read-only frame telemetry, properties of the +whole dataset, and custom metadata attached to it. It is one pane of the [context sidebar](UI-Navigation-Editing-Bar.md#context-sidebar-web). Metadata you add travels with the dataset: it is shown while annotating and written into @@ -11,7 +11,11 @@ re-link annotations to their source records. ## What it shows -![Dataset Info panel](images/General/DatasetInfo.png){ width=220px align=right } +![Media Metadata panel](images/General/DatasetInfo.png){ width=220px align=right } + +**Frame Metadata** (read-only): per-frame telemetry for the active image, such +as timestamp, latitude, longitude, depth, or altitude. The panel shows only the +source fields for the active frame, in the order they appear in the source file. **Standard information** (read-only): Name, Type, FPS, Original FPS and Subtype (when set), Created date, and ID (the Girder folder id). @@ -21,6 +25,76 @@ example a station id, cruise number, or dive number.
+## Frame Metadata + +Frame telemetry is not an annotation stream. DIVE reads it from a `.txt` or +`.csv` sidecar file next to the imagery and displays the values for the active +frame. The sidecar remains the source of truth; DIVE does not import it into an +editable store or save a derived copy. + +### Source file + +Use a delimited text file with: + +* a header row, +* one or more columns containing image filenames, +* at least one metadata column beyond the filename column. + +The delimiter can be comma, tab, or whitespace. DIVE joins rows to frames by +matching filename values, not by row order. A row that does not match an image is +ignored instead of being shifted onto another frame. + +Example: + +```text +image_file timestamp latitude longitude water_depth +img_0001.tif 15:40:56 46.575870 -124.603094 192.80 +img_0002.tif 15:41:04 46.575912 -124.603080 193.10 +``` + +The filename extension is ignored during matching, so `img_0001.tif` matches the +image key `img_0001`. Values are displayed as raw strings in the order they +appear in the source file. + +### Placement + +For a single-camera image sequence, place the `.txt` or `.csv` file in the +dataset folder beside the images. + +For a multicamera image sequence, use either placement: + +* Place one shared file at the multicam parent folder. Each camera selects the + rows or filename column that match its own images. +* Place one file inside each camera child folder. Each file is read only for that + camera. + +A shared multicam file can contain one filename column per camera, such as +`port_image` and `starboard_image`, or one filename column with separate rows for +each camera. The Media Metadata panel follows the active camera, so switching +cameras switches the displayed records. + +### Display behavior + +Open **Media Metadata** from the context sidebar while viewing an image-sequence +dataset. The Frame Metadata section updates as the playhead moves. + +The section shows only the source fields for the active frame. It does not +repeat the current frame number or filename, which are already shown by the +playback controls. + +The section may show an empty state when: + +* the platform or dataset type does not support frame metadata, +* no matching `.txt` or `.csv` source is present, +* the current frame has no matching row. + +Frame telemetry is read-only in v1. There is no edit, save, import, or export +flow for these values. Video telemetry, embedded KLV, embedded EXIF, and manual +selection of a source file from another location are future work. + +See [Data Formats](DataFormats.md#per-frame-metadata-text-sidecars) for the +sidecar file contract. + ## Where the data is stored Custom metadata lives on the dataset's folder metadata under the `datasetInfo` key — the diff --git a/mkdocs.yml b/mkdocs.yml index 3944a0ad8..cb4f7bf6a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -68,7 +68,7 @@ nav: - Attributes Details: UI-AttributeDetails.md - Attribute Track Filtering: UI-AttributeTrackFiltering.md - Group Manager: UI-Group-Manager.md - - Dataset Info: UI-DatasetInfo.md + - Media Metadata: UI-DatasetInfo.md - Annotation Sets: Annotation-Sets.md - Keyboard Shortcut Reference: Mouse-Keyboard-Shortcuts.md - Advanced features: @@ -83,6 +83,7 @@ nav: - Data Formats: DataFormats.md - Developer Guide: - Codebase architecture: Architecture-For-New-Developers.md + - Frame metadata architecture: Frame-Metadata-Architecture.md - Administrator Guide: - Deployment Options Overview: Deployment-Overview.md - Provisioning Google Cloud: Deployment-Provision.md diff --git a/server/dive_server/crud.py b/server/dive_server/crud.py index a584cb9dd..7ec0dfabe 100644 --- a/server/dive_server/crud.py +++ b/server/dive_server/crud.py @@ -24,6 +24,7 @@ class FileType(Enum): COCO_JSON = 3 DIVE_CONF = 4 MEVA_KPF = 5 + FRAME_METADATA = 6 def get_validated_model(model: BaseModel, **kwargs): diff --git a/server/dive_server/crud_dataset.py b/server/dive_server/crud_dataset.py index c0f7dc24f..7b38606ce 100644 --- a/server/dive_server/crud_dataset.py +++ b/server/dive_server/crud_dataset.py @@ -17,7 +17,7 @@ from dive_server import crud, crud_annotation from dive_tasks import tasks from dive_utils import TRUTHY_META_VALUES, asbool, calibration_format, constants, fromMeta, models, types -from dive_utils.serializers import kwcoco +from dive_utils.serializers import frame_metadata, kwcoco def get_url(dataset: types.GirderModel, item: types.GirderModel) -> str: @@ -378,6 +378,153 @@ def get_media( ) +def load_frame_metadata( + dsFolder: types.GirderModel, + user: types.GirderUserModel, + startFrame: int = 0, + endFrame: Optional[int] = None, +) -> dict: + crud.verify_dataset(dsFolder) + source_type = fromMeta(dsFolder, constants.TypeMarker) + if source_type == constants.MultiType: + return _load_multicam_frame_metadata(dsFolder, user, startFrame, endFrame) + if source_type != constants.ImageSequenceType: + return {'cameras': {}} + + images = crud.valid_images(dsFolder, user) + media_keys = crud.valid_image_names_dict(images) + media_root = crud.getCloneRoot(user, dsFolder) + source = frame_metadata.select_frame_metadata_source( + _frame_metadata_candidate_texts(media_root), + media_keys, + ) + if source is None: + return {'cameras': {}} + + if endFrame is None: + endFrame = len(images) - 1 + + records = {} + for media_key, frame_number in media_keys.items(): + if startFrame <= frame_number <= endFrame and media_key in source.records: + records[str(frame_number)] = source.records[media_key] + + return {'cameras': {'singleCam': records}} + + +def _load_multicam_frame_metadata( + dsFolder: types.GirderModel, + user: types.GirderUserModel, + startFrame: int, + endFrame: Optional[int], +) -> dict: + multi_cam = fromMeta(dsFolder, constants.MultiCamMarker) or {} + root = crud.getCloneRoot(user, dsFolder) + root_candidates = list(_frame_metadata_candidate_texts(root)) + cameras: Dict[str, Dict[str, Dict[str, str]]] = {} + has_source = False + + for camera_name in _multicam_camera_order(multi_cam): + cam_info = multi_cam['cameras'][camera_name] + child = Folder().load(cam_info['folderId'], level=AccessType.READ, user=user) + if child is None: + raise RestException( + f'Camera folder for "{camera_name}" was not found', + code=404, + ) + if fromMeta(child, constants.TypeMarker) != constants.ImageSequenceType: + continue + + child_root = crud.getCloneRoot(user, child) + candidates = root_candidates + list(_frame_metadata_candidate_texts(child_root)) + records = _load_camera_frame_metadata_records( + child, + user, + startFrame, + endFrame, + candidates, + ) + if records is not None: + has_source = True + cameras[camera_name] = records + else: + cameras[camera_name] = {} + + if not has_source: + return {'cameras': {}} + return {'cameras': cameras} + + +def _load_camera_frame_metadata_records( + folder: types.GirderModel, + user: types.GirderUserModel, + startFrame: int, + endFrame: Optional[int], + candidates: Iterable[Tuple[str, str]], +) -> Optional[Dict[str, Dict[str, str]]]: + images = crud.valid_images(folder, user) + media_keys = crud.valid_image_names_dict(images) + if endFrame is None: + endFrame = len(images) - 1 + + sources = [ + source + for source in ( + frame_metadata.parse_frame_metadata_source(text, media_keys, source_name=name) + for name, text in candidates + ) + if source is not None + ] + if not sources: + return None + + frame_by_key = { + frame_metadata.normalize_key(media_key): frame_number + for media_key, frame_number in media_keys.items() + } + records: Dict[str, Dict[str, str]] = {} + collided_frames = set() + for source in sources: + for media_key, values in source.records.items(): + frame_number = frame_by_key.get(media_key) + if frame_number is None or not startFrame <= frame_number <= endFrame: + continue + + frame_key = str(frame_number) + if frame_key in collided_frames: + continue + if frame_key not in records: + records[frame_key] = values + elif records[frame_key] != values: + records.pop(frame_key, None) + collided_frames.add(frame_key) + + return records + + +def _frame_metadata_candidate_texts(folder: types.GirderModel) -> Iterable[Tuple[str, str]]: + for item in Folder().childItems(folder): + if _is_frame_metadata_source_item(item): + yield item['name'], _download_item_text(item) + + +def _is_frame_metadata_source_item(item: types.GirderModel) -> bool: + return frame_metadata.is_frame_metadata_source_name(item['name']) + + +def _download_item_text(item: types.GirderModel) -> str: + file = next(iter(Item().childFiles(item)), None) + if file is None: + return '' + chunks = File().download(file, headers=False)() + # Scientific sidecars are often Latin-1/CP1252 (degree signs, etc.); decode + # leniently so one non-UTF-8 file does not 500 the whole frame-metadata route. + # The desktop path uses fs.readFile(..., 'utf-8'), which replaces likewise. + return b''.join( + chunk if isinstance(chunk, bytes) else str(chunk).encode('utf-8') for chunk in chunks + ).decode('utf-8', errors='replace') + + class MetadataMutableUpdateArgs(models.MetadataMutable): """Update schema for mutable metadata fields""" diff --git a/server/dive_server/crud_rpc.py b/server/dive_server/crud_rpc.py index a5a416c97..a38847935 100644 --- a/server/dive_server/crud_rpc.py +++ b/server/dive_server/crud_rpc.py @@ -21,7 +21,7 @@ from dive_tasks.multicam_pipeline import is_stereo_or_multicam_pipeline, pipeline_requires_input from dive_utils import TRUTHY_META_VALUES, asbool, constants, fromMeta, models, types from dive_utils.constants import TrainingModelExtensions -from dive_utils.serializers import dive, kpf, kwcoco, viame +from dive_utils.serializers import dive, frame_metadata, kpf, kwcoco, viame class RunTrainingArgs(BaseModel): @@ -511,6 +511,10 @@ def run_training( ) +def _is_stored_frame_metadata_json(data: dict) -> bool: + return set(data.keys()) == {'cameras'} and isinstance(data.get('cameras'), dict) + + def _get_data_by_type( file: types.GirderModel, image_map: Optional[Dict[str, int]] = None, @@ -533,12 +537,17 @@ def _get_data_by_type( # Discover the type of the mystery file if file['exts'][-1] == 'csv': - as_type = crud.FileType.VIAME_CSV + if image_map is not None and frame_metadata.is_frame_metadata(file_string, image_map): + as_type = crud.FileType.FRAME_METADATA + else: + as_type = crud.FileType.VIAME_CSV elif file['exts'][-1] == 'json': data_dict = json.loads(file_string) if type(data_dict) is list: raise RestException('No array-type json objects are supported') - if kwcoco.is_coco_json(data_dict): + if _is_stored_frame_metadata_json(data_dict): + as_type = crud.FileType.FRAME_METADATA + elif kwcoco.is_coco_json(data_dict): as_type = crud.FileType.COCO_JSON elif models.MetadataMutable.is_dive_configuration(data_dict): data_dict = models.MetadataMutable(**data_dict).dict(exclude_none=True) @@ -569,6 +578,13 @@ def _get_data_by_type( 'attributes': attributes, 'type': as_type, }, warnings + if as_type == crud.FileType.FRAME_METADATA: + return { + 'annotations': None, + 'meta': None, + 'attributes': None, + 'type': as_type, + }, warnings if as_type == crud.FileType.MEVA_KPF: converted, attributes = kpf.convert(kpf.load(file_string)) return { @@ -650,10 +666,7 @@ def process_items( # Processing order: oldest to newest sort=[("created", pymongo.ASCENDING)], ) - auxiliary = crud.get_or_create_auxiliary_folder( - folder, - user, - ) + auxiliary = None aggregate_warnings = [] for item in unprocessed_items: file: Optional[types.GirderModel] = next(Item().childFiles(item), None) @@ -677,6 +690,14 @@ def process_items( Item().remove(item) raise RestException(f'Unknown file type for {file["name"]}') + if results['type'] == crud.FileType.FRAME_METADATA: + continue + + if auxiliary is None: + auxiliary = crud.get_or_create_auxiliary_folder( + folder, + user, + ) item['meta'][constants.ProcessedMarker] = True Item().move(item, auxiliary) if results['annotations']: diff --git a/server/dive_server/views_dataset.py b/server/dive_server/views_dataset.py index 7978590f0..d8e5982b8 100644 --- a/server/dive_server/views_dataset.py +++ b/server/dive_server/views_dataset.py @@ -1,14 +1,16 @@ +import json from typing import List, Optional import cherrypy from girder.api import access from girder.api.describe import Description, autoDescribeRoute -from girder.api.rest import Resource, rawResponse +from girder.api.rest import Resource, rawResponse, setRawResponse, setResponseHeader from girder.constants import AccessType, SortDir, TokenScope from girder.exceptions import RestException from girder.models.file import File from girder.models.folder import Folder from girder.models.item import Item +from girder.utility import JsonEncoder from dive_utils import constants, setContentDisposition from dive_utils.models import MetadataMutable @@ -43,6 +45,7 @@ def __init__(self, resourceName): self.route("GET", ("export",), self.export) self.route("GET", (":id", "configuration"), self.get_configuration) self.route("GET", (":id", "media", ":mediaId", "download"), self.download_media) + self.route("GET", (":id", "frame_metadata"), self.get_frame_metadata) self.route("POST", ("validate_files",), self.validate_files) self.route("PATCH", (":id",), self.patch_metadata) @@ -202,7 +205,7 @@ def list_datasets( ) def get_meta(self, folder): return crud_dataset.get_dataset(folder, self.getCurrentUser()).dict(exclude_none=True) - + @access.user @autoDescribeRoute( Description("Get calibration information of dataset") @@ -262,6 +265,40 @@ def get_configuration(self, folder): def get_media(self, folder): return crud_dataset.get_media(folder, self.getCurrentUser()).dict(exclude_none=True) + @access.user + @autoDescribeRoute( + Description("Get dataset frame metadata for an explicit frame window") + .modelParam("id", level=AccessType.READ, **DatasetModelParam) + .param( + "startFrame", + "Inclusive first frame to return", + paramType="query", + dataType="integer", + required=True, + ) + .param( + "endFrame", + "Inclusive last frame to return", + paramType="query", + dataType="integer", + required=True, + ) + ) + def get_frame_metadata(self, folder, startFrame: int, endFrame: int): + if startFrame < 0 or endFrame < 0: + raise RestException('Frame metadata window bounds must be non-negative', code=400) + if startFrame > endFrame: + raise RestException('startFrame must be less than or equal to endFrame', code=400) + payload = crud_dataset.load_frame_metadata( + folder, + self.getCurrentUser(), + startFrame=startFrame, + endFrame=endFrame, + ) + setResponseHeader('Content-Type', 'application/json') + setRawResponse() + return json.dumps(payload, allow_nan=False, cls=JsonEncoder) + @access.public(scope=TokenScope.DATA_READ, cookie=True) @autoDescribeRoute( Description("Export all selected datasets") diff --git a/server/dive_utils/serializers/frame_metadata.py b/server/dive_utils/serializers/frame_metadata.py new file mode 100644 index 000000000..5bf942f08 --- /dev/null +++ b/server/dive_utils/serializers/frame_metadata.py @@ -0,0 +1,183 @@ +import csv +from dataclasses import dataclass +import io +import os +import re +from typing import Dict, Iterable, List, Mapping, Optional, Sequence, Tuple + +from dive_utils import constants +from dive_utils.serializers import viame + +FRAME_METADATA_SOURCE_EXTENSIONS = {'.txt', '.csv'} + + +@dataclass(frozen=True) +class ParsedFrameMetadata: + source_name: Optional[str] + header: List[str] + rows: List[Dict[str, str]] + join_columns: List[str] + payload_columns: List[str] + records: Dict[str, Dict[str, str]] + + +def normalize_key(value: str) -> str: + """Normalize a media filename the same way valid_image_names_dict keys images.""" + basename = os.path.basename(str(value).strip()) + stem, ext = os.path.splitext(basename) + if ext.lower().lstrip('.') in constants.allValidLargeImageFormats: + return stem + return basename + + +def parse_table(text: str) -> Tuple[List[str], List[Dict[str, str]]]: + return _parse_table_rows(_read_rows(text)) + + +def _parse_table_rows(raw_rows: List[List[str]]) -> Tuple[List[str], List[Dict[str, str]]]: + if not raw_rows: + return [], [] + + header = [cell.strip() for cell in raw_rows[0]] + if not all(header): + return [], [] + + rows: List[Dict[str, str]] = [] + for raw_row in raw_rows[1:]: + values = [cell.strip() for cell in raw_row] + if not any(values): + continue + values = values[: len(header)] + [''] * max(0, len(header) - len(values)) + rows.append(dict(zip(header, values))) + return header, rows + + +def find_join_columns( + header: Sequence[str], + rows: Iterable[Mapping[str, str]], + media_keys: Mapping[str, int], +) -> List[str]: + return _find_join_columns_for_keys(header, rows, _normalized_media_keys(media_keys)) + + +def _find_join_columns_for_keys( + header: Sequence[str], + rows: Iterable[Mapping[str, str]], + normalized_media_keys: set, +) -> List[str]: + materialized_rows = list(rows) + return [ + column + for column in header + if any( + row.get(column) and normalize_key(row[column]) in normalized_media_keys + for row in materialized_rows + ) + ] + + +def is_frame_metadata(text: str, media_keys: Mapping[str, int]) -> bool: + return parse_frame_metadata_source(text, media_keys) is not None + + +def parse_frame_metadata_source( + text: str, + media_keys: Mapping[str, int], + source_name: Optional[str] = None, +) -> Optional[ParsedFrameMetadata]: + raw_rows, delimiter = _read_rows_with_delimiter(text) + if delimiter == ',' and viame.is_viame_csv_rows(raw_rows): + return None + + header, rows = _parse_table_rows(raw_rows) + if not header or not rows: + return None + + normalized_media_keys = _normalized_media_keys(media_keys) + join_columns = _find_join_columns_for_keys(header, rows, normalized_media_keys) + if not join_columns: + return None + + payload_columns = [column for column in header if column not in join_columns] + if not payload_columns: + return None + + records: Dict[str, Dict[str, str]] = {} + for row in rows: + for column in join_columns: + key = normalize_key(row.get(column, '')) + if key in normalized_media_keys: + records[key] = {field: row.get(field, '') for field in header} + + if not records: + return None + + return ParsedFrameMetadata( + source_name=source_name, + header=list(header), + rows=rows, + join_columns=join_columns, + payload_columns=payload_columns, + records=records, + ) + + +def select_frame_metadata_source( + candidates: Iterable[Tuple[str, str]], + media_keys: Mapping[str, int], +) -> Optional[ParsedFrameMetadata]: + matches: List[ParsedFrameMetadata] = [] + for source_name, text in candidates: + if not is_frame_metadata_source_name(source_name): + continue + source = parse_frame_metadata_source(text, media_keys, source_name=source_name) + if source is not None: + matches.append(source) + + if len(matches) != 1: + return None + return matches[0] + + +def _read_rows(text: str) -> List[List[str]]: + return _read_rows_with_delimiter(text)[0] + + +def _read_rows_with_delimiter(text: str) -> Tuple[List[List[str]], Optional[str]]: + first_line = _first_nonempty_line(text) + if first_line is None: + return [], None + + delimiter = _sniff_delimiter(first_line) + if delimiter is None: + return [re.split(r'\s+', line.strip()) for line in text.splitlines() if line.strip()], None + + reader = csv.reader(io.StringIO(text), delimiter=delimiter) + return [ + [cell.strip() for cell in row] + for row in reader + if row and any(cell.strip() for cell in row) + ], delimiter + + +def _first_nonempty_line(text: str) -> Optional[str]: + for line in text.splitlines(): + if line.strip(): + return line.strip() + return None + + +def _sniff_delimiter(line: str) -> Optional[str]: + if ',' in line: + return ',' + if '\t' in line: + return '\t' + return None + + +def _normalized_media_keys(media_keys: Mapping[str, int]) -> set: + return {normalize_key(key) for key in media_keys} + + +def is_frame_metadata_source_name(source_name: str) -> bool: + return os.path.splitext(source_name.lower())[1] in FRAME_METADATA_SOURCE_EXTENSIONS diff --git a/server/dive_utils/serializers/viame.py b/server/dive_utils/serializers/viame.py index ff8e34540..14a5f05cd 100644 --- a/server/dive_utils/serializers/viame.py +++ b/server/dive_utils/serializers/viame.py @@ -8,7 +8,7 @@ import json import os import re -from typing import Any, Dict, Generator, List, Optional, Tuple, Union +from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Union from dive_utils import constants, types from dive_utils.models import Feature, Track, interpolate @@ -63,6 +63,54 @@ def row_info(row: List[str]) -> Tuple[int, str, int, List[int], float]: return trackId, filename, frame, bounds, fish_length +def _is_viame_data_row(row: List[str]) -> bool: + if len(row) < 9: + return False + try: + row_info(row) + except (TypeError, ValueError): + return False + return True + + +def is_viame_csv(rows: List[str]) -> bool: + return is_viame_csv_rows(csv.reader(row for row in rows)) + + +def is_viame_csv_rows(rows: Iterable[List[str]]) -> bool: + """Return true when rows look like a VIAME annotation CSV. + + Two shapes are recognized. DIVE's own exports carry the ``# 1: Detection or + Track-id`` comment header, so any file with that header plus a VIAME-shaped + data row is VIAME. Headerless VIAME CSVs (produced by external tools) have no + text header at all, so their first non-comment row is itself a detection; + those are recognized too. A telemetry file instead leads with a plain + field-name header row that is *not* VIAME-shaped, so it is left for the frame + metadata parser even when one of its columns matches the media names. + """ + has_header = False + has_data_row = False + first_row_is_detection = False + seen_data_row = False + + for row in rows: + if not row: + continue + if row[0].startswith('#'): + has_header = has_header or row[0].startswith('# 1: Detection or Track-id') + continue + row_is_detection = _is_viame_data_row(row) + if not seen_data_row: + seen_data_row = True + first_row_is_detection = row_is_detection + if row_is_detection: + has_data_row = True + if has_header or first_row_is_detection: + return True + + return has_data_row and (has_header or first_row_is_detection) + + def _resolve_detection_length( attributes: Optional[Dict[str, Any]], fish_length_from_column: float, diff --git a/server/tests/test_deserialize_kwcoco_json.py b/server/tests/test_deserialize_kwcoco_json.py index 396d29526..2139e48fe 100644 --- a/server/tests/test_deserialize_kwcoco_json.py +++ b/server/tests/test_deserialize_kwcoco_json.py @@ -754,6 +754,8 @@ def test_export_dive_as_coco_writes_dataset_info(): ) assert coco["info"]["dive_dataset_info"] == DATASET_INFO assert "dive_dataset_info" in coco["info"]["dive_extensions"] + assert "dive_frame_metadata" not in coco["info"] + assert "dive_frame_metadata" not in coco["info"]["dive_extensions"] @pytest.mark.parametrize("datasetInfo", [None, {}]) diff --git a/server/tests/test_frame_metadata.py b/server/tests/test_frame_metadata.py new file mode 100644 index 000000000..48e5bdf0d --- /dev/null +++ b/server/tests/test_frame_metadata.py @@ -0,0 +1,295 @@ +from dive_utils.serializers.frame_metadata import ( + find_join_columns, + normalize_key, + parse_frame_metadata_source, + select_frame_metadata_source, +) + +SYNTHETIC_HEADER = [ + "port_image", + "depth_m", + "heading", + "starboard_image", +] + +SYNTHETIC_RECT_ROWS = [ + ["rect_port_0001.tif", "192.80", "174.5", "rect_starboard_0001.tif"], + ["rect_port_0002.tif", "193.05", "175.1", "rect_starboard_0002.tif"], +] + +SYNTHETIC_JPG_ROWS = [ + ["jpg_port_0001.jpg", "88.40", "92.5", "jpg_starboard_0001.jpg"], + ["jpg_port_0002.jpg", "88.72", "93.1", "jpg_starboard_0002.jpg"], +] + +SYNTHETIC_SOURCES = { + "synthetic_auv_nav_rect.txt": SYNTHETIC_RECT_ROWS, + "synthetic_auv_nav_jpg.txt": SYNTHETIC_JPG_ROWS, +} + + +def _load_contract(): + return { + "selectionStatus": {"missing": "none", "ambiguous": "none"}, + "sources": { + source_name: _source_contract(rows) for source_name, rows in SYNTHETIC_SOURCES.items() + }, + } + + +def _fixture_text(source_name): + rows = SYNTHETIC_SOURCES[source_name] + return "\n".join([" ".join(SYNTHETIC_HEADER), *[" ".join(row) for row in rows], ""]) + + +def _source_contract(rows): + return { + "header": SYNTHETIC_HEADER, + "cameras": { + "port": _camera_contract("port_image", rows), + "starboard": _camera_contract("starboard_image", rows), + }, + "recordsByFrame": { + str(frame): dict(zip(SYNTHETIC_HEADER, row)) for frame, row in enumerate(rows) + }, + } + + +def _camera_contract(join_column, rows): + return { + "joinColumn": join_column, + "payloadColumns": [column for column in SYNTHETIC_HEADER if column != join_column], + "frames": [str(frame) for frame in range(len(rows))], + } + + +def _media_keys(camera_records, join_column): + return { + normalize_key(record[join_column]): int(frame) for frame, record in camera_records.items() + } + + +def _records_by_frame(source, media_keys): + return { + str(frame): source.records[key] + for key, frame in sorted(media_keys.items(), key=lambda item: item[1]) + if key in source.records + } + + +def _source_status(source): + return "none" if source is None else "selected" + + +def test_normalize_key_matches_image_name_map_keys(): + assert normalize_key("nested/20191009.154056.00082_rect_color.tif") == ( + "20191009.154056.00082_rect_color" + ) + + +def test_parse_multicamera_rows_with_multiple_image_columns(): + media_keys = { + "20191009.154056.00082_rect_color": 0, + "20191009.154056.00081_rect_color": 0, + } + text = """port_image date time latitude longitude water_depth altitude starboard_image +20191009.154056.00082_rect_color.tif 2019/10/09 15:40:56.1122 46.575870 -124.603094 192.80 2.78 20191009.154056.00081_rect_color.tif +""" + + source = parse_frame_metadata_source(text, media_keys, source_name="nav.txt") + + assert source is not None + assert source.source_name == "nav.txt" + assert source.header == [ + "port_image", + "date", + "time", + "latitude", + "longitude", + "water_depth", + "altitude", + "starboard_image", + ] + assert source.join_columns == ["port_image", "starboard_image"] + assert source.payload_columns == [ + "date", + "time", + "latitude", + "longitude", + "water_depth", + "altitude", + ] + assert set(source.records) == { + "20191009.154056.00082_rect_color", + "20191009.154056.00081_rect_color", + } + port_record = source.records["20191009.154056.00082_rect_color"] + assert list(port_record) == source.header + assert port_record["latitude"] == "46.575870" + assert all(isinstance(value, str) for value in port_record.values()) + + +def test_parse_comma_tab_and_whitespace_delimited_sources(): + media_keys = {"image_0001": 0} + + for text in ( + "filename,depth,latitude\nimage_0001.jpg,192.80,46.575870\n", + "filename\tdepth\tlatitude\nimage_0001.jpg\t192.80\t46.575870\n", + "filename depth latitude\nimage_0001.jpg 192.80 46.575870\n", + ): + source = parse_frame_metadata_source(text, media_keys) + + assert source is not None + assert source.header == ["filename", "depth", "latitude"] + assert source.join_columns == ["filename"] + assert source.records["image_0001"] == { + "filename": "image_0001.jpg", + "depth": "192.80", + "latitude": "46.575870", + } + + +def test_find_join_columns_matches_by_filename_value(): + rows = [ + { + "port_image": "20191009.154056.00082_rect_color.tif", + "latitude": "46.575870", + "starboard_image": "20191009.154056.00081_rect_color.tif", + } + ] + + assert find_join_columns( + ["port_image", "latitude", "starboard_image"], + rows, + { + "20191009.154056.00082_rect_color": 0, + "20191009.154056.00081_rect_color": 0, + }, + ) == ["port_image", "starboard_image"] + + +def test_rejects_viame_annotation_csv_even_when_image_column_matches(): + media_keys = {"20191009.154056.00082_rect_color": 0} + viame_csv = """# 1: Detection or Track-id,2: Video or Image Identifier,3: Unique Frame Identifier,4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes +1,20191009.154056.00082_rect_color.tif,0,0,0,10,10,1.0,-1,fish,0.9 +""" + + assert parse_frame_metadata_source(viame_csv, media_keys) is None + + +def test_rejects_headerless_viame_annotation_csv(): + """A headerless VIAME CSV (no comment header, first row is a detection) must + not be mistaken for telemetry, otherwise its detections are dropped on import. + """ + media_keys = {"frame_0001": 0, "frame_0002": 1} + headerless_viame = ( + "1,frame_0001.png,0,10,20,30,40,1.0,-1,fish,0.9\n" + "2,frame_0002.png,1,11,21,31,41,1.0,-1,fish,0.8\n" + ) + + assert parse_frame_metadata_source(headerless_viame, media_keys) is None + + +def test_accepts_viame_shaped_telemetry_without_viame_header(): + """Telemetry whose rows coincidentally match VIAME's numeric shape but lacks the + ``# 1: Detection or Track-id`` comment header is still accepted as telemetry. + """ + media_keys = {"image_0001": 0} + text = ( + "index,image,frame,x,y,depth,altitude,heading,temperature\n" + "1,image_0001.jpg,100,46.5,-124.6,192.8,2.7,180.5,4.2\n" + ) + + source = parse_frame_metadata_source(text, media_keys) + + assert source is not None + assert source.join_columns == ["image"] + assert source.records["image_0001"]["depth"] == "192.8" + + +def test_rejects_bare_image_list_and_unrelated_text(): + media_keys = {"image_0001": 0} + + assert parse_frame_metadata_source("image\nimage_0001.jpg\n", media_keys) is None + assert parse_frame_metadata_source("note,value\nhello,world\n", media_keys) is None + + +def test_select_source_rejects_ambiguous_candidates_and_non_text_extensions(): + media_keys = {"image_0001": 0} + accepted_text = "filename,depth\nimage_0001.jpg,192.80\n" + + assert ( + select_frame_metadata_source( + [ + ("metadata.json", accepted_text), + ("telemetry-a.txt", accepted_text), + ], + media_keys, + ).source_name + == "telemetry-a.txt" + ) + assert ( + select_frame_metadata_source( + [ + ("telemetry-a.txt", accepted_text), + ("telemetry-b.csv", "filename,temperature\nimage_0001.jpg,4.2\n"), + ], + media_keys, + ) + is None + ) + + +def test_shared_synthetic_auv_fixture_contract(): + contract = _load_contract() + + for source_name, expected in contract["sources"].items(): + text = _fixture_text(source_name) + for camera_contract in expected["cameras"].values(): + expected_records = { + frame: expected["recordsByFrame"][frame] for frame in camera_contract["frames"] + } + join_column = camera_contract["joinColumn"] + media_keys = _media_keys(expected_records, join_column) + + source = parse_frame_metadata_source(text, media_keys, source_name=source_name) + + assert source is not None + assert source.source_name == source_name + assert source.header == expected["header"] + assert source.join_columns == [join_column] + assert source.payload_columns == camera_contract["payloadColumns"] + assert _records_by_frame(source, media_keys) == expected_records + assert all( + isinstance(value, str) + for record in source.records.values() + for value in record.values() + ) + + +def test_shared_synthetic_auv_selection_status_contract(): + contract = _load_contract() + source_contract = contract["sources"]["synthetic_auv_nav_rect.txt"] + port_contract = source_contract["cameras"]["port"] + port_records = { + frame: source_contract["recordsByFrame"][frame] for frame in port_contract["frames"] + } + media_keys = _media_keys(port_records, port_contract["joinColumn"]) + rect_text = _fixture_text("synthetic_auv_nav_rect.txt") + + missing_source = select_frame_metadata_source( + [("synthetic_auv_nav_jpg.txt", _fixture_text("synthetic_auv_nav_jpg.txt"))], + media_keys, + ) + ambiguous_source = select_frame_metadata_source( + [ + ("synthetic_auv_nav_rect.txt", rect_text), + ("synthetic_auv_nav_rect_copy.csv", rect_text), + ], + media_keys, + ) + + assert { + "missing": _source_status(missing_source), + "ambiguous": _source_status(ambiguous_source), + } == contract["selectionStatus"] diff --git a/server/tests/test_frame_metadata_crud.py b/server/tests/test_frame_metadata_crud.py new file mode 100644 index 000000000..3ed7db009 --- /dev/null +++ b/server/tests/test_frame_metadata_crud.py @@ -0,0 +1,528 @@ +import json +from unittest.mock import patch + +import pytest +from girder.exceptions import RestException + +from dive_server import crud_dataset +from dive_server.views_dataset import DatasetResource +from dive_utils import constants + + +def _dataset_folder(): + return { + '_id': 'dataset-id', + 'name': 'single-camera', + 'meta': { + 'annotate': True, + 'type': constants.ImageSequenceType, + }, + } + + +def _multicam_parent_folder(): + return { + '_id': 'parent-id', + 'name': 'stereo-camera', + 'meta': { + 'annotate': True, + 'type': constants.MultiType, + 'fps': 5, + 'multiCam': { + 'defaultDisplay': 'port', + 'cameraOrder': ['port', 'starboard'], + 'cameras': { + 'port': {'folderId': 'port-id', 'type': constants.ImageSequenceType}, + 'starboard': { + 'folderId': 'starboard-id', + 'type': constants.ImageSequenceType, + }, + }, + }, + }, + } + + +def _camera_folder(folder_id: str, name: str): + return { + '_id': folder_id, + 'name': name, + 'meta': { + 'annotate': True, + 'type': constants.ImageSequenceType, + 'fps': 5, + }, + } + + +def _image_item(name: str): + return { + '_id': f'{name}-id', + 'name': name, + } + + +def _source_item(name: str): + return { + '_id': f'{name}-id', + 'name': name, + } + + +def _root_folder(folder_id: str, name: str): + return { + '_id': folder_id, + 'name': name, + 'meta': {}, + } + + +def _call_frame_metadata_route(folder, user, params, return_mocks=False): + with ( + patch('dive_server.views_dataset.Folder'), + patch('dive_server.views_dataset.setRawResponse') as set_raw_response, + patch('dive_server.views_dataset.setResponseHeader') as set_response_header, + ): + resource = DatasetResource('dive_dataset') + resource.getCurrentUser = lambda: user + method = DatasetResource.get_frame_metadata.__wrapped__.__wrapped__ + result = method( + resource, + folder, + startFrame=int(params['startFrame']), + endFrame=int(params['endFrame']), + ) + if return_mocks: + return result, set_response_header, set_raw_response + return result + + +def _wire_item_downloads(item_model, file_model, texts_by_name): + def child_files(item): + if item['name'] not in texts_by_name: + raise AssertionError(f'unexpected download for {item["name"]}') + return iter( + [ + { + '_id': f'{item["_id"]}-file', + 'itemId': item['_id'], + 'name': item['name'], + } + ] + ) + + def download(file, headers=False): + return lambda: [texts_by_name[file['name']].encode('utf-8')] + + item_model.childFiles.side_effect = child_files + file_model.download.side_effect = download + + +def _wire_multicam_folders(folder_model, children, items_by_folder_id): + def load_folder(folder_id, level=None, user=None): + return children.get(folder_id) + + def child_items(folder): + return items_by_folder_id.get(folder['_id'], []) + + folder_model.load.side_effect = load_folder + folder_model.childItems.side_effect = child_items + + +def _wire_multicam_clone_roots(get_clone_root, roots_by_folder_id): + def clone_root(user, folder): + return roots_by_folder_id[folder['_id']] + + get_clone_root.side_effect = clone_root + + +def _wire_multicam_valid_images(valid_images, images_by_folder_id): + def images(folder, user): + return images_by_folder_id.get(folder['_id'], []) + + valid_images.side_effect = images + + +@patch('girder.api.rest.Resource.route') +def test_dataset_resource_registers_frame_metadata_route(route): + with patch('dive_server.views_dataset.Folder'): + resource = DatasetResource('dive_dataset') + + assert any( + call.args == ("GET", (":id", "frame_metadata"), resource.get_frame_metadata) + for call in route.call_args_list + ) + + +@patch('dive_server.views_dataset.crud_dataset.load_frame_metadata') +def test_get_frame_metadata_route_accepts_explicit_window(load_frame_metadata): + dataset = _dataset_folder() + user = {'_id': 'user-id'} + response = { + 'cameras': { + 'singleCam': { + '1': { + 'filename': 'image_0002.jpg', + 'depth': '193.10', + 'temperature': '4.1', + }, + }, + }, + } + load_frame_metadata.return_value = response + + result, set_response_header, set_raw_response = _call_frame_metadata_route( + dataset, + user, + {'startFrame': '1', 'endFrame': '2'}, + return_mocks=True, + ) + + assert json.loads(result) == response + assert result.index('"filename"') < result.index('"depth"') < result.index('"temperature"') + set_response_header.assert_called_once_with('Content-Type', 'application/json') + set_raw_response.assert_called_once_with() + load_frame_metadata.assert_called_once_with( + dataset, + user, + startFrame=1, + endFrame=2, + ) + + +@patch('dive_server.views_dataset.crud_dataset.load_frame_metadata') +def test_get_frame_metadata_route_returns_empty_cameras_without_source(load_frame_metadata): + dataset = _dataset_folder() + user = {'_id': 'user-id'} + load_frame_metadata.return_value = {'cameras': {}} + + result = _call_frame_metadata_route( + dataset, + user, + {'startFrame': '0', 'endFrame': '0'}, + ) + + assert json.loads(result) == {'cameras': {}} + + +@pytest.mark.parametrize( + ('params', 'message'), + [ + ({'startFrame': '-1', 'endFrame': '0'}, 'non-negative'), + ({'startFrame': '2', 'endFrame': '1'}, 'less than or equal to endFrame'), + ], +) +@patch('dive_server.views_dataset.crud_dataset.load_frame_metadata') +def test_get_frame_metadata_route_rejects_invalid_window(load_frame_metadata, params, message): + dataset = _dataset_folder() + user = {'_id': 'user-id'} + + with pytest.raises(RestException, match=message): + _call_frame_metadata_route(dataset, user, params) + + load_frame_metadata.assert_not_called() + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_reads_co_located_source_and_applies_window( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + dataset = _dataset_folder() + source_root = {'_id': 'source-root-id', 'name': 'source-root', 'meta': dataset['meta']} + user = {'_id': 'user-id'} + valid_images.return_value = [ + _image_item('image_0001.jpg'), + _image_item('image_0002.jpg'), + _image_item('image_0003.jpg'), + ] + get_clone_root.return_value = source_root + folder_model = folder_cls.return_value + folder_model.childItems.return_value = [ + _source_item('image_0001.jpg'), + _source_item('frame_metadata.json'), + _source_item('navigation.txt'), + ] + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'navigation.txt': ( + "filename,depth,temperature\n" + "image_0001.jpg,192.80,4.0\n" + "image_0002.jpg,193.10,4.1\n" + "image_0003.jpg,193.40,4.2\n" + ), + }, + ) + + result = crud_dataset.load_frame_metadata(dataset, user, startFrame=1, endFrame=2) + + assert result == { + 'cameras': { + 'singleCam': { + '1': { + 'filename': 'image_0002.jpg', + 'depth': '193.10', + 'temperature': '4.1', + }, + '2': { + 'filename': 'image_0003.jpg', + 'depth': '193.40', + 'temperature': '4.2', + }, + }, + }, + } + folder_model.childItems.assert_called_once_with(source_root) + item_model.childFiles.assert_called_once_with(_source_item('navigation.txt')) + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_returns_empty_cameras_without_text_source( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + dataset = _dataset_folder() + source_root = {'_id': 'source-root-id', 'name': 'source-root', 'meta': dataset['meta']} + user = {'_id': 'user-id'} + valid_images.return_value = [_image_item('image_0001.jpg')] + get_clone_root.return_value = source_root + folder_model = folder_cls.return_value + folder_model.childItems.return_value = [ + _source_item('frame_metadata.json'), + _source_item('notes.txt'), + ] + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'notes.txt': "note,value\nhello,world\n", + }, + ) + + result = crud_dataset.load_frame_metadata(dataset, user, startFrame=0, endFrame=0) + + assert result == {'cameras': {}} + item_model.childFiles.assert_called_once_with(_source_item('notes.txt')) + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + file_model.save.assert_not_called() + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_routes_root_multicam_source_by_child_camera( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + parent = _multicam_parent_folder() + port = _camera_folder('port-id', 'port') + starboard = _camera_folder('starboard-id', 'starboard') + parent_root = _root_folder('parent-root-id', 'parent-root') + port_root = _root_folder('port-root-id', 'port-root') + starboard_root = _root_folder('starboard-root-id', 'starboard-root') + user = {'_id': 'user-id'} + + folder_model = folder_cls.return_value + _wire_multicam_folders( + folder_model, + {'port-id': port, 'starboard-id': starboard}, + { + 'parent-root-id': [_source_item('navigation.txt')], + 'port-root-id': [], + 'starboard-root-id': [], + }, + ) + _wire_multicam_clone_roots( + get_clone_root, + { + 'parent-id': parent_root, + 'port-id': port_root, + 'starboard-id': starboard_root, + }, + ) + _wire_multicam_valid_images( + valid_images, + { + 'port-id': [_image_item('port_0001.jpg'), _image_item('port_0002.jpg')], + 'starboard-id': [ + _image_item('starboard_0001.jpg'), + _image_item('starboard_0002.jpg'), + ], + }, + ) + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'navigation.txt': ( + "port_image,starboard_image,depth,temperature\n" + "port_0001.jpg,starboard_0001.jpg,192.80,4.0\n" + "port_0002.jpg,starboard_0002.jpg,193.10,4.1\n" + ), + }, + ) + + result = crud_dataset.load_frame_metadata(parent, user, startFrame=0, endFrame=1) + + assert result == { + 'cameras': { + 'port': { + '0': { + 'port_image': 'port_0001.jpg', + 'starboard_image': 'starboard_0001.jpg', + 'depth': '192.80', + 'temperature': '4.0', + }, + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + 'temperature': '4.1', + }, + }, + 'starboard': { + '0': { + 'port_image': 'port_0001.jpg', + 'starboard_image': 'starboard_0001.jpg', + 'depth': '192.80', + 'temperature': '4.0', + }, + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + 'temperature': '4.1', + }, + }, + }, + } + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + file_model.save.assert_not_called() + + +@patch('dive_server.crud_dataset.File') +@patch('dive_server.crud_dataset.Item') +@patch('dive_server.crud_dataset.Folder') +@patch('dive_server.crud_dataset.crud.valid_images') +@patch('dive_server.crud_dataset.crud.getCloneRoot') +def test_load_frame_metadata_omits_multicam_frame_on_distinct_record_collision( + get_clone_root, + valid_images, + folder_cls, + item_cls, + file_cls, +): + parent = _multicam_parent_folder() + port = _camera_folder('port-id', 'port') + starboard = _camera_folder('starboard-id', 'starboard') + parent_root = _root_folder('parent-root-id', 'parent-root') + port_root = _root_folder('port-root-id', 'port-root') + starboard_root = _root_folder('starboard-root-id', 'starboard-root') + user = {'_id': 'user-id'} + + folder_model = folder_cls.return_value + _wire_multicam_folders( + folder_model, + {'port-id': port, 'starboard-id': starboard}, + { + 'parent-root-id': [_source_item('navigation.txt')], + 'port-root-id': [_source_item('port_override.txt')], + 'starboard-root-id': [], + }, + ) + _wire_multicam_clone_roots( + get_clone_root, + { + 'parent-id': parent_root, + 'port-id': port_root, + 'starboard-id': starboard_root, + }, + ) + _wire_multicam_valid_images( + valid_images, + { + 'port-id': [_image_item('port_0001.jpg'), _image_item('port_0002.jpg')], + 'starboard-id': [ + _image_item('starboard_0001.jpg'), + _image_item('starboard_0002.jpg'), + ], + }, + ) + item_model = item_cls.return_value + file_model = file_cls.return_value + _wire_item_downloads( + item_model, + file_model, + { + 'navigation.txt': ( + "port_image,starboard_image,depth\n" + "port_0001.jpg,starboard_0001.jpg,192.80\n" + "port_0002.jpg,starboard_0002.jpg,193.10\n" + ), + 'port_override.txt': ( + "port_image,starboard_image,depth\n" + "port_0001.jpg,starboard_0001.jpg,999.99\n" + "port_0002.jpg,starboard_0002.jpg,193.10\n" + ), + }, + ) + + result = crud_dataset.load_frame_metadata(parent, user, startFrame=0, endFrame=1) + + assert result == { + 'cameras': { + 'port': { + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + }, + }, + 'starboard': { + '0': { + 'port_image': 'port_0001.jpg', + 'starboard_image': 'starboard_0001.jpg', + 'depth': '192.80', + }, + '1': { + 'port_image': 'port_0002.jpg', + 'starboard_image': 'starboard_0002.jpg', + 'depth': '193.10', + }, + }, + }, + } + folder_model.save.assert_not_called() + item_model.move.assert_not_called() + file_model.save.assert_not_called() diff --git a/server/tests/test_serialize_viame_csv.py b/server/tests/test_serialize_viame_csv.py index ea647f0ec..f0667115a 100644 --- a/server/tests/test_serialize_viame_csv.py +++ b/server/tests/test_serialize_viame_csv.py @@ -523,6 +523,7 @@ def test_dataset_info_on_metadata_line(): assert isinstance(parsed['cruise'], int) assert isinstance(parsed['sta_lat'], float) assert isinstance(parsed['gfishsite_id'], str) + assert all('frame_metadata' not in field for field in fields) @pytest.mark.parametrize("datasetInfo", [None, {}]) diff --git a/server/tests/test_update_metadata.py b/server/tests/test_update_metadata.py index ba08140a7..5d7bf4db2 100644 --- a/server/tests/test_update_metadata.py +++ b/server/tests/test_update_metadata.py @@ -5,6 +5,7 @@ from dive_server import crud_dataset from dive_server.crud_rpc import process_items, resolve_imported_dataset_info +from dive_utils import constants @patch('dive_server.crud_dataset.Folder') @@ -153,3 +154,77 @@ def test_process_items_resolves_dataset_info_from_dive_configuration_import( assert update_payload['datasetInfo'] == expected assert update_payload['version'] == 1 assert verify is False + + +@patch('dive_server.crud_rpc.crud_annotation.save_annotations') +@patch('dive_server.crud_rpc.crud.saveImportAttributes') +@patch('dive_server.crud_rpc.crud_dataset.update_metadata') +@patch('dive_server.crud_rpc.crud.valid_images') +@patch('dive_server.crud_rpc.crud.get_or_create_auxiliary_folder') +@patch('dive_server.crud_rpc.File') +@patch('dive_server.crud_rpc.Item') +@patch('dive_server.crud_rpc.Folder') +@pytest.mark.parametrize( + ('name', 'exts', 'payload'), + [ + ( + 'navigation.csv', + ['csv'], + '\n'.join( + [ + 'filename,depth,temperature', + 'image_0001.jpg,192.80,4.0', + 'image_0002.jpg,193.10,4.1', + '', + ] + ), + ), + ( + 'frame_metadata.json', + ['json'], + json.dumps({'cameras': {'singleCam': {'0': {'depth': '192.80'}}}}), + ), + ], +) +def test_process_items_leaves_frame_metadata_import_sources_in_dataset_folder( + folder_cls, + item_cls, + file_cls, + get_auxiliary_folder, + valid_images, + update_metadata, + save_import_attributes, + save_annotations, + name, + exts, + payload, +): + folder = { + '_id': 'dataset-id', + 'meta': { + 'annotate': True, + 'type': constants.ImageSequenceType, + 'fps': 5, + }, + } + item = {'_id': 'item-id', 'name': name, 'meta': {}} + file = {'_id': 'file-id', 'name': name, 'exts': exts} + + folder_cls.return_value.childItems.return_value = [item] + item_cls.return_value.childFiles.return_value = iter([file]) + file_cls.return_value.download.return_value = lambda: [payload.encode()] + valid_images.return_value = [ + {'name': 'image_0001.jpg'}, + {'name': 'image_0002.jpg'}, + ] + + warnings = process_items(folder, {'_id': 'user-id'}) + + assert warnings == [] + assert constants.ProcessedMarker not in item['meta'] + item_cls.return_value.move.assert_not_called() + item_cls.return_value.remove.assert_not_called() + get_auxiliary_folder.assert_not_called() + save_annotations.assert_not_called() + save_import_attributes.assert_not_called() + update_metadata.assert_not_called()