From 7610ca997240588f5be2d30ff2bdc3b08ca60e42 Mon Sep 17 00:00:00 2001 From: Alex Rawlings Date: Mon, 22 Jun 2026 16:37:58 -0600 Subject: [PATCH 1/4] Allow user to define their own segment boundaries --- __mocks__/lucide-react.tsx | 20 ++ contributions/localizedStrings.json | 5 +- .../components/Interlinearizer.test.tsx | 1 + .../components/InterlinearizerLoader.test.tsx | 113 ++++++++ .../components/PhraseStripParts.test.tsx | 107 ++++++- .../components/SegmentationStore.test.tsx | 60 ++++ .../components/TokenLinkIcon.test.tsx | 133 +++++++++ .../controls/ViewOptionsDropdown.test.tsx | 2 + src/__tests__/components/test-helpers.tsx | 1 + src/__tests__/hooks/useDraftProject.test.ts | 95 +++++++ src/__tests__/main.test.ts | 54 +++- .../parsers/papi/resegmentBook.test.ts | 103 +++++++ src/__tests__/services/projectStorage.test.ts | 39 +++ src/__tests__/utils/segmentation.test.ts | 213 ++++++++++++++ src/__tests__/utils/token-layout.test.ts | 48 ++++ src/components/Interlinearizer.tsx | 130 +++++---- src/components/InterlinearizerLoader.tsx | 72 ++++- src/components/PhraseStripParts.tsx | 135 +++++++-- src/components/SegmentationStore.tsx | 101 +++++++ src/components/TokenLinkIcon.tsx | 64 ++++- .../controls/ViewOptionsDropdown.tsx | 14 + src/hooks/useDraftProject.ts | 45 ++- src/main.ts | 29 +- src/parsers/papi/resegmentBook.ts | 130 +++++++++ src/services/projectStorage.ts | 16 +- src/types/interlinearizer.d.ts | 67 ++++- src/types/token-layout.ts | 12 +- src/types/type-guards.ts | 30 +- src/types/view-options.ts | 5 + src/utils/segmentation.ts | 265 ++++++++++++++++++ src/utils/token-layout.ts | 30 +- user-questions.md | 38 +++ 32 files changed, 2074 insertions(+), 103 deletions(-) create mode 100644 src/__tests__/components/SegmentationStore.test.tsx create mode 100644 src/__tests__/parsers/papi/resegmentBook.test.ts create mode 100644 src/__tests__/utils/segmentation.test.ts create mode 100644 src/components/SegmentationStore.tsx create mode 100644 src/parsers/papi/resegmentBook.ts create mode 100644 src/utils/segmentation.ts diff --git a/__mocks__/lucide-react.tsx b/__mocks__/lucide-react.tsx index 9e5ba98e..fcc24400 100644 --- a/__mocks__/lucide-react.tsx +++ b/__mocks__/lucide-react.tsx @@ -74,3 +74,23 @@ export function Link2Off(props: Readonly<{ size?: number; className?: string }>) export function Settings(props: Readonly<{ size?: number; className?: string }>): ReactElement { return ; } + +/** + * Stub for the Combine icon used by the merge boundary control. + * + * @param props - SVG props forwarded from the component. + * @returns A ReactElement SVG element used as a merge icon stub in tests. + */ +export function Combine(props: Readonly<{ size?: number; className?: string }>): ReactElement { + return ; +} + +/** + * Stub for the Scissors icon used by the split boundary control. + * + * @param props - SVG props forwarded from the component. + * @returns A ReactElement SVG element used as a split icon stub in tests. + */ +export function Scissors(props: Readonly<{ size?: number; className?: string }>): ReactElement { + return ; +} diff --git a/contributions/localizedStrings.json b/contributions/localizedStrings.json index cd921cea..bd4ffd79 100644 --- a/contributions/localizedStrings.json +++ b/contributions/localizedStrings.json @@ -42,7 +42,10 @@ "%interlinearizer_morphemeGloss_label%": "Gloss for morpheme {form}", "%interlinearizer_tokenChip_editMorphemes%": "Edit morpheme breakdown for {token}", "%interlinearizer_tokenChip_defineMorphemes%": "Define morpheme breakdown for {token}", - "%interlinearizer_linkButton_crossSegmentDisabledTooltip%": "Cross-segment phrases are not supported. This link button is outside the current segment.", + "%interlinearizer_linkButton_crossSegmentDisabledTooltip%": "Only the edge token of an adjacent segment can be linked across a boundary.", + "%interlinearizer_viewOption_boundaryEditMode%": "Edit segment boundaries", + "%interlinearizer_boundaryControl_merge%": "Merge with previous segment", + "%interlinearizer_boundaryControl_split%": "Split segment here", "%interlinearizer_modal_create_title%": "Create Interlinear Project", "%interlinearizer_modal_create_name_label%": "Name (optional)", diff --git a/src/__tests__/components/Interlinearizer.test.tsx b/src/__tests__/components/Interlinearizer.test.tsx index c558403f..c8c3298d 100644 --- a/src/__tests__/components/Interlinearizer.test.tsx +++ b/src/__tests__/components/Interlinearizer.test.tsx @@ -423,6 +423,7 @@ function renderInterlinearizer({ chapterLabelInVerse, showMorphology, showFreeTranslation, + boundaryEditMode: false, }} />, navigate, diff --git a/src/__tests__/components/InterlinearizerLoader.test.tsx b/src/__tests__/components/InterlinearizerLoader.test.tsx index 2eb6425a..ac96dda7 100644 --- a/src/__tests__/components/InterlinearizerLoader.test.tsx +++ b/src/__tests__/components/InterlinearizerLoader.test.tsx @@ -16,6 +16,7 @@ import useOptimisticBooleanSetting from '../../hooks/useOptimisticBooleanSetting import { emptyAnalysis, emptyDraft } from '../../types/empty-factories'; import type { PhraseMode } from '../../types/phrase-mode'; import type { ViewOptions } from '../../types/view-options'; +import type { SegmentationDispatch } from '../../components/SegmentationStore'; import { GEN_1_1_BOOK, makeScrollGroupHook, makeWebViewState } from '../test-helpers'; jest.mock('../../hooks/useInterlinearizerBookData'); @@ -154,6 +155,7 @@ type CapturedInterlinearizerProps = { phraseMode: PhraseMode; setPhraseMode: Dispatch>; viewOptions: ViewOptions; + segmentationDispatch: SegmentationDispatch; }; let capturedInterlinearizerProps: CapturedInterlinearizerProps | undefined; let interlinearizerMountCount = 0; @@ -1024,6 +1026,115 @@ describe('InterlinearizerLoader', () => { }); }); + describe('segmentation dispatch', () => { + /** A two-verse book so boundary edits produce real, non-default deltas. */ + const TWO_VERSE_BOOK: Book = { + id: 'GEN', + bookRef: 'GEN', + textVersion: 'v1', + segments: [ + { + id: 'GEN 1:1', + startRef: { book: 'GEN', chapter: 1, verse: 1 }, + endRef: { book: 'GEN', chapter: 1, verse: 1 }, + baselineText: 'Alpha beta.', + tokens: [ + { + ref: 'GEN 1:1:0', + surfaceText: 'Alpha', + writingSystem: 'en', + type: 'word', + charStart: 0, + charEnd: 5, + }, + { + ref: 'GEN 1:1:6', + surfaceText: 'beta', + writingSystem: 'en', + type: 'word', + charStart: 6, + charEnd: 10, + }, + ], + }, + { + id: 'GEN 1:2', + startRef: { book: 'GEN', chapter: 1, verse: 2 }, + endRef: { book: 'GEN', chapter: 1, verse: 2 }, + baselineText: 'Gamma.', + tokens: [ + { + ref: 'GEN 1:2:0', + surfaceText: 'Gamma', + writingSystem: 'en', + type: 'word', + charStart: 0, + charEnd: 5, + }, + ], + }, + ], + }; + + /** + * Returns the segmentation delta from the most recent saveDraft call. + * + * @returns The persisted draft's `segmentation`, or `undefined` when not set / no call. + */ + function lastPersistedSegmentation(): DraftProject['segmentation'] { + const calls = mockSendCommand.mock.calls.filter(([c]) => c === 'interlinearizer.saveDraft'); + const last = calls[calls.length - 1]; + const json = last?.[2]; + return typeof json === 'string' ? JSON.parse(json).segmentation : undefined; + } + + it('persists split, merge, and move boundary edits made through the dispatch', async () => { + mockBookData({ book: TWO_VERSE_BOOK }); + await act(async () => { + renderLoader(); + }); + const dispatch = capturedInterlinearizerProps?.segmentationDispatch; + if (!dispatch) throw new Error('expected a captured segmentationDispatch'); + + jest.useFakeTimers(); + // Split verse 1 before "beta" — a non-default delta is persisted. + act(() => dispatch.split('GEN 1:1:6')); + act(() => jest.advanceTimersByTime(300)); + expect(lastPersistedSegmentation()).toEqual({ + removedVerseStarts: [], + addedStarts: ['GEN 1:1:6'], + }); + + // Merge verse 2 into its predecessor — adds a removed verse start. + act(() => dispatch.merge('GEN 1:2:0')); + act(() => jest.advanceTimersByTime(300)); + expect(lastPersistedSegmentation()?.removedVerseStarts).toContain('GEN 1:2:0'); + + // Move the verse-2 boundary back onto "beta". + act(() => dispatch.move('GEN 1:2:0', 'GEN 1:1:6')); + act(() => jest.advanceTimersByTime(300)); + jest.useRealTimers(); + expect(lastPersistedSegmentation()).toBeDefined(); + }); + + it('clears the segmentation field when an edit restores the default segmentation', async () => { + mockBookData({ book: TWO_VERSE_BOOK }); + await act(async () => { + renderLoader(); + }); + const dispatch = capturedInterlinearizerProps?.segmentationDispatch; + if (!dispatch) throw new Error('expected a captured segmentationDispatch'); + + jest.useFakeTimers(); + // Merging the book's first token is a no-op, so the result is the default segmentation and the + // persisted field is cleared to undefined. + act(() => dispatch.merge('GEN 1:1:0')); + act(() => jest.advanceTimersByTime(300)); + jest.useRealTimers(); + expect(lastPersistedSegmentation()).toBeUndefined(); + }); + }); + describe('save command', () => { it('saves the draft analysis to the active project when Save is clicked with an active project', async () => { const draftAnalysis = emptyAnalysis(); @@ -1041,6 +1152,8 @@ describe('InterlinearizerLoader', () => { 'interlinearizer.saveAnalysis', 'proj-1', JSON.stringify(draftAnalysis), + // The draft has no custom boundaries, so Save sends "null" to clear any stored ones. + 'null', ); }); diff --git a/src/__tests__/components/PhraseStripParts.test.tsx b/src/__tests__/components/PhraseStripParts.test.tsx index 9eab0dfb..a71c6cee 100644 --- a/src/__tests__/components/PhraseStripParts.test.tsx +++ b/src/__tests__/components/PhraseStripParts.test.tsx @@ -2,8 +2,9 @@ /// /// +import { useLocalizedStrings } from '@papi/frontend/react'; import { fireEvent, render, screen } from '@testing-library/react'; -import type { PhraseAnalysisLink, Token } from 'interlinearizer'; +import type { PhraseAnalysisLink, Segment, Token } from 'interlinearizer'; import type { ReactElement } from 'react'; import { PhraseSlot, @@ -12,6 +13,10 @@ import { type StripItem, } from '../../components/PhraseStripParts'; import { PhraseStripProvider } from '../../components/PhraseStripContext'; +import { + SegmentationProvider, + type SegmentationContextValue, +} from '../../components/SegmentationStore'; import { emptyFocusContext } from '../../types/empty-factories'; import type { TokenGroup, LinkSlot, FocusContext } from '../../types/token-layout'; import { makePhraseLink, makePhraseStripContext, makeWordToken } from '../test-helpers'; @@ -279,6 +284,106 @@ describe('PhraseSlot', () => { }); }); +// --------------------------------------------------------------------------- +// PhraseSlot boundary controls (boundary-edit mode) +// --------------------------------------------------------------------------- + +describe('PhraseSlot boundary controls', () => { + // resetMocks clears the shared useLocalizedStrings implementation, so re-establish the + // key-to-itself mapping the BoundaryControl labels rely on. + beforeEach(() => { + jest + .mocked(useLocalizedStrings) + .mockImplementation((keys: readonly string[]) => [ + keys.reduce>((acc, k) => ({ ...acc, [k]: k }), {}), + false, + ]); + }); + + const groupA: TokenGroup = { + tokens: [makeWordToken('a')], + phraseLink: undefined, + firstIndex: 0, + punctuationBetween: [], + }; + const groupB: TokenGroup = { + tokens: [makeWordToken('b')], + phraseLink: undefined, + firstIndex: 1, + punctuationBetween: [], + }; + const slot: LinkSlot = { prevGroup: groupA, nextGroup: groupB, punctuation: [] }; + + /** A segment whose first token ref identifies the boundary the merge control removes. */ + const nextSegment: Segment = { + id: 'seg-2', + startRef: { book: 'GEN', chapter: 1, verse: 2 }, + endRef: { book: 'GEN', chapter: 1, verse: 2 }, + baselineText: 'b', + tokens: [makeWordToken('seg2-start')], + }; + + /** + * Renders a PhraseSlot inside both providers with boundary-edit mode on. + * + * @param props - Overrides for the slot props (e.g. prev/next segment ids). + * @param dispatch - The segmentation dispatch to capture calls on. + * @returns The render result. + */ + function renderBoundary( + props: Partial[0]>, + dispatch = { + merge: jest.fn(), + split: jest.fn(), + move: jest.fn(), + }, + ) { + const value: SegmentationContextValue = { + dispatch, + boundaryEditMode: true, + segmentById: new Map([['seg-2', nextSegment]]), + segmentOrder: new Map([ + ['seg-1', 0], + ['seg-2', 1], + ]), + }; + render( + + + + + , + ); + return dispatch; + } + + it('shows a merge control on a cross-segment slot and merges on click', () => { + const dispatch = renderBoundary({ prevSegmentId: 'seg-1', nextSegmentId: 'seg-2' }); + const button = screen.getByTestId('boundary-merge-btn'); + fireEvent.click(button); + expect(dispatch.merge).toHaveBeenCalledWith('seg2-start'); + expect(screen.queryByTestId('boundary-split-btn')).not.toBeInTheDocument(); + }); + + it('shows a split control on an intra-segment slot and splits on click', () => { + const dispatch = renderBoundary({ prevSegmentId: 'seg-1', nextSegmentId: 'seg-1' }); + const button = screen.getByTestId('boundary-split-btn'); + fireEvent.click(button); + // The next group's first token ref is the split anchor. + expect(dispatch.split).toHaveBeenCalledWith('b'); + expect(screen.queryByTestId('boundary-merge-btn')).not.toBeInTheDocument(); + }); + + it('renders no control at a leading slot with no previous segment', () => { + renderBoundary({ + prevSegmentId: undefined, + nextSegmentId: 'seg-1', + }); + expect(screen.queryByTestId('boundary-merge-btn')).not.toBeInTheDocument(); + expect(screen.queryByTestId('boundary-split-btn')).not.toBeInTheDocument(); + }); +}); + // --------------------------------------------------------------------------- // PhraseGroup // --------------------------------------------------------------------------- diff --git a/src/__tests__/components/SegmentationStore.test.tsx b/src/__tests__/components/SegmentationStore.test.tsx new file mode 100644 index 00000000..2052706d --- /dev/null +++ b/src/__tests__/components/SegmentationStore.test.tsx @@ -0,0 +1,60 @@ +/** @file Unit tests for components/SegmentationStore.tsx. */ +/// +/// + +import { render, screen } from '@testing-library/react'; +import type { Segment } from 'interlinearizer'; +import { + NO_OP_SEGMENTATION_DISPATCH, + SegmentationProvider, + useSegmentation, + type SegmentationContextValue, +} from '../../components/SegmentationStore'; + +/** A test consumer that renders the resolved context as text so tests can assert on it. */ +function Probe() { + const { boundaryEditMode, segmentById, segmentOrder } = useSegmentation(); + return ( + + {String(boundaryEditMode)}:{segmentById.size}:{segmentOrder.size} + + ); +} + +describe('SegmentationStore', () => { + it('returns an inert default when no provider is present', () => { + render(); + expect(screen.getByTestId('probe')).toHaveTextContent('false:0:0'); + }); + + it('provides the supplied value to consumers within a provider', () => { + const segment: Segment = { + id: 'GEN 1:1', + startRef: { book: 'GEN', chapter: 1, verse: 1 }, + endRef: { book: 'GEN', chapter: 1, verse: 1 }, + baselineText: 'Hi.', + tokens: [], + }; + const value: SegmentationContextValue = { + dispatch: NO_OP_SEGMENTATION_DISPATCH, + boundaryEditMode: true, + segmentById: new Map([['GEN 1:1', segment]]), + segmentOrder: new Map([['GEN 1:1', 0]]), + }; + render( + + + , + ); + expect(screen.getByTestId('probe')).toHaveTextContent('true:1:1'); + }); + + it('exposes an inert no-op dispatch that does nothing when invoked', () => { + // Calling each method must not throw; this also exercises the no-op function bodies. + expect(() => { + NO_OP_SEGMENTATION_DISPATCH.merge('GEN 1:1:0'); + NO_OP_SEGMENTATION_DISPATCH.split('GEN 1:1:6'); + NO_OP_SEGMENTATION_DISPATCH.move('GEN 1:1:0', 'GEN 1:1:6'); + }).not.toThrow(); + }); +}); diff --git a/src/__tests__/components/TokenLinkIcon.test.tsx b/src/__tests__/components/TokenLinkIcon.test.tsx index 4632feaf..a2edfcf8 100644 --- a/src/__tests__/components/TokenLinkIcon.test.tsx +++ b/src/__tests__/components/TokenLinkIcon.test.tsx @@ -4,12 +4,18 @@ import { render, screen } from '@testing-library/react'; import userEvent from '@testing-library/user-event'; +import type { Segment } from 'interlinearizer'; import type { ComponentProps, ReactElement } from 'react'; import { TokenLinkIcon } from '../../components/TokenLinkIcon'; import { PhraseStripProvider, type PhraseStripContextValue, } from '../../components/PhraseStripContext'; +import { + SegmentationProvider, + type SegmentationContextValue, + type SegmentationDispatch, +} from '../../components/SegmentationStore'; import type { SlotFocusInfo } from '../../types/token-layout'; import { makePhraseLink, makePhraseStripContext, makeWordToken } from '../test-helpers'; @@ -48,6 +54,7 @@ function slotFocus(overrides: Partial = {}): SlotFocusInfo { return { focusedSideIsPrev: undefined, isSameSegmentAsFocus: true, + isAdjacentEdgeOfFocus: false, focusedPhraseLink: undefined, focusedFreeToken: undefined, ...overrides, @@ -580,4 +587,130 @@ describe('TokenLinkIcon', () => { await userEvent.hover(screen.getByTestId('token-unlink-btn')); expect(onHoverSplitFreeTokens).not.toHaveBeenCalled(); }); + + // --------------------------------------------------------------------------- + // Cross-segment edge link (pulls an adjacent segment's edge token + moves the boundary) + // --------------------------------------------------------------------------- + + describe('cross-segment edge link', () => { + /** A fresh segmentation dispatch whose calls the tests assert on. */ + function makeDispatch(): jest.Mocked { + return { merge: jest.fn(), split: jest.fn(), move: jest.fn() }; + } + + /** + * Builds an adjacent segment ("seg-B") containing the given token refs in order. + * + * @param refs - Word token refs composing the segment, in document order. + * @returns A segment with id `seg-B`. + */ + function segB(refs: string[]): Segment { + return { + id: 'seg-B', + startRef: { book: 'GEN', chapter: 1, verse: 2 }, + endRef: { book: 'GEN', chapter: 1, verse: 2 }, + baselineText: refs.join(' '), + tokens: refs.map((r) => makeWordToken(r)), + }; + } + + /** + * Renders a cross-segment edge `TokenLinkIcon` inside both providers. + * + * @param dispatch - The segmentation dispatch to capture calls on. + * @param opts - `focusedSideIsPrev` and the adjacent segment's tokens. + * @returns The render result. + */ + function renderEdge( + dispatch: SegmentationDispatch, + opts: { focusedSideIsPrev: boolean; segmentTokens: string[]; mapToken?: boolean }, + ) { + const segmentation: SegmentationContextValue = { + dispatch, + boundaryEditMode: false, + segmentById: new Map([['seg-B', segB(opts.segmentTokens)]]), + segmentOrder: new Map([ + ['seg-A', 0], + ['seg-B', 1], + ]), + }; + const tokenSegmentMap = + opts.mapToken === false ? new Map() : new Map([['tok-b', 'seg-B']]); + return render( + + + + + , + ); + } + + it('activates the link button at an adjacent edge even across a segment boundary', () => { + renderEdge(makeDispatch(), { focusedSideIsPrev: true, segmentTokens: ['tok-b', 'tok-c'] }); + expect(screen.getByTestId('token-link-btn')).toBeEnabled(); + }); + + it('pulls one token forward (move) when focus is the previous segment', async () => { + const dispatch = makeDispatch(); + renderEdge(dispatch, { focusedSideIsPrev: true, segmentTokens: ['tok-b', 'tok-c'] }); + await userEvent.click(screen.getByTestId('token-link-btn')); + // The adjacent segment B starts at tok-b; pulling tok-b leaves tok-c as B's new start. + expect(dispatch.move).toHaveBeenCalledWith('tok-b', 'tok-c'); + expect(mockCreatePhrase).toHaveBeenCalled(); + }); + + it('merges the whole adjacent segment when it has only the pulled token', async () => { + const dispatch = makeDispatch(); + renderEdge(dispatch, { focusedSideIsPrev: true, segmentTokens: ['tok-b'] }); + await userEvent.click(screen.getByTestId('token-link-btn')); + expect(dispatch.merge).toHaveBeenCalledWith('tok-b'); + }); + + it('moves the boundary back to the pulled token when focus is the next segment', async () => { + const dispatch = makeDispatch(); + renderEdge(dispatch, { focusedSideIsPrev: false, segmentTokens: ['tok-b', 'tok-c'] }); + await userEvent.click(screen.getByTestId('token-link-btn')); + // Focus is segment B (starting at tok-b); pulling the previous segment's tok-a moves B's start to tok-a. + expect(dispatch.move).toHaveBeenCalledWith('tok-b', 'tok-a'); + }); + + it('skips the boundary move but still phrases when the pulled token maps to no segment', async () => { + const dispatch = makeDispatch(); + renderEdge(dispatch, { + focusedSideIsPrev: true, + segmentTokens: ['tok-b', 'tok-c'], + mapToken: false, + }); + await userEvent.click(screen.getByTestId('token-link-btn')); + expect(dispatch.move).not.toHaveBeenCalled(); + expect(dispatch.merge).not.toHaveBeenCalled(); + expect(mockCreatePhrase).toHaveBeenCalled(); + }); + + it('leaves the link button inactive (with tooltip) for a non-edge cross-segment slot', () => { + render( + + + , + ); + const button = screen.getByTestId('token-link-btn'); + expect(button).toBeDisabled(); + expect(button).toHaveAttribute('title', 'nope'); + }); + }); }); diff --git a/src/__tests__/components/controls/ViewOptionsDropdown.test.tsx b/src/__tests__/components/controls/ViewOptionsDropdown.test.tsx index 9f86c077..a00ba38c 100644 --- a/src/__tests__/components/controls/ViewOptionsDropdown.test.tsx +++ b/src/__tests__/components/controls/ViewOptionsDropdown.test.tsx @@ -31,6 +31,8 @@ const DEFAULT_PROPS = { onShowMorphologyChange: jest.fn(), showFreeTranslation: false, onShowFreeTranslationChange: jest.fn(), + boundaryEditMode: false, + onBoundaryEditModeChange: jest.fn(), }; describe('ViewOptionsDropdown', () => { diff --git a/src/__tests__/components/test-helpers.tsx b/src/__tests__/components/test-helpers.tsx index 0fcae20a..0de14434 100644 --- a/src/__tests__/components/test-helpers.tsx +++ b/src/__tests__/components/test-helpers.tsx @@ -24,4 +24,5 @@ export const allFalseViewOptions: ViewOptions = { chapterLabelInVerse: false, showMorphology: false, showFreeTranslation: false, + boundaryEditMode: false, }; diff --git a/src/__tests__/hooks/useDraftProject.test.ts b/src/__tests__/hooks/useDraftProject.test.ts index 04e07030..b0dda535 100644 --- a/src/__tests__/hooks/useDraftProject.test.ts +++ b/src/__tests__/hooks/useDraftProject.test.ts @@ -194,7 +194,84 @@ describe('useDraftProject', () => { }); }); + describe('autosaveSegmentation', () => { + it('stores the boundary delta on the draft, marks it dirty, and persists it', async () => { + const { result } = await renderLoaded(); + + jest.useFakeTimers(); + const delta = { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }; + act(() => { + result.current.autosaveSegmentation(delta); + }); + act(() => { + jest.advanceTimersByTime(300); + }); + jest.useRealTimers(); + + expect(result.current.dirty).toBe(true); + expect(result.current.getDraftSnapshot()?.segmentation).toEqual(delta); + expect(lastSavedDraft().segmentation).toEqual(delta); + }); + + it('replaces a pending debounced write when called again before it flushes', async () => { + const { result } = await renderLoaded(); + + jest.useFakeTimers(); + act(() => { + result.current.autosaveSegmentation({ removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }); + }); + // A second call before the debounce fires clears the pending timer and schedules a new write. + act(() => { + result.current.autosaveSegmentation({ removedVerseStarts: [], addedStarts: ['GEN 1:1:6'] }); + }); + act(() => { + jest.advanceTimersByTime(300); + }); + jest.useRealTimers(); + + expect(lastSavedDraft().segmentation).toEqual({ + removedVerseStarts: [], + addedStarts: ['GEN 1:1:6'], + }); + }); + + it('clears the segmentation field when passed undefined (back to default segmentation)', async () => { + // Seed a draft that already has custom boundaries so clearing them is observable. + mockGetDraftResolves( + makeDraft({ segmentation: { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] } }), + ); + const { result } = await renderLoaded(); + + jest.useFakeTimers(); + act(() => { + result.current.autosaveSegmentation(undefined); + }); + act(() => { + jest.advanceTimersByTime(300); + }); + jest.useRealTimers(); + + expect(result.current.getDraftSnapshot()?.segmentation).toBeUndefined(); + expect(lastSavedDraft().segmentation).toBeUndefined(); + }); + }); + describe('loadFromProject', () => { + it('copies a project segmentation delta into the draft when present', async () => { + const { result } = await renderLoaded(); + + const delta = { removedVerseStarts: ['GEN 1:2:0'], addedStarts: ['GEN 1:1:6'] }; + act(() => { + result.current.loadFromProject({ + analysis: analysisWithToken('tok-open'), + analysisLanguages: ['de'], + segmentation: delta, + }); + }); + + expect(result.current.draft?.segmentation).toEqual(delta); + }); + it('copies analysis, analysis languages, and target, clears dirty, and bumps the version', async () => { const { result } = await renderLoaded(); const versionBefore = result.current.draftVersion; @@ -351,6 +428,24 @@ describe('useDraftProject', () => { expect(result.current.draftVersion).toBe(versionBefore + 1); expect(lastSavedDraft().dirty).toBe(false); }); + + it('clears any custom segment boundaries as part of the clean baseline', async () => { + mockGetDraftResolves( + makeDraft({ + analysis: analysisWithToken('tok-wipe-all'), + dirty: true, + segmentation: { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }, + }), + ); + const { result } = await renderLoaded(); + + act(() => { + result.current.wipeAll(); + }); + + expect(result.current.draft?.segmentation).toBeUndefined(); + expect(lastSavedDraft().segmentation).toBeUndefined(); + }); }); describe('markSynced', () => { diff --git a/src/__tests__/main.test.ts b/src/__tests__/main.test.ts index 0fc3fd64..b3349cb6 100644 --- a/src/__tests__/main.test.ts +++ b/src/__tests__/main.test.ts @@ -170,9 +170,9 @@ const getGetProjectHandler = () => /** Activates the extension and returns the `interlinearizer.saveAnalysis` handler. */ const getSaveAnalysisHandler = () => - activateAndGetHandler<(id: string, analysisJson: string) => Promise>( - 'interlinearizer.saveAnalysis', - ); + activateAndGetHandler< + (id: string, analysisJson: string, segmentationJson?: string) => Promise + >('interlinearizer.saveAnalysis'); /** Activates the extension and returns the `interlinearizer.getDraft` handler. */ const getGetDraftHandler = () => @@ -910,7 +910,13 @@ describe('main', () => { await handler('proj-id', JSON.stringify(stubAnalysis)); - expect(mockUpdateAnalysis).toHaveBeenCalledWith(expect.anything(), 'proj-id', stubAnalysis); + // No segmentationJson passed ⇒ the 4th arg is undefined (leave stored boundaries unchanged). + expect(mockUpdateAnalysis).toHaveBeenCalledWith( + expect.anything(), + 'proj-id', + stubAnalysis, + undefined, + ); }); it('logs the error, sends an error notification, and rethrows when storage throws', async () => { @@ -956,6 +962,46 @@ describe('main', () => { ); expect(mockUpdateAnalysis).not.toHaveBeenCalled(); }); + + it('passes a parsed segmentation delta through to updateAnalysis', async () => { + mockUpdateAnalysis.mockResolvedValue(undefined); + const handler = await getSaveAnalysisHandler(); + const segmentation = { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }; + + await handler('proj-id', JSON.stringify(stubAnalysis), JSON.stringify(segmentation)); + + expect(mockUpdateAnalysis).toHaveBeenCalledWith( + expect.anything(), + 'proj-id', + stubAnalysis, + segmentation, + ); + }); + + it('passes null through to updateAnalysis to clear boundaries when segmentationJson is "null"', async () => { + mockUpdateAnalysis.mockResolvedValue(undefined); + const handler = await getSaveAnalysisHandler(); + + await handler('proj-id', JSON.stringify(stubAnalysis), 'null'); + + // eslint-disable-next-line no-null/no-null -- asserting the clear-boundaries sentinel is forwarded + expect(mockUpdateAnalysis).toHaveBeenCalledWith( + expect.anything(), + 'proj-id', + stubAnalysis, + // eslint-disable-next-line no-null/no-null -- asserting the clear-boundaries sentinel is forwarded + null, + ); + }); + + it('rethrows when segmentationJson does not conform to SegmentationDelta', async () => { + const handler = await getSaveAnalysisHandler(); + + await expect( + handler('proj-id', JSON.stringify(stubAnalysis), JSON.stringify({ bogus: true })), + ).rejects.toThrow(TypeError); + expect(mockUpdateAnalysis).not.toHaveBeenCalled(); + }); }); describe('interlinearizer.getDraft command', () => { diff --git a/src/__tests__/parsers/papi/resegmentBook.test.ts b/src/__tests__/parsers/papi/resegmentBook.test.ts new file mode 100644 index 00000000..76ade277 --- /dev/null +++ b/src/__tests__/parsers/papi/resegmentBook.test.ts @@ -0,0 +1,103 @@ +/** @file Unit tests for {@link resegmentBook}. */ +/// + +import type { Book } from 'interlinearizer'; +import { tokenizeBook } from 'parsers/papi/bookTokenizer'; +import { resegmentBook } from 'parsers/papi/resegmentBook'; + +/** + * Builds a verse-tokenized GEN book from the given verses. + * + * @param verses - Verse SID + text pairs. + * @returns The tokenized book. + */ +function makeBook(verses: { sid: string; text: string }[]): Book { + return tokenizeBook({ bookCode: 'GEN', writingSystem: 'en', contentHash: 'h', verses }); +} + +const BOOK = makeBook([ + { sid: 'GEN 1:1', text: 'Alpha beta.' }, + { sid: 'GEN 1:2', text: 'Gamma delta.' }, + { sid: 'GEN 1:3', text: 'Epsilon.' }, +]); + +/** Asserts the segment-baseline/offset invariant for every token of every segment. */ +function expectInvariant(book: Book): void { + book.segments.forEach((seg) => { + seg.tokens.forEach((t) => { + expect(seg.baselineText.slice(t.charStart, t.charEnd)).toBe(t.surfaceText); + }); + }); +} + +describe('resegmentBook', () => { + it('returns the same book reference for an undefined delta', () => { + expect(resegmentBook(BOOK, undefined)).toBe(BOOK); + }); + + it('returns the same book reference for an empty delta', () => { + expect(resegmentBook(BOOK, { removedVerseStarts: [], addedStarts: [] })).toBe(BOOK); + }); + + it('reuses untouched verse Segment objects by reference when a delta is active elsewhere', () => { + // Merge verses 1+2; verse 3 is untouched and should be the same object. + const result = resegmentBook(BOOK, { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }); + expect(result.segments[1]).toBe(BOOK.segments[2]); + }); + + it('merges two verses into one segment with concatenated baseline and shifted offsets', () => { + const result = resegmentBook(BOOK, { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }); + expect(result.segments).toHaveLength(2); + const merged = result.segments[0]; + expect(merged.baselineText).toBe('Alpha beta. Gamma delta.'); + // Token refs are preserved unchanged across the merge. + expect(merged.tokens.map((t) => t.ref)).toEqual([ + 'GEN 1:1:0', + 'GEN 1:1:6', + 'GEN 1:1:10', + 'GEN 1:2:0', + 'GEN 1:2:6', + 'GEN 1:2:11', + ]); + expectInvariant(result); + }); + + it('keeps the leading verse SID as the merged segment id', () => { + const result = resegmentBook(BOOK, { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }); + expect(result.segments[0].id).toBe('GEN 1:1'); + }); + + it('spans the merged range in startRef/endRef', () => { + const result = resegmentBook(BOOK, { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }); + expect(result.segments[0].startRef).toEqual({ book: 'GEN', chapter: 1, verse: 1 }); + expect(result.segments[0].endRef).toEqual({ book: 'GEN', chapter: 1, verse: 2 }); + }); + + it('splits a verse before a mid-verse token', () => { + // Split verse 1 before "beta" (charStart 6). + const result = resegmentBook(BOOK, { removedVerseStarts: [], addedStarts: ['GEN 1:1:6'] }); + expect(result.segments).toHaveLength(4); + const [firstHalf, secondHalf] = result.segments; + expect(firstHalf.id).toBe('GEN 1:1'); + expect(firstHalf.tokens.map((t) => t.ref)).toEqual(['GEN 1:1:0']); + // The second half begins mid-verse, so it takes its first token's ref as a fresh id. + expect(secondHalf.id).toBe('GEN 1:1:6'); + expect(secondHalf.tokens.map((t) => t.ref)).toEqual(['GEN 1:1:6', 'GEN 1:1:10']); + expectInvariant(result); + }); + + it('carries a sub-verse charIndex on a split piece that begins mid-verse', () => { + const result = resegmentBook(BOOK, { removedVerseStarts: [], addedStarts: ['GEN 1:1:6'] }); + expect(result.segments[1].startRef).toEqual({ + book: 'GEN', + chapter: 1, + verse: 1, + charIndex: 6, + }); + }); + + it('ignores a drifted (nonexistent) anchor and yields the default grouping', () => { + const result = resegmentBook(BOOK, { removedVerseStarts: ['GEN 9:9:9'], addedStarts: [] }); + expect(result.segments.map((s) => s.id)).toEqual(['GEN 1:1', 'GEN 1:2', 'GEN 1:3']); + }); +}); diff --git a/src/__tests__/services/projectStorage.test.ts b/src/__tests__/services/projectStorage.test.ts index 713a611c..98e537e5 100644 --- a/src/__tests__/services/projectStorage.test.ts +++ b/src/__tests__/services/projectStorage.test.ts @@ -454,6 +454,45 @@ describe('projectStorage', () => { await expect(updateAnalysis(token, 'proj-id', newAnalysis)).rejects.toThrow('disk full'); }); + + it('writes a provided segmentation delta onto the project', async () => { + __mockReadUserData.mockResolvedValue(JSON.stringify(storedProject)); + const segmentation = { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }; + + const result = await updateAnalysis(token, 'proj-id', newAnalysis, segmentation); + + expect(result).toMatchObject({ analysis: newAnalysis, segmentation }); + expect(__mockWriteUserData).toHaveBeenCalledWith( + token, + 'project:proj-id', + JSON.stringify({ ...storedProject, analysis: newAnalysis, segmentation }), + ); + }); + + it('clears stored boundaries when segmentation is null', async () => { + const projectWithBoundaries = { + ...storedProject, + segmentation: { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }, + }; + __mockReadUserData.mockResolvedValue(JSON.stringify(projectWithBoundaries)); + + // eslint-disable-next-line no-null/no-null -- explicit "clear boundaries" sentinel under test + const result = await updateAnalysis(token, 'proj-id', newAnalysis, null); + + expect(result && 'segmentation' in result).toBe(false); + }); + + it('leaves existing boundaries unchanged when segmentation is undefined', async () => { + const projectWithBoundaries = { + ...storedProject, + segmentation: { removedVerseStarts: ['GEN 1:2:0'], addedStarts: [] }, + }; + __mockReadUserData.mockResolvedValue(JSON.stringify(projectWithBoundaries)); + + const result = await updateAnalysis(token, 'proj-id', newAnalysis); + + expect(result).toMatchObject({ segmentation: projectWithBoundaries.segmentation }); + }); }); describe('getProjectsForSource', () => { diff --git a/src/__tests__/utils/segmentation.test.ts b/src/__tests__/utils/segmentation.test.ts new file mode 100644 index 00000000..447bc968 --- /dev/null +++ b/src/__tests__/utils/segmentation.test.ts @@ -0,0 +1,213 @@ +/** @file Unit tests for the pure segmentation-delta transforms. */ +/// + +import type { Book, SegmentationDelta } from 'interlinearizer'; +import { tokenizeBook } from 'parsers/papi/bookTokenizer'; +import { + addBoundaryBefore, + defaultVerseStarts, + effectiveStarts, + isDefaultSegmentation, + mergeSegments, + moveBoundary, + removeBoundaryAt, + splitSegmentBefore, +} from '../../utils/segmentation'; + +/** + * Builds a verse-tokenized GEN book from the given verses for use as the `verseBook` argument. + * + * @param verses - Verse SID + text pairs. + * @returns The tokenized book. + */ +function makeBook(verses: { sid: string; text: string }[]): Book { + return tokenizeBook({ bookCode: 'GEN', writingSystem: 'en', contentHash: 'h', verses }); +} + +/** A three-verse fixture: "Alpha beta." / "Gamma delta." / "Epsilon." */ +const THREE_VERSES = makeBook([ + { sid: 'GEN 1:1', text: 'Alpha beta.' }, + { sid: 'GEN 1:2', text: 'Gamma delta.' }, + { sid: 'GEN 1:3', text: 'Epsilon.' }, +]); + +// First token refs of each verse (charStart 0): "GEN 1:1:0", "GEN 1:2:0", "GEN 1:3:0". +const V1_START = 'GEN 1:1:0'; +const V2_START = 'GEN 1:2:0'; +const V3_START = 'GEN 1:3:0'; +// Second word of verse 1 ("beta" at charStart 6). +const V1_BETA = 'GEN 1:1:6'; + +describe('defaultVerseStarts', () => { + it('returns the first-token ref of every verse', () => { + expect(defaultVerseStarts(THREE_VERSES)).toEqual(new Set([V1_START, V2_START, V3_START])); + }); + + it('skips verses with no tokens', () => { + const book = makeBook([ + { sid: 'GEN 1:1', text: ' ' }, + { sid: 'GEN 1:2', text: 'Word.' }, + ]); + expect(defaultVerseStarts(book)).toEqual(new Set(['GEN 1:2:0'])); + }); +}); + +describe('isDefaultSegmentation', () => { + it('is true for undefined', () => { + expect(isDefaultSegmentation(undefined)).toBe(true); + }); + + it('is true for empty arrays', () => { + expect(isDefaultSegmentation({ removedVerseStarts: [], addedStarts: [] })).toBe(true); + }); + + it('is false when a boundary is removed', () => { + expect(isDefaultSegmentation({ removedVerseStarts: [V2_START], addedStarts: [] })).toBe(false); + }); + + it('is false when a boundary is added', () => { + expect(isDefaultSegmentation({ removedVerseStarts: [], addedStarts: [V1_BETA] })).toBe(false); + }); +}); + +describe('effectiveStarts', () => { + it('returns all default verse starts for the default segmentation', () => { + expect(effectiveStarts(THREE_VERSES, undefined)).toEqual( + new Set([V1_START, V2_START, V3_START]), + ); + }); + + it('drops a removed verse start (merge)', () => { + const starts = effectiveStarts(THREE_VERSES, { + removedVerseStarts: [V2_START], + addedStarts: [], + }); + expect(starts).toEqual(new Set([V1_START, V3_START])); + }); + + it('adds a split start', () => { + const starts = effectiveStarts(THREE_VERSES, { + removedVerseStarts: [], + addedStarts: [V1_BETA], + }); + expect(starts).toEqual(new Set([V1_START, V1_BETA, V2_START, V3_START])); + }); + + it('ignores an added start whose token no longer exists (drift)', () => { + const starts = effectiveStarts(THREE_VERSES, { + removedVerseStarts: [], + addedStarts: ['GEN 9:9:9'], + }); + expect(starts).toEqual(new Set([V1_START, V2_START, V3_START])); + }); + + it('always keeps the book-first token as a start even if asked to remove it', () => { + const starts = effectiveStarts(THREE_VERSES, { + removedVerseStarts: [V1_START], + addedStarts: [], + }); + expect(starts.has(V1_START)).toBe(true); + }); +}); + +describe('addBoundaryBefore', () => { + it('records a mid-verse split as an added start', () => { + expect(addBoundaryBefore(THREE_VERSES, undefined, V1_BETA)).toEqual({ + removedVerseStarts: [], + addedStarts: [V1_BETA], + }); + }); + + it('un-merges a default verse start by dropping it from removedVerseStarts', () => { + const merged: SegmentationDelta = { removedVerseStarts: [V2_START], addedStarts: [] }; + expect(addBoundaryBefore(THREE_VERSES, merged, V2_START)).toEqual({ + removedVerseStarts: [], + addedStarts: [], + }); + }); + + it('is idempotent on an already-added start', () => { + const once = addBoundaryBefore(THREE_VERSES, undefined, V1_BETA); + expect(addBoundaryBefore(THREE_VERSES, once, V1_BETA)).toEqual(once); + }); +}); + +describe('removeBoundaryAt', () => { + it('records a default verse start as removed (merge)', () => { + expect(removeBoundaryAt(THREE_VERSES, undefined, V2_START)).toEqual({ + removedVerseStarts: [V2_START], + addedStarts: [], + }); + }); + + it('drops an added split rather than recording a removal', () => { + const split: SegmentationDelta = { removedVerseStarts: [], addedStarts: [V1_BETA] }; + expect(removeBoundaryAt(THREE_VERSES, split, V1_BETA)).toEqual({ + removedVerseStarts: [], + addedStarts: [], + }); + }); + + it('is a no-op for the book-first token', () => { + expect(removeBoundaryAt(THREE_VERSES, undefined, V1_START)).toEqual({ + removedVerseStarts: [], + addedStarts: [], + }); + }); +}); + +describe('moveBoundary', () => { + it('removes the old start and adds the new one', () => { + expect(moveBoundary(THREE_VERSES, undefined, V2_START, V1_BETA)).toEqual({ + removedVerseStarts: [V2_START], + addedStarts: [V1_BETA], + }); + }); +}); + +describe('mergeSegments / splitSegmentBefore aliases', () => { + it('mergeSegments removes the second segment start', () => { + expect(mergeSegments(THREE_VERSES, undefined, V2_START)).toEqual( + removeBoundaryAt(THREE_VERSES, undefined, V2_START), + ); + }); + + it('splitSegmentBefore adds a start', () => { + expect(splitSegmentBefore(THREE_VERSES, undefined, V1_BETA)).toEqual( + addBoundaryBefore(THREE_VERSES, undefined, V1_BETA), + ); + }); +}); + +describe('normalization', () => { + it('dedupes and sorts removed/added arrays by document order', () => { + const messy: SegmentationDelta = { + removedVerseStarts: [V3_START, V2_START, V2_START], + addedStarts: [], + }; + // Re-adding V1_BETA twice plus the messy removals exercises dedupe + sort. + const result = addBoundaryBefore( + THREE_VERSES, + addBoundaryBefore(THREE_VERSES, messy, V1_BETA), + V1_BETA, + ); + expect(result).toEqual({ removedVerseStarts: [V2_START, V3_START], addedStarts: [V1_BETA] }); + }); + + it('strips a removed ref that is not a default verse start', () => { + const bogus: SegmentationDelta = { removedVerseStarts: [V1_BETA], addedStarts: [] }; + // V1_BETA is mid-verse, not a default start, so it is not a valid removal. + expect(removeBoundaryAt(THREE_VERSES, bogus, V3_START)).toEqual({ + removedVerseStarts: [V3_START], + addedStarts: [], + }); + }); + + it('strips an added ref that is actually a default verse start', () => { + const bogus: SegmentationDelta = { removedVerseStarts: [], addedStarts: [V2_START] }; + expect(addBoundaryBefore(THREE_VERSES, bogus, V1_BETA)).toEqual({ + removedVerseStarts: [], + addedStarts: [V1_BETA], + }); + }); +}); diff --git a/src/__tests__/utils/token-layout.test.ts b/src/__tests__/utils/token-layout.test.ts index 1e3cac63..7b33a7e9 100644 --- a/src/__tests__/utils/token-layout.test.ts +++ b/src/__tests__/utils/token-layout.test.ts @@ -148,6 +148,53 @@ describe('resolveSlotFocus', () => { expect(result.focusedPhraseLink).toBe(link); expect(result.focusedFreeToken).toBe(freeToken); }); + + /** Segment document order for the adjacent-edge tests: seg-0 < seg-1 < seg-2. */ + const order = new Map([ + ['seg-0', 0], + ['seg-1', 1], + ['seg-2', 2], + ]); + + it('marks isAdjacentEdgeOfFocus true when the focused segment borders the next one', () => { + const result = resolveSlotFocus('seg-1', 'seg-2', focusWithSegment('seg-1'), true, order); + expect(result.isAdjacentEdgeOfFocus).toBe(true); + }); + + it('marks isAdjacentEdgeOfFocus true when the focused segment borders the previous one', () => { + const result = resolveSlotFocus('seg-0', 'seg-1', focusWithSegment('seg-1'), false, order); + expect(result.isAdjacentEdgeOfFocus).toBe(true); + }); + + it('marks isAdjacentEdgeOfFocus false within one segment', () => { + const result = resolveSlotFocus('seg-1', 'seg-1', focusWithSegment('seg-1'), true, order); + expect(result.isAdjacentEdgeOfFocus).toBe(false); + }); + + it('marks isAdjacentEdgeOfFocus false when nothing is focused', () => { + const result = resolveSlotFocus('seg-1', 'seg-2', focusWithSegment(undefined), true, order); + expect(result.isAdjacentEdgeOfFocus).toBe(false); + }); + + it('marks isAdjacentEdgeOfFocus false when neither neighbor is the focused segment', () => { + const result = resolveSlotFocus('seg-0', 'seg-2', focusWithSegment('seg-1'), true, order); + expect(result.isAdjacentEdgeOfFocus).toBe(false); + }); + + it('marks isAdjacentEdgeOfFocus false when the two segments are not adjacent', () => { + const result = resolveSlotFocus('seg-0', 'seg-2', focusWithSegment('seg-0'), true, order); + expect(result.isAdjacentEdgeOfFocus).toBe(false); + }); + + it('marks isAdjacentEdgeOfFocus false when a leading slot has no previous segment', () => { + const result = resolveSlotFocus(undefined, 'seg-1', focusWithSegment('seg-1'), false, order); + expect(result.isAdjacentEdgeOfFocus).toBe(false); + }); + + it('marks isAdjacentEdgeOfFocus false when segment order is unknown (default empty map)', () => { + const result = resolveSlotFocus('seg-1', 'seg-2', focusWithSegment('seg-1'), true); + expect(result.isAdjacentEdgeOfFocus).toBe(false); + }); }); // --------------------------------------------------------------------------- @@ -159,6 +206,7 @@ describe('NO_SLOT_FOCUS', () => { expect(NO_SLOT_FOCUS).toEqual({ focusedSideIsPrev: undefined, isSameSegmentAsFocus: false, + isAdjacentEdgeOfFocus: false, focusedPhraseLink: undefined, focusedFreeToken: undefined, }); diff --git a/src/components/Interlinearizer.tsx b/src/components/Interlinearizer.tsx index d1281246..68159409 100644 --- a/src/components/Interlinearizer.tsx +++ b/src/components/Interlinearizer.tsx @@ -3,6 +3,12 @@ import type { Book, ScriptureRef, Segment, TextAnalysis } from 'interlinearizer' import { useCallback, useEffect, useMemo, useState } from 'react'; import type { Dispatch, SetStateAction } from 'react'; import { AnalysisStoreProvider, usePhraseDispatch } from './AnalysisStore'; +import { + NO_OP_SEGMENTATION_DISPATCH, + SegmentationProvider, + type SegmentationContextValue, + type SegmentationDispatch, +} from './SegmentationStore'; import ContinuousView from './ContinuousView'; import EditPhraseControls from './controls/EditPhraseControls'; import useBookIndexes from '../hooks/useBookIndexes'; @@ -62,6 +68,11 @@ type InterlinearizerProps = Readonly<{ setPhraseMode: Dispatch>; /** Bundled display toggles forwarded to the segment list and continuous views. */ viewOptions: ViewOptions; + /** + * Boundary-editing operations provided to the views via {@link SegmentationProvider}. Optional so + * isolated tests can omit it; the real loader always supplies it. Defaults to an inert no-op. + */ + segmentationDispatch?: SegmentationDispatch; }>; /** @@ -87,6 +98,7 @@ function InterlinearizerInner({ phraseMode, setPhraseMode, viewOptions, + segmentationDispatch = NO_OP_SEGMENTATION_DISPATCH, }: Omit) { // Navigation surface from the context: `navigate` writes the reference (classifying internal vs // external at the call site), `consumeInternalNav` lets the segment window suppress the fade for @@ -127,6 +139,24 @@ function InterlinearizerInner({ // Book-wide lookup indexes the views share, built in one pass over the segment list. const { segmentById, tokenDocOrder, tokenSegmentMap, wordTokenByRef } = useBookIndexes(book); + /** Segment id → its index in document order; used to test segment adjacency for boundary edits. */ + const segmentOrder = useMemo(() => { + const order = new Map(); + book.segments.forEach((seg, i) => order.set(seg.id, i)); + return order; + }, [book.segments]); + + /** Segmentation context shared by the views — the dispatch plus the lookups its call sites need. */ + const segmentationValue = useMemo( + () => ({ + dispatch: segmentationDispatch, + boundaryEditMode: viewOptions.boundaryEditMode, + segmentById, + segmentOrder, + }), + [segmentationDispatch, viewOptions.boundaryEditMode, segmentById, segmentOrder], + ); + /** PhraseId currently hovered anywhere in the interlinearizer; shared across all SegmentViews. */ const [hoveredPhraseId, setHoveredPhraseId] = useState(); @@ -255,59 +285,61 @@ function InterlinearizerInner({ ); return ( -
- {(phraseMode.kind === 'confirm-unlink' || phraseMode.kind === 'edit') && ( -
- {phraseMode.kind === 'confirm-unlink' ? ( - - ) : ( - - )} -
- )} -
- {displayContinuousScroll && ( -
- + +
+ {(phraseMode.kind === 'confirm-unlink' || phraseMode.kind === 'edit') && ( +
+ {phraseMode.kind === 'confirm-unlink' ? ( + + ) : ( + + )}
)} +
+ {displayContinuousScroll && ( +
+ +
+ )} - + +
-
+ ); } diff --git a/src/components/InterlinearizerLoader.tsx b/src/components/InterlinearizerLoader.tsx index 8db29fd0..230ee0ce 100644 --- a/src/components/InterlinearizerLoader.tsx +++ b/src/components/InterlinearizerLoader.tsx @@ -9,9 +9,17 @@ import { TabToolbar } from 'platform-bible-react'; import type { SelectMenuItemHandler } from 'platform-bible-react'; import { isPlatformError } from 'platform-bible-utils'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { resegmentBook } from 'parsers/papi/resegmentBook'; import useDraftProject from '../hooks/useDraftProject'; import useInterlinearizerBookData from '../hooks/useInterlinearizerBookData'; import useOptimisticBooleanSetting from '../hooks/useOptimisticBooleanSetting'; +import { + isDefaultSegmentation, + mergeSegments, + moveBoundary, + splitSegmentBefore, +} from '../utils/segmentation'; +import type { SegmentationDispatch } from './SegmentationStore'; import type { InterlinearProjectSummary } from '../types/interlinear-project-summary'; import Interlinearizer from './Interlinearizer'; import ViewOptionsDropdown from './controls/ViewOptionsDropdown'; @@ -143,6 +151,7 @@ function InterlinearizerLoaderInner({ draftVersion, dirty, autosaveAnalysis, + autosaveSegmentation, loadFromProject, newDraft, getDraftSnapshot, @@ -214,6 +223,10 @@ function InterlinearizerLoaderInner({ // the reference identical across the loader's frequent re-renders (driven by `useData`, // `useSetting`, etc.), so the `memo()` wrapping `SegmentView` can shallow-compare it away instead // of re-rendering every windowed segment when no toggle actually changed. + // Editing segment boundaries is a transient mode rather than a saved preference, so it lives in + // local state (not a persisted project setting) and resets to off whenever the WebView reloads. + const [boundaryEditMode, setBoundaryEditMode] = useState(false); + const viewOptions = useMemo( () => ({ hideInactiveLinkButtons, @@ -221,6 +234,7 @@ function InterlinearizerLoaderInner({ chapterLabelInVerse, showMorphology, showFreeTranslation, + boundaryEditMode, }), [ hideInactiveLinkButtons, @@ -228,14 +242,63 @@ function InterlinearizerLoaderInner({ chapterLabelInVerse, showMorphology, showFreeTranslation, + boundaryEditMode, ], ); - const { book, isLoading, bookError, tokenizeError } = useInterlinearizerBookData({ + const { + book: verseBook, + isLoading, + bookError, + tokenizeError, + } = useInterlinearizerBookData({ projectId, scrRef, }); + /** The user's custom segment boundaries from the draft, or `undefined` for verse segmentation. */ + const segmentation = draft?.segmentation; + + /** + * The book the views render: the verse-tokenized book re-grouped into the user's custom segments. + * Identical (by reference) to `verseBook` when no custom boundaries are set, so the common case + * incurs no extra work. `verseBook` is retained separately because the segmentation operations + * need the default verse boundaries it carries. + */ + const book = useMemo( + () => (verseBook ? resegmentBook(verseBook, segmentation) : undefined), + [verseBook, segmentation], + ); + + /** + * Boundary-editing operations passed down to the views. Each reads the draft's latest boundary + * delta synchronously (so rapid edits compose correctly), applies the relevant pure transform + * against the original verse book, and auto-saves the normalized result — clearing the field back + * to `undefined` when the edit restores the default verse segmentation. + */ + const segmentationDispatch = useMemo(() => { + const apply = (next: ReturnType) => { + autosaveSegmentation(isDefaultSegmentation(next) ? undefined : next); + }; + return { + merge: (secondSegmentStartRef) => { + /* v8 ignore next -- boundary controls only render once the book has loaded */ + if (!verseBook) return; + apply(mergeSegments(verseBook, getDraftSnapshot()?.segmentation, secondSegmentStartRef)); + }, + split: (tokenRef) => { + /* v8 ignore next -- boundary controls only render once the book has loaded */ + if (!verseBook) return; + apply(splitSegmentBefore(verseBook, getDraftSnapshot()?.segmentation, tokenRef)); + }, + move: (fromRef, toRef) => { + /* v8 ignore next -- the cross-segment link only renders once the book has loaded */ + if (!verseBook) return; + apply(moveBoundary(verseBook, getDraftSnapshot()?.segmentation, fromRef, toRef)); + }, + }; + }, [verseBook, getDraftSnapshot, autosaveSegmentation]); + // The active reference handed to the interlinearizer. The host emits `verseNum: 0` both for a // chapter's verse-0 superscription (which has its own segment) and for a plain whole-chapter // selection (which does not). Keep verse 0 when the loaded book actually has a verse-0 segment for @@ -313,6 +376,10 @@ function InterlinearizerLoaderInner({ 'interlinearizer.saveAnalysis', activeProject.id, JSON.stringify(snapshot.analysis), + // Send the draft's boundary state on every Save; `null` clears any stored boundaries so a + // reverted segmentation propagates to the project rather than leaving it stale. + // eslint-disable-next-line no-null/no-null -- "null" is the JSON sentinel that clears boundaries + JSON.stringify(snapshot.segmentation ?? null), ); markSynced(snapshot.analysis); } catch (e) { @@ -434,6 +501,8 @@ function InterlinearizerLoaderInner({ onShowMorphologyChange={handleShowMorphologyChange} showFreeTranslation={showFreeTranslation} onShowFreeTranslationChange={handleShowFreeTranslationChange} + boundaryEditMode={boundaryEditMode} + onBoundaryEditModeChange={setBoundaryEditMode} /> ) : undefined } @@ -493,6 +562,7 @@ function InterlinearizerLoaderInner({ phraseMode={phraseMode} setPhraseMode={setPhraseMode} viewOptions={viewOptions} + segmentationDispatch={segmentationDispatch} /> )}
diff --git a/src/components/PhraseStripParts.tsx b/src/components/PhraseStripParts.tsx index e1efc91a..4de3ec2c 100644 --- a/src/components/PhraseStripParts.tsx +++ b/src/components/PhraseStripParts.tsx @@ -1,13 +1,84 @@ /** @file Shared render parts for the two phrase strips (SegmentView and ContinuousView). */ +import { useLocalizedStrings } from '@papi/frontend/react'; +import { Combine, Scissors } from 'lucide-react'; import { memo } from 'react'; import MemoizedPhraseBox from './PhraseBox'; import type { PhraseMode } from '../types/phrase-mode'; import { usePhraseStripContext } from './PhraseStripContext'; +import { useSegmentation } from './SegmentationStore'; import { InertTokenChip } from './TokenChip'; import MemoizedTokenLinkIcon from './TokenLinkIcon'; import type { FocusContext, LinkSlot, TokenGroup } from '../types/token-layout'; import { resolveSlotFocus } from '../utils/token-layout'; +/** Localized labels for the merge/split boundary controls; hoisted so the array reference is stable. */ +const BOUNDARY_STRING_KEYS = [ + '%interlinearizer_boundaryControl_merge%', + '%interlinearizer_boundaryControl_split%', +] as const satisfies `%${string}%`[]; + +/** Props for {@link BoundaryControl}. */ +type BoundaryControlProps = Readonly<{ + /** Segment id of the group before the slot, or `undefined` for the leading slot. */ + prevSegmentId: string | undefined; + /** Segment id of the group after the slot, or `undefined` for the trailing slot. */ + nextSegmentId: string | undefined; + /** First word token after the slot, used as the split anchor. */ + nextTokenRef: string | undefined; +}>; + +/** + * Renders the boundary-edit control for one slot. A slot straddling two different segments shows a + * merge control (combine the next segment into the previous one); a slot inside one segment shows a + * split control (start a new segment at the next token). Leading/trailing slots (one side missing) + * render nothing. + * + * @param props - Component props. + * @param props.prevSegmentId - Segment id before the slot. + * @param props.nextSegmentId - Segment id after the slot. + * @param props.nextTokenRef - First word token after the slot (split anchor). + * @returns A merge or split button, or `undefined` when the slot is at a book edge. + */ +function BoundaryControl({ prevSegmentId, nextSegmentId, nextTokenRef }: BoundaryControlProps) { + const { dispatch, segmentById } = useSegmentation(); + const [localizedStrings] = useLocalizedStrings(BOUNDARY_STRING_KEYS); + if (prevSegmentId === undefined || nextSegmentId === undefined || nextTokenRef === undefined) { + return undefined; + } + if (prevSegmentId !== nextSegmentId) { + const mergeLabel = localizedStrings['%interlinearizer_boundaryControl_merge%']; + const secondStart = segmentById.get(nextSegmentId)?.tokens[0]?.ref; + return ( + + ); + } + const splitLabel = localizedStrings['%interlinearizer_boundaryControl_split%']; + return ( + + ); +} + /** * Duration, in milliseconds, of the link-slot opacity fade transition. Exported so `ContinuousView` * can re-center the focused phrase for exactly this long after `committedActiveSegmentId` flips, @@ -61,6 +132,7 @@ export function PhraseSlot({ hoveredPhraseId, }: PhraseSlotProps) { const { hideInactiveLinkButtons, activeSegmentId, skipLinkTransition } = usePhraseStripContext(); + const { boundaryEditMode, segmentOrder } = useSegmentation(); const { prevGroup, nextGroup, punctuation } = slot; if (!prevGroup && !nextGroup && punctuation.length === 0) return undefined; const prevToken = prevGroup?.tokens[prevGroup.tokens.length - 1]; @@ -71,7 +143,13 @@ export function PhraseSlot({ prevPhraseId !== undefined && prevPhraseId === nextPhraseId && (prevPhraseId === hoveredPhraseId || prevPhraseId === focus.focusedPhraseId); - const slotFocus = resolveSlotFocus(prevSegmentId, nextSegmentId, focus, focusedSideIsPrev); + const slotFocus = resolveSlotFocus( + prevSegmentId, + nextSegmentId, + focus, + focusedSideIsPrev, + segmentOrder, + ); // The slot is "in the active segment" only when both neighboring phrases belong to it. A link // that crosses a verse boundary (one side in the active verse, the other in an adjacent verse) is // therefore treated as inactive and hidden too. When hideInactiveLinkButtons is on, link buttons @@ -89,29 +167,38 @@ export function PhraseSlot({ data-link-slot="true" style={{ overflowAnchor: 'none' }} > - {hasLinkableNeighbors && ( - - - - )} + {hasLinkableNeighbors && + (boundaryEditMode ? ( + + + + ) : ( + + + + ))} {punctuation.length > 0 && ( {punctuation.map((punctToken) => ( diff --git a/src/components/SegmentationStore.tsx b/src/components/SegmentationStore.tsx new file mode 100644 index 00000000..a6a99498 --- /dev/null +++ b/src/components/SegmentationStore.tsx @@ -0,0 +1,101 @@ +/** + * @file Render-scoped context exposing segment-boundary editing to the deep leaves that trigger it + * (the cross-segment link icon and the merge/split boundary controls). + * + * The {@link SegmentationDispatch} closes over the draft's current boundary delta and the original + * verse-tokenized book, applying the pure transforms in `utils/segmentation.ts` and auto-saving + * the result. Boundary edits flow draft → re-segmentation → new `book.segments`, so consumers + * only need to call a dispatch method; they never see the delta itself. + */ +import type { Segment } from 'interlinearizer'; +import { createContext, useContext } from 'react'; +import type { ReactNode } from 'react'; + +/** The boundary-editing operations available to leaf controls. Each one auto-saves the result. */ +export type SegmentationDispatch = Readonly<{ + /** + * Merges the segment that begins at `secondSegmentStartRef` into the segment before it. + * + * @param secondSegmentStartRef - First-token ref of the segment to merge into its predecessor. + */ + merge: (secondSegmentStartRef: string) => void; + /** + * Splits a segment so a new one begins at `tokenRef`. + * + * @param tokenRef - The token ref the new segment should begin at. + */ + split: (tokenRef: string) => void; + /** + * Moves a boundary from `fromRef` to `toRef` — used to pull a single edge token across a segment + * boundary when a cross-segment phrase link is made. + * + * @param fromRef - The current segment-start ref to remove. + * @param toRef - The new segment-start ref to add. + */ + move: (fromRef: string, toRef: string) => void; +}>; + +/** The strip-wide segmentation context: the dispatch plus the lookups its call sites need. */ +export type SegmentationContextValue = Readonly<{ + /** Boundary-editing operations. */ + dispatch: SegmentationDispatch; + /** + * When `true`, the link slots render merge/split boundary controls instead of phrase link icons. + * Toggled from the view-options menu. + */ + boundaryEditMode: boolean; + /** Segment id → segment, used to resolve a segment's first-token start ref. */ + segmentById: ReadonlyMap; + /** Segment id → its index in document order, used to test segment adjacency. */ + segmentOrder: ReadonlyMap; +}>; + +/** No-op dispatch used as the default outside a provider (e.g. in isolated component tests). */ +export const NO_OP_SEGMENTATION_DISPATCH: SegmentationDispatch = { + merge: () => {}, + split: () => {}, + move: () => {}, +}; + +/** + * Default context for components rendered without a {@link SegmentationProvider}: boundary editing + * is off and the dispatch is inert. Lets `SegmentView` / `ContinuousView` / `TokenLinkIcon` be + * unit- tested in isolation without wiring a provider, while the real app always supplies one. + */ +const DEFAULT_VALUE: SegmentationContextValue = { + dispatch: NO_OP_SEGMENTATION_DISPATCH, + boundaryEditMode: false, + segmentById: new Map(), + segmentOrder: new Map(), +}; + +const SegmentationContext = createContext(undefined); + +/** Props for {@link SegmentationProvider}. */ +type SegmentationProviderProps = Readonly<{ + /** The segmentation context value; callers should memoize it to preserve leaf memoization. */ + value: SegmentationContextValue; + /** The subtree that can edit segment boundaries. */ + children: ReactNode; +}>; + +/** + * Provides the {@link SegmentationContextValue} to the interlinear views beneath it. + * + * @param props - Component props. + * @param props.value - The segmentation context value. + * @param props.children - The subtree. + * @returns The children wrapped in the context provider. + */ +export function SegmentationProvider({ value, children }: SegmentationProviderProps) { + return {children}; +} + +/** + * Reads the segmentation context, falling back to an inert default when no provider is present. + * + * @returns The current {@link SegmentationContextValue}, or an inert default outside a provider. + */ +export function useSegmentation(): SegmentationContextValue { + return useContext(SegmentationContext) ?? DEFAULT_VALUE; +} diff --git a/src/components/TokenLinkIcon.tsx b/src/components/TokenLinkIcon.tsx index dd445516..34de561a 100644 --- a/src/components/TokenLinkIcon.tsx +++ b/src/components/TokenLinkIcon.tsx @@ -4,6 +4,7 @@ import { Link2, Link2Off } from 'lucide-react'; import { memo, useCallback } from 'react'; import { usePhraseDispatch } from './AnalysisStore'; import { usePhraseStripContext } from './PhraseStripContext'; +import { useSegmentation } from './SegmentationStore'; import type { SlotFocusInfo } from '../types/token-layout'; import { computeSplitFreeRefs, sortByDocOrder, splitPhraseAtBoundary } from '../utils/phrase-arc'; @@ -65,17 +66,24 @@ export function TokenLinkIcon({ slotFocus, isPhraseRevealed, }: TokenLinkIconProps) { - const { focusedSideIsPrev, focusedPhraseLink, focusedFreeToken, isSameSegmentAsFocus } = - slotFocus; + const { + focusedSideIsPrev, + focusedPhraseLink, + focusedFreeToken, + isSameSegmentAsFocus, + isAdjacentEdgeOfFocus, + } = slotFocus; const { phraseMode, tokenDocOrder, + tokenSegmentMap, onHoverPhrase: onHoverCandidatePhrase, onHoverCandidateTokens, onHoverSplitFreeTokens, crossSegmentLinkTooltip, } = usePhraseStripContext(); const { createPhrase, updatePhrase, deletePhrase, mergePhrases } = usePhraseDispatch(); + const { dispatch: segmentationDispatch, segmentById } = useSegmentation(); const inSamePhrase = prevPhraseLink !== undefined && @@ -106,6 +114,37 @@ export function TokenLinkIcon({ tokenDocOrder, ]); + /** + * Moves the segment boundary at this slot so the pulled edge token joins the focused token's + * segment, when this is a cross-segment adjacent-edge link. The pulled token is the neighbor on + * the far side of the slot; moving the boundary by one token keeps both segments contiguous. + * + * `focusedSideIsPrev = true`: focus is the previous (left) segment; `nextToken` is the adjacent + * segment's first word, so the boundary moves forward to the token after it. `false`: focus is + * the next (right) segment; `prevToken` is the previous segment's last word, so the boundary + * moves back to start at it. + */ + const performBoundaryPull = useCallback(() => { + /* v8 ignore next -- only invoked from handleLinkClick after the same defined-token guards */ + if (!prevToken || !nextToken) return; + const adjacentSegmentId = tokenSegmentMap.get(nextToken.ref); + const adjacentSegment = + adjacentSegmentId === undefined ? undefined : segmentById.get(adjacentSegmentId); + if (!adjacentSegment) return; + const currentStart = adjacentSegment.tokens[0]?.ref; + /* v8 ignore next -- a rendered segment always has at least one token */ + if (currentStart === undefined) return; + if (focusedSideIsPrev) { + const index = adjacentSegment.tokens.findIndex((t) => t.ref === nextToken.ref); + const newStart = adjacentSegment.tokens[index + 1]?.ref; + // The adjacent segment had only the pulled token, so it merges wholly into the focused one. + if (newStart === undefined) segmentationDispatch.merge(currentStart); + else segmentationDispatch.move(currentStart, newStart); + } else { + segmentationDispatch.move(currentStart, prevToken.ref); + } + }, [prevToken, nextToken, focusedSideIsPrev, tokenSegmentMap, segmentById, segmentationDispatch]); + /** * Joins the neighbor on the far side of this slot into the focused phrase (or free token). * @@ -129,6 +168,10 @@ export function TokenLinkIcon({ /* v8 ignore next -- button only renders when both tokens exist and focus is defined */ if (!prevToken || !nextToken || focusedSideIsPrev === undefined) return; + // For a cross-segment edge link, first move the boundary so the pulled token joins the focused + // segment; the phrase mutation below then proceeds as for a within-segment link. + if (isAdjacentEdgeOfFocus) performBoundaryPull(); + // The neighbor is the token/phrase on the opposite side of this slot from focus. const neighborLink = focusedSideIsPrev ? nextPhraseLink : prevPhraseLink; const neighborToken = focusedSideIsPrev ? nextToken : prevToken; @@ -196,6 +239,8 @@ export function TokenLinkIcon({ focusedSideIsPrev, focusedPhraseLink, focusedFreeToken, + isAdjacentEdgeOfFocus, + performBoundaryPull, tokenDocOrder, createPhrase, updatePhrase, @@ -262,14 +307,21 @@ export function TokenLinkIcon({ ); } - // Link icon: active in view mode when focus is set and both neighbors are in the same segment. + // Link icon: active in view mode when focus is set and either both neighbors are in the focused + // segment (a within-segment link) or this slot is the adjacent edge of the focused segment (a + // cross-segment link that pulls the edge token across and moves the boundary). const isActive = - phraseMode.kind === 'view' && focusedSideIsPrev !== undefined && isSameSegmentAsFocus; + phraseMode.kind === 'view' && + focusedSideIsPrev !== undefined && + (isSameSegmentAsFocus || isAdjacentEdgeOfFocus); const linkDisabled = isUnlinkMode || isEditMode || !isActive; - // Show a tooltip only when inactive because the slot is outside the focused segment (not when + // Show a tooltip only when inactive because the slot is a non-edge cross-segment slot (not when // disabled for other reasons like unlink/edit mode where the reason is already visible in the UI). const crossSegmentDisabled = - phraseMode.kind === 'view' && focusedSideIsPrev !== undefined && !isSameSegmentAsFocus; + phraseMode.kind === 'view' && + focusedSideIsPrev !== undefined && + !isSameSegmentAsFocus && + !isAdjacentEdgeOfFocus; const linkTitle = crossSegmentDisabled ? crossSegmentLinkTooltip : undefined; // Highlight exactly what would be absorbed if the button were clicked — mirrors handleLinkClick. diff --git a/src/components/controls/ViewOptionsDropdown.tsx b/src/components/controls/ViewOptionsDropdown.tsx index 27b51f19..cadc1fb2 100644 --- a/src/components/controls/ViewOptionsDropdown.tsx +++ b/src/components/controls/ViewOptionsDropdown.tsx @@ -12,6 +12,7 @@ const STRING_KEYS = [ '%interlinearizer_viewOption_chapterLabelInVerse%', '%interlinearizer_viewOption_showMorphology%', '%interlinearizer_viewOption_showFreeTranslation%', + '%interlinearizer_viewOption_boundaryEditMode%', ] as const satisfies `%${string}%`[]; /** @@ -72,6 +73,10 @@ type ViewOptionsDropdownProps = Readonly<{ showFreeTranslation: boolean; /** Called when the show-free-translation toggle changes. */ onShowFreeTranslationChange: (checked: boolean) => void; + /** Current value of the edit-segment-boundaries toggle. */ + boundaryEditMode: boolean; + /** Called when the edit-segment-boundaries toggle changes. */ + onBoundaryEditModeChange: (checked: boolean) => void; }>; /** @@ -92,6 +97,8 @@ type ViewOptionsDropdownProps = Readonly<{ * @param props.onShowMorphologyChange - Show-morphology change callback. * @param props.showFreeTranslation - Current show-free-translation value. * @param props.onShowFreeTranslationChange - Show-free-translation change callback. + * @param props.boundaryEditMode - Current edit-segment-boundaries value. + * @param props.onBoundaryEditModeChange - Edit-segment-boundaries change callback. * @returns A gear button that opens a dropdown panel of view toggles. */ export default function ViewOptionsDropdown({ @@ -107,6 +114,8 @@ export default function ViewOptionsDropdown({ onShowMorphologyChange, showFreeTranslation, onShowFreeTranslationChange, + boundaryEditMode, + onBoundaryEditModeChange, }: ViewOptionsDropdownProps) { const [localizedStrings] = useLocalizedStrings(STRING_KEYS); const [open, setOpen] = useState(false); @@ -220,6 +229,11 @@ export default function ViewOptionsDropdown({ label={localizedStrings['%interlinearizer_viewOption_chapterLabelInVerse%']} onCheckedChange={onChapterLabelInVerseChange} /> +
, document.body, diff --git a/src/hooks/useDraftProject.ts b/src/hooks/useDraftProject.ts index 0bfd289b..f5270c6a 100644 --- a/src/hooks/useDraftProject.ts +++ b/src/hooks/useDraftProject.ts @@ -1,6 +1,11 @@ /** @file Hook owning the always-present, auto-saved draft buffer for one source project. */ import papi, { logger } from '@papi/frontend'; -import type { DraftProject, InterlinearProject, TextAnalysis } from 'interlinearizer'; +import type { + DraftProject, + InterlinearProject, + SegmentationDelta, + TextAnalysis, +} from 'interlinearizer'; import { useCallback, useEffect, useRef, useState } from 'react'; import { emptyAnalysis, emptyDraft } from '../types/empty-factories'; import { removeBookFromAnalysis } from '../utils/analysis-book'; @@ -11,7 +16,7 @@ const AUTOSAVE_DEBOUNCE_MS = 300; /** The subset of an {@link InterlinearProject} needed to open it into the draft as a working copy. */ export type OpenableProject = Pick< InterlinearProject, - 'analysis' | 'analysisLanguages' | 'targetProjectId' + 'analysis' | 'analysisLanguages' | 'targetProjectId' | 'segmentation' >; /** Configuration for starting a fresh, empty draft via {@link UseDraftProjectResult.newDraft}. */ @@ -59,6 +64,13 @@ export type UseDraftProjectResult = { * @param analysis - The updated analysis from the store. */ autosaveAnalysis: (analysis: TextAnalysis) => void; + /** + * Persists an edited segment-boundary delta into the draft and marks it dirty. Pass `undefined` + * (or a default/empty delta) to clear custom boundaries back to the default verse segmentation. + * + * @param segmentation - The updated boundary delta, or `undefined` for the default segmentation. + */ + autosaveSegmentation: (segmentation: SegmentationDelta | undefined) => void; /** * Replaces the draft with a working copy of an existing project's analysis and config — the * "Open" flow. @@ -239,12 +251,35 @@ export default function useDraftProject( [persist], ); + const autosaveSegmentation = useCallback( + (segmentation: SegmentationDelta | undefined) => { + const { current } = draftRef; + /* v8 ignore next -- auto-save only fires from the mounted editor, which exists only post-load */ + if (!current) return; + + const next: DraftProject = { ...current, dirty: true }; + // Store custom boundaries when present; clear the field for the default segmentation so the + // persisted draft stays minimal. + if (segmentation === undefined) delete next.segmentation; + else next.segmentation = segmentation; + draftRef.current = next; + if (autosaveTimeoutRef.current !== undefined) clearTimeout(autosaveTimeoutRef.current); + autosaveTimeoutRef.current = setTimeout(() => { + autosaveTimeoutRef.current = undefined; + persist(next); + }, AUTOSAVE_DEBOUNCE_MS); + setDirty(true); + }, + [persist], + ); + const loadFromProject = useCallback( (project: OpenableProject) => { applyReplacement({ sourceProjectId, analysisLanguages: project.analysisLanguages, ...(project.targetProjectId !== undefined && { targetProjectId: project.targetProjectId }), + ...(project.segmentation !== undefined && { segmentation: project.segmentation }), analysis: project.analysis, dirty: false, }); @@ -292,7 +327,10 @@ export default function useDraftProject( // the unsaved-changes indicator (dirty: false) so the user is not nagged to save an empty // draft. The active project is intentionally left untouched, so a subsequent Save still targets // it. Per-book wipe stays dirty, since it is a partial edit the user will usually want to save. - applyReplacement({ ...current, analysis: emptyAnalysis(), dirty: false }); + // Custom segment boundaries are part of the working state, so a whole-draft wipe clears them too. + const next: DraftProject = { ...current, analysis: emptyAnalysis(), dirty: false }; + delete next.segmentation; + applyReplacement(next); }, [applyReplacement]); const markSynced = useCallback( @@ -328,6 +366,7 @@ export default function useDraftProject( dirty, getDraftSnapshot, autosaveAnalysis, + autosaveSegmentation, loadFromProject, newDraft, wipeBook, diff --git a/src/main.ts b/src/main.ts index f82b7e65..71980410 100644 --- a/src/main.ts +++ b/src/main.ts @@ -7,10 +7,11 @@ import type { SavedWebViewDefinition, WebViewDefinition, } from '@papi/core'; +import type { SegmentationDelta } from 'interlinearizer'; import interlinearizerReact from './interlinearizer.web-view?inline'; import interlinearizerStyles from './interlinearizer.web-view.scss?inline'; import * as projectStorage from './services/projectStorage'; -import { isDraftProject, isTextAnalysis } from './types/type-guards'; +import { isDraftProject, isSegmentationDelta, isTextAnalysis } from './types/type-guards'; // #region WebView provider @@ -265,6 +266,8 @@ async function getInterlinearProject(interlinearProjectId: string): Promise { try { const analysis = JSON.parse(analysisJson); if (!isTextAnalysis(analysis)) { throw new TypeError('saveInterlinearAnalysis: analysisJson does not conform to TextAnalysis'); } - await projectStorage.updateAnalysis(executionToken, interlinearProjectId, analysis); + // undefined ⇒ leave boundaries unchanged; null ⇒ clear them; an object ⇒ set them. + let segmentation: SegmentationDelta | null | undefined; + if (segmentationJson !== undefined) { + const parsed: unknown = JSON.parse(segmentationJson); + // eslint-disable-next-line no-null/no-null -- JSON.parse('null') yields null, the clear sentinel + if (parsed === null) { + // eslint-disable-next-line no-null/no-null -- explicit "clear boundaries" sentinel from the WebView + segmentation = null; + } else if (isSegmentationDelta(parsed)) { + segmentation = parsed; + } else { + throw new TypeError( + 'saveInterlinearAnalysis: segmentationJson does not conform to SegmentationDelta', + ); + } + } + await projectStorage.updateAnalysis( + executionToken, + interlinearProjectId, + analysis, + segmentation, + ); } catch (e) { logger.error('Interlinearizer: failed to save analysis', e); await papi.notifications diff --git a/src/parsers/papi/resegmentBook.ts b/src/parsers/papi/resegmentBook.ts new file mode 100644 index 00000000..81642f88 --- /dev/null +++ b/src/parsers/papi/resegmentBook.ts @@ -0,0 +1,130 @@ +/** + * @file Re-groups a verse-tokenized {@link Book} into the user's custom segments per a + * {@link SegmentationDelta}, without touching the text-layer tokenizer. + * + * {@link tokenizeBook} always produces one `Segment` per verse; this pass runs on its output and + * cuts the flat document-order token stream at the delta's effective boundaries. Token refs and + * token objects are preserved unchanged for untouched verses (reused by reference) so analyses + * keep resolving and React memoization is undisturbed; only merged or split segments are rebuilt, + * with `baselineText` and per-token char offsets recomputed so the `baselineText.slice(charStart, + * charEnd) === surfaceText` invariant still holds. + */ +import type { Book, ScriptureRef, Segment, SegmentationDelta, Token } from 'interlinearizer'; + +import { effectiveStarts, isDefaultSegmentation } from '../../utils/segmentation'; + +/** Separator inserted between two verses' baseline text when they are merged into one segment. */ +const MERGE_SEPARATOR = ' '; + +/** A token paired with the original verse {@link Segment} it came from. */ +type SourcedToken = { token: Token; verse: Segment }; + +/** + * Rebuilds one custom {@link Segment} from a run of tokens that may span multiple original verses. + * The new `baselineText` is each contributing verse's text spliced to its covered span, joined by + * {@link MERGE_SEPARATOR} between verses; every token's char offset is shifted into the new string + * while its `ref` and `surfaceText` are preserved. + * + * @param run - The run's tokens in document order, each tagged with its source verse. Non-empty. + * @returns The rebuilt segment. + */ +function buildSegment(run: SourcedToken[]): Segment { + const firstSourced = run[0]; + const lastSourced = run[run.length - 1]; + const firstVerse = firstSourced.verse; + const lastVerse = lastSourced.verse; + + // A segment that begins at its first verse's first token keeps that verse's id (so an untouched or + // merged segment preserves the leading verse's segment-level analyses); a segment that begins + // mid-verse (a split's later piece) takes its first token's ref as a fresh, unique id. + const startsAtVerseBoundary = firstSourced.token.ref === firstVerse.tokens[0]?.ref; + const id = startsAtVerseBoundary ? firstVerse.id : firstSourced.token.ref; + + let baselineText = ''; + let cursor = 0; + let runIndex = 0; + const tokens: Token[] = []; + while (runIndex < run.length) { + const { verse } = run[runIndex]; + if (runIndex > 0) { + baselineText += MERGE_SEPARATOR; + cursor += MERGE_SEPARATOR.length; + } + // Consume the contiguous sub-run of tokens from this verse, shifting each token's offsets into + // the new concatenated baseline while keeping its ref and surface text unchanged. + const subStart = runIndex; + const base = run[subStart].token.charStart; + while (runIndex < run.length && run[runIndex].verse === verse) { + const { token } = run[runIndex]; + tokens.push({ + ...token, + charStart: cursor + (token.charStart - base), + charEnd: cursor + (token.charEnd - base), + }); + runIndex += 1; + } + const lastCharEnd = run[runIndex - 1].token.charEnd; + const piece = verse.baselineText.slice(base, lastCharEnd); + baselineText += piece; + cursor += piece.length; + } + + // Anchor the new range to the covered span; a mid-verse edge carries a sub-verse charIndex. + const startRef: ScriptureRef = startsAtVerseBoundary + ? firstVerse.startRef + : { ...firstVerse.startRef, charIndex: firstSourced.token.charStart }; + const endsAtVerseBoundary = + lastSourced.token.ref === lastVerse.tokens[lastVerse.tokens.length - 1]?.ref; + const endRef: ScriptureRef = endsAtVerseBoundary + ? lastVerse.endRef + : { ...lastVerse.endRef, charIndex: lastSourced.token.charEnd }; + + return { id, startRef, endRef, baselineText, tokens }; +} + +/** + * Re-groups `book`'s verse segments into the user's custom segments per `delta`. + * + * Returns `book` unchanged (by reference) for the default segmentation, so the common no-custom- + * boundaries case incurs no work and no identity churn. Otherwise the flat token stream is cut at + * the effective boundaries; a run that is exactly one original verse reuses that verse's `Segment` + * object verbatim, while merged or split runs are rebuilt via {@link buildSegment}. + * + * @param book - The verse-tokenized book from {@link tokenizeBook}. + * @param delta - The user's boundary delta, or `undefined` for the default verse segmentation. + * @returns A book with the custom segmentation applied, or `book` itself when `delta` is the + * default. + */ +export function resegmentBook(book: Book, delta: SegmentationDelta | undefined): Book { + if (isDefaultSegmentation(delta)) return book; + + const starts = effectiveStarts(book, delta); + + // Cut the flat token stream into runs, beginning a new run at each effective start (but never + // splitting off a run that has no word/structural content yet — leading tokens stay with the + // first run). + const runs: SourcedToken[][] = []; + let current: SourcedToken[] = []; + book.segments.forEach((verse) => { + verse.tokens.forEach((token) => { + if (starts.has(token.ref) && current.length > 0) { + runs.push(current); + current = []; + } + current.push({ token, verse }); + }); + }); + /* v8 ignore next -- a non-default delta always yields at least one token, so current is non-empty */ + if (current.length > 0) runs.push(current); + + const segments: Segment[] = runs.map((run) => { + const firstVerse = run[0].verse; + // Reuse the original verse Segment when the run is exactly that verse — preserves its id, + // baselineText, token offsets, and object identity. + const isWholeUntouchedVerse = + run.length === firstVerse.tokens.length && run.every((s) => s.verse === firstVerse); + return isWholeUntouchedVerse ? firstVerse : buildSegment(run); + }); + + return { ...book, segments }; +} diff --git a/src/services/projectStorage.ts b/src/services/projectStorage.ts index 42b242a3..7eeac53f 100644 --- a/src/services/projectStorage.ts +++ b/src/services/projectStorage.ts @@ -1,6 +1,11 @@ import papi, { logger } from '@papi/backend'; import type { ExecutionToken } from '@papi/core'; -import type { DraftProject, InterlinearProject, TextAnalysis } from 'interlinearizer'; +import type { + DraftProject, + InterlinearProject, + SegmentationDelta, + TextAnalysis, +} from 'interlinearizer'; import { emptyAnalysis, emptyDraft } from '../types/empty-factories'; import { isDraftProject } from '../types/type-guards'; @@ -256,11 +261,14 @@ export async function getProjectsForSource( } /** - * Replaces the analysis of an existing interlinearizer project. + * Replaces the analysis of an existing interlinearizer project, and optionally its custom segment + * boundaries, in one atomic write. * * @param token - The execution token for storage access. * @param id - The interlinearizer project UUID to update. * @param analysis - The new `TextAnalysis` to persist. + * @param segmentation - The new boundary delta to persist (`SegmentationDelta`), `null` to clear + * any stored boundaries, or `undefined` to leave the project's existing boundaries unchanged. * @returns The updated project record, or `undefined` if no project with the given ID exists. * @throws {SyntaxError} If the project's storage value contains invalid JSON. * @throws If `papi.storage.readUserData` or `papi.storage.writeUserData` rejects for a non-ENOENT @@ -270,11 +278,15 @@ export async function updateAnalysis( token: ExecutionToken, id: string, analysis: TextAnalysis, + segmentation?: SegmentationDelta | null, ): Promise { return enqueueProjectOp(id, async () => { const project = await getProject(token, id); if (!project) return undefined; const updated: InterlinearProject = { ...project, analysis }; + // eslint-disable-next-line no-null/no-null -- null is the explicit "clear stored boundaries" sentinel + if (segmentation === null) delete updated.segmentation; + else if (segmentation !== undefined) updated.segmentation = segmentation; await papi.storage.writeUserData(token, projectKey(id), JSON.stringify(updated)); return updated; }); diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts index 52dc08cb..b626446f 100644 --- a/src/types/interlinearizer.d.ts +++ b/src/types/interlinearizer.d.ts @@ -162,11 +162,15 @@ declare module 'papi-shared-types' { 'interlinearizer.getProject': (interlinearProjectId: string) => Promise; /** - * Persists an updated `TextAnalysis` for an interlinearizer project. Called from the WebView - * after each gloss write so that analysis changes survive tab restores and project switches. + * Persists an updated `TextAnalysis` (and optionally custom segment boundaries) for an + * interlinearizer project. Called from the WebView on Save so analysis and boundary changes + * survive tab restores and project switches. * * @param interlinearProjectId UUID of the interlinearizer project to update. * @param analysisJson JSON-stringified `TextAnalysis` to persist. + * @param segmentationJson Optional JSON-stringified `SegmentationDelta` to persist, or the + * string `"null"` to clear any stored custom boundaries. Omit entirely to leave the project's + * existing boundaries unchanged. * @returns Promise that resolves to void once the analysis has been written to storage. * @throws If JSON parsing or storage fails. Error is logged and an error notification is sent * before rethrowing so callers do not need to send a second notification. @@ -174,6 +178,7 @@ declare module 'papi-shared-types' { 'interlinearizer.saveAnalysis': ( interlinearProjectId: string, analysisJson: string, + segmentationJson?: string, ) => Promise; /** @@ -1095,7 +1100,47 @@ declare module 'interlinearizer' { } // --------------------------------------------------------------------------- - // §6 InterlinearProject — persisted project envelope + // §6 SegmentationDelta — user-defined segment boundaries + // --------------------------------------------------------------------------- + + /** + * A user's custom segment boundaries, stored as a **delta from the default one-segment-per-verse + * segmentation** rather than as explicit segment definitions. + * + * The text layer is rebuilt from USJ on every load as one `Segment` per verse (see {@link Book}). + * A segment is otherwise just a maximal contiguous run of the book's document-order token stream + * between "start" tokens; the default start tokens are each verse's first token. This delta + * records where the user's boundaries differ from that default: + * + * - A verse's first token listed in `removedVerseStarts` no longer starts a segment, so that verse + * is **merged** into the preceding segment. + * - A mid-verse token listed in `addedStarts` starts a new segment, **splitting** its verse. + * + * Boundaries are anchored to token refs (stable opaque ids), so the model degrades gracefully + * when the baseline text drifts: an anchor whose token no longer exists is ignored on load, + * leaving every other boundary intact. Because a segment can only ever be a contiguous run + * between start tokens, discontiguous segments are unrepresentable by construction. + * + * Absent (`undefined`) ⇒ the default verse segmentation. The empty delta (both arrays empty) is + * equivalent. + */ + export interface SegmentationDelta { + /** + * Word-token refs that are a verse's first token in the default segmentation but should **not** + * start a segment — i.e. the verse is merged into the preceding segment. A ref whose token no + * longer exists is ignored on load. + */ + removedVerseStarts: string[]; + + /** + * Mid-verse word-token refs that should start a new segment — i.e. the verse is split before + * this token. A ref whose token no longer exists is ignored on load. + */ + addedStarts: string[]; + } + + // --------------------------------------------------------------------------- + // §7 InterlinearProject — persisted project envelope // --------------------------------------------------------------------------- /** @@ -1166,6 +1211,13 @@ declare module 'interlinearizer' { * aligns source and target tokens. */ links?: AlignmentLink[]; + + /** + * User-defined segment boundaries as a delta from the default verse segmentation. Absent + * (`undefined`) ⇒ the default one-segment-per-verse segmentation. See + * {@link SegmentationDelta}. + */ + segmentation?: SegmentationDelta; } /** @@ -1218,10 +1270,17 @@ declare module 'interlinearizer' { * project. */ dirty: boolean; + + /** + * User-defined segment boundaries being edited, as a delta from the default verse segmentation. + * Absent (`undefined`) ⇒ the default one-segment-per-verse segmentation. Carried to the active + * project on Save. See {@link SegmentationDelta}. + */ + segmentation?: SegmentationDelta; } // --------------------------------------------------------------------------- - // §7 ActiveProject — runtime pairing of project envelope and text layers + // §8 ActiveProject — runtime pairing of project envelope and text layers // --------------------------------------------------------------------------- /** diff --git a/src/types/token-layout.ts b/src/types/token-layout.ts index 7fd3cbd8..39ae209a 100644 --- a/src/types/token-layout.ts +++ b/src/types/token-layout.ts @@ -37,10 +37,18 @@ export type SlotFocusInfo = { */ focusedSideIsPrev: boolean | undefined; /** - * `true` when both slot neighbors are in the same segment as the focused token. Phrases cannot - * span segments, so the link button is disabled when this is `false`. + * `true` when both slot neighbors are in the same segment as the focused token. Within one + * segment the link button joins tokens into a phrase as usual. */ isSameSegmentAsFocus: boolean; + /** + * `true` when this slot is the boundary between the focused token's segment and an immediately + * adjacent segment — i.e. one neighbor is in the focused segment and the other is in the segment + * directly before or after it in document order. The cross-segment link button is active only at + * these edges, so pulling an adjacent segment's edge token into the focused phrase moves the + * boundary by exactly one token and keeps both segments contiguous. + */ + isAdjacentEdgeOfFocus: boolean; /** The phrase containing the focused token, or `undefined` when the focused token is free. */ focusedPhraseLink: PhraseAnalysisLink | undefined; /** The focused token when it is not part of any phrase ("free"); `undefined` otherwise. */ diff --git a/src/types/type-guards.ts b/src/types/type-guards.ts index 00dddc1f..a04b346a 100644 --- a/src/types/type-guards.ts +++ b/src/types/type-guards.ts @@ -1,5 +1,11 @@ /** @file Type guards for narrowing interlinearizer types and validating parsed JSON payloads. */ -import type { AssignmentStatus, DraftProject, TextAnalysis, Token } from 'interlinearizer'; +import type { + AssignmentStatus, + DraftProject, + SegmentationDelta, + TextAnalysis, + Token, +} from 'interlinearizer'; import type { InterlinearProjectSummary } from './interlinear-project-summary'; /** @@ -234,6 +240,27 @@ export function isTextAnalysis(value: unknown): value is TextAnalysis { ); } +/** + * Type guard for {@link SegmentationDelta} parsed from unknown JSON. Both arrays must be present and + * contain only strings, so a malformed delta is rejected before it can corrupt re-segmentation. + * + * @param value - The value to test, typically a parsed JSON object of unknown shape. + * @returns `true` if `value` satisfies the {@link SegmentationDelta} shape, narrowing its type + * accordingly. + */ +export function isSegmentationDelta(value: unknown): value is SegmentationDelta { + return ( + !!value && + typeof value === 'object' && + 'removedVerseStarts' in value && + Array.isArray(value.removedVerseStarts) && + value.removedVerseStarts.every((r) => typeof r === 'string') && + 'addedStarts' in value && + Array.isArray(value.addedStarts) && + value.addedStarts.every((r) => typeof r === 'string') + ); +} + /** * Type guard for {@link DraftProject} parsed from unknown JSON. Validates the envelope fields and * delegates the `analysis` to {@link isTextAnalysis}, so malformed drafts are rejected before @@ -257,6 +284,7 @@ export function isDraftProject(value: unknown): value is DraftProject { (!('targetProjectId' in value) || typeof value.targetProjectId === 'string') && (!('suggestedName' in value) || typeof value.suggestedName === 'string') && (!('suggestedDescription' in value) || typeof value.suggestedDescription === 'string') && + (!('segmentation' in value) || isSegmentationDelta(value.segmentation)) && 'analysis' in value && isTextAnalysis(value.analysis) ); diff --git a/src/types/view-options.ts b/src/types/view-options.ts index 57a02496..eae8d57c 100644 --- a/src/types/view-options.ts +++ b/src/types/view-options.ts @@ -19,4 +19,9 @@ export type ViewOptions = Readonly<{ showMorphology: boolean; /** When true, a free-translation input is shown beneath each segment's tokens or baseline text. */ showFreeTranslation: boolean; + /** + * When true, the link slots between phrases show merge/split controls for editing segment + * boundaries instead of the phrase link/unlink icons. + */ + boundaryEditMode: boolean; }>; diff --git a/src/utils/segmentation.ts b/src/utils/segmentation.ts new file mode 100644 index 00000000..874d2577 --- /dev/null +++ b/src/utils/segmentation.ts @@ -0,0 +1,265 @@ +/** + * @file Pure transforms over a {@link SegmentationDelta} — the user's custom segment boundaries + * expressed as a delta from the default one-segment-per-verse segmentation. + * + * A segment is a maximal contiguous run of the book's document-order token stream between "start" + * tokens. The default start tokens are each verse's first token; the delta records where the + * user's boundaries differ (a removed verse start merges that verse into the previous segment; an + * added start splits a verse). Because a segment can only be a contiguous run between starts, + * discontiguous segments are unrepresentable. + * + * Every function here is pure and store-free (mirrors `phrase-arc.ts`). They take the original + * verse-tokenized {@link Book} (from `tokenizeBook`, before re-segmentation) so they can derive + * the default verse starts; they never need the re-segmented book. + */ +import type { Book, SegmentationDelta } from 'interlinearizer'; + +/** An empty delta — equivalent to the default verse segmentation. */ +const EMPTY_DELTA: SegmentationDelta = { removedVerseStarts: [], addedStarts: [] }; + +/** + * The ref of the book's very first token — the start of the first segment, which can never be + * merged leftward. + * + * @param verseBook - The original verse-tokenized book. + * @returns The first token's ref, or `undefined` when the book has no tokens. + */ +function bookFirstTokenRef(verseBook: Book): string | undefined { + return verseBook.segments[0]?.tokens[0]?.ref; +} + +/** + * The default segment-start refs — each verse segment's first token (of any type, so a verse's + * leading punctuation stays with that verse). + * + * @param verseBook - The original verse-tokenized book. + * @returns The set of first-token refs, one per verse segment that has tokens. + */ +export function defaultVerseStarts(verseBook: Book): Set { + const starts = new Set(); + verseBook.segments.forEach((seg) => { + const first = seg.tokens[0]; + if (first) starts.add(first.ref); + }); + return starts; +} + +/** + * Every token ref in the book, used to drop delta anchors whose token no longer exists. + * + * @param verseBook - The original verse-tokenized book. + * @returns The set of all token refs. + */ +function allTokenRefs(verseBook: Book): Set { + const refs = new Set(); + verseBook.segments.forEach((seg) => seg.tokens.forEach((t) => refs.add(t.ref))); + return refs; +} + +/** + * Document-order index for every token ref, used to keep delta arrays canonically sorted. + * + * @param verseBook - The original verse-tokenized book. + * @returns Map from token ref to its flat document index. + */ +function docOrder(verseBook: Book): Map { + const order = new Map(); + let i = 0; + verseBook.segments.forEach((seg) => + seg.tokens.forEach((t) => { + order.set(t.ref, i); + i += 1; + }), + ); + return order; +} + +/** + * The effective set of segment-start refs after applying `delta` to the default verse starts: + * `(defaults \ removedVerseStarts) ∪ addedStarts`, with added anchors dropped when their token no + * longer exists and the book's first token always forced to be a start. Shared with `resegmentBook` + * so re-segmentation and the editing operations agree on where boundaries fall. + * + * @param verseBook - The original verse-tokenized book. + * @param delta - The user's boundary delta, or `undefined` for the default segmentation. + * @returns The set of token refs that begin a segment. + */ +export function effectiveStarts( + verseBook: Book, + delta: SegmentationDelta | undefined, +): Set { + const defaults = defaultVerseStarts(verseBook); + const removed = new Set(delta?.removedVerseStarts ?? []); + const starts = new Set(); + defaults.forEach((ref) => { + if (!removed.has(ref)) starts.add(ref); + }); + if (delta) { + const all = allTokenRefs(verseBook); + delta.addedStarts.forEach((ref) => { + if (all.has(ref)) starts.add(ref); + }); + } + const first = bookFirstTokenRef(verseBook); + // The first segment can never be merged away, so its start is always present. + if (first !== undefined) starts.add(first); + return starts; +} + +/** + * Returns a canonicalized copy of `delta`: each array deduped, stripped of no-op entries (a removed + * ref that is not a default start, or an added ref that is already a default start or whose token + * is gone), and sorted by document order so equal segmentations serialize identically. + * + * @param verseBook - The original verse-tokenized book. + * @param delta - The delta to canonicalize. + * @returns A normalized {@link SegmentationDelta}. + */ +function normalize(verseBook: Book, delta: SegmentationDelta): SegmentationDelta { + const defaults = defaultVerseStarts(verseBook); + const all = allTokenRefs(verseBook); + const order = docOrder(verseBook); + const first = bookFirstTokenRef(verseBook); + const byOrder = (a: string, b: string) => + /* v8 ignore next -- ?? 0 fallback for refs absent from order; filtered arrays only hold real refs */ + (order.get(a) ?? 0) - (order.get(b) ?? 0); + + const removedVerseStarts = [...new Set(delta.removedVerseStarts)] + .filter((ref) => defaults.has(ref) && ref !== first) + .sort(byOrder); + const addedStarts = [...new Set(delta.addedStarts)] + .filter((ref) => all.has(ref) && !defaults.has(ref)) + .sort(byOrder); + + return { removedVerseStarts, addedStarts }; +} + +/** + * Makes `ref` begin a segment — i.e. splits before it. + * + * - When `ref` is a default verse start that was merged away, it is un-merged (dropped from + * `removedVerseStarts`). + * - Otherwise `ref` is recorded as an added start. + * + * No-op (returns an equivalent normalized delta) when `ref` already begins a segment. + * + * @param verseBook - The original verse-tokenized book. + * @param delta - The current delta, or `undefined` for the default segmentation. + * @param ref - The token ref that should begin a segment. + * @returns The updated, normalized delta. + */ +export function addBoundaryBefore( + verseBook: Book, + delta: SegmentationDelta | undefined, + ref: string, +): SegmentationDelta { + const current = delta ?? EMPTY_DELTA; + const defaults = defaultVerseStarts(verseBook); + if (defaults.has(ref)) { + return normalize(verseBook, { + removedVerseStarts: current.removedVerseStarts.filter((r) => r !== ref), + addedStarts: current.addedStarts, + }); + } + return normalize(verseBook, { + removedVerseStarts: current.removedVerseStarts, + addedStarts: [...current.addedStarts, ref], + }); +} + +/** + * Stops `ref` from beginning a segment — i.e. merges it into the preceding segment. + * + * - When `ref` is a default verse start, it is recorded in `removedVerseStarts`. + * - Otherwise (it was an added split) it is dropped from `addedStarts`. + * + * No-op when `ref` is the book's first token (the first segment cannot be merged leftward). + * + * @param verseBook - The original verse-tokenized book. + * @param delta - The current delta, or `undefined` for the default segmentation. + * @param ref - The segment-start token ref to remove. + * @returns The updated, normalized delta. + */ +export function removeBoundaryAt( + verseBook: Book, + delta: SegmentationDelta | undefined, + ref: string, +): SegmentationDelta { + const current = delta ?? EMPTY_DELTA; + if (ref === bookFirstTokenRef(verseBook)) return normalize(verseBook, current); + const defaults = defaultVerseStarts(verseBook); + if (defaults.has(ref)) { + return normalize(verseBook, { + removedVerseStarts: [...current.removedVerseStarts, ref], + addedStarts: current.addedStarts, + }); + } + return normalize(verseBook, { + removedVerseStarts: current.removedVerseStarts, + addedStarts: current.addedStarts.filter((r) => r !== ref), + }); +} + +/** + * Moves a boundary from `fromRef` to `toRef` in one step — the primitive behind pulling a single + * edge token across a segment boundary. Removes the start at `fromRef` and adds one at `toRef`. + * + * @param verseBook - The original verse-tokenized book. + * @param delta - The current delta, or `undefined` for the default segmentation. + * @param fromRef - The current segment-start ref to remove. + * @param toRef - The new segment-start ref to add. + * @returns The updated, normalized delta. + */ +export function moveBoundary( + verseBook: Book, + delta: SegmentationDelta | undefined, + fromRef: string, + toRef: string, +): SegmentationDelta { + return addBoundaryBefore(verseBook, removeBoundaryAt(verseBook, delta, fromRef), toRef); +} + +/** + * Merges the segment that starts at `secondSegmentStartRef` into the segment before it. Thin alias + * for {@link removeBoundaryAt}, named for the explicit merge control. + * + * @param verseBook - The original verse-tokenized book. + * @param delta - The current delta, or `undefined` for the default segmentation. + * @param secondSegmentStartRef - The first-token ref of the segment being merged into its + * predecessor. + * @returns The updated, normalized delta. + */ +export function mergeSegments( + verseBook: Book, + delta: SegmentationDelta | undefined, + secondSegmentStartRef: string, +): SegmentationDelta { + return removeBoundaryAt(verseBook, delta, secondSegmentStartRef); +} + +/** + * Splits a segment so a new one begins at `ref`. Thin alias for {@link addBoundaryBefore}, named for + * the explicit split control. + * + * @param verseBook - The original verse-tokenized book. + * @param delta - The current delta, or `undefined` for the default segmentation. + * @param ref - The token ref the new segment should begin at. + * @returns The updated, normalized delta. + */ +export function splitSegmentBefore( + verseBook: Book, + delta: SegmentationDelta | undefined, + ref: string, +): SegmentationDelta { + return addBoundaryBefore(verseBook, delta, ref); +} + +/** + * Whether `delta` represents the default verse segmentation (absent or both arrays empty). + * + * @param delta - The delta to test. + * @returns `true` when applying `delta` yields the default segmentation. + */ +export function isDefaultSegmentation(delta: SegmentationDelta | undefined): boolean { + return !delta || (delta.removedVerseStarts.length === 0 && delta.addedStarts.length === 0); +} diff --git a/src/utils/token-layout.ts b/src/utils/token-layout.ts index 0c9f4c6d..4b3cd083 100644 --- a/src/utils/token-layout.ts +++ b/src/utils/token-layout.ts @@ -61,6 +61,9 @@ export function resolveFocusContext( * @param focus - Resolved focus context for the whole strip. * @param focusedSideIsPrev - The layout-specific bool indicating whether focus is start-ward of * this slot. + * @param segmentOrder - Segment id → document-order index, used to detect when this slot is the + * boundary between the focused segment and an immediately adjacent one. Defaults to empty (no + * adjacency, e.g. single-segment SegmentView slots). * @returns Slot focus info ready to pass as `slotFocus` to `MemoizedTokenLinkIcon`. */ export function resolveSlotFocus( @@ -68,14 +71,34 @@ export function resolveSlotFocus( nextSegmentId: string | undefined, focus: FocusContext, focusedSideIsPrev: boolean | undefined, + segmentOrder: ReadonlyMap = new Map(), ): SlotFocusInfo { + const { focusedSegmentId } = focus; const isSameSegmentAsFocus = - focus.focusedSegmentId !== undefined && - prevSegmentId === focus.focusedSegmentId && - nextSegmentId === focus.focusedSegmentId; + focusedSegmentId !== undefined && + prevSegmentId === focusedSegmentId && + nextSegmentId === focusedSegmentId; + // The slot is an adjacent edge when exactly one neighbor is the focused segment, the other is a + // different segment, and the two are neighbors in document order. + const isAdjacentEdgeOfFocus = (() => { + if ( + focusedSegmentId === undefined || + prevSegmentId === undefined || + nextSegmentId === undefined + ) + return false; + if (prevSegmentId === nextSegmentId) return false; + const focusedIsPrev = prevSegmentId === focusedSegmentId; + const focusedIsNext = nextSegmentId === focusedSegmentId; + if (!focusedIsPrev && !focusedIsNext) return false; + const prevIndex = segmentOrder.get(prevSegmentId); + const nextIndex = segmentOrder.get(nextSegmentId); + return prevIndex !== undefined && nextIndex !== undefined && nextIndex - prevIndex === 1; + })(); return { focusedSideIsPrev, isSameSegmentAsFocus, + isAdjacentEdgeOfFocus, focusedPhraseLink: focus.focusedPhraseLink, focusedFreeToken: focus.focusedFreeToken, }; @@ -88,6 +111,7 @@ export function resolveSlotFocus( export const NO_SLOT_FOCUS: SlotFocusInfo = { focusedSideIsPrev: undefined, isSameSegmentAsFocus: false, + isAdjacentEdgeOfFocus: false, focusedPhraseLink: undefined, focusedFreeToken: undefined, }; diff --git a/user-questions.md b/user-questions.md index 0b76a0f1..dfd691d4 100644 --- a/user-questions.md +++ b/user-questions.md @@ -86,3 +86,41 @@ Decisions made during development that we'd like reviewed: (the less destructive option) and disables that option when no book is loaded. Alternative: keep two separate menu items (each a single click, no scope step). Current choice: one menu item plus a scope-picker dialog. + +## User-defined segment boundaries + +Segments were previously fixed to verses (rebuilt from USJ on every load). Users can now define +their own segment boundaries: a **Edit segment boundaries** view toggle exposes per-slot **merge** +(combine a segment into the one before it) and **split** (start a new segment at a token) controls, +and linking a phrase across a verse boundary pulls the adjacent segment's **edge** token into the +focused segment (only the immediate adjacent-edge link buttons are active for this). Boundaries are +stored as a delta from the default verse segmentation on the draft and carried to the project on +Save; discontiguous segments are not supported. + +Decisions made during development that we'd like reviewed: + +1. **Merged-segment separator.** When two verses are merged into one segment, their baseline texts + are joined with a single space. This is reasonable for whitespace-delimited scripts but wrong for + scriptio continua (Chinese, Thai, …) and for cases where the USFM implied a different break. + Should the separator be configurable per project/writing system, or derived from the source? + +2. **Split-segment baseline display.** A segment created by splitting a verse currently keeps the + **whole verse's** baseline text (token offsets unchanged; the invariant holds trivially). In the + baseline-text display mode this duplicates the verse text under each half. The alternative is to + trim each half's baseline to just its tokens' span (cleaner, but drops edge whitespace and + punctuation). Current choice: keep the full-verse baseline for simplicity and safety. + +3. **Free translation when merging.** A segment's free translation is keyed by segment id. An + untouched or merged segment keeps the **leading** verse's id (so its free translation survives); + the **absorbed** verse's free translation is retained in storage but hidden while merged, and + reappears if the segments are split back apart. Splitting keeps the first half's free translation + and starts later halves blank. Is "hide-and-restore" the desired behavior, or should merging + prompt the user to keep/discard the absorbed verse's translation? + +4. **Boundary edits and the unsaved indicator.** Merging/splitting/pulling a boundary marks the + draft dirty (lighting the tab `●`), exactly like a gloss edit. Confirm this is desired, or whether + boundary edits should be treated differently from analysis edits. + +5. **Boundary editing is a transient mode.** The **Edit segment boundaries** toggle is local UI + state (off on reload), not a persisted project setting, since it changes what the link slots do + rather than a display preference. Confirm this is the right treatment. From 705b4b349235fd6603dba31c8eac5c5aa46145a4 Mon Sep 17 00:00:00 2001 From: Alex Rawlings Date: Wed, 24 Jun 2026 08:43:43 -0600 Subject: [PATCH 2/4] "Prevent" verse zero from being included (currently broken, pushing so I can work on this elsewhere) --- .../components/PhraseStripParts.test.tsx | 22 ++ .../components/SegmentationStore.test.tsx | 1 + .../components/TokenLinkIcon.test.tsx | 262 ++++++++++++++++++ src/__tests__/test-helpers.ts | 1 + src/__tests__/utils/segmentation.test.ts | 46 +++ src/components/ContinuousView.tsx | 1 + src/components/Interlinearizer.tsx | 18 +- src/components/PhraseStripContext.tsx | 6 + src/components/PhraseStripParts.tsx | 12 +- src/components/SegmentView.tsx | 1 + src/components/SegmentationStore.tsx | 7 + src/components/TokenLinkIcon.tsx | 168 +++++++++-- src/hooks/usePhraseStripSetup.ts | 5 + src/utils/segmentation.ts | 33 ++- 14 files changed, 559 insertions(+), 24 deletions(-) diff --git a/src/__tests__/components/PhraseStripParts.test.tsx b/src/__tests__/components/PhraseStripParts.test.tsx index a71c6cee..ae4230ce 100644 --- a/src/__tests__/components/PhraseStripParts.test.tsx +++ b/src/__tests__/components/PhraseStripParts.test.tsx @@ -337,6 +337,7 @@ describe('PhraseSlot boundary controls', () => { split: jest.fn(), move: jest.fn(), }, + verseZeroSegmentIds: ReadonlySet = new Set(), ) { const value: SegmentationContextValue = { dispatch, @@ -346,6 +347,7 @@ describe('PhraseSlot boundary controls', () => { ['seg-1', 0], ['seg-2', 1], ]), + verseZeroSegmentIds, }; render( @@ -374,6 +376,26 @@ describe('PhraseSlot boundary controls', () => { expect(screen.queryByTestId('boundary-merge-btn')).not.toBeInTheDocument(); }); + it('still shows the merge control when the next segment is a verse-0 superscription', () => { + const dispatch = renderBoundary( + { prevSegmentId: 'seg-1', nextSegmentId: 'seg-2' }, + undefined, + new Set(['seg-2']), + ); + fireEvent.click(screen.getByTestId('boundary-merge-btn')); + expect(dispatch.merge).toHaveBeenCalledWith('seg2-start'); + }); + + it('renders no split control inside a verse-0 superscription segment', () => { + renderBoundary( + { prevSegmentId: 'seg-1', nextSegmentId: 'seg-1' }, + undefined, + new Set(['seg-1']), + ); + expect(screen.queryByTestId('boundary-split-btn')).not.toBeInTheDocument(); + expect(screen.queryByTestId('boundary-merge-btn')).not.toBeInTheDocument(); + }); + it('renders no control at a leading slot with no previous segment', () => { renderBoundary({ prevSegmentId: undefined, diff --git a/src/__tests__/components/SegmentationStore.test.tsx b/src/__tests__/components/SegmentationStore.test.tsx index 2052706d..8f9df7ce 100644 --- a/src/__tests__/components/SegmentationStore.test.tsx +++ b/src/__tests__/components/SegmentationStore.test.tsx @@ -40,6 +40,7 @@ describe('SegmentationStore', () => { boundaryEditMode: true, segmentById: new Map([['GEN 1:1', segment]]), segmentOrder: new Map([['GEN 1:1', 0]]), + verseZeroSegmentIds: new Set(), }; render( diff --git a/src/__tests__/components/TokenLinkIcon.test.tsx b/src/__tests__/components/TokenLinkIcon.test.tsx index a2edfcf8..c0616094 100644 --- a/src/__tests__/components/TokenLinkIcon.test.tsx +++ b/src/__tests__/components/TokenLinkIcon.test.tsx @@ -633,6 +633,7 @@ describe('TokenLinkIcon', () => { ['seg-A', 0], ['seg-B', 1], ]), + verseZeroSegmentIds: new Set(), }; const tokenSegmentMap = opts.mapToken === false ? new Map() : new Map([['tok-b', 'seg-B']]); @@ -712,5 +713,266 @@ describe('TokenLinkIcon', () => { expect(button).toBeDisabled(); expect(button).toHaveAttribute('title', 'nope'); }); + + // ------------------------------------------------------------------------- + // Reach past a verse-0 superscription (sweep verse 0 + pull the token beyond it) + // ------------------------------------------------------------------------- + + describe('reaching past a verse-0 superscription', () => { + /** + * Builds a segment from word-token refs. + * + * @param id - Segment id. + * @param verse - Verse number (0 marks a superscription). + * @param refs - Word token refs in order. + * @returns The segment. + */ + function makeSeg(id: string, verse: number, refs: string[]): Segment { + return { + id, + startRef: { book: 'GEN', chapter: 1, verse }, + endRef: { book: 'GEN', chapter: 1, verse }, + baselineText: refs.join(' '), + tokens: refs.map((r) => makeWordToken(r)), + }; + } + + /** Options for {@link renderSkip}. */ + type SkipOpts = { + /** Pull direction: `true` = focus before verse 0 (forward), `false` = after it (backward). */ + focusedSideIsPrev: boolean; + /** Word refs of the segment before verse 0. */ + aTokens: string[]; + /** Word refs of the verse-0 superscription. */ + v0Tokens: string[]; + /** Word refs of the segment after verse 0; omit to model verse 0 at the book edge. */ + bTokens?: string[]; + /** A phrase containing the resolved beyond-token, to exercise the neighbor-phrase path. */ + beyondPhrase?: ReturnType; + /** Focused token ref override (defaults to the focused segment's edge word). */ + focusedRef?: string; + /** Whether the focused token is itself inside the verse-0 segment. */ + focusInVerseZero?: boolean; + }; + + /** + * Renders a link icon at the slot bordering a verse-0 superscription, with three segments (A + * + * | V0 | B) wired into both providers. + * + * @param dispatch - The segmentation dispatch to capture calls on. + * @param opts - Segment layout and focus configuration. + * @returns The render result. + */ + function renderSkip(dispatch: SegmentationDispatch, opts: SkipOpts) { + const aSeg = makeSeg('seg-A', 1, opts.aTokens); + const v0Seg = makeSeg('seg-V0', 0, opts.v0Tokens); + const bSeg = opts.bTokens ? makeSeg('seg-B', 2, opts.bTokens) : undefined; + const segmentById = new Map([ + ['seg-A', aSeg], + ['seg-V0', v0Seg], + ]); + const segmentOrder = new Map([ + ['seg-A', 0], + ['seg-V0', 1], + ]); + if (bSeg) { + segmentById.set('seg-B', bSeg); + segmentOrder.set('seg-B', 2); + } + const tokenSegmentMap = new Map(); + opts.aTokens.forEach((r) => tokenSegmentMap.set(r, 'seg-A')); + opts.v0Tokens.forEach((r) => tokenSegmentMap.set(r, 'seg-V0')); + opts.bTokens?.forEach((r) => tokenSegmentMap.set(r, 'seg-B')); + + // Forward: slot sits between A's last word and verse 0's first word, focus is in A. + // Backward: slot sits between verse 0's last word and B's first word, focus is in B. + const lastAToken = opts.aTokens[opts.aTokens.length - 1]; + const firstBToken = + /* v8 ignore next -- backward/forward tests always pass bTokens */ + opts.bTokens?.[0] ?? 'none'; + const prevToken = opts.focusedSideIsPrev + ? makeWordToken(lastAToken) + : makeWordToken(opts.v0Tokens[opts.v0Tokens.length - 1]); + const nextToken = opts.focusedSideIsPrev + ? makeWordToken(opts.v0Tokens[0]) + : makeWordToken(firstBToken); + const edgeFocusedRef = opts.focusedSideIsPrev ? lastAToken : firstBToken; + const defaultFocusedRef = opts.focusInVerseZero ? opts.v0Tokens[0] : edgeFocusedRef; + + const segmentation: SegmentationContextValue = { + dispatch, + boundaryEditMode: false, + segmentById, + segmentOrder, + verseZeroSegmentIds: new Set(['seg-V0']), + }; + const allRefs = [...opts.aTokens, ...opts.v0Tokens, ...(opts.bTokens ?? [])]; + const tokenDocOrder = new Map(allRefs.map((r, i) => [r, i])); + const phraseLinkByRef = new Map>(); + if (opts.beyondPhrase) { + opts.beyondPhrase.tokens.forEach((t) => { + if (opts.beyondPhrase) phraseLinkByRef.set(t.tokenRef, opts.beyondPhrase); + }); + } + return render( + + + + + , + ); + } + + it('sweeps verse 0 and pulls the first word beyond it when focus is the previous segment', async () => { + const dispatch = makeDispatch(); + renderSkip(dispatch, { + focusedSideIsPrev: true, + aTokens: ['a1', 'a2'], + v0Tokens: ['z1', 'z2'], + bTokens: ['b1', 'b2'], + }); + await userEvent.click(screen.getByTestId('token-link-btn')); + // Merge verse 0 into A, then pull b1 (leaving b2 as B's new start). + expect(dispatch.merge).toHaveBeenCalledWith('z1'); + expect(dispatch.move).toHaveBeenCalledWith('b1', 'b2'); + // The phrase links the focused A token with the token beyond verse 0, not a verse-0 token. + expect(mockCreatePhrase).toHaveBeenCalledWith([ + { tokenRef: 'a2', surfaceText: 'a2' }, + { tokenRef: 'b1', surfaceText: 'b1' }, + ]); + }); + + it('merges the whole segment beyond verse 0 when it has only the pulled token', async () => { + const dispatch = makeDispatch(); + renderSkip(dispatch, { + focusedSideIsPrev: true, + aTokens: ['a1', 'a2'], + v0Tokens: ['z1'], + bTokens: ['b1'], + }); + await userEvent.click(screen.getByTestId('token-link-btn')); + expect(dispatch.merge).toHaveBeenCalledWith('z1'); + expect(dispatch.merge).toHaveBeenCalledWith('b1'); + }); + + it('sweeps verse 0 and pulls the last word before it when focus is the next segment', async () => { + const dispatch = makeDispatch(); + renderSkip(dispatch, { + focusedSideIsPrev: false, + aTokens: ['a1', 'a2'], + v0Tokens: ['z1', 'z2'], + bTokens: ['b1', 'b2'], + }); + await userEvent.click(screen.getByTestId('token-link-btn')); + // Merge verse 0 into A, then move B's start back to A's last word a2. + expect(dispatch.merge).toHaveBeenCalledWith('z1'); + expect(dispatch.move).toHaveBeenCalledWith('b1', 'a2'); + }); + + it('absorbs the focused token into the beyond-token phrase when one exists', async () => { + const dispatch = makeDispatch(); + renderSkip(dispatch, { + focusedSideIsPrev: true, + aTokens: ['a1', 'a2'], + v0Tokens: ['z1', 'z2'], + bTokens: ['b1', 'b2'], + beyondPhrase: makePhraseLink('p-beyond', ['b1']), + }); + await userEvent.click(screen.getByTestId('token-link-btn')); + expect(dispatch.merge).toHaveBeenCalledWith('z1'); + // Focused free token a2 is absorbed into the phrase that already contains b1. + expect(mockUpdatePhrase).toHaveBeenCalledWith( + 'p-beyond', + expect.arrayContaining([{ tokenRef: 'a2', surfaceText: 'a2' }]), + ); + }); + + it('disables the link when verse 0 is at the book edge with nothing beyond it', () => { + renderSkip(makeDispatch(), { + focusedSideIsPrev: true, + aTokens: ['a1', 'a2'], + v0Tokens: ['z1', 'z2'], + // No bTokens: verse 0 ends the book, so there is no segment to reach. + }); + expect(screen.getByTestId('token-link-btn')).toBeDisabled(); + }); + + it('disables the link when the focused token is itself inside verse 0', () => { + renderSkip(makeDispatch(), { + focusedSideIsPrev: true, + aTokens: ['a1', 'a2'], + v0Tokens: ['z1', 'z2'], + bTokens: ['b1', 'b2'], + focusInVerseZero: true, + }); + expect(screen.getByTestId('token-link-btn')).toBeDisabled(); + }); + + it('highlights the token beyond verse 0 on hover', async () => { + const onHoverCandidateTokens = jest.fn(); + const dispatch = makeDispatch(); + const aSeg = makeSeg('seg-A', 1, ['a1', 'a2']); + const v0Seg = makeSeg('seg-V0', 0, ['z1', 'z2']); + const bSeg = makeSeg('seg-B', 2, ['b1', 'b2']); + const tokenSegmentMap = new Map([ + ['a1', 'seg-A'], + ['a2', 'seg-A'], + ['z1', 'seg-V0'], + ['z2', 'seg-V0'], + ['b1', 'seg-B'], + ['b2', 'seg-B'], + ]); + render( + + + + + , + ); + await userEvent.hover(screen.getByTestId('token-link-btn')); + expect(onHoverCandidateTokens).toHaveBeenCalledWith(['a2', 'b1']); + }); + }); }); }); diff --git a/src/__tests__/test-helpers.ts b/src/__tests__/test-helpers.ts index f65e0e41..84ecb159 100644 --- a/src/__tests__/test-helpers.ts +++ b/src/__tests__/test-helpers.ts @@ -78,6 +78,7 @@ export function makePhraseStripContext( editPhraseSegmentId: undefined, tokenSegmentMap: new Map(), tokenDocOrder: new Map(), + phraseLinkByRef: new Map(), onHoverPhrase: () => {}, onHoverCandidateTokens: () => {}, onHoverSplitFreeTokens: () => {}, diff --git a/src/__tests__/utils/segmentation.test.ts b/src/__tests__/utils/segmentation.test.ts index 447bc968..b848fd38 100644 --- a/src/__tests__/utils/segmentation.test.ts +++ b/src/__tests__/utils/segmentation.test.ts @@ -38,6 +38,22 @@ const V3_START = 'GEN 1:3:0'; // Second word of verse 1 ("beta" at charStart 6). const V1_BETA = 'GEN 1:1:6'; +/** + * A fixture with a mid-book verse-0 superscription: GEN 1:1, then GEN 2:0 (the superscription), + * then GEN 2:1. Verse 0 is deliberately not the book's first segment so its start-lock is distinct + * from the always-present book-first lock. + */ +const MID_VERSE_ZERO = makeBook([ + { sid: 'GEN 1:1', text: 'Alpha beta.' }, + { sid: 'GEN 2:0', text: 'Sup tee.' }, + { sid: 'GEN 2:1', text: 'Gamma.' }, +]); +// Verse-0 start, an interior verse-0 word ("tee" at charStart 4), and the start of the verse right +// after verse 0 — the three refs frozen by the verse-0 locks. +const VZ0_START = 'GEN 2:0:0'; +const VZ0_INTERIOR = 'GEN 2:0:4'; +const VZ_NEXT_START = 'GEN 2:1:0'; + describe('defaultVerseStarts', () => { it('returns the first-token ref of every verse', () => { expect(defaultVerseStarts(THREE_VERSES)).toEqual(new Set([V1_START, V2_START, V3_START])); @@ -130,6 +146,13 @@ describe('addBoundaryBefore', () => { const once = addBoundaryBefore(THREE_VERSES, undefined, V1_BETA); expect(addBoundaryBefore(THREE_VERSES, once, V1_BETA)).toEqual(once); }); + + it('is a no-op when splitting inside a verse-0 superscription (its tokens stay together)', () => { + expect(addBoundaryBefore(MID_VERSE_ZERO, undefined, VZ0_INTERIOR)).toEqual({ + removedVerseStarts: [], + addedStarts: [], + }); + }); }); describe('removeBoundaryAt', () => { @@ -154,6 +177,20 @@ describe('removeBoundaryAt', () => { addedStarts: [], }); }); + + it('merges a verse-0 superscription wholesale into the previous segment', () => { + expect(removeBoundaryAt(MID_VERSE_ZERO, undefined, VZ0_START)).toEqual({ + removedVerseStarts: [VZ0_START], + addedStarts: [], + }); + }); + + it('merges the verse after a superscription, sweeping verse 0 along with it', () => { + expect(removeBoundaryAt(MID_VERSE_ZERO, undefined, VZ_NEXT_START)).toEqual({ + removedVerseStarts: [VZ_NEXT_START], + addedStarts: [], + }); + }); }); describe('moveBoundary', () => { @@ -163,6 +200,15 @@ describe('moveBoundary', () => { addedStarts: [V1_BETA], }); }); + + it('moves a verse-0 boundary but never splits the superscription (add half is a no-op)', () => { + // Moving the after-superscription boundary back onto an interior verse-0 token can't split + // verse 0, so it degrades to merging the following verse in. + expect(moveBoundary(MID_VERSE_ZERO, undefined, VZ_NEXT_START, VZ0_INTERIOR)).toEqual({ + removedVerseStarts: [VZ_NEXT_START], + addedStarts: [], + }); + }); }); describe('mergeSegments / splitSegmentBefore aliases', () => { diff --git a/src/components/ContinuousView.tsx b/src/components/ContinuousView.tsx index 2374d42b..d6ce6d91 100644 --- a/src/components/ContinuousView.tsx +++ b/src/components/ContinuousView.tsx @@ -649,6 +649,7 @@ export default function ContinuousView({ editPhraseSegmentId, tokenSegmentMap, tokenDocOrder, + phraseLinkByRef: committedPhraseLinkByRef, onHoverPhrase: setHoveredPhraseId, onHoverCandidateTokens: setCandidateTokenRefs, onHoverSplitFreeTokens: handleHoverSplitFreeTokens, diff --git a/src/components/Interlinearizer.tsx b/src/components/Interlinearizer.tsx index 68159409..08c29474 100644 --- a/src/components/Interlinearizer.tsx +++ b/src/components/Interlinearizer.tsx @@ -146,6 +146,15 @@ function InterlinearizerInner({ return order; }, [book.segments]); + /** Ids of verse-0 segments (superscriptions), whose boundaries are frozen against edits. */ + const verseZeroSegmentIds = useMemo(() => { + const ids = new Set(); + book.segments.forEach((seg) => { + if (seg.startRef.verse === 0) ids.add(seg.id); + }); + return ids; + }, [book.segments]); + /** Segmentation context shared by the views — the dispatch plus the lookups its call sites need. */ const segmentationValue = useMemo( () => ({ @@ -153,8 +162,15 @@ function InterlinearizerInner({ boundaryEditMode: viewOptions.boundaryEditMode, segmentById, segmentOrder, + verseZeroSegmentIds, }), - [segmentationDispatch, viewOptions.boundaryEditMode, segmentById, segmentOrder], + [ + segmentationDispatch, + viewOptions.boundaryEditMode, + segmentById, + segmentOrder, + verseZeroSegmentIds, + ], ); /** PhraseId currently hovered anywhere in the interlinearizer; shared across all SegmentViews. */ diff --git a/src/components/PhraseStripContext.tsx b/src/components/PhraseStripContext.tsx index e0a7fb05..7e9cc51a 100644 --- a/src/components/PhraseStripContext.tsx +++ b/src/components/PhraseStripContext.tsx @@ -33,6 +33,12 @@ export type PhraseStripContextValue = Readonly<{ tokenSegmentMap: ReadonlyMap; /** Token ref → flat document index; used to keep merged phrase token lists in document order. */ tokenDocOrder: ReadonlyMap; + /** + * Token ref → the committed phrase link containing it, for the whole strip. Used by the + * cross-segment link icon to resolve the phrase of a token it must reach across a verse-0 + * superscription (which sits between the icon's slot and its real link target). + */ + phraseLinkByRef: ReadonlyMap; /** * Called with a phraseId (or `undefined`) when a phrase or a link/unlink candidate is hovered, so * the parent can highlight the relevant phrase box and arcs. Merges what used to be two separate diff --git a/src/components/PhraseStripParts.tsx b/src/components/PhraseStripParts.tsx index 4de3ec2c..29e4fe25 100644 --- a/src/components/PhraseStripParts.tsx +++ b/src/components/PhraseStripParts.tsx @@ -31,16 +31,19 @@ type BoundaryControlProps = Readonly<{ * Renders the boundary-edit control for one slot. A slot straddling two different segments shows a * merge control (combine the next segment into the previous one); a slot inside one segment shows a * split control (start a new segment at the next token). Leading/trailing slots (one side missing) - * render nothing. + * render nothing. A merge that involves a verse-0 segment (a superscription) is allowed — verse 0 + * merges as an intact unit — but a split _inside_ a verse-0 segment is suppressed, since its tokens + * must always stay together. * * @param props - Component props. * @param props.prevSegmentId - Segment id before the slot. * @param props.nextSegmentId - Segment id after the slot. * @param props.nextTokenRef - First word token after the slot (split anchor). - * @returns A merge or split button, or `undefined` when the slot is at a book edge. + * @returns A merge or split button, or `undefined` when the slot is at a book edge or would split a + * verse-0 segment. */ function BoundaryControl({ prevSegmentId, nextSegmentId, nextTokenRef }: BoundaryControlProps) { - const { dispatch, segmentById } = useSegmentation(); + const { dispatch, segmentById, verseZeroSegmentIds } = useSegmentation(); const [localizedStrings] = useLocalizedStrings(BOUNDARY_STRING_KEYS); if (prevSegmentId === undefined || nextSegmentId === undefined || nextTokenRef === undefined) { return undefined; @@ -63,6 +66,9 @@ function BoundaryControl({ prevSegmentId, nextSegmentId, nextTokenRef }: Boundar ); } + // An intra-segment slot inside a verse-0 superscription can't be split — its tokens must stay + // together — so render no split control there. + if (verseZeroSegmentIds.has(prevSegmentId)) return undefined; const splitLabel = localizedStrings['%interlinearizer_boundaryControl_split%']; return (