+
);
}
diff --git a/src/components/InterlinearizerLoader.tsx b/src/components/InterlinearizerLoader.tsx
index 8db29fd0..230ee0ce 100644
--- a/src/components/InterlinearizerLoader.tsx
+++ b/src/components/InterlinearizerLoader.tsx
@@ -9,9 +9,17 @@ import { TabToolbar } from 'platform-bible-react';
import type { SelectMenuItemHandler } from 'platform-bible-react';
import { isPlatformError } from 'platform-bible-utils';
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
+import { resegmentBook } from 'parsers/papi/resegmentBook';
import useDraftProject from '../hooks/useDraftProject';
import useInterlinearizerBookData from '../hooks/useInterlinearizerBookData';
import useOptimisticBooleanSetting from '../hooks/useOptimisticBooleanSetting';
+import {
+ isDefaultSegmentation,
+ mergeSegments,
+ moveBoundary,
+ splitSegmentBefore,
+} from '../utils/segmentation';
+import type { SegmentationDispatch } from './SegmentationStore';
import type { InterlinearProjectSummary } from '../types/interlinear-project-summary';
import Interlinearizer from './Interlinearizer';
import ViewOptionsDropdown from './controls/ViewOptionsDropdown';
@@ -143,6 +151,7 @@ function InterlinearizerLoaderInner({
draftVersion,
dirty,
autosaveAnalysis,
+ autosaveSegmentation,
loadFromProject,
newDraft,
getDraftSnapshot,
@@ -214,6 +223,10 @@ function InterlinearizerLoaderInner({
// the reference identical across the loader's frequent re-renders (driven by `useData`,
// `useSetting`, etc.), so the `memo()` wrapping `SegmentView` can shallow-compare it away instead
// of re-rendering every windowed segment when no toggle actually changed.
+ // Editing segment boundaries is a transient mode rather than a saved preference, so it lives in
+ // local state (not a persisted project setting) and resets to off whenever the WebView reloads.
+ const [boundaryEditMode, setBoundaryEditMode] = useState(false);
+
const viewOptions = useMemo(
() => ({
hideInactiveLinkButtons,
@@ -221,6 +234,7 @@ function InterlinearizerLoaderInner({
chapterLabelInVerse,
showMorphology,
showFreeTranslation,
+ boundaryEditMode,
}),
[
hideInactiveLinkButtons,
@@ -228,14 +242,63 @@ function InterlinearizerLoaderInner({
chapterLabelInVerse,
showMorphology,
showFreeTranslation,
+ boundaryEditMode,
],
);
- const { book, isLoading, bookError, tokenizeError } = useInterlinearizerBookData({
+ const {
+ book: verseBook,
+ isLoading,
+ bookError,
+ tokenizeError,
+ } = useInterlinearizerBookData({
projectId,
scrRef,
});
+ /** The user's custom segment boundaries from the draft, or `undefined` for verse segmentation. */
+ const segmentation = draft?.segmentation;
+
+ /**
+ * The book the views render: the verse-tokenized book re-grouped into the user's custom segments.
+ * Identical (by reference) to `verseBook` when no custom boundaries are set, so the common case
+ * incurs no extra work. `verseBook` is retained separately because the segmentation operations
+ * need the default verse boundaries it carries.
+ */
+ const book = useMemo(
+ () => (verseBook ? resegmentBook(verseBook, segmentation) : undefined),
+ [verseBook, segmentation],
+ );
+
+ /**
+ * Boundary-editing operations passed down to the views. Each reads the draft's latest boundary
+ * delta synchronously (so rapid edits compose correctly), applies the relevant pure transform
+ * against the original verse book, and auto-saves the normalized result — clearing the field back
+ * to `undefined` when the edit restores the default verse segmentation.
+ */
+ const segmentationDispatch = useMemo(() => {
+ const apply = (next: ReturnType) => {
+ autosaveSegmentation(isDefaultSegmentation(next) ? undefined : next);
+ };
+ return {
+ merge: (secondSegmentStartRef) => {
+ /* v8 ignore next -- boundary controls only render once the book has loaded */
+ if (!verseBook) return;
+ apply(mergeSegments(verseBook, getDraftSnapshot()?.segmentation, secondSegmentStartRef));
+ },
+ split: (tokenRef) => {
+ /* v8 ignore next -- boundary controls only render once the book has loaded */
+ if (!verseBook) return;
+ apply(splitSegmentBefore(verseBook, getDraftSnapshot()?.segmentation, tokenRef));
+ },
+ move: (fromRef, toRef) => {
+ /* v8 ignore next -- the cross-segment link only renders once the book has loaded */
+ if (!verseBook) return;
+ apply(moveBoundary(verseBook, getDraftSnapshot()?.segmentation, fromRef, toRef));
+ },
+ };
+ }, [verseBook, getDraftSnapshot, autosaveSegmentation]);
+
// The active reference handed to the interlinearizer. The host emits `verseNum: 0` both for a
// chapter's verse-0 superscription (which has its own segment) and for a plain whole-chapter
// selection (which does not). Keep verse 0 when the loaded book actually has a verse-0 segment for
@@ -313,6 +376,10 @@ function InterlinearizerLoaderInner({
'interlinearizer.saveAnalysis',
activeProject.id,
JSON.stringify(snapshot.analysis),
+ // Send the draft's boundary state on every Save; `null` clears any stored boundaries so a
+ // reverted segmentation propagates to the project rather than leaving it stale.
+ // eslint-disable-next-line no-null/no-null -- "null" is the JSON sentinel that clears boundaries
+ JSON.stringify(snapshot.segmentation ?? null),
);
markSynced(snapshot.analysis);
} catch (e) {
@@ -434,6 +501,8 @@ function InterlinearizerLoaderInner({
onShowMorphologyChange={handleShowMorphologyChange}
showFreeTranslation={showFreeTranslation}
onShowFreeTranslationChange={handleShowFreeTranslationChange}
+ boundaryEditMode={boundaryEditMode}
+ onBoundaryEditModeChange={setBoundaryEditMode}
/>
) : undefined
}
@@ -493,6 +562,7 @@ function InterlinearizerLoaderInner({
phraseMode={phraseMode}
setPhraseMode={setPhraseMode}
viewOptions={viewOptions}
+ segmentationDispatch={segmentationDispatch}
/>
)}
diff --git a/src/components/PhraseStripParts.tsx b/src/components/PhraseStripParts.tsx
index e1efc91a..1c44ce37 100644
--- a/src/components/PhraseStripParts.tsx
+++ b/src/components/PhraseStripParts.tsx
@@ -1,13 +1,97 @@
/** @file Shared render parts for the two phrase strips (SegmentView and ContinuousView). */
+import { useLocalizedStrings } from '@papi/frontend/react';
+import { Combine, Scissors } from 'lucide-react';
import { memo } from 'react';
import MemoizedPhraseBox from './PhraseBox';
import type { PhraseMode } from '../types/phrase-mode';
import { usePhraseStripContext } from './PhraseStripContext';
+import { useSegmentation } from './SegmentationStore';
import { InertTokenChip } from './TokenChip';
import MemoizedTokenLinkIcon from './TokenLinkIcon';
import type { FocusContext, LinkSlot, TokenGroup } from '../types/token-layout';
import { resolveSlotFocus } from '../utils/token-layout';
+/** Localized labels for the merge/split boundary controls; hoisted so the array reference is stable. */
+const BOUNDARY_STRING_KEYS = [
+ '%interlinearizer_boundaryControl_merge%',
+ '%interlinearizer_boundaryControl_split%',
+] as const satisfies `%${string}%`[];
+
+/** Props for {@link BoundaryControl}. */
+type BoundaryControlProps = Readonly<{
+ /** Segment id of the group before the slot, or `undefined` for the leading slot. */
+ prevSegmentId: string | undefined;
+ /** Segment id of the group after the slot, or `undefined` for the trailing slot. */
+ nextSegmentId: string | undefined;
+ /** First word token after the slot, used as the split anchor. */
+ nextTokenRef: string | undefined;
+}>;
+
+/**
+ * Renders the boundary-edit control for one slot. A slot straddling two different segments shows a
+ * merge control (combine the next segment into the previous one); a slot inside one segment shows a
+ * split control (start a new segment at the next token). Leading/trailing slots (one side missing)
+ * render nothing.
+ *
+ * A verse-0 segment (a chapter superscription) is a hard wall: its tokens must stay together and it
+ * must never join a neighbor. So no control renders at a boundary touching one — neither the merge
+ * control at a slot where verse 0 is on either side, nor the split control inside it.
+ *
+ * @param props - Component props.
+ * @param props.prevSegmentId - Segment id before the slot.
+ * @param props.nextSegmentId - Segment id after the slot.
+ * @param props.nextTokenRef - First word token after the slot (split anchor).
+ * @returns A merge or split button, or `undefined` when the slot is at a book edge or borders a
+ * verse-0 superscription.
+ */
+function BoundaryControl({ prevSegmentId, nextSegmentId, nextTokenRef }: BoundaryControlProps) {
+ const { dispatch, segmentById, verseZeroSegmentIds } = useSegmentation();
+ const [localizedStrings] = useLocalizedStrings(BOUNDARY_STRING_KEYS);
+ if (prevSegmentId === undefined || nextSegmentId === undefined || nextTokenRef === undefined) {
+ return undefined;
+ }
+ if (prevSegmentId !== nextSegmentId) {
+ // A merge that would pull a verse-0 superscription into a neighbor, or pull a neighbor into one,
+ // is forbidden: render no merge control at a boundary where either side is verse 0.
+ if (verseZeroSegmentIds.has(prevSegmentId) || verseZeroSegmentIds.has(nextSegmentId)) {
+ return undefined;
+ }
+ const mergeLabel = localizedStrings['%interlinearizer_boundaryControl_merge%'];
+ const secondStart = segmentById.get(nextSegmentId)?.tokens[0]?.ref;
+ return (
+
+ );
+ }
+ // An intra-segment slot inside a verse-0 superscription can't be split — its tokens must stay
+ // together — so render no split control there.
+ if (verseZeroSegmentIds.has(prevSegmentId)) return undefined;
+ const splitLabel = localizedStrings['%interlinearizer_boundaryControl_split%'];
+ return (
+
+ );
+}
+
/**
* Duration, in milliseconds, of the link-slot opacity fade transition. Exported so `ContinuousView`
* can re-center the focused phrase for exactly this long after `committedActiveSegmentId` flips,
@@ -61,6 +145,7 @@ export function PhraseSlot({
hoveredPhraseId,
}: PhraseSlotProps) {
const { hideInactiveLinkButtons, activeSegmentId, skipLinkTransition } = usePhraseStripContext();
+ const { boundaryEditMode, segmentOrder } = useSegmentation();
const { prevGroup, nextGroup, punctuation } = slot;
if (!prevGroup && !nextGroup && punctuation.length === 0) return undefined;
const prevToken = prevGroup?.tokens[prevGroup.tokens.length - 1];
@@ -71,7 +156,13 @@ export function PhraseSlot({
prevPhraseId !== undefined &&
prevPhraseId === nextPhraseId &&
(prevPhraseId === hoveredPhraseId || prevPhraseId === focus.focusedPhraseId);
- const slotFocus = resolveSlotFocus(prevSegmentId, nextSegmentId, focus, focusedSideIsPrev);
+ const slotFocus = resolveSlotFocus(
+ prevSegmentId,
+ nextSegmentId,
+ focus,
+ focusedSideIsPrev,
+ segmentOrder,
+ );
// The slot is "in the active segment" only when both neighboring phrases belong to it. A link
// that crosses a verse boundary (one side in the active verse, the other in an adjacent verse) is
// therefore treated as inactive and hidden too. When hideInactiveLinkButtons is on, link buttons
@@ -89,29 +180,38 @@ export function PhraseSlot({
data-link-slot="true"
style={{ overflowAnchor: 'none' }}
>
- {hasLinkableNeighbors && (
-
-
-
- )}
+ {hasLinkableNeighbors &&
+ (boundaryEditMode ? (
+
+
+
+ ) : (
+
+
+
+ ))}
{punctuation.length > 0 && (
{punctuation.map((punctToken) => (
diff --git a/src/components/SegmentationStore.tsx b/src/components/SegmentationStore.tsx
new file mode 100644
index 00000000..9588ae85
--- /dev/null
+++ b/src/components/SegmentationStore.tsx
@@ -0,0 +1,108 @@
+/**
+ * @file Render-scoped context exposing segment-boundary editing to the deep leaves that trigger it
+ * (the cross-segment link icon and the merge/split boundary controls).
+ *
+ * The {@link SegmentationDispatch} closes over the draft's current boundary delta and the original
+ * verse-tokenized book, applying the pure transforms in `utils/segmentation.ts` and auto-saving
+ * the result. Boundary edits flow draft → re-segmentation → new `book.segments`, so consumers
+ * only need to call a dispatch method; they never see the delta itself.
+ */
+import type { Segment } from 'interlinearizer';
+import { createContext, useContext } from 'react';
+import type { ReactNode } from 'react';
+
+/** The boundary-editing operations available to leaf controls. Each one auto-saves the result. */
+export type SegmentationDispatch = Readonly<{
+ /**
+ * Merges the segment that begins at `secondSegmentStartRef` into the segment before it.
+ *
+ * @param secondSegmentStartRef - First-token ref of the segment to merge into its predecessor.
+ */
+ merge: (secondSegmentStartRef: string) => void;
+ /**
+ * Splits a segment so a new one begins at `tokenRef`.
+ *
+ * @param tokenRef - The token ref the new segment should begin at.
+ */
+ split: (tokenRef: string) => void;
+ /**
+ * Moves a boundary from `fromRef` to `toRef` — used to pull a single edge token across a segment
+ * boundary when a cross-segment phrase link is made.
+ *
+ * @param fromRef - The current segment-start ref to remove.
+ * @param toRef - The new segment-start ref to add.
+ */
+ move: (fromRef: string, toRef: string) => void;
+}>;
+
+/** The strip-wide segmentation context: the dispatch plus the lookups its call sites need. */
+export type SegmentationContextValue = Readonly<{
+ /** Boundary-editing operations. */
+ dispatch: SegmentationDispatch;
+ /**
+ * When `true`, the link slots render merge/split boundary controls instead of phrase link icons.
+ * Toggled from the view-options menu.
+ */
+ boundaryEditMode: boolean;
+ /** Segment id → segment, used to resolve a segment's first-token start ref. */
+ segmentById: ReadonlyMap;
+ /** Segment id → its index in document order, used to test segment adjacency. */
+ segmentOrder: ReadonlyMap;
+ /**
+ * Ids of the verse-0 segments (chapter superscriptions). Their boundaries are frozen: the merge/
+ * split controls and the cross-segment link pull are suppressed at any boundary touching one, so
+ * no token is ever moved into or out of a superscription.
+ */
+ verseZeroSegmentIds: ReadonlySet;
+}>;
+
+/** No-op dispatch used as the default outside a provider (e.g. in isolated component tests). */
+export const NO_OP_SEGMENTATION_DISPATCH: SegmentationDispatch = {
+ merge: () => {},
+ split: () => {},
+ move: () => {},
+};
+
+/**
+ * Default context for components rendered without a {@link SegmentationProvider}: boundary editing
+ * is off and the dispatch is inert. Lets `SegmentView` / `ContinuousView` / `TokenLinkIcon` be
+ * unit- tested in isolation without wiring a provider, while the real app always supplies one.
+ */
+const DEFAULT_VALUE: SegmentationContextValue = {
+ dispatch: NO_OP_SEGMENTATION_DISPATCH,
+ boundaryEditMode: false,
+ segmentById: new Map(),
+ segmentOrder: new Map(),
+ verseZeroSegmentIds: new Set(),
+};
+
+const SegmentationContext = createContext(undefined);
+
+/** Props for {@link SegmentationProvider}. */
+type SegmentationProviderProps = Readonly<{
+ /** The segmentation context value; callers should memoize it to preserve leaf memoization. */
+ value: SegmentationContextValue;
+ /** The subtree that can edit segment boundaries. */
+ children: ReactNode;
+}>;
+
+/**
+ * Provides the {@link SegmentationContextValue} to the interlinear views beneath it.
+ *
+ * @param props - Component props.
+ * @param props.value - The segmentation context value.
+ * @param props.children - The subtree.
+ * @returns The children wrapped in the context provider.
+ */
+export function SegmentationProvider({ value, children }: SegmentationProviderProps) {
+ return {children};
+}
+
+/**
+ * Reads the segmentation context, falling back to an inert default when no provider is present.
+ *
+ * @returns The current {@link SegmentationContextValue}, or an inert default outside a provider.
+ */
+export function useSegmentation(): SegmentationContextValue {
+ return useContext(SegmentationContext) ?? DEFAULT_VALUE;
+}
diff --git a/src/components/TokenLinkIcon.tsx b/src/components/TokenLinkIcon.tsx
index dd445516..c7e99e97 100644
--- a/src/components/TokenLinkIcon.tsx
+++ b/src/components/TokenLinkIcon.tsx
@@ -4,6 +4,7 @@ import { Link2, Link2Off } from 'lucide-react';
import { memo, useCallback } from 'react';
import { usePhraseDispatch } from './AnalysisStore';
import { usePhraseStripContext } from './PhraseStripContext';
+import { useSegmentation } from './SegmentationStore';
import type { SlotFocusInfo } from '../types/token-layout';
import { computeSplitFreeRefs, sortByDocOrder, splitPhraseAtBoundary } from '../utils/phrase-arc';
@@ -65,17 +66,24 @@ export function TokenLinkIcon({
slotFocus,
isPhraseRevealed,
}: TokenLinkIconProps) {
- const { focusedSideIsPrev, focusedPhraseLink, focusedFreeToken, isSameSegmentAsFocus } =
- slotFocus;
+ const {
+ focusedSideIsPrev,
+ focusedPhraseLink,
+ focusedFreeToken,
+ isSameSegmentAsFocus,
+ isAdjacentEdgeOfFocus,
+ } = slotFocus;
const {
phraseMode,
tokenDocOrder,
+ tokenSegmentMap,
onHoverPhrase: onHoverCandidatePhrase,
onHoverCandidateTokens,
onHoverSplitFreeTokens,
crossSegmentLinkTooltip,
} = usePhraseStripContext();
const { createPhrase, updatePhrase, deletePhrase, mergePhrases } = usePhraseDispatch();
+ const { dispatch: segmentationDispatch, segmentById, verseZeroSegmentIds } = useSegmentation();
const inSamePhrase =
prevPhraseLink !== undefined &&
@@ -106,6 +114,37 @@ export function TokenLinkIcon({
tokenDocOrder,
]);
+ /**
+ * Moves the segment boundary at this slot so the pulled edge token joins the focused token's
+ * segment, when this is a cross-segment adjacent-edge link. The pulled token is the neighbor on
+ * the far side of the slot; moving the boundary by one token keeps both segments contiguous.
+ *
+ * `focusedSideIsPrev = true`: focus is the previous (left) segment; `nextToken` is the adjacent
+ * segment's first word, so the boundary moves forward to the token after it. `false`: focus is
+ * the next (right) segment; `prevToken` is the previous segment's last word, so the boundary
+ * moves back to start at it.
+ */
+ const performBoundaryPull = useCallback(() => {
+ /* v8 ignore next -- only invoked from handleLinkClick after the same defined-token guards */
+ if (!prevToken || !nextToken) return;
+ const adjacentSegmentId = tokenSegmentMap.get(nextToken.ref);
+ const adjacentSegment =
+ adjacentSegmentId === undefined ? undefined : segmentById.get(adjacentSegmentId);
+ if (!adjacentSegment) return;
+ const currentStart = adjacentSegment.tokens[0]?.ref;
+ /* v8 ignore next -- a rendered segment always has at least one token */
+ if (currentStart === undefined) return;
+ if (focusedSideIsPrev) {
+ const index = adjacentSegment.tokens.findIndex((t) => t.ref === nextToken.ref);
+ const newStart = adjacentSegment.tokens[index + 1]?.ref;
+ // The adjacent segment had only the pulled token, so it merges wholly into the focused one.
+ if (newStart === undefined) segmentationDispatch.merge(currentStart);
+ else segmentationDispatch.move(currentStart, newStart);
+ } else {
+ segmentationDispatch.move(currentStart, prevToken.ref);
+ }
+ }, [prevToken, nextToken, focusedSideIsPrev, tokenSegmentMap, segmentById, segmentationDispatch]);
+
/**
* Joins the neighbor on the far side of this slot into the focused phrase (or free token).
*
@@ -129,6 +168,10 @@ export function TokenLinkIcon({
/* v8 ignore next -- button only renders when both tokens exist and focus is defined */
if (!prevToken || !nextToken || focusedSideIsPrev === undefined) return;
+ // For a cross-segment edge link, first move the boundary so the pulled token joins the focused
+ // segment; the phrase mutation below then proceeds as for a within-segment link.
+ if (isAdjacentEdgeOfFocus) performBoundaryPull();
+
// The neighbor is the token/phrase on the opposite side of this slot from focus.
const neighborLink = focusedSideIsPrev ? nextPhraseLink : prevPhraseLink;
const neighborToken = focusedSideIsPrev ? nextToken : prevToken;
@@ -196,6 +239,8 @@ export function TokenLinkIcon({
focusedSideIsPrev,
focusedPhraseLink,
focusedFreeToken,
+ isAdjacentEdgeOfFocus,
+ performBoundaryPull,
tokenDocOrder,
createPhrase,
updatePhrase,
@@ -262,14 +307,41 @@ export function TokenLinkIcon({
);
}
- // Link icon: active in view mode when focus is set and both neighbors are in the same segment.
+ // A verse-0 superscription is a hard wall: it is never pulled into a neighbor and never absorbs a
+ // foreign token, so no cross-segment link may touch one. That holds whether focus is itself inside
+ // the superscription or the superscription is the segment adjacent across this slot.
+ const focusedRef = focusedFreeToken?.ref ?? focusedPhraseLink?.tokens[0]?.tokenRef;
+ const focusedSegmentId = focusedRef === undefined ? undefined : tokenSegmentMap.get(focusedRef);
+ const focusedIsVerseZero =
+ focusedSegmentId !== undefined && verseZeroSegmentIds.has(focusedSegmentId);
+ // Whether the adjacent (non-focused) segment is a verse-0 superscription.
+ const adjacentSegmentId =
+ focusedSideIsPrev === undefined
+ ? undefined
+ : tokenSegmentMap.get((focusedSideIsPrev ? nextToken : prevToken).ref);
+ const adjacentIsVerseZero =
+ adjacentSegmentId !== undefined && verseZeroSegmentIds.has(adjacentSegmentId);
+ // A cross-segment (adjacent-edge) pull is valid only when neither side of the boundary is a
+ // verse-0 superscription.
+ const adjacentEdgeValid = isAdjacentEdgeOfFocus && !focusedIsVerseZero && !adjacentIsVerseZero;
+
+ // Link icon: active in view mode when focus is set and either both neighbors are in the focused
+ // segment (a within-segment link) or this slot is a valid adjacent edge of the focused segment (a
+ // cross-segment link that pulls the edge token across and moves the boundary — never across a
+ // verse-0 superscription, which is a hard wall).
const isActive =
- phraseMode.kind === 'view' && focusedSideIsPrev !== undefined && isSameSegmentAsFocus;
+ phraseMode.kind === 'view' &&
+ focusedSideIsPrev !== undefined &&
+ (isSameSegmentAsFocus || adjacentEdgeValid);
const linkDisabled = isUnlinkMode || isEditMode || !isActive;
- // Show a tooltip only when inactive because the slot is outside the focused segment (not when
- // disabled for other reasons like unlink/edit mode where the reason is already visible in the UI).
+ // Show a tooltip only when inactive because the slot is a cross-segment slot that cannot host a
+ // link (not when disabled for other reasons like unlink/edit mode, where the reason is already
+ // visible in the UI).
const crossSegmentDisabled =
- phraseMode.kind === 'view' && focusedSideIsPrev !== undefined && !isSameSegmentAsFocus;
+ phraseMode.kind === 'view' &&
+ focusedSideIsPrev !== undefined &&
+ !isSameSegmentAsFocus &&
+ !adjacentEdgeValid;
const linkTitle = crossSegmentDisabled ? crossSegmentLinkTooltip : undefined;
// Highlight exactly what would be absorbed if the button were clicked — mirrors handleLinkClick.
diff --git a/src/components/controls/ViewOptionsDropdown.tsx b/src/components/controls/ViewOptionsDropdown.tsx
index 27b51f19..cadc1fb2 100644
--- a/src/components/controls/ViewOptionsDropdown.tsx
+++ b/src/components/controls/ViewOptionsDropdown.tsx
@@ -12,6 +12,7 @@ const STRING_KEYS = [
'%interlinearizer_viewOption_chapterLabelInVerse%',
'%interlinearizer_viewOption_showMorphology%',
'%interlinearizer_viewOption_showFreeTranslation%',
+ '%interlinearizer_viewOption_boundaryEditMode%',
] as const satisfies `%${string}%`[];
/**
@@ -72,6 +73,10 @@ type ViewOptionsDropdownProps = Readonly<{
showFreeTranslation: boolean;
/** Called when the show-free-translation toggle changes. */
onShowFreeTranslationChange: (checked: boolean) => void;
+ /** Current value of the edit-segment-boundaries toggle. */
+ boundaryEditMode: boolean;
+ /** Called when the edit-segment-boundaries toggle changes. */
+ onBoundaryEditModeChange: (checked: boolean) => void;
}>;
/**
@@ -92,6 +97,8 @@ type ViewOptionsDropdownProps = Readonly<{
* @param props.onShowMorphologyChange - Show-morphology change callback.
* @param props.showFreeTranslation - Current show-free-translation value.
* @param props.onShowFreeTranslationChange - Show-free-translation change callback.
+ * @param props.boundaryEditMode - Current edit-segment-boundaries value.
+ * @param props.onBoundaryEditModeChange - Edit-segment-boundaries change callback.
* @returns A gear button that opens a dropdown panel of view toggles.
*/
export default function ViewOptionsDropdown({
@@ -107,6 +114,8 @@ export default function ViewOptionsDropdown({
onShowMorphologyChange,
showFreeTranslation,
onShowFreeTranslationChange,
+ boundaryEditMode,
+ onBoundaryEditModeChange,
}: ViewOptionsDropdownProps) {
const [localizedStrings] = useLocalizedStrings(STRING_KEYS);
const [open, setOpen] = useState(false);
@@ -220,6 +229,11 @@ export default function ViewOptionsDropdown({
label={localizedStrings['%interlinearizer_viewOption_chapterLabelInVerse%']}
onCheckedChange={onChapterLabelInVerseChange}
/>
+
>,
document.body,
diff --git a/src/hooks/useDraftProject.ts b/src/hooks/useDraftProject.ts
index 0bfd289b..f5270c6a 100644
--- a/src/hooks/useDraftProject.ts
+++ b/src/hooks/useDraftProject.ts
@@ -1,6 +1,11 @@
/** @file Hook owning the always-present, auto-saved draft buffer for one source project. */
import papi, { logger } from '@papi/frontend';
-import type { DraftProject, InterlinearProject, TextAnalysis } from 'interlinearizer';
+import type {
+ DraftProject,
+ InterlinearProject,
+ SegmentationDelta,
+ TextAnalysis,
+} from 'interlinearizer';
import { useCallback, useEffect, useRef, useState } from 'react';
import { emptyAnalysis, emptyDraft } from '../types/empty-factories';
import { removeBookFromAnalysis } from '../utils/analysis-book';
@@ -11,7 +16,7 @@ const AUTOSAVE_DEBOUNCE_MS = 300;
/** The subset of an {@link InterlinearProject} needed to open it into the draft as a working copy. */
export type OpenableProject = Pick<
InterlinearProject,
- 'analysis' | 'analysisLanguages' | 'targetProjectId'
+ 'analysis' | 'analysisLanguages' | 'targetProjectId' | 'segmentation'
>;
/** Configuration for starting a fresh, empty draft via {@link UseDraftProjectResult.newDraft}. */
@@ -59,6 +64,13 @@ export type UseDraftProjectResult = {
* @param analysis - The updated analysis from the store.
*/
autosaveAnalysis: (analysis: TextAnalysis) => void;
+ /**
+ * Persists an edited segment-boundary delta into the draft and marks it dirty. Pass `undefined`
+ * (or a default/empty delta) to clear custom boundaries back to the default verse segmentation.
+ *
+ * @param segmentation - The updated boundary delta, or `undefined` for the default segmentation.
+ */
+ autosaveSegmentation: (segmentation: SegmentationDelta | undefined) => void;
/**
* Replaces the draft with a working copy of an existing project's analysis and config — the
* "Open" flow.
@@ -239,12 +251,35 @@ export default function useDraftProject(
[persist],
);
+ const autosaveSegmentation = useCallback(
+ (segmentation: SegmentationDelta | undefined) => {
+ const { current } = draftRef;
+ /* v8 ignore next -- auto-save only fires from the mounted editor, which exists only post-load */
+ if (!current) return;
+
+ const next: DraftProject = { ...current, dirty: true };
+ // Store custom boundaries when present; clear the field for the default segmentation so the
+ // persisted draft stays minimal.
+ if (segmentation === undefined) delete next.segmentation;
+ else next.segmentation = segmentation;
+ draftRef.current = next;
+ if (autosaveTimeoutRef.current !== undefined) clearTimeout(autosaveTimeoutRef.current);
+ autosaveTimeoutRef.current = setTimeout(() => {
+ autosaveTimeoutRef.current = undefined;
+ persist(next);
+ }, AUTOSAVE_DEBOUNCE_MS);
+ setDirty(true);
+ },
+ [persist],
+ );
+
const loadFromProject = useCallback(
(project: OpenableProject) => {
applyReplacement({
sourceProjectId,
analysisLanguages: project.analysisLanguages,
...(project.targetProjectId !== undefined && { targetProjectId: project.targetProjectId }),
+ ...(project.segmentation !== undefined && { segmentation: project.segmentation }),
analysis: project.analysis,
dirty: false,
});
@@ -292,7 +327,10 @@ export default function useDraftProject(
// the unsaved-changes indicator (dirty: false) so the user is not nagged to save an empty
// draft. The active project is intentionally left untouched, so a subsequent Save still targets
// it. Per-book wipe stays dirty, since it is a partial edit the user will usually want to save.
- applyReplacement({ ...current, analysis: emptyAnalysis(), dirty: false });
+ // Custom segment boundaries are part of the working state, so a whole-draft wipe clears them too.
+ const next: DraftProject = { ...current, analysis: emptyAnalysis(), dirty: false };
+ delete next.segmentation;
+ applyReplacement(next);
}, [applyReplacement]);
const markSynced = useCallback(
@@ -328,6 +366,7 @@ export default function useDraftProject(
dirty,
getDraftSnapshot,
autosaveAnalysis,
+ autosaveSegmentation,
loadFromProject,
newDraft,
wipeBook,
diff --git a/src/main.ts b/src/main.ts
index f82b7e65..71980410 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -7,10 +7,11 @@ import type {
SavedWebViewDefinition,
WebViewDefinition,
} from '@papi/core';
+import type { SegmentationDelta } from 'interlinearizer';
import interlinearizerReact from './interlinearizer.web-view?inline';
import interlinearizerStyles from './interlinearizer.web-view.scss?inline';
import * as projectStorage from './services/projectStorage';
-import { isDraftProject, isTextAnalysis } from './types/type-guards';
+import { isDraftProject, isSegmentationDelta, isTextAnalysis } from './types/type-guards';
// #region WebView provider
@@ -265,6 +266,8 @@ async function getInterlinearProject(interlinearProjectId: string): Promise {
try {
const analysis = JSON.parse(analysisJson);
if (!isTextAnalysis(analysis)) {
throw new TypeError('saveInterlinearAnalysis: analysisJson does not conform to TextAnalysis');
}
- await projectStorage.updateAnalysis(executionToken, interlinearProjectId, analysis);
+ // undefined ⇒ leave boundaries unchanged; null ⇒ clear them; an object ⇒ set them.
+ let segmentation: SegmentationDelta | null | undefined;
+ if (segmentationJson !== undefined) {
+ const parsed: unknown = JSON.parse(segmentationJson);
+ // eslint-disable-next-line no-null/no-null -- JSON.parse('null') yields null, the clear sentinel
+ if (parsed === null) {
+ // eslint-disable-next-line no-null/no-null -- explicit "clear boundaries" sentinel from the WebView
+ segmentation = null;
+ } else if (isSegmentationDelta(parsed)) {
+ segmentation = parsed;
+ } else {
+ throw new TypeError(
+ 'saveInterlinearAnalysis: segmentationJson does not conform to SegmentationDelta',
+ );
+ }
+ }
+ await projectStorage.updateAnalysis(
+ executionToken,
+ interlinearProjectId,
+ analysis,
+ segmentation,
+ );
} catch (e) {
logger.error('Interlinearizer: failed to save analysis', e);
await papi.notifications
diff --git a/src/parsers/papi/resegmentBook.ts b/src/parsers/papi/resegmentBook.ts
new file mode 100644
index 00000000..81642f88
--- /dev/null
+++ b/src/parsers/papi/resegmentBook.ts
@@ -0,0 +1,130 @@
+/**
+ * @file Re-groups a verse-tokenized {@link Book} into the user's custom segments per a
+ * {@link SegmentationDelta}, without touching the text-layer tokenizer.
+ *
+ * {@link tokenizeBook} always produces one `Segment` per verse; this pass runs on its output and
+ * cuts the flat document-order token stream at the delta's effective boundaries. Token refs and
+ * token objects are preserved unchanged for untouched verses (reused by reference) so analyses
+ * keep resolving and React memoization is undisturbed; only merged or split segments are rebuilt,
+ * with `baselineText` and per-token char offsets recomputed so the `baselineText.slice(charStart,
+ * charEnd) === surfaceText` invariant still holds.
+ */
+import type { Book, ScriptureRef, Segment, SegmentationDelta, Token } from 'interlinearizer';
+
+import { effectiveStarts, isDefaultSegmentation } from '../../utils/segmentation';
+
+/** Separator inserted between two verses' baseline text when they are merged into one segment. */
+const MERGE_SEPARATOR = ' ';
+
+/** A token paired with the original verse {@link Segment} it came from. */
+type SourcedToken = { token: Token; verse: Segment };
+
+/**
+ * Rebuilds one custom {@link Segment} from a run of tokens that may span multiple original verses.
+ * The new `baselineText` is each contributing verse's text spliced to its covered span, joined by
+ * {@link MERGE_SEPARATOR} between verses; every token's char offset is shifted into the new string
+ * while its `ref` and `surfaceText` are preserved.
+ *
+ * @param run - The run's tokens in document order, each tagged with its source verse. Non-empty.
+ * @returns The rebuilt segment.
+ */
+function buildSegment(run: SourcedToken[]): Segment {
+ const firstSourced = run[0];
+ const lastSourced = run[run.length - 1];
+ const firstVerse = firstSourced.verse;
+ const lastVerse = lastSourced.verse;
+
+ // A segment that begins at its first verse's first token keeps that verse's id (so an untouched or
+ // merged segment preserves the leading verse's segment-level analyses); a segment that begins
+ // mid-verse (a split's later piece) takes its first token's ref as a fresh, unique id.
+ const startsAtVerseBoundary = firstSourced.token.ref === firstVerse.tokens[0]?.ref;
+ const id = startsAtVerseBoundary ? firstVerse.id : firstSourced.token.ref;
+
+ let baselineText = '';
+ let cursor = 0;
+ let runIndex = 0;
+ const tokens: Token[] = [];
+ while (runIndex < run.length) {
+ const { verse } = run[runIndex];
+ if (runIndex > 0) {
+ baselineText += MERGE_SEPARATOR;
+ cursor += MERGE_SEPARATOR.length;
+ }
+ // Consume the contiguous sub-run of tokens from this verse, shifting each token's offsets into
+ // the new concatenated baseline while keeping its ref and surface text unchanged.
+ const subStart = runIndex;
+ const base = run[subStart].token.charStart;
+ while (runIndex < run.length && run[runIndex].verse === verse) {
+ const { token } = run[runIndex];
+ tokens.push({
+ ...token,
+ charStart: cursor + (token.charStart - base),
+ charEnd: cursor + (token.charEnd - base),
+ });
+ runIndex += 1;
+ }
+ const lastCharEnd = run[runIndex - 1].token.charEnd;
+ const piece = verse.baselineText.slice(base, lastCharEnd);
+ baselineText += piece;
+ cursor += piece.length;
+ }
+
+ // Anchor the new range to the covered span; a mid-verse edge carries a sub-verse charIndex.
+ const startRef: ScriptureRef = startsAtVerseBoundary
+ ? firstVerse.startRef
+ : { ...firstVerse.startRef, charIndex: firstSourced.token.charStart };
+ const endsAtVerseBoundary =
+ lastSourced.token.ref === lastVerse.tokens[lastVerse.tokens.length - 1]?.ref;
+ const endRef: ScriptureRef = endsAtVerseBoundary
+ ? lastVerse.endRef
+ : { ...lastVerse.endRef, charIndex: lastSourced.token.charEnd };
+
+ return { id, startRef, endRef, baselineText, tokens };
+}
+
+/**
+ * Re-groups `book`'s verse segments into the user's custom segments per `delta`.
+ *
+ * Returns `book` unchanged (by reference) for the default segmentation, so the common no-custom-
+ * boundaries case incurs no work and no identity churn. Otherwise the flat token stream is cut at
+ * the effective boundaries; a run that is exactly one original verse reuses that verse's `Segment`
+ * object verbatim, while merged or split runs are rebuilt via {@link buildSegment}.
+ *
+ * @param book - The verse-tokenized book from {@link tokenizeBook}.
+ * @param delta - The user's boundary delta, or `undefined` for the default verse segmentation.
+ * @returns A book with the custom segmentation applied, or `book` itself when `delta` is the
+ * default.
+ */
+export function resegmentBook(book: Book, delta: SegmentationDelta | undefined): Book {
+ if (isDefaultSegmentation(delta)) return book;
+
+ const starts = effectiveStarts(book, delta);
+
+ // Cut the flat token stream into runs, beginning a new run at each effective start (but never
+ // splitting off a run that has no word/structural content yet — leading tokens stay with the
+ // first run).
+ const runs: SourcedToken[][] = [];
+ let current: SourcedToken[] = [];
+ book.segments.forEach((verse) => {
+ verse.tokens.forEach((token) => {
+ if (starts.has(token.ref) && current.length > 0) {
+ runs.push(current);
+ current = [];
+ }
+ current.push({ token, verse });
+ });
+ });
+ /* v8 ignore next -- a non-default delta always yields at least one token, so current is non-empty */
+ if (current.length > 0) runs.push(current);
+
+ const segments: Segment[] = runs.map((run) => {
+ const firstVerse = run[0].verse;
+ // Reuse the original verse Segment when the run is exactly that verse — preserves its id,
+ // baselineText, token offsets, and object identity.
+ const isWholeUntouchedVerse =
+ run.length === firstVerse.tokens.length && run.every((s) => s.verse === firstVerse);
+ return isWholeUntouchedVerse ? firstVerse : buildSegment(run);
+ });
+
+ return { ...book, segments };
+}
diff --git a/src/services/projectStorage.ts b/src/services/projectStorage.ts
index 42b242a3..7eeac53f 100644
--- a/src/services/projectStorage.ts
+++ b/src/services/projectStorage.ts
@@ -1,6 +1,11 @@
import papi, { logger } from '@papi/backend';
import type { ExecutionToken } from '@papi/core';
-import type { DraftProject, InterlinearProject, TextAnalysis } from 'interlinearizer';
+import type {
+ DraftProject,
+ InterlinearProject,
+ SegmentationDelta,
+ TextAnalysis,
+} from 'interlinearizer';
import { emptyAnalysis, emptyDraft } from '../types/empty-factories';
import { isDraftProject } from '../types/type-guards';
@@ -256,11 +261,14 @@ export async function getProjectsForSource(
}
/**
- * Replaces the analysis of an existing interlinearizer project.
+ * Replaces the analysis of an existing interlinearizer project, and optionally its custom segment
+ * boundaries, in one atomic write.
*
* @param token - The execution token for storage access.
* @param id - The interlinearizer project UUID to update.
* @param analysis - The new `TextAnalysis` to persist.
+ * @param segmentation - The new boundary delta to persist (`SegmentationDelta`), `null` to clear
+ * any stored boundaries, or `undefined` to leave the project's existing boundaries unchanged.
* @returns The updated project record, or `undefined` if no project with the given ID exists.
* @throws {SyntaxError} If the project's storage value contains invalid JSON.
* @throws If `papi.storage.readUserData` or `papi.storage.writeUserData` rejects for a non-ENOENT
@@ -270,11 +278,15 @@ export async function updateAnalysis(
token: ExecutionToken,
id: string,
analysis: TextAnalysis,
+ segmentation?: SegmentationDelta | null,
): Promise {
return enqueueProjectOp(id, async () => {
const project = await getProject(token, id);
if (!project) return undefined;
const updated: InterlinearProject = { ...project, analysis };
+ // eslint-disable-next-line no-null/no-null -- null is the explicit "clear stored boundaries" sentinel
+ if (segmentation === null) delete updated.segmentation;
+ else if (segmentation !== undefined) updated.segmentation = segmentation;
await papi.storage.writeUserData(token, projectKey(id), JSON.stringify(updated));
return updated;
});
diff --git a/src/types/interlinearizer.d.ts b/src/types/interlinearizer.d.ts
index 52dc08cb..b626446f 100644
--- a/src/types/interlinearizer.d.ts
+++ b/src/types/interlinearizer.d.ts
@@ -162,11 +162,15 @@ declare module 'papi-shared-types' {
'interlinearizer.getProject': (interlinearProjectId: string) => Promise;
/**
- * Persists an updated `TextAnalysis` for an interlinearizer project. Called from the WebView
- * after each gloss write so that analysis changes survive tab restores and project switches.
+ * Persists an updated `TextAnalysis` (and optionally custom segment boundaries) for an
+ * interlinearizer project. Called from the WebView on Save so analysis and boundary changes
+ * survive tab restores and project switches.
*
* @param interlinearProjectId UUID of the interlinearizer project to update.
* @param analysisJson JSON-stringified `TextAnalysis` to persist.
+ * @param segmentationJson Optional JSON-stringified `SegmentationDelta` to persist, or the
+ * string `"null"` to clear any stored custom boundaries. Omit entirely to leave the project's
+ * existing boundaries unchanged.
* @returns Promise that resolves to void once the analysis has been written to storage.
* @throws If JSON parsing or storage fails. Error is logged and an error notification is sent
* before rethrowing so callers do not need to send a second notification.
@@ -174,6 +178,7 @@ declare module 'papi-shared-types' {
'interlinearizer.saveAnalysis': (
interlinearProjectId: string,
analysisJson: string,
+ segmentationJson?: string,
) => Promise;
/**
@@ -1095,7 +1100,47 @@ declare module 'interlinearizer' {
}
// ---------------------------------------------------------------------------
- // §6 InterlinearProject — persisted project envelope
+ // §6 SegmentationDelta — user-defined segment boundaries
+ // ---------------------------------------------------------------------------
+
+ /**
+ * A user's custom segment boundaries, stored as a **delta from the default one-segment-per-verse
+ * segmentation** rather than as explicit segment definitions.
+ *
+ * The text layer is rebuilt from USJ on every load as one `Segment` per verse (see {@link Book}).
+ * A segment is otherwise just a maximal contiguous run of the book's document-order token stream
+ * between "start" tokens; the default start tokens are each verse's first token. This delta
+ * records where the user's boundaries differ from that default:
+ *
+ * - A verse's first token listed in `removedVerseStarts` no longer starts a segment, so that verse
+ * is **merged** into the preceding segment.
+ * - A mid-verse token listed in `addedStarts` starts a new segment, **splitting** its verse.
+ *
+ * Boundaries are anchored to token refs (stable opaque ids), so the model degrades gracefully
+ * when the baseline text drifts: an anchor whose token no longer exists is ignored on load,
+ * leaving every other boundary intact. Because a segment can only ever be a contiguous run
+ * between start tokens, discontiguous segments are unrepresentable by construction.
+ *
+ * Absent (`undefined`) ⇒ the default verse segmentation. The empty delta (both arrays empty) is
+ * equivalent.
+ */
+ export interface SegmentationDelta {
+ /**
+ * Word-token refs that are a verse's first token in the default segmentation but should **not**
+ * start a segment — i.e. the verse is merged into the preceding segment. A ref whose token no
+ * longer exists is ignored on load.
+ */
+ removedVerseStarts: string[];
+
+ /**
+ * Mid-verse word-token refs that should start a new segment — i.e. the verse is split before
+ * this token. A ref whose token no longer exists is ignored on load.
+ */
+ addedStarts: string[];
+ }
+
+ // ---------------------------------------------------------------------------
+ // §7 InterlinearProject — persisted project envelope
// ---------------------------------------------------------------------------
/**
@@ -1166,6 +1211,13 @@ declare module 'interlinearizer' {
* aligns source and target tokens.
*/
links?: AlignmentLink[];
+
+ /**
+ * User-defined segment boundaries as a delta from the default verse segmentation. Absent
+ * (`undefined`) ⇒ the default one-segment-per-verse segmentation. See
+ * {@link SegmentationDelta}.
+ */
+ segmentation?: SegmentationDelta;
}
/**
@@ -1218,10 +1270,17 @@ declare module 'interlinearizer' {
* project.
*/
dirty: boolean;
+
+ /**
+ * User-defined segment boundaries being edited, as a delta from the default verse segmentation.
+ * Absent (`undefined`) ⇒ the default one-segment-per-verse segmentation. Carried to the active
+ * project on Save. See {@link SegmentationDelta}.
+ */
+ segmentation?: SegmentationDelta;
}
// ---------------------------------------------------------------------------
- // §7 ActiveProject — runtime pairing of project envelope and text layers
+ // §8 ActiveProject — runtime pairing of project envelope and text layers
// ---------------------------------------------------------------------------
/**
diff --git a/src/types/token-layout.ts b/src/types/token-layout.ts
index 7fd3cbd8..39ae209a 100644
--- a/src/types/token-layout.ts
+++ b/src/types/token-layout.ts
@@ -37,10 +37,18 @@ export type SlotFocusInfo = {
*/
focusedSideIsPrev: boolean | undefined;
/**
- * `true` when both slot neighbors are in the same segment as the focused token. Phrases cannot
- * span segments, so the link button is disabled when this is `false`.
+ * `true` when both slot neighbors are in the same segment as the focused token. Within one
+ * segment the link button joins tokens into a phrase as usual.
*/
isSameSegmentAsFocus: boolean;
+ /**
+ * `true` when this slot is the boundary between the focused token's segment and an immediately
+ * adjacent segment — i.e. one neighbor is in the focused segment and the other is in the segment
+ * directly before or after it in document order. The cross-segment link button is active only at
+ * these edges, so pulling an adjacent segment's edge token into the focused phrase moves the
+ * boundary by exactly one token and keeps both segments contiguous.
+ */
+ isAdjacentEdgeOfFocus: boolean;
/** The phrase containing the focused token, or `undefined` when the focused token is free. */
focusedPhraseLink: PhraseAnalysisLink | undefined;
/** The focused token when it is not part of any phrase ("free"); `undefined` otherwise. */
diff --git a/src/types/type-guards.ts b/src/types/type-guards.ts
index 00dddc1f..a04b346a 100644
--- a/src/types/type-guards.ts
+++ b/src/types/type-guards.ts
@@ -1,5 +1,11 @@
/** @file Type guards for narrowing interlinearizer types and validating parsed JSON payloads. */
-import type { AssignmentStatus, DraftProject, TextAnalysis, Token } from 'interlinearizer';
+import type {
+ AssignmentStatus,
+ DraftProject,
+ SegmentationDelta,
+ TextAnalysis,
+ Token,
+} from 'interlinearizer';
import type { InterlinearProjectSummary } from './interlinear-project-summary';
/**
@@ -234,6 +240,27 @@ export function isTextAnalysis(value: unknown): value is TextAnalysis {
);
}
+/**
+ * Type guard for {@link SegmentationDelta} parsed from unknown JSON. Both arrays must be present and
+ * contain only strings, so a malformed delta is rejected before it can corrupt re-segmentation.
+ *
+ * @param value - The value to test, typically a parsed JSON object of unknown shape.
+ * @returns `true` if `value` satisfies the {@link SegmentationDelta} shape, narrowing its type
+ * accordingly.
+ */
+export function isSegmentationDelta(value: unknown): value is SegmentationDelta {
+ return (
+ !!value &&
+ typeof value === 'object' &&
+ 'removedVerseStarts' in value &&
+ Array.isArray(value.removedVerseStarts) &&
+ value.removedVerseStarts.every((r) => typeof r === 'string') &&
+ 'addedStarts' in value &&
+ Array.isArray(value.addedStarts) &&
+ value.addedStarts.every((r) => typeof r === 'string')
+ );
+}
+
/**
* Type guard for {@link DraftProject} parsed from unknown JSON. Validates the envelope fields and
* delegates the `analysis` to {@link isTextAnalysis}, so malformed drafts are rejected before
@@ -257,6 +284,7 @@ export function isDraftProject(value: unknown): value is DraftProject {
(!('targetProjectId' in value) || typeof value.targetProjectId === 'string') &&
(!('suggestedName' in value) || typeof value.suggestedName === 'string') &&
(!('suggestedDescription' in value) || typeof value.suggestedDescription === 'string') &&
+ (!('segmentation' in value) || isSegmentationDelta(value.segmentation)) &&
'analysis' in value &&
isTextAnalysis(value.analysis)
);
diff --git a/src/types/view-options.ts b/src/types/view-options.ts
index 57a02496..eae8d57c 100644
--- a/src/types/view-options.ts
+++ b/src/types/view-options.ts
@@ -19,4 +19,9 @@ export type ViewOptions = Readonly<{
showMorphology: boolean;
/** When true, a free-translation input is shown beneath each segment's tokens or baseline text. */
showFreeTranslation: boolean;
+ /**
+ * When true, the link slots between phrases show merge/split controls for editing segment
+ * boundaries instead of the phrase link/unlink icons.
+ */
+ boundaryEditMode: boolean;
}>;
diff --git a/src/utils/segmentation.ts b/src/utils/segmentation.ts
new file mode 100644
index 00000000..5724ebbe
--- /dev/null
+++ b/src/utils/segmentation.ts
@@ -0,0 +1,328 @@
+/**
+ * @file Pure transforms over a {@link SegmentationDelta} — the user's custom segment boundaries
+ * expressed as a delta from the default one-segment-per-verse segmentation.
+ *
+ * A segment is a maximal contiguous run of the book's document-order token stream between "start"
+ * tokens. The default start tokens are each verse's first token; the delta records where the
+ * user's boundaries differ (a removed verse start merges that verse into the previous segment; an
+ * added start splits a verse). Because a segment can only be a contiguous run between starts,
+ * discontiguous segments are unrepresentable.
+ *
+ * Every function here is pure and store-free (mirrors `phrase-arc.ts`). They take the original
+ * verse-tokenized {@link Book} (from `tokenizeBook`, before re-segmentation) so they can derive
+ * the default verse starts; they never need the re-segmented book.
+ */
+import type { Book, SegmentationDelta } from 'interlinearizer';
+
+/** An empty delta — equivalent to the default verse segmentation. */
+const EMPTY_DELTA: SegmentationDelta = { removedVerseStarts: [], addedStarts: [] };
+
+/**
+ * The ref of the book's very first token — the start of the first segment, which can never be
+ * merged leftward.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @returns The first token's ref, or `undefined` when the book has no tokens.
+ */
+function bookFirstTokenRef(verseBook: Book): string | undefined {
+ return verseBook.segments[0]?.tokens[0]?.ref;
+}
+
+/**
+ * The default segment-start refs — each verse segment's first token (of any type, so a verse's
+ * leading punctuation stays with that verse).
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @returns The set of first-token refs, one per verse segment that has tokens.
+ */
+export function defaultVerseStarts(verseBook: Book): Set {
+ const starts = new Set();
+ verseBook.segments.forEach((seg) => {
+ const first = seg.tokens[0];
+ if (first) starts.add(first.ref);
+ });
+ return starts;
+}
+
+/**
+ * Every token ref in the book, used to drop delta anchors whose token no longer exists.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @returns The set of all token refs.
+ */
+function allTokenRefs(verseBook: Book): Set {
+ const refs = new Set();
+ verseBook.segments.forEach((seg) => seg.tokens.forEach((t) => refs.add(t.ref)));
+ return refs;
+}
+
+/**
+ * The interior token refs of every verse-0 segment — each verse-0 segment's tokens except its
+ * first. A verse-0 segment is a chapter superscription (e.g. a Psalm title); its tokens must always
+ * stay together in one segment, so no boundary may ever fall strictly inside it.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @returns The set of token refs interior to a verse-0 segment.
+ */
+function verseZeroInteriorRefs(verseBook: Book): Set {
+ const interior = new Set();
+ verseBook.segments.forEach((seg) => {
+ if (seg.startRef.verse !== 0) return;
+ seg.tokens.slice(1).forEach((t) => interior.add(t.ref));
+ });
+ return interior;
+}
+
+/**
+ * The segment-start refs that bound a verse-0 segment on either side — the two boundaries that turn
+ * a chapter superscription into a hard wall. A verse-0 segment is a chapter superscription (e.g. a
+ * Psalm title); it must stay its own isolated segment, so neither of its boundaries may ever be
+ * removed: doing so would either pull the superscription into the previous chapter's last verse
+ * (its document-order predecessor) or sweep it forward into the verse that follows. For each
+ * verse-0 segment this contributes:
+ *
+ * - Its own first-token ref (the boundary _before_ verse 0), and
+ * - The first-token ref of the segment immediately after it in document order (the boundary _after_
+ * verse 0), when such a segment exists.
+ *
+ * Splitting verse 0 is handled separately by {@link verseZeroInteriorRefs}; this set is only about
+ * the boundaries that border it.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @returns The set of segment-start refs that may not be removed because they border a verse-0
+ * segment.
+ */
+function verseZeroBoundaryRefs(verseBook: Book): Set {
+ const boundaries = new Set();
+ verseBook.segments.forEach((seg, index) => {
+ if (seg.startRef.verse !== 0) return;
+ const ownStart = seg.tokens[0]?.ref;
+ /* v8 ignore next -- a verse-0 segment is only emitted when it has text, so it always has tokens */
+ if (ownStart !== undefined) boundaries.add(ownStart);
+ // The segment after verse 0 (when one exists): the boundary after the superscription. Absent
+ // only when verse 0 ends the book, in which case there is no after-boundary to lock.
+ const nextStart = verseBook.segments[index + 1]?.tokens[0]?.ref;
+ if (nextStart !== undefined) boundaries.add(nextStart);
+ });
+ return boundaries;
+}
+
+/**
+ * Document-order index for every token ref, used to keep delta arrays canonically sorted.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @returns Map from token ref to its flat document index.
+ */
+function docOrder(verseBook: Book): Map {
+ const order = new Map();
+ let i = 0;
+ verseBook.segments.forEach((seg) =>
+ seg.tokens.forEach((t) => {
+ order.set(t.ref, i);
+ i += 1;
+ }),
+ );
+ return order;
+}
+
+/**
+ * The effective set of segment-start refs after applying `delta` to the default verse starts:
+ * `(defaults \ removedVerseStarts) ∪ addedStarts`, with added anchors dropped when their token no
+ * longer exists and the book's first token always forced to be a start. Shared with `resegmentBook`
+ * so re-segmentation and the editing operations agree on where boundaries fall.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @param delta - The user's boundary delta, or `undefined` for the default segmentation.
+ * @returns The set of token refs that begin a segment.
+ */
+export function effectiveStarts(
+ verseBook: Book,
+ delta: SegmentationDelta | undefined,
+): Set {
+ const defaults = defaultVerseStarts(verseBook);
+ const removed = new Set(delta?.removedVerseStarts ?? []);
+ const starts = new Set();
+ defaults.forEach((ref) => {
+ if (!removed.has(ref)) starts.add(ref);
+ });
+ if (delta) {
+ const all = allTokenRefs(verseBook);
+ delta.addedStarts.forEach((ref) => {
+ if (all.has(ref)) starts.add(ref);
+ });
+ }
+ const first = bookFirstTokenRef(verseBook);
+ // The first segment can never be merged away, so its start is always present.
+ if (first !== undefined) starts.add(first);
+ return starts;
+}
+
+/**
+ * Returns a canonicalized copy of `delta`: each array deduped, stripped of no-op entries (a removed
+ * ref that is not a default start, or an added ref that is already a default start or whose token
+ * is gone), and sorted by document order so equal segmentations serialize identically.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @param delta - The delta to canonicalize.
+ * @returns A normalized {@link SegmentationDelta}.
+ */
+function normalize(verseBook: Book, delta: SegmentationDelta): SegmentationDelta {
+ const defaults = defaultVerseStarts(verseBook);
+ const all = allTokenRefs(verseBook);
+ const order = docOrder(verseBook);
+ const first = bookFirstTokenRef(verseBook);
+ const byOrder = (a: string, b: string) =>
+ /* v8 ignore next -- ?? 0 fallback for refs absent from order; filtered arrays only hold real refs */
+ (order.get(a) ?? 0) - (order.get(b) ?? 0);
+
+ const removedVerseStarts = [...new Set(delta.removedVerseStarts)]
+ .filter((ref) => defaults.has(ref) && ref !== first)
+ .sort(byOrder);
+ const addedStarts = [...new Set(delta.addedStarts)]
+ .filter((ref) => all.has(ref) && !defaults.has(ref))
+ .sort(byOrder);
+
+ return { removedVerseStarts, addedStarts };
+}
+
+/**
+ * Makes `ref` begin a segment — i.e. splits before it.
+ *
+ * - When `ref` is a default verse start that was merged away, it is un-merged (dropped from
+ * `removedVerseStarts`).
+ * - Otherwise `ref` is recorded as an added start.
+ *
+ * No-op (returns an equivalent normalized delta) when `ref` already begins a segment, or when `ref`
+ * is an interior token of a verse-0 segment (splitting it would push tokens out of the
+ * superscription).
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @param delta - The current delta, or `undefined` for the default segmentation.
+ * @param ref - The token ref that should begin a segment.
+ * @returns The updated, normalized delta.
+ */
+export function addBoundaryBefore(
+ verseBook: Book,
+ delta: SegmentationDelta | undefined,
+ ref: string,
+): SegmentationDelta {
+ const current = delta ?? EMPTY_DELTA;
+ if (verseZeroInteriorRefs(verseBook).has(ref)) return normalize(verseBook, current);
+ const defaults = defaultVerseStarts(verseBook);
+ if (defaults.has(ref)) {
+ return normalize(verseBook, {
+ removedVerseStarts: current.removedVerseStarts.filter((r) => r !== ref),
+ addedStarts: current.addedStarts,
+ });
+ }
+ return normalize(verseBook, {
+ removedVerseStarts: current.removedVerseStarts,
+ addedStarts: [...current.addedStarts, ref],
+ });
+}
+
+/**
+ * Stops `ref` from beginning a segment — i.e. merges it into the preceding segment.
+ *
+ * - When `ref` is a default verse start, it is recorded in `removedVerseStarts`.
+ * - Otherwise (it was an added split) it is dropped from `addedStarts`.
+ *
+ * No-op when `ref` is the book's first token (the first segment cannot be merged leftward), or when
+ * `ref` borders a verse-0 superscription (see {@link verseZeroBoundaryRefs}): a superscription is an
+ * isolated segment, so removing the boundary before it (which would pull it into the previous
+ * chapter's last verse) or the boundary after it (which would sweep it into the following verse) is
+ * forbidden. Splitting verse 0 is likewise forbidden — see {@link addBoundaryBefore}.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @param delta - The current delta, or `undefined` for the default segmentation.
+ * @param ref - The segment-start token ref to remove.
+ * @returns The updated, normalized delta.
+ */
+export function removeBoundaryAt(
+ verseBook: Book,
+ delta: SegmentationDelta | undefined,
+ ref: string,
+): SegmentationDelta {
+ const current = delta ?? EMPTY_DELTA;
+ if (ref === bookFirstTokenRef(verseBook)) return normalize(verseBook, current);
+ if (verseZeroBoundaryRefs(verseBook).has(ref)) return normalize(verseBook, current);
+ const defaults = defaultVerseStarts(verseBook);
+ if (defaults.has(ref)) {
+ return normalize(verseBook, {
+ removedVerseStarts: [...current.removedVerseStarts, ref],
+ addedStarts: current.addedStarts,
+ });
+ }
+ return normalize(verseBook, {
+ removedVerseStarts: current.removedVerseStarts,
+ addedStarts: current.addedStarts.filter((r) => r !== ref),
+ });
+}
+
+/**
+ * Moves a boundary from `fromRef` to `toRef` in one step — the primitive behind pulling a single
+ * edge token across a segment boundary. Removes the start at `fromRef` and adds one at `toRef`.
+ * Each half is independently subject to its own verse-0 guard: the removal is a no-op when
+ * `fromRef` borders a superscription (see {@link removeBoundaryAt}), and the addition is a no-op
+ * when `toRef` is interior to one (see {@link addBoundaryBefore}). A move that touches a
+ * superscription on either end therefore cannot pull a token into or out of it.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @param delta - The current delta, or `undefined` for the default segmentation.
+ * @param fromRef - The current segment-start ref to remove.
+ * @param toRef - The new segment-start ref to add.
+ * @returns The updated, normalized delta.
+ */
+export function moveBoundary(
+ verseBook: Book,
+ delta: SegmentationDelta | undefined,
+ fromRef: string,
+ toRef: string,
+): SegmentationDelta {
+ return addBoundaryBefore(verseBook, removeBoundaryAt(verseBook, delta, fromRef), toRef);
+}
+
+/**
+ * Merges the segment that starts at `secondSegmentStartRef` into the segment before it. Thin alias
+ * for {@link removeBoundaryAt}, named for the explicit merge control.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @param delta - The current delta, or `undefined` for the default segmentation.
+ * @param secondSegmentStartRef - The first-token ref of the segment being merged into its
+ * predecessor.
+ * @returns The updated, normalized delta.
+ */
+export function mergeSegments(
+ verseBook: Book,
+ delta: SegmentationDelta | undefined,
+ secondSegmentStartRef: string,
+): SegmentationDelta {
+ return removeBoundaryAt(verseBook, delta, secondSegmentStartRef);
+}
+
+/**
+ * Splits a segment so a new one begins at `ref`. Thin alias for {@link addBoundaryBefore}, named for
+ * the explicit split control.
+ *
+ * @param verseBook - The original verse-tokenized book.
+ * @param delta - The current delta, or `undefined` for the default segmentation.
+ * @param ref - The token ref the new segment should begin at.
+ * @returns The updated, normalized delta.
+ */
+export function splitSegmentBefore(
+ verseBook: Book,
+ delta: SegmentationDelta | undefined,
+ ref: string,
+): SegmentationDelta {
+ return addBoundaryBefore(verseBook, delta, ref);
+}
+
+/**
+ * Whether `delta` represents the default verse segmentation (absent or both arrays empty).
+ *
+ * @param delta - The delta to test.
+ * @returns `true` when applying `delta` yields the default segmentation.
+ */
+export function isDefaultSegmentation(delta: SegmentationDelta | undefined): boolean {
+ return !delta || (delta.removedVerseStarts.length === 0 && delta.addedStarts.length === 0);
+}
diff --git a/src/utils/token-layout.ts b/src/utils/token-layout.ts
index 0c9f4c6d..4b3cd083 100644
--- a/src/utils/token-layout.ts
+++ b/src/utils/token-layout.ts
@@ -61,6 +61,9 @@ export function resolveFocusContext(
* @param focus - Resolved focus context for the whole strip.
* @param focusedSideIsPrev - The layout-specific bool indicating whether focus is start-ward of
* this slot.
+ * @param segmentOrder - Segment id → document-order index, used to detect when this slot is the
+ * boundary between the focused segment and an immediately adjacent one. Defaults to empty (no
+ * adjacency, e.g. single-segment SegmentView slots).
* @returns Slot focus info ready to pass as `slotFocus` to `MemoizedTokenLinkIcon`.
*/
export function resolveSlotFocus(
@@ -68,14 +71,34 @@ export function resolveSlotFocus(
nextSegmentId: string | undefined,
focus: FocusContext,
focusedSideIsPrev: boolean | undefined,
+ segmentOrder: ReadonlyMap = new Map(),
): SlotFocusInfo {
+ const { focusedSegmentId } = focus;
const isSameSegmentAsFocus =
- focus.focusedSegmentId !== undefined &&
- prevSegmentId === focus.focusedSegmentId &&
- nextSegmentId === focus.focusedSegmentId;
+ focusedSegmentId !== undefined &&
+ prevSegmentId === focusedSegmentId &&
+ nextSegmentId === focusedSegmentId;
+ // The slot is an adjacent edge when exactly one neighbor is the focused segment, the other is a
+ // different segment, and the two are neighbors in document order.
+ const isAdjacentEdgeOfFocus = (() => {
+ if (
+ focusedSegmentId === undefined ||
+ prevSegmentId === undefined ||
+ nextSegmentId === undefined
+ )
+ return false;
+ if (prevSegmentId === nextSegmentId) return false;
+ const focusedIsPrev = prevSegmentId === focusedSegmentId;
+ const focusedIsNext = nextSegmentId === focusedSegmentId;
+ if (!focusedIsPrev && !focusedIsNext) return false;
+ const prevIndex = segmentOrder.get(prevSegmentId);
+ const nextIndex = segmentOrder.get(nextSegmentId);
+ return prevIndex !== undefined && nextIndex !== undefined && nextIndex - prevIndex === 1;
+ })();
return {
focusedSideIsPrev,
isSameSegmentAsFocus,
+ isAdjacentEdgeOfFocus,
focusedPhraseLink: focus.focusedPhraseLink,
focusedFreeToken: focus.focusedFreeToken,
};
@@ -88,6 +111,7 @@ export function resolveSlotFocus(
export const NO_SLOT_FOCUS: SlotFocusInfo = {
focusedSideIsPrev: undefined,
isSameSegmentAsFocus: false,
+ isAdjacentEdgeOfFocus: false,
focusedPhraseLink: undefined,
focusedFreeToken: undefined,
};
diff --git a/user-questions.md b/user-questions.md
index 0b76a0f1..8b755aab 100644
--- a/user-questions.md
+++ b/user-questions.md
@@ -86,3 +86,66 @@ Decisions made during development that we'd like reviewed:
(the less destructive option) and disables that option when no book is loaded. Alternative: keep
two separate menu items (each a single click, no scope step). Current choice: one menu item plus a
scope-picker dialog.
+
+## User-defined segment boundaries
+
+Segments were previously fixed to verses (rebuilt from USJ on every load). Users can now define
+their own segment boundaries: an **Edit segment boundaries** view toggle exposes per-slot **merge**
+(combine a segment into the one before it) and **split** (start a new segment at a token) controls,
+and linking a phrase across a verse boundary pulls the adjacent segment's **edge** token into the
+focused segment (only the immediate adjacent-edge link buttons are active for this). Boundaries are
+stored as a delta from the default verse segmentation on the draft and carried to the project on
+Save; discontiguous segments are not supported.
+
+Decisions made during development that we'd like reviewed:
+
+1. **Merged-segment separator.** When two verses are merged into one segment, their baseline texts
+ are joined with a single space. This is reasonable for whitespace-delimited scripts but wrong for
+ scriptio continua (Chinese, Thai, …) and for cases where the USFM implied a different break.
+ Should the separator be configurable per project/writing system, or derived from the source?
+
+2. **Split-segment baseline display.** A segment created by splitting a verse currently keeps only
+ the baseline text spanning **its own tokens** — `buildSegment` slices the verse baseline from the
+ first token's `charStart` to the last token's `charEnd` and shifts each token's offsets into the
+ new string, so the `baselineText.slice(charStart, charEnd) === surfaceText` invariant still holds.
+ The trade-off is that any whitespace or punctuation sitting between the split boundary and the
+ adjacent token's edge is dropped from both halves (e.g. the space at position 5 between "Alpha"
+ and "beta" in "Alpha beta." belongs to neither half). The alternative — keeping the whole verse's
+ baseline text under each half — avoids dropping edge characters but duplicates the verse text in
+ the baseline-text display mode. Current choice: trim each half to its own span, accepting the
+ dropped edge whitespace.
+
+3. **Free translation when merging.** A segment's free translation is keyed by segment id. An
+ untouched or merged segment keeps the **leading** verse's id (so its free translation survives);
+ the **absorbed** verse's free translation is retained in storage but hidden while merged, and
+ reappears if the segments are split back apart. Splitting keeps the first half's free translation
+ and starts later halves blank. Is "hide-and-restore" the desired behavior, or should merging
+ prompt the user to keep/discard the absorbed verse's translation?
+
+4. **Boundary edits and the unsaved indicator.** Merging/splitting/pulling a boundary marks the
+ draft dirty (lighting the tab `●`), exactly like a gloss edit. Confirm this is desired, or whether
+ boundary edits should be treated differently from analysis edits.
+
+5. **Boundary editing is a transient mode.** The **Edit segment boundaries** toggle is local UI
+ state (off on reload), not a persisted project setting, since it changes what the link slots do
+ rather than a display preference. Confirm this is the right treatment.
+
+6. **Chapter superscriptions are a hard wall (interim).** A chapter heading (a `d` descriptive
+ title, e.g. a Psalm superscription) is extracted as a synthetic **verse 0** segment that sits in
+ document order between the previous chapter's last verse and the new chapter's verse 1. As an
+ interim fix, verse 0 is treated as a **hard wall**: no merge, split, move, or cross-segment link
+ may touch either of its boundaries, so its tokens always stay together and no neighboring token
+ is ever pulled into or across it. The cost is a lost capability — you **cannot currently draw a
+ segment boundary that spans a superscription** (e.g. group the end of one chapter with the start
+ of the next when a heading sits between them). The stated goal is for verse 0 to be _invisible_ to
+ boundary redrawing (a redraw acts on the real verses on either side as if the superscription
+ weren't there, while the heading stays its own intact segment), but that conflicts with the
+ contiguous-run segment model and needs a design decision before implementation. Options and the
+ recommendation are worked out in
+ [design-verse-0-agnostic-segmentation.md](design-verse-0-agnostic-segmentation.md). Two questions
+ for stakeholders:
+ - Is the hard wall acceptable as the shipped behavior for now, or is spanning-a-superscription a
+ blocker that must be resolved before release?
+ - When a segment _does_ eventually absorb tokens across a superscription, **where should the
+ heading render and how should its free translation be handled?** (This parallels item 3 above —
+ "Free translation when merging.")