From 63e9220e290624041c8c84e15bd766914f5df4ca Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Thu, 26 Mar 2026 09:10:56 -0500
Subject: [PATCH 01/16] change(web): simplify model.predict() calls

Rather than copying over part of the existing context just to delete it, we can simplify prediction calls by just pre-deleting the current token, then applying any relevant deleteLeft transform component afterward to resulting predictions.

Build-bot: skip build:web
Test-bot: skip
---
 .../worker-thread/src/main/predict-helpers.ts | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 891dd923cc8..f1ddfe38792 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -321,6 +321,7 @@ export function determineContextTransition(
   return transition;
 }
 
+// TODO:  Remove this and its associated unit tests!
 /**
  * Determines where the context for prediction-generation should be rooted and how
  * much of the context it should replace.
@@ -428,8 +429,6 @@ export function determineSuggestionRange(
     }
   }
 
-  tokensToPredict.reverse();
-
   // Can occur when backspacing to the end of a previous word.
   if(tokensToPredict.length == 0) {
     if(tokenSetA.length == 0 || tokenSetB.length == 0) {
@@ -439,6 +438,7 @@ export function determineSuggestionRange(
     tokensToPredict.push(tokenSetB.pop());
   }
 
+  tokensToPredict.reverse();
   tokensToRemove.reverse();
 
   return {
@@ -465,33 +465,37 @@ export function buildAndMapPredictions(
 ): CorrectionPredictionTuple[] {
   const model = transition.final.model;
 
-  // No matter the prediction, once we know the root of the prediction, we'll
-  // always 'replace' the same amount of text.  We can handle this before the
-  // big 'prediction root' loop.
-  const { predictionContext, deleteLeft } = determineSuggestionAlignment(transition, tokenization, model);
+  const applicationTarget = transition.base.displayTokenization;
+  const { tokensToRemove, tokensToPredict } = determineSuggestionRange(applicationTarget, tokenization);
 
-  let correction = match.matchString;
-  let rootCost = match.totalCost;
+  const deleteLeft = tokensToPredict.length > 1 ? 0 : tokensToRemove.reduce((prev, curr) => prev + curr.searchModule.codepointLength, 0);
+
+  // Exists to be extended by the 'correctionTransfrom' below.
+  const emptyContext: Context = {
+    left: '',
+    startOfBuffer: false,
+    endOfBuffer: false
+  };
 
   // Replace the existing context with the correction.
   const correctionTransform: Transform = {
-    insert: correction,  // insert correction string
-    deleteLeft: deleteLeft,
+    insert: match.matchString,  // insert correction string
+    deleteLeft: 0,
     id: transition.transitionId // The correction should always be based on the most recent external transform/transcription ID.
   }
 
+  const rootCost = match.totalCost;
   const predictionRoot = {
     sample: correctionTransform,
     p: Math.exp(-rootCost * costFactor)
   };
 
-  // Worth considering:  extend Traversal to allow direct prediction lookups?
-  // let traversal = match.finalTraversal; // ...
-  let predictions = predictFromCorrections(model, [predictionRoot], predictionContext);
+  let predictions = predictFromCorrections(model, [predictionRoot], emptyContext);
   predictions.forEach((entry) => {
     entry.preservationTransform = tokenization.taillessTrueKeystroke;
     // // Will need an extra lookup layer if the suggestion is generated from within a cluster.
     // entry.baseTokenization = transition.final.tokenizationSourceMap.get(tokenization);
+    entry.prediction.sample.transform.deleteLeft = deleteLeft;
   });
 
   return predictions;
@@ -1069,13 +1073,10 @@ export function finalizeSuggestions(
     //
     // Note:  may need adjustment if/when supporting phrase-level correction.
     if(tuple.preservationTransform) {
-      const presDL = tuple.preservationTransform.deleteLeft;
-      const mergedTransform = models.buildMergedTransform(tuple.preservationTransform, prediction.sample.transform);
-      // Any preserved delete-left is applied early because it directly affects the suggestion
-      // root; we need to remove that preserved delete-left here.
-      if(presDL > 0) {
-        mergedTransform.deleteLeft -= presDL;
-      }
+      const mergedTransform = {
+        ...models.buildMergedTransform(tuple.preservationTransform, {...prediction.sample.transform, deleteLeft: 0}),
+        deleteLeft: prediction.sample.transform.deleteLeft
+      };
 
       // Temporarily and locally drops 'readonly' semantics so that we can reassign the transform.
       // See https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#improved-control-over-mapped-type-modifiers

From 3f8323757f9f5ebf5c1ccb27ba023d2baa5051d9 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Mon, 27 Apr 2026 16:38:40 -0500
Subject: [PATCH 02/16] change(web): remove determineSuggestionAlignment method
 in favor of determineSuggestionRange

---
 .../worker-thread/src/main/predict-helpers.ts | 70 ---------------
 .../determine-suggestion-alignment.tests.ts   | 90 -------------------
 2 files changed, 160 deletions(-)
 delete mode 100644 web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index f1ddfe38792..ce6cea75695 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -321,76 +321,6 @@ export function determineContextTransition(
   return transition;
 }
 
-// TODO:  Remove this and its associated unit tests!
-/**
- * Determines where the context for prediction-generation should be rooted and how
- * much of the context it should replace.
- * @param transition
- * @param lexicalModel
- * @returns
- */
-export function determineSuggestionAlignment(
-  transition: ContextTransition,
-  tokenization: ContextTokenization,
-  lexicalModel: LexicalModel
-): {
-  /**
-   * The context to use directly for generating predictions from the model.
-   */
-  predictionContext: Context,
-  /**
-   * The total number of characters to delete for generated suggestions
-   * in order to replace the prediction root token entirely.
-   */
-  deleteLeft: number
-} {
-  const transitionEdits = tokenization.transitionEdits;
-  const context = transition.base.context;
-  const postContext = transition.final.context;
-  const inputTransform = transition.inputDistribution[0].sample;
-  let deleteLeft: number;
-
-  // If the context now has more tokens, the token we'll be 'predicting' didn't originally exist.
-  const wordbreak = determineModelWordbreaker(lexicalModel);
-
-  // Is the token under construction newly-constructed / is there no pre-existing root?
-  if(tokenization.taillessTrueKeystroke && transitionEdits?.addedNewTokens) {
-    return {
-      // If the new token is due to whitespace or due to a different input type
-      // that would likely imply a tokenization boundary, infer 'new word' mode.
-      // Apply any part of the context change that is not considered to be up
-      // for correction.
-      predictionContext: models.applyTransform(tokenization.taillessTrueKeystroke, context),
-      // As the word/token being corrected/predicted didn't originally exist,
-      // there's no part of it to 'replace'.  (Suggestions are applied to the
-      // pre-transform state.)
-      deleteLeft: 0
-    };
-    // If the tokenized context length is shorter... sounds like a backspace (or similar).
-  } else if (transitionEdits?.removedOldTokens) {
-    /* Ooh, we've dropped context here.  Almost certainly from a backspace or
-     * similar effect.  Even if we drop multiple tokens... well, we know exactly
-     * how many chars were actually deleted - `inputTransform.deleteLeft`. Since
-     * we replace a word being corrected/predicted, we take length of the
-     * remaining context's tail token in addition to however far was deleted to
-     * reach that state.
-     */
-    deleteLeft = KMWString.length(wordbreak(postContext)) + inputTransform.deleteLeft;
-  } else {
-    // Suggestions are applied to the pre-input context, so get the token's original length.
-    // We're on the same token, so just delete its text for the replacement op.
-    deleteLeft = KMWString.length(wordbreak(context));
-  }
-
-  // Did the wordbreaker (or similar) append a blank token before the caret?  If so,
-  // preserve that by preventing corrections from triggering left-deletion.
-  if(tokenization.tail.isEmptyToken) {
-    deleteLeft = 0;
-  }
-
-  return { predictionContext: context, deleteLeft };
-}
-
 /**
  * Given two ContextTokenizations related by context transition, this function
  * determines the tail-end range of the tokenization affected by the transition.
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts
deleted file mode 100644
index be2d1177571..00000000000
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-import { assert } from 'chai';
-
-import { LexicalModelTypes } from '@keymanapp/common-types';
-import { default as defaultBreaker } from '@keymanapp/models-wordbreakers';
-import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs';
-
-import { ContextState, ContextTransition, determineSuggestionAlignment, models } from "@keymanapp/lm-worker/test-index";
-
-import CasingFunction = LexicalModelTypes.CasingFunction;
-import Context = LexicalModelTypes.Context;
-import TrieModel = models.TrieModel;
-
-const plainApplyCasing: CasingFunction = function(caseToApply, text) {
-  switch(caseToApply) {
-    case 'lower':
-      return text.toLowerCase();
-    case 'upper':
-      return text.toUpperCase();
-    case 'initial':
-      return plainApplyCasing('upper', text.charAt(0)) . concat(text.substring(1));
-    default:
-      return text;
-  }
-};
-
-const plainCasedModel = new TrieModel(
-  jsonFixture('models/tries/english-1000'), {
-    languageUsesCasing: true,
-    applyCasing: plainApplyCasing,
-    wordBreaker: defaultBreaker,
-    searchTermToKey: function(text: string) {
-      // We're dealing with very simple English text; no need to normalize or remove diacritics here.
-      return plainApplyCasing('lower', text);
-    }
-  }
-);
-
-describe('determineSuggestionAlignment', () => {
-  it('handles standard cases well - same token, no preservationTransforms', () => {
-    const context: Context = {
-      left: 'this is techn',
-      startOfBuffer: true,
-      endOfBuffer: true
-    };
-    const baseState = new ContextState(context, plainCasedModel);
-
-    const transition = new ContextTransition(baseState, 0);
-    transition.finalize(transition.base, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]);
-
-    // transition, model
-    const results = determineSuggestionAlignment(transition, transition.final.displayTokenization, plainCasedModel);
-
-    assert.deepEqual(results.predictionContext, context);
-    assert.equal(results.deleteLeft, "techn".length);
-  });
-
-  it('handles extension of prior token after backspace', () => {
-    const context: Context = {
-      left: 'this is tech ',
-      startOfBuffer: true,
-      endOfBuffer: true
-    };
-    const baseState = new ContextState(context, plainCasedModel);
-
-    const transition = baseState.analyzeTransition(context, [{sample: { insert: '', deleteLeft: 1 }, p: 1}])
-
-    // transition, model
-    const results = determineSuggestionAlignment(transition, transition.final.displayTokenization, plainCasedModel);
-
-    assert.deepEqual(results.predictionContext, context);
-    assert.equal(results.deleteLeft, "tech".length + 1 /* for the deleted whitespace */);
-  });
-
-  it('handles extension of prior token after complex input with delete-left', () => {
-    const context: Context = {
-      left: 'this is tech ',
-      startOfBuffer: true,
-      endOfBuffer: true
-    };
-    const baseState = new ContextState(context, plainCasedModel);
-
-    const transition = baseState.analyzeTransition(context, [{sample: { insert: 'n', deleteLeft: 1 }, p: 1}])
-
-    // transition, model
-    const results = determineSuggestionAlignment(transition, transition.final.displayTokenization, plainCasedModel);
-
-    assert.deepEqual(results.predictionContext, context);
-    assert.equal(results.deleteLeft, "techn".length + 1 /* for the deleted whitespace */);
-  });
-});
\ No newline at end of file

From d558c868e3335cf821bac237ec01dc153262c46f Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Tue, 14 Apr 2026 14:42:12 -0500
Subject: [PATCH 03/16] feat(web): add prepareTokenizationSearch helper method

This method is designed to determine the appropriate range of tokens, within each context variant, should be eligible for correction when generating predictions and corrections.

Build-bot: skip build:web
Test-bot: skip
---
 .../main/correction/tokenization-corrector.ts |  2 +-
 .../worker-thread/src/main/predict-helpers.ts | 34 +++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
index e47c6b04fad..b111aa9d394 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
@@ -154,7 +154,7 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
   constructor(
     tokenization: ContextTokenization,
     tailCorrectionLength: number,
-    filterClosure: (token: ContextToken) => boolean
+    filterClosure: (token: ContextToken, index?: number) => boolean
   ) {
     this.tokenization = tokenization;
     this.tailCorrectionLength = tailCorrectionLength;
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index ce6cea75695..4b87f0c3e39 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -13,6 +13,7 @@ import { ContextTransition } from './correction/context-transition.js';
 import { ExecutionTimer } from './correction/execution-timer.js';
 import ModelCompositor from './model-compositor.js';
 import { getBestTokenMatches } from './correction/distance-modeler.js';
+import { TokenizationCorrector } from './correction/tokenization-corrector.js';
 import { TokenResultMapping } from './correction/token-result-mapping.js';
 
 const searchForProperty = defaultWordbreaker.searchForProperty;
@@ -431,6 +432,39 @@ export function buildAndMapPredictions(
   return predictions;
 }
 
+export function prepareTokenizationSearch(
+  transition: ContextTransition,
+  tokenizations: ContextTokenization[]
+) {
+  // Goal - determine what parts of each tokenization are searchable & prep them for correcion-search.
+  const tokenizationAnalyses = tokenizations.map((tokenization) => {
+    return {
+      tokenization: tokenization,
+      analysis: determineSuggestionRange(transition.base.displayTokenization, tokenization)
+    };
+  });
+
+  const biggestCommonRemoval = tokenizationAnalyses.reduce(
+    (biggest, current) => biggest.length > current.analysis.tokensToRemove.length ? biggest : current.analysis.tokensToRemove,
+    [] as ContextToken[]
+  );
+
+  const tokenizationSetup = tokenizationAnalyses.map((tuple) => {
+    // These tokens are unaffected by the input whatsoever, though their
+    // probability may affect thresholding for the non-locked tokens.
+    const unaffectedTokenCount = biggestCommonRemoval.length - tuple.analysis.tokensToRemove.length;
+
+    const mutatedLength = tuple.analysis.tokensToPredict.length;
+    return new TokenizationCorrector(tuple.tokenization, mutatedLength, (token, index) => {
+      return index >= unaffectedTokenCount  // is a modified token
+        && index == mutatedLength - 1       // TEMP: adjacent to the caret (TO BE REMOVED)
+        && correctionValidForAutoSelect(token.exampleInput);  // and is eligible text-correction
+    });
+  });
+
+  return tokenizationSetup;
+}
+
 /**
  * This method performs the correction-search and model-lookup operations for
  * prediction generation by using the user's context state and potential

From fdd65c0f7476ab21ce84a5c13f0a636557c2f0b5 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Thu, 30 Apr 2026 08:28:43 -0500
Subject: [PATCH 04/16] feat(web): rework buildAndMapPredictions for
 multi-token predictions

Build-bot: skip build:web
Test-bot: skip
---
 .../main/correction/token-result-mapping.ts   |   4 +
 .../main/correction/tokenization-corrector.ts |  13 +-
 .../worker-thread/src/main/predict-helpers.ts | 144 +++++++++++++++---
 3 files changed, 132 insertions(+), 29 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/token-result-mapping.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/token-result-mapping.ts
index c85e9e9b6d4..862ac1f740f 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/token-result-mapping.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/token-result-mapping.ts
@@ -75,6 +75,10 @@ export class TokenResultMapping implements CorrectionResultMapping<SearchNode>,
     return this.node;
   }
 
+  get inputCount(): number {
+    return this.matchingSpace.inputCount;
+  }
+
   get inputSequence(): ProbabilityMass<Transform>[] {
     return this.node.priorInput;
   }
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
index b111aa9d394..3b71b2eb4f8 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
@@ -35,7 +35,8 @@ export type TokenResult = {
   matchString: string,
   inputSamplingCost: number,
   knownCost: number,
-  totalCost: number
+  totalCost: number,
+  inputCount: number
 }
 
 /**
@@ -195,9 +196,10 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
       const lockedResult = uncorrectable.bestExample;
       this._generatedTokenResults.set(uncorrectable.spaceId, {
         matchString: lockedResult.text,
-        inputSamplingCost: -Math.log(lockedResult.p),
-        knownCost: 0,
-        totalCost: -Math.log(lockedResult.p)
+        inputSamplingCost: 0,
+        knownCost: -Math.log(lockedResult.p),
+        totalCost: -Math.log(lockedResult.p),
+        inputCount: uncorrectable.inputCount
       });
     });
 
@@ -299,7 +301,8 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
           matchString: lockedResult.text,
           inputSamplingCost: -Math.log(lockedResult.p),
           knownCost: MAX_EDIT_THRESHOLD_FACTOR, // we'll use the same threshold at which further search is terminated.
-          totalCost: -Math.log(lockedResult.p) + MAX_EDIT_THRESHOLD_FACTOR * EDIT_DISTANCE_COST_SCALE
+          totalCost: -Math.log(lockedResult.p) + MAX_EDIT_THRESHOLD_FACTOR * EDIT_DISTANCE_COST_SCALE,
+          inputCount: correctableToUpdate.inputCount
         });
       }
 
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 4b87f0c3e39..ba892d76062 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -12,9 +12,9 @@ import { ContextState, determineContextSlideTransform } from './correction/conte
 import { ContextTransition } from './correction/context-transition.js';
 import { ExecutionTimer } from './correction/execution-timer.js';
 import ModelCompositor from './model-compositor.js';
-import { getBestTokenMatches } from './correction/distance-modeler.js';
+import { EDIT_DISTANCE_COST_SCALE, getBestTokenMatches } from './correction/distance-modeler.js';
 import { TokenizationCorrector } from './correction/tokenization-corrector.js';
-import { TokenResultMapping } from './correction/token-result-mapping.js';
+import { TokenizationResultMapping } from './correction/tokenization-result-mapping.js';
 
 const searchForProperty = defaultWordbreaker.searchForProperty;
 
@@ -390,11 +390,10 @@ export function determineSuggestionRange(
  */
 export function buildAndMapPredictions(
   transition: ContextTransition,
-  tokenization: ContextTokenization,
-  match: Readonly<TokenResultMapping>,
-  costFactor: number
+  tokenizationCorrection: TokenizationResultMapping,
 ): CorrectionPredictionTuple[] {
   const model = transition.final.model;
+  const tokenization = tokenizationCorrection.matchingSpace.tokenization;
 
   const applicationTarget = transition.base.displayTokenization;
   const { tokensToRemove, tokensToPredict } = determineSuggestionRange(applicationTarget, tokenization);
@@ -408,28 +407,123 @@ export function buildAndMapPredictions(
     endOfBuffer: false
   };
 
-  // Replace the existing context with the correction.
-  const correctionTransform: Transform = {
-    insert: match.matchString,  // insert correction string
-    deleteLeft: 0,
-    id: transition.transitionId // The correction should always be based on the most recent external transform/transcription ID.
-  }
+  const correctionTransforms = tokenizationCorrection.matchedResult.map((correction, i) => {
+    return {
+      insert: correction.matchString,  // insert correction string
+      deleteLeft: i == 0 ? deleteLeft : 0,
+      id: transition.transitionId // The correction should always be based on the most recent external transform/transcription ID.
+    };
+  });
 
-  const rootCost = match.totalCost;
-  const predictionRoot = {
-    sample: correctionTransform,
-    p: Math.exp(-rootCost * costFactor)
-  };
+  const correctionCost = tokenizationCorrection.matchedResult.map((correction) => {
+    let rootCost = correction.totalCost;
+    /* If we're dealing with the FIRST keystroke of a new sequence, we'll **dramatically** boost
+     * the exponent to ensure only VERY nearby corrections have a chance of winning, and only if
+     * there are significantly more likely words.  We only need this to allow very minor fat-finger
+     * adjustments for 100% keystroke-sequence corrections in order to prevent finickiness on
+     * key borders.
+     *
+     * Technically, the probabilities this produces won't be normalized as-is... but there's no
+     * true NEED to do so for it, even if it'd be 'nice to have'.  Consistently tracking when
+     * to apply it could become tricky, so it's simpler to leave out.
+     *
+     * Worst-case, it's possible to temporarily add normalization if a code deep-dive
+     * is needed in the future.
+     */
+    if(correction.inputCount <= 1) {
+      /* Suppose a key distribution:  most likely with p=0.5, second-most with 0.4 - a pretty
+       * ambiguous case that would only arise very near the center of the boundary between two keys.
+       * Raising (0.5/0.4)^16 ~= 35.53.  (At time of writing, SINGLE_CHAR_KEY_PROB_EXPONENT = 16.)
+       * That seems 'within reason' for correction very near boundaries.
+       *
+       * So, with the second-most-likely key being that close in probability, its best suggestion
+       * must be ~ 35.5x more likely than that of the truly-most-likely key to "win".  So, it's not
+       * a HARD cutoff, but more of a 'soft' one.  Keeping the principles in mind documented above,
+       * it's possible to tweak this to a more harsh or lenient setting if desired, rather than
+       * being totally "all or nothing" on which key is taken for highly-ambiguous keypresses.
+       */
+      rootCost *= ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT;  // note the `Math.exp` below.
+    }
+
+    return Math.exp(-rootCost);
+  }).reduce((accum, curr) => accum * curr, 1);
 
-  let predictions = predictFromCorrections(model, [predictionRoot], emptyContext);
-  predictions.forEach((entry) => {
-    entry.preservationTransform = tokenization.taillessTrueKeystroke;
-    // // Will need an extra lookup layer if the suggestion is generated from within a cluster.
-    // entry.baseTokenization = transition.final.tokenizationSourceMap.get(tokenization);
-    entry.prediction.sample.transform.deleteLeft = deleteLeft;
+  const predictionComponents = correctionTransforms.map((correctionTransform, i) => {
+    const predictions = model.predict(correctionTransform, emptyContext);
+
+    // Failsafe:  if there are no matching predictions, create a fake prediction
+    // matching the original text.
+    if(predictions.length == 0) {
+      predictions.push({
+        sample: {
+          transform: correctionTransform,
+          displayAs: correctionTransform.insert
+        },
+        // It's not found in the lexicon, so we'll take a low probability for it.
+        //
+        // Edit penalties will be applied via the correction component separately later on.
+        p: -Math.exp(EDIT_DISTANCE_COST_SCALE)
+      });
+    }
+
+    // Regardless of origin, overwrite the transform's deleteLeft value with what it should actually hold.
+    predictions.forEach((entry) => {
+      entry.sample.transform.deleteLeft = deleteLeft;
+    });
+
+    // Use traversals if possible - extract the most likely entry that is on the traversal,
+    // rather than predicting (and possibly extending) tokens not adjacent to the caret.
+    //
+    // Also, fall back to the actual correction string should prediction not be valid here.
+    return i == correctionTransforms.length - 1 ? predictions : [predictions[0]];
+  });
+
+  // Constructs a common prefix for all but the final token's component.
+  const predictionPrefix = predictionComponents
+    .slice(0, predictionComponents.length-1)
+    .reduce((accum, curr) => models.buildMergedTransform(accum, curr[0].sample.transform), { insert: '', deleteLeft: 0 });
+  const prefixProb = predictionComponents
+    .slice(0, predictionComponents.length-1)
+    .reduce((accum, curr) => accum * curr[0].p, 1)
+
+  const completePredictionTuples: CorrectionPredictionTuple[] = predictionComponents[predictionComponents.length-1].map((prediction) => {
+    const predictionCost = prediction.p * prefixProb;
+    return {
+      // Will need to do this differently.  We want to have each component
+      // individualized b/c casing. Case should be maintained for prior tokens
+      // and managed independently for each.
+      //
+      // detectCurrentCasing is designed to determine casing based on context;
+      // makes sense for 'context up to each token'.
+      //
+      // applySuggestionCasing applies onto suggestions, so we'll want to build
+      // the FULL suggestion AFTER applying casing changes (to each token's
+      // suggestion component).
+      prediction: {
+        sample: {
+          transformId: transition.transitionId,
+          transform: models.buildMergedTransform(predictionPrefix, prediction.sample.transform),
+          displayAs: models.buildMergedTransform(predictionPrefix, prediction.sample.transform).insert // should composite the displayAs strings instead...
+        },
+        p: predictionCost,
+      },
+      correction: {
+        // Is used partly for word-casing, partly for auto-select enabling.
+        sample: '', // plain correction string instead...
+        p: correctionCost
+      },
+      totalProb: predictionCost * correctionCost,
+      matchLevel: SuggestionSimilarity.none,
+      // Long-term, we shouldn't have `.preservationTransform` here.
+      //
+      // Needed for now until the search actually operates based on
+      // TokenizationCorrector, rather than the half-converted use currently in
+      // place.
+      preservationTransform: tokenization.taillessTrueKeystroke
+    }
   });
 
-  return predictions;
+  return completePredictionTuples;
 }
 
 export function prepareTokenizationSearch(
@@ -576,7 +670,9 @@ export async function correctAndEnumerate(
      */
     const costFactor = (tokenization.tail.inputCount <= 1) ? ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT : 1;
 
-    const predictions = buildAndMapPredictions(transition, tokenization, match, costFactor);
+    const suggestionRange = determineSuggestionRange(transition.base.displayTokenization, tokenization)
+    const corrector = new TokenizationCorrector(tokenization, suggestionRange.tokensToPredict.length, () => true);
+    const predictions = buildAndMapPredictions(transition, new TokenizationResultMapping([match], corrector));
 
     // Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions.
     if(predictions.length > 0 && bestCorrectionCost === undefined) {

From 6c1170d27a3dfc108201e3ce2ae52a2fcbc30bd5 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 6 May 2026 13:21:08 -0500
Subject: [PATCH 05/16] change(web): simplify mapWhitespacedTokenization
 requirements

To better handle inputs that shift the word-boundary in some custom models and models released before Keyman 14.0, this PR provides generalized re-use of the whitespace-based token-transition algorithm used for our most prominently-supported models.

Build-bot: skip build:web
Test-bot: skip
---
 .../main/correction/context-tokenization.ts   | 341 ++++++++++--------
 1 file changed, 187 insertions(+), 154 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
index fc2f81615c1..95d3125c2a9 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -334,7 +334,7 @@ export class ContextTokenization {
   }
 
   /**
-   * Given the existing tokenization and an incoming input `Transform`, this
+   * Given this existing tokenization and an incoming input `Transform`, this
    * method precomputes how both the current, pre-application tokenization will
    * be altered and how the incoming Transform will be tokenized.
    *
@@ -351,158 +351,7 @@ export class ContextTokenization {
     transform: Transform,
     edgeOptions?: EdgeWindowOptions
   ): TokenizationTransitionEdits {
-    // Step 4:  now that our window's been properly updated, determine what the
-    // input's effects on the context is.
-    //
-    // Context does not slide within this function.
-    //
-    // Assumption:  this alignment cannot fail; we KNOW there's a solid
-    // before-and-after relationship here, and we can base it on the results of
-    // a prior syncToSourceWindow call.
-    //
-    // We don't wish to do the full tokenization here - we only want to check
-    // over the last few tokens that might reasonably shift.  We also want to
-    // batch effects.
-
-    // Do not mutate the original transform; it can cause unexpected assertion
-    // effects in unit tests.
-    const edgeTransform = {...transform, deleteRight: transform.deleteRight || 0};
-    const edgeWindow = buildEdgeWindow(this.tokens, edgeTransform, false, edgeOptions);
-    const {
-      retokenizationText,
-      editBoundary,
-      sliceIndex: edgeSliceIndex
-    } = edgeWindow;
-    // Prevent mutation of the original return property.
-    const stackedDeletes = edgeWindow.deleteLengths.slice();
-
-    const tokenize = determineModelTokenizer(lexicalModel);
-    const postTokenization = tokenize({left: retokenizationText + transform.insert, startOfBuffer: true, endOfBuffer: true}).left.map(t => t.text);
-    if(postTokenization.length == 0) {
-      postTokenization.push('');
-    }
-    const { stackedInserts, firstInsertPostIndex } = traceInsertEdits(postTokenization, transform);
-
-    // What does the edge's retokenization look like when we remove the inserted portions?
-    const retokenizedEdge = postTokenization.slice(0, firstInsertPostIndex);
-    const insertBoundaryToken = postTokenization[firstInsertPostIndex];
-
-    // Note:  requires that helpers have not mutated `stackedInserts`.
-    const uninsertedBoundaryToken = KMWString.substring(insertBoundaryToken, 0, KMWString.lastIndexOf(insertBoundaryToken, stackedInserts[0]));
-
-    // Do not preserve empty tokens here, even if tokenization normally would produce one.
-    // It's redundant and replaceable for tokenization batching efforts.
-    if(uninsertedBoundaryToken != '') {
-      retokenizedEdge.push(uninsertedBoundaryToken);
-    }
-
-    // We've found the root token within the root context state to which deletes (and inserts)
-    // may be applied.
-    // We've also found the last post-application token to which transform changes contributed.
-    // How do these indices line up - we need to properly construct and index our transforms,
-    // but 'merge' and 'split' edits can mess up that indexing.
-
-    const currentTokens = this.tokens;
-    const preTokenization = currentTokens
-      .slice(edgeSliceIndex, editBoundary.tokenIndex+1)
-      .map(t => t.exampleInput);
-
-    // Determine the effects of splits & merges as applied to the original
-    // cached context state.
-    const { mergeOffset, splitOffset, editPath, merges, splits } = analyzePathMergesAndSplits(
-      preTokenization,
-      postTokenization.slice(0, firstInsertPostIndex+1)
-    );
-
-    /*
-     * Final steps:  We can now safely index the transforms.  Let's do it!
-     * 1. Determine the first index a Transform may align to
-     * 2. Build the transforms
-     *
-     * Notes:
-     * - text applied to the end of a 'merged' token at the tail:  should have
-     *   index 0, not -1.
-     *   - pretokenization index will mismatch by -1: -SUM(merge size - 1)
-     *   - Ex: can + ' + t => can't
-     *          -1   0          0
-     * - text applied to the end of a 'split' token at the tail:  should also
-     *   have index 0, not 1.
-     *   - posttokenization index will mismatch by +1: SUM(split size - 1)
-     *   - new token after 'split':  index 1
-     *   - Ex: can' + ? => can + ' + ?
-     *          0          -1    0   1
-     *
-     * The first transform applies at the end of the retokenized zone and its
-     * associated index.  The question:  were there deletes that occurred?
-     */
-
-    const lastEditedPreTokenIndex = editBoundary.tokenIndex - edgeSliceIndex;
-    let shiftDeletes = false;
-    // first popped entry == 0 - a delete no-op.
-    if(stackedDeletes[stackedDeletes.length - 1] == 0) {
-      // the boundary indices found by both methods above differ
-      if(lastEditedPreTokenIndex + mergeOffset != firstInsertPostIndex + splitOffset) {
-        shiftDeletes = true;
-      }
-
-      // there are no inserts, so we don't affect the boundary token we landed on.
-      if(stackedDeletes.length > 1 && transform.insert == '') {
-        shiftDeletes = true;
-      }
-    }
-
-    if(shiftDeletes) {
-      // Do not add a zero-length delete if we're not actually altering the
-      // corresponding token at all.
-      stackedDeletes.pop();
-    }
-
-    // The first delete always applies to index 0. If the built edge window
-    // omits a context-final empty-string, adjust the tokenization indices
-    // accordingly.
-    const tailIndex = 0 - (stackedDeletes.length - 1) + (editBoundary.omitsEmptyToken ? -1 : 0);
-    // Mutates stackedInserts, stackedDeletes.
-    const baseRemovedTokenCount = Math.max(0, stackedDeletes.length - stackedInserts.length);
-    const transformMap = assembleTransforms(stackedInserts, stackedDeletes, tailIndex);
-
-    // If there's an empty transform in the 0 position and we already know we're
-    // dropping tokens - and only deleting - we're dropping an
-    // otherwise-untracked empty token - make sure it's included!
-    const droppedFinalTransform = baseRemovedTokenCount > 0 && transform.insert == '' && TransformUtils.isEmpty(transformMap.get(0));
-    // Past that, if we have more delete entries than insert entries for our transforms, we
-    // dropped some tokens outright.
-    const removedTokenCount = baseRemovedTokenCount + (droppedFinalTransform ? 1 : 0);
-
-    // Final step:  check for any unexpected boundary shifts not mappable to 'merge' / 'split'
-    // and not caused by transforms.  All transforms always apply in sequence at the end.
-    const unmappedEdits: EditTuple<EditOperation>[] = [];
-    for(let i = 0; i < editPath.length - transformMap.size; i++) {
-      const op = editPath[i].op;
-      switch(op) {
-        case 'merge':
-        case 'split':
-          // already calculated
-          // can fall through to the `continue;` line.
-        case 'match':
-          continue;
-        default:
-          // Should only be substitutions here.
-          // We may wish to add extra analysis in the future when supporting
-          // prediction from multiple competing tokenizations.
-          unmappedEdits.push(editPath[i] as EditTuple<EditOperation>);
-      }
-    }
-
-    return {
-      alignment: {
-        edgeWindow: {...edgeWindow, retokenization: retokenizedEdge},
-        merges,
-        splits,
-        unmappedEdits,
-        removedTokenCount
-      },
-      tokenizedTransform: transformMap,
-    };
+    return mapWhitespacedTokenization(this.tokens, lexicalModel, transform, edgeOptions);
   }
 
   /**
@@ -763,6 +612,190 @@ interface RetokenizedEdgeWindow extends EdgeWindow {
   retokenization: string[];
 }
 
+export interface ContextTokenLike {
+  exampleInput: string;
+  isPartial?: boolean;
+  sourceRangeKey?: string;
+}
+
+/**
+ * Given an existing tokenization and an incoming input `Transform`, this
+ * method precomputes how both the current, pre-application tokenization will
+ * be altered and how the incoming Transform will be tokenized.
+ *
+ * This function is able to operate with a reduced interface, not requiring
+ * the full ContextToken/ContextState/etc subsystem and its related
+ * SearchQuotientNode requirements.
+ *
+ * Note that this method is designed for use with languages that employ
+ * classical space-based wordbreaking.  Do not use it for languages that need
+ * dictionary-based wordbreaking support!
+ * @param tokens
+ * @param lexicalModel
+ * @param transform
+ * @param edgeOptions
+ * @returns
+ */
+export function mapWhitespacedTokenization(
+  tokens: ContextTokenLike[],
+  lexicalModel: LexicalModel,
+  transform: Transform,
+  edgeOptions?: EdgeWindowOptions
+): TokenizationTransitionEdits {
+  // Step 4:  now that our window's been properly updated, determine what the
+  // input's effects on the context is.
+  //
+  // Context does not slide within this function.
+  //
+  // Assumption:  this alignment cannot fail; we KNOW there's a solid
+  // before-and-after relationship here, and we can base it on the results of
+  // a prior syncToSourceWindow call.
+  //
+  // We don't wish to do the full tokenization here - we only want to check
+  // over the last few tokens that might reasonably shift.  We also want to
+  // batch effects.
+
+  // Do not mutate the original transform; it can cause unexpected assertion
+  // effects in unit tests.
+  const edgeTransform = {...transform, deleteRight: transform.deleteRight || 0};
+  const edgeWindow = buildEdgeWindow(tokens, edgeTransform, false, edgeOptions);
+  const {
+    retokenizationText,
+    editBoundary,
+    sliceIndex: edgeSliceIndex
+  } = edgeWindow;
+  // Prevent mutation of the original return property.
+  const stackedDeletes = edgeWindow.deleteLengths.slice();
+
+  const tokenize = determineModelTokenizer(lexicalModel);
+  const postTokenization = tokenize({left: retokenizationText + transform.insert, startOfBuffer: true, endOfBuffer: true}).left.map(t => t.text);
+  if(postTokenization.length == 0) {
+    postTokenization.push('');
+  }
+  const { stackedInserts, firstInsertPostIndex } = traceInsertEdits(postTokenization, transform);
+
+  // What does the edge's retokenization look like when we remove the inserted portions?
+  const retokenizedEdge = postTokenization.slice(0, firstInsertPostIndex);
+  const insertBoundaryToken = postTokenization[firstInsertPostIndex];
+
+  // Note:  requires that helpers have not mutated `stackedInserts`.
+  const uninsertedBoundaryToken = KMWString.substring(insertBoundaryToken, 0, KMWString.lastIndexOf(insertBoundaryToken, stackedInserts[0]));
+
+  // Do not preserve empty tokens here, even if tokenization normally would produce one.
+  // It's redundant and replaceable for tokenization batching efforts.
+  if(uninsertedBoundaryToken != '') {
+    retokenizedEdge.push(uninsertedBoundaryToken);
+  }
+
+  // We've found the root token within the root context state to which deletes (and inserts)
+  // may be applied.
+  // We've also found the last post-application token to which transform changes contributed.
+  // How do these indices line up - we need to properly construct and index our transforms,
+  // but 'merge' and 'split' edits can mess up that indexing.
+
+  const currentTokens = tokens;
+  const preTokenization = currentTokens
+    .slice(edgeSliceIndex, editBoundary.tokenIndex+1)
+    .map(t => t.exampleInput);
+
+  // Determine the effects of splits & merges as applied to the original
+  // cached context state.
+  const { mergeOffset, splitOffset, editPath, merges, splits } = analyzePathMergesAndSplits(
+    preTokenization,
+    postTokenization.slice(0, firstInsertPostIndex+1)
+  );
+
+  /*
+    * Final steps:  We can now safely index the transforms.  Let's do it!
+    * 1. Determine the first index a Transform may align to
+    * 2. Build the transforms
+    *
+    * Notes:
+    * - text applied to the end of a 'merged' token at the tail:  should have
+    *   index 0, not -1.
+    *   - pretokenization index will mismatch by -1: -SUM(merge size - 1)
+    *   - Ex: can + ' + t => can't
+    *          -1   0          0
+    * - text applied to the end of a 'split' token at the tail:  should also
+    *   have index 0, not 1.
+    *   - posttokenization index will mismatch by +1: SUM(split size - 1)
+    *   - new token after 'split':  index 1
+    *   - Ex: can' + ? => can + ' + ?
+    *          0          -1    0   1
+    *
+    * The first transform applies at the end of the retokenized zone and its
+    * associated index.  The question:  were there deletes that occurred?
+    */
+
+  const lastEditedPreTokenIndex = editBoundary.tokenIndex - edgeSliceIndex;
+  let shiftDeletes = false;
+  // first popped entry == 0 - a delete no-op.
+  if(stackedDeletes[stackedDeletes.length - 1] == 0) {
+    // the boundary indices found by both methods above differ
+    if(lastEditedPreTokenIndex + mergeOffset != firstInsertPostIndex + splitOffset) {
+      shiftDeletes = true;
+    }
+
+    // there are no inserts, so we don't affect the boundary token we landed on.
+    if(stackedDeletes.length > 1 && transform.insert == '') {
+      shiftDeletes = true;
+    }
+  }
+
+  if(shiftDeletes) {
+    // Do not add a zero-length delete if we're not actually altering the
+    // corresponding token at all.
+    stackedDeletes.pop();
+  }
+
+  // The first delete always applies to index 0. If the built edge window
+  // omits a context-final empty-string, adjust the tokenization indices
+  // accordingly.
+  const tailIndex = 0 - (stackedDeletes.length - 1) + (editBoundary.omitsEmptyToken ? -1 : 0);
+  // Mutates stackedInserts, stackedDeletes.
+  const baseRemovedTokenCount = Math.max(0, stackedDeletes.length - stackedInserts.length);
+  const transformMap = assembleTransforms(stackedInserts, stackedDeletes, tailIndex);
+
+  // If there's an empty transform in the 0 position and we already know we're
+  // dropping tokens - and only deleting - we're dropping an
+  // otherwise-untracked empty token - make sure it's included!
+  const droppedFinalTransform = baseRemovedTokenCount > 0 && transform.insert == '' && TransformUtils.isEmpty(transformMap.get(0));
+  // Past that, if we have more delete entries than insert entries for our transforms, we
+  // dropped some tokens outright.
+  const removedTokenCount = baseRemovedTokenCount + (droppedFinalTransform ? 1 : 0);
+
+  // Final step:  check for any unexpected boundary shifts not mappable to 'merge' / 'split'
+  // and not caused by transforms.  All transforms always apply in sequence at the end.
+  const unmappedEdits: EditTuple<EditOperation>[] = [];
+  for(let i = 0; i < editPath.length - transformMap.size; i++) {
+    const op = editPath[i].op;
+    switch(op) {
+      case 'merge':
+      case 'split':
+        // already calculated
+        // can fall through to the `continue;` line.
+      case 'match':
+        continue;
+      default:
+        // Should only be substitutions here.
+        // We may wish to add extra analysis in the future when supporting
+        // prediction from multiple competing tokenizations.
+        unmappedEdits.push(editPath[i] as EditTuple<EditOperation>);
+    }
+  }
+
+  return {
+    alignment: {
+      edgeWindow: {...edgeWindow, retokenization: retokenizedEdge},
+      merges,
+      splits,
+      unmappedEdits,
+      removedTokenCount
+    },
+    tokenizedTransform: transformMap,
+  };
+}
+
 /**
  * Constructs a window on one side of the represented context that is aligned to
  * existing tokenization.
@@ -777,7 +810,7 @@ interface RetokenizedEdgeWindow extends EdgeWindow {
  * @returns
  */
 export function buildEdgeWindow(
-  currentTokens: ContextToken[],
+  currentTokens: ContextTokenLike[],
   // Requires deleteRight be explicitly set.
   transform: Transform & { deleteRight: number },
   applyAtFront: boolean,

From daea6e561f016f41f9d60d4c4b95dbaef4a7ff94 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 6 May 2026 15:07:43 -0500
Subject: [PATCH 06/16] change(web): rework traversalless prediction, add mild
 whitespace-correction

Build-bot: skip build:web
Test-bot: skip
---
 .../templates/src/tokenization.ts             |   4 +
 .../worker-thread/src/main/model-helpers.ts   |   3 +-
 .../worker-thread/src/main/predict-helpers.ts | 131 +++++++-----------
 .../predict-from-corrections.tests.ts         |   8 +-
 4 files changed, 58 insertions(+), 88 deletions(-)

diff --git a/web/src/engine/predictive-text/templates/src/tokenization.ts b/web/src/engine/predictive-text/templates/src/tokenization.ts
index fd8ed28d5ca..47ef927fa5b 100644
--- a/web/src/engine/predictive-text/templates/src/tokenization.ts
+++ b/web/src/engine/predictive-text/templates/src/tokenization.ts
@@ -95,6 +95,10 @@ export function tokenize(
     currentIndex = nextIndex;
   }
 
+  if(tokenization.left.length == 0) {
+    tokenization.left.push({text: '', isWhitespace: false});
+  }
+
   // New step 2: handle any rejoins needed.
 
   // Handle any desired special handling for directly-pre-caret scenarios - where for this
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-helpers.ts
index 071cad588c5..7b5115e308f 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-helpers.ts
@@ -71,7 +71,8 @@ export function determineModelTokenizer(model: LexicalModel) {
     if(model.wordbreaker) {
       return models.tokenize(model.wordbreaker, context);
     } else {
-      return null;
+      // Not ideal for pre-14.0 models, but it'll do for now.
+      return models.tokenize(wordBreakers.default, context);
     }
   }
 }
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index ba892d76062..4d73e53e8d3 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -5,7 +5,7 @@ import { defaultWordbreaker, WordBreakProperty } from '@keymanapp/models-wordbre
 
 import TransformUtils from './transformUtils.js';
 import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
-import { ContextTokenization } from './correction/context-tokenization.js';
+import { ContextTokenization, mapWhitespacedTokenization } from './correction/context-tokenization.js';
 import { ContextTracker } from './correction/context-tracker.js';
 import { ContextToken } from './correction/context-token.js';
 import { ContextState, determineContextSlideTransform } from './correction/context-state.js';
@@ -155,77 +155,6 @@ export function tupleDisplayOrderSort(a: CorrectionPredictionTuple, b: Correctio
   return b.totalProb - a.totalProb;
 }
 
-export async function correctAndEnumerateWithoutTraversals(
-  lexicalModel: LexicalModel,
-  transformDistribution: Distribution<Transform>,
-  context: Context
-): Promise<{
-  /**
-   * For models that support correction-search caching, this provides the
-   * cached object corresponding to this method's operation.
-   *
-   * Otherwise, is `null`.
-   */
-  postContextState?: ContextState;
-
-  /**
-   * The suggestions generated based on the user's input state.
-   */
-  rawPredictions: CorrectionPredictionTuple[];
-
-  /**
-   * The id of a prior ContextTransition event that triggered a Suggestion found
-   * at the end of the Context.  Will be undefined if no edits have occurred
-   * since the Suggestion was applied.
-   */
-  revertableTransitionId?: number
-}> {
-  const inputTransform = transformDistribution[0].sample;
-  let rawPredictions: CorrectionPredictionTuple[] = [];
-
-  let predictionRoots: ProbabilityMass<Transform>[];
-
-  // Only allow new-word suggestions if space was the most likely keypress.
-  const allowSpace = TransformUtils.isWhitespace(inputTransform);
-  const allowBksp = TransformUtils.isBackspace(inputTransform);
-
-  // Generates raw prediction distributions for each valid input.  Can only 'correct'
-  // against the final input.
-  //
-  // This is the old, 12.0-13.0 'correction' style.
-  if(allowSpace) {
-    // Detect start of new word; prevent whitespace loss here.
-    predictionRoots = [{sample: inputTransform, p: 1.0}];
-  } else {
-    predictionRoots = transformDistribution.map((alt) => {
-      let transform = alt.sample;
-
-      // Filter out special keys unless they're expected.
-      if(TransformUtils.isWhitespace(transform) && !allowSpace) {
-        return null;
-      } else if(TransformUtils.isBackspace(transform) && !allowBksp) {
-        return null;
-      }
-
-      return alt;
-    });
-  }
-
-  // Remove `null` entries.
-  predictionRoots = predictionRoots.filter(tuple => !!tuple);
-
-  // Running in bulk over all suggestions, duplicate entries may be possible.
-  rawPredictions = predictFromCorrections(lexicalModel, predictionRoots, context);
-  if(allowSpace) {
-    rawPredictions.forEach((entry) => entry.preservationTransform = inputTransform);
-  }
-
-  return {
-    postContextState: null,
-    rawPredictions: rawPredictions
-  };
-}
-
 /**
  * Determines the most recent ContextState corresponding to the incoming
  * Context, assuming no context-reset operations have occurred.  Their contents
@@ -602,7 +531,10 @@ export async function correctAndEnumerate(
   // It's mostly here to support models compiled before Keyman 14.0, which was
   // when the `LexiconTraversal` pattern was established.
   if(!contextTracker) {
-    return correctAndEnumerateWithoutTraversals(lexicalModel, transformDistribution, context);
+    return {
+      postContextState: null,
+      rawPredictions: correctAndEnumerateWithoutTraversals(lexicalModel, transformDistribution, context)
+    };
   }
 
   // 'else':  the current, 14.0+ pattern, which is able to leverage
@@ -744,35 +676,68 @@ export function shouldStopSearchingEarly(
  * @param context
  * @returns
  */
-export function predictFromCorrections(
+export function correctAndEnumerateWithoutTraversals(
   lexicalModel: LexicalModel,
   corrections: ProbabilityMass<Transform>[],
   context: Context
 ): CorrectionPredictionTuple[] {
   let returnedPredictions: CorrectionPredictionTuple[] = [];
+
   const wordbreak = determineModelWordbreaker(lexicalModel);
+  const tokenizer = determineModelTokenizer(lexicalModel);
 
+  const tokenization = tokenizer(context);
   for(let correction of corrections) {
-    let predictions = lexicalModel.predict(correction.sample, context);
+    // Step 1:  determine tokenization effects.  We can't use the
+    // ContextTokenization pattern due to the model's lack of LexiconTraversal
+    // support, though.
+
+    const tokenizedCorrection = mapWhitespacedTokenization(tokenization.left.map((t) => { return {exampleInput: t.text} }), lexicalModel, correction.sample).tokenizedTransform;
+    const deleteLeft = [...tokenizedCorrection.values()].reduce((total, curr) => total + curr.deleteLeft, 0);
+
+    const tokenizedCorrectionEntries = [...tokenizedCorrection.entries()];
+    const preservationTransform = tokenizedCorrectionEntries.slice(0, -1).map((e) => e[1]).reduce((accum, curr) => {
+      return models.buildMergedTransform(accum, {...curr, deleteLeft: 0});
+    }, { insert: '', deleteLeft: 0, id: correction.sample.id});
+    preservationTransform.deleteLeft = deleteLeft;
+
+    // Step 2:  predict based on the final token.
+    const emptyContext: Context = {
+      left: '',
+      startOfBuffer: true,
+      endOfBuffer: true
+    };
 
-    const { sample: correctionTransform, p: correctionProb } = correction;
-    const correctionRoot = wordbreak(models.applyTransform(correction.sample, context));
+    const tailCorrection = tokenizedCorrectionEntries[tokenizedCorrectionEntries.length-1][1];
+    let predictions = lexicalModel.predict(tailCorrection, emptyContext);
 
+    // Step 3: create the intermediate prediction data entries for each generated prediction
     let predictionSet = predictions.map((pair: ProbabilityMass<Suggestion>) => {
       // Let's not rely on the model to copy transform IDs.
       // Only bother is there IS an ID to copy.
-      if(correctionTransform.id !== undefined) {
-        pair.sample.transformId = correctionTransform.id;
+      if(correction.sample.id !== undefined) {
+        pair.sample.transformId = correction.sample.id;
+      }
+
+      let correctionText: string;
+      if(tokenizedCorrectionEntries.length != 1) {
+        correctionText = correction.sample.insert;
+        // deleteLeft: 0; it's pre-applied within preservationTransform.
+      } else {
+        // Use the deleteLeft & tokenize.
+        const postContext = models.applyTransform(correction.sample, context);
+        correctionText = wordbreak(postContext);
       }
 
       let tuple: CorrectionPredictionTuple = {
         prediction: pair,
         correction: {
-          sample: correctionRoot,
-          p: correctionProb
+          sample: correctionText,
+          p: correction.p
         },
-        totalProb: pair.p * correctionProb,
-        matchLevel: SuggestionSimilarity.none
+        totalProb: pair.p * correction.p,
+        matchLevel: SuggestionSimilarity.none,
+        preservationTransform
       };
       return tuple;
     });
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
index 54eec729554..a99187defa5 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
@@ -4,7 +4,7 @@ import { assert } from 'chai';
 import { deepCopy } from "@keymanapp/web-utils";
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
-import { models, predictFromCorrections, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index";
+import { models, correctAndEnumerateWithoutTraversals, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index";
 
 import CasingFunction = LexicalModelTypes.CasingFunction;
 import Context = LexicalModelTypes.Context;
@@ -112,7 +112,7 @@ describe('predictFromCorrections', () => {
       futureSuggestions: [ dummied_suggestions ]
     });
 
-    const predictions = predictFromCorrections(model, correctionDistribution, context);
+    const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
     predictions.forEach((entry) => assert.equal(entry.correction.sample, 'Its'));
     predictions.forEach((entry) => assert.equal(entry.correction.p, 0.6));
     predictions.sort(tupleDisplayOrderSort);
@@ -164,7 +164,7 @@ describe('predictFromCorrections', () => {
       futureSuggestions: [ dummied_suggestions ]
     });
 
-    const predictions = predictFromCorrections(model, correctionDistribution, context);
+    const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
     predictions.forEach((entry) => assert.equal(entry.correction.sample, 'Its'));
     predictions.forEach((entry) => assert.equal(entry.correction.p, 0.6));
     predictions.sort(tupleDisplayOrderSort);
@@ -247,7 +247,7 @@ describe('predictFromCorrections', () => {
       futureSuggestions: dummied_suggestions
     });
 
-    const predictions = predictFromCorrections(model, correctionDistribution, context);
+    const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
     predictions.sort(tupleDisplayOrderSort);
 
     assert.sameOrderedMembers(predictions.map((entry) => entry.prediction.sample.displayAs), ["is", "it's", "isn't", "its"]);

From 9364e0018196caa4636aea5858011446a5a279f9 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Thu, 7 May 2026 16:36:58 -0500
Subject: [PATCH 07/16] fix(web): implement prediction-data correction string

Lack of this string can break auto-correction and casing behaviors - and actually _did_ within engine/main!
---
 .../predictive-text/worker-thread/src/main/predict-helpers.ts   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index ba892d76062..ee7595d0eed 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -509,7 +509,7 @@ export function buildAndMapPredictions(
       },
       correction: {
         // Is used partly for word-casing, partly for auto-select enabling.
-        sample: '', // plain correction string instead...
+        sample: correctionTransforms[correctionTransforms.length-1].insert, // plain correction string instead...
         p: correctionCost
       },
       totalProb: predictionCost * correctionCost,

From 698be9bc70c19dc4558e4a78f791574932945b72 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Thu, 7 May 2026 16:38:21 -0500
Subject: [PATCH 08/16] docs(web): extend comment for last commit's change

---
 .../predictive-text/worker-thread/src/main/predict-helpers.ts  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index ee7595d0eed..8611c6be249 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -509,7 +509,8 @@ export function buildAndMapPredictions(
       },
       correction: {
         // Is used partly for word-casing, partly for auto-select enabling.
-        sample: correctionTransforms[correctionTransforms.length-1].insert, // plain correction string instead...
+        // Is already the full word, as that's what is provided by TokenizationCorrector.
+        sample: correctionTransforms[correctionTransforms.length-1].insert,
         p: correctionCost
       },
       totalProb: predictionCost * correctionCost,

From dacfc136ceb70146dc46bb45ddd793b9439351aa Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Fri, 8 May 2026 15:45:19 -0500
Subject: [PATCH 09/16] fix(web): adjust tokenization unit test expectations to
 match

---
 .../predictive-text/templates/tokenization.tests.ts       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/web/src/test/auto/headless/engine/predictive-text/templates/tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/templates/tokenization.tests.ts
index 3bc636c4128..0aa4f9551ed 100644
--- a/web/src/test/auto/headless/engine/predictive-text/templates/tokenization.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/templates/tokenization.tests.ts
@@ -175,7 +175,7 @@ describe('Tokenization functions', function() {
     });
 
     it('properly handles empty-context cases', function() {
-      // Wordbreaking on a empty space => no word.
+      // Wordbreaking on a empty space => no word, but empty initial token.
       let context = {
         left: '', startOfBuffer: true,
         right: '', endOfBuffer: true
@@ -184,7 +184,7 @@ describe('Tokenization functions', function() {
       let tokenization = models.tokenize(wordBreakers.default, context);
 
       let expectedResult: models.Tokenization = {
-        left: [],
+        left: [{text: '', isWhitespace: false}],
         right: [],
         caretSplitsToken: false
       };
@@ -193,11 +193,11 @@ describe('Tokenization functions', function() {
     });
 
     it('properly handles null context cases', function() {
-      // Wordbreaking on a empty space => no word.
+      // Wordbreaking on a empty space => no word, but empty initial token.
       let tokenization = models.tokenize(wordBreakers.default, null);
 
       let expectedResult: models.Tokenization = {
-        left: [],
+        left: [{text: '', isWhitespace: false}],
         right: [],
         caretSplitsToken: false
       };

From 023a560afeeb427342532ba67fbcb023643649d4 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Tue, 12 May 2026 11:57:47 -0500
Subject: [PATCH 10/16] fix(web): fix bugs in createDefaultKeep, extend unit
 testing

It turns out that #15766 did not perfectly address all cases for generation of default "keep" suggestions.  This PR will remedy the situation.

Build-bot: skip build:web
Test-bot: skip
---
 .../src/main/model-compositor.ts              |   2 +-
 .../worker-thread/src/main/predict-helpers.ts |  16 +-
 .../create-default-keep.tests.ts              | 212 +++++++++++++++++-
 3 files changed, 219 insertions(+), 11 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index a518642d2ce..8a4a53035cf 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -177,7 +177,7 @@ export class ModelCompositor {
     // the token, also add a 'keep' suggestion (with `.matchesModel = false`)
     // matching it.
     if(!hasExistingKeep) {
-      const baseTuple = createDefaultKeep(this.lexicalModel, context, transformDistribution[0]);
+      const baseTuple = createDefaultKeep(this.lexicalModel, postContext, transformDistribution[0]);
 
       // Will be re-sorted shortly after this; just use the simple O(1) method here
       // and let sorting put it in place.
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 28968916e44..c8a953b79c2 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -909,27 +909,29 @@ export function processSimilarity(
  * This method is designed for use when no appropriate 'keep' suggestion was
  * generated by the correction-search process.
  * @param lexicalModel
- * @param context
+ * @param postContext
  * @param trueInput
  * @returns
  */
 export function createDefaultKeep(
   lexicalModel: LexicalModel,
-  context: Context,
+  postContext: Context,
   trueInput: ProbabilityMass<Transform>
 ): CorrectionPredictionTuple {
   const { sample: inputTransform, p: inputTransformProb } = trueInput;
   const wordbreak = determineModelWordbreaker(lexicalModel);
 
-  const postContext = models.applyTransform(inputTransform, context);
   const truePrefix = wordbreak(postContext);
+  const truePrefixLen = KMWString.length(truePrefix);
+  const inputInsertLen = KMWString.length(trueInput.sample.insert)
+  const tokenPrefixLen = truePrefixLen - Math.max(0, inputInsertLen - trueInput.sample.deleteLeft);
 
-  // Generate a full-word 'keep' replacement like other suggestions when one is not otherwise
-  // produced; we want to replace the full token in the same manner used for other suggestions.
-  const basePrefixLength = KMWString.length(truePrefix) - KMWString.length(inputTransform.insert) + inputTransform.deleteLeft;
+  // Generate a full-word 'keep' replacement like other suggestions when one is
+  // not otherwise produced; we want to replace the full token in the same
+  // manner used for other suggestions.
   const keepTransform = {
     insert: truePrefix,
-    deleteLeft: basePrefixLength
+    deleteLeft: Math.max(0, trueInput.sample.deleteLeft - inputInsertLen) + (tokenPrefixLen < 0 ? truePrefixLen : tokenPrefixLen)
   };
 
   let keepSuggestion = models.transformToSuggestion(keepTransform);
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts
index 6b5981c8343..40fd2dafc6f 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts
@@ -11,6 +11,7 @@ import { assert } from 'chai';
 
 import { LexicalModelTypes } from "@keymanapp/common-types";
 import * as wordBreakers from '@keymanapp/models-wordbreakers';
+import { applyTransform } from '@keymanapp/models-templates';
 
 import { CorrectionPredictionTuple, createDefaultKeep, models, SuggestionSimilarity } from "@keymanapp/lm-worker/test-index";
 
@@ -91,8 +92,8 @@ const testModelWithCasing = new DummyModel({
   // No suggestions needed here, so we don't define any.
 });
 
-describe('produceKeep', () => {
-  it(`creates an 'exact'-match suggestion based on primary input and current context`, () => {
+describe('createDefaultKeep', () => {
+  it(`creates an 'exact'-match suggestion based on simple primary input`, () => {
     const context: Context = {
       left: 'iphon',
       right: '',
@@ -129,7 +130,212 @@ describe('produceKeep', () => {
       matchLevel: SuggestionSimilarity.exact
     };
 
-    const tuple = createDefaultKeep(testModelWithCasing, context, trueInput);
+    const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
+    assert.deepEqual(tuple, expectedKeep);
+  });
+
+  it(`creates an 'exact'-match suggestion based on full word after a backspace`, () => {
+    const context: Context = {
+      left: 'iphone ',
+      right: '',
+      startOfBuffer: true,
+      endOfBuffer: true
+    };
+
+    const trueInput: ProbabilityMass<Transform> = {
+      sample: {
+        insert: '',
+        deleteLeft: 1
+      },
+      p: 1
+    };
+
+    const expectedKeep: CorrectionPredictionTuple = {
+      correction: {
+        sample: 'iphone',
+        p: 1
+      },
+      prediction: {
+        sample: {
+          transform: {
+            insert: 'iphone',
+            deleteLeft: 7
+          },
+          displayAs: '<iphone>',
+          matchesModel: false,
+          tag: 'keep'
+        },
+        p: 1
+      },
+      totalProb: 1,
+      matchLevel: SuggestionSimilarity.exact
+    };
+
+    const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
+    assert.deepEqual(tuple, expectedKeep);
+  });
+
+  it(`creates an 'exact'-match suggestion based on complex deletion`, () => {
+    const context: Context = {
+      left: 'iphone a',
+      right: '',
+      startOfBuffer: true,
+      endOfBuffer: true
+    };
+
+    const trueInput: ProbabilityMass<Transform> = {
+      sample: {
+        insert: 'e',
+        deleteLeft: 3
+      },
+      p: 1
+    };
+
+    const expectedKeep: CorrectionPredictionTuple = {
+      correction: {
+        sample: 'iphone',
+        p: 1
+      },
+      prediction: {
+        sample: {
+          transform: {
+            insert: 'iphone',
+            deleteLeft: 8
+          },
+          displayAs: '<iphone>',
+          matchesModel: false,
+          tag: 'keep'
+        },
+        p: 1
+      },
+      totalProb: 1,
+      matchLevel: SuggestionSimilarity.exact
+    };
+
+    const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
+    assert.deepEqual(tuple, expectedKeep);
+  });
+
+  it(`creates an 'exact'-match suggestion based on complex insertion`, () => {
+    const context: Context = {
+      left: 'iphon',
+      right: '',
+      startOfBuffer: true,
+      endOfBuffer: true
+    };
+
+    const trueInput: ProbabilityMass<Transform> = {
+      sample: {
+        insert: 'es and',
+        deleteLeft: 0
+      },
+      p: 1
+    };
+
+    const expectedKeep: CorrectionPredictionTuple = {
+      correction: {
+        sample: 'and',
+        p: 1
+      },
+      prediction: {
+        sample: {
+          transform: {
+            insert: 'and',
+            deleteLeft: 3
+          },
+          displayAs: '<and>',
+          matchesModel: false,
+          tag: 'keep'
+        },
+        p: 1
+      },
+      totalProb: 1,
+      matchLevel: SuggestionSimilarity.exact
+    };
+
+    const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
+    assert.deepEqual(tuple, expectedKeep);
+  });
+
+  it(`creates an 'exact'-match suggestion based on complex replacement`, () => {
+    const context: Context = {
+      left: 'iphone ',
+      right: '',
+      startOfBuffer: true,
+      endOfBuffer: true
+    };
+
+    const trueInput: ProbabilityMass<Transform> = {
+      sample: {
+        insert: 's',
+        deleteLeft: 1
+      },
+      p: 1
+    };
+
+    const expectedKeep: CorrectionPredictionTuple = {
+      correction: {
+        sample: 'iphones',
+        p: 1
+      },
+      prediction: {
+        sample: {
+          transform: {
+            insert: 'iphones',
+            deleteLeft: 7
+          },
+          displayAs: '<iphones>',
+          matchesModel: false,
+          tag: 'keep'
+        },
+        p: 1
+      },
+      totalProb: 1,
+      matchLevel: SuggestionSimilarity.exact
+    };
+
+    const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
+    assert.deepEqual(tuple, expectedKeep);
+  });
+
+  it(`creates an empty 'exact'-match suggestion after adding a wordbreak`, () => {
+    const context: Context = {
+      left: 'iphon',
+      right: '',
+      startOfBuffer: true,
+      endOfBuffer: true
+    };
+
+    const trueInput: ProbabilityMass<Transform> = {
+      sample: {
+        insert: 'e ',
+        deleteLeft: 0
+      },
+      p: 1
+    };
+
+    const expectedKeep: CorrectionPredictionTuple = {
+      correction: {
+        sample: '',
+        p: 1
+      },
+      prediction: {
+        sample: {
+          transform: {
+            insert: '',
+            deleteLeft: 0
+          },
+          displayAs: '<>',
+          matchesModel: false,
+          tag: 'keep'
+        },
+        p: 1
+      },
+      totalProb: 1,
+      matchLevel: SuggestionSimilarity.exact
+    };
+
+    const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
     assert.deepEqual(tuple, expectedKeep);
   });
 });
\ No newline at end of file

From b4a87712fb977d590f916051efe70f58e9c403b5 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Tue, 12 May 2026 14:15:17 -0500
Subject: [PATCH 11/16] refactor(web): refactor intermediate composited
 prediction type

This reorganizes the type formerly known as CorrectionPredictionTuple, preparing it to share similarities with a new incoming type handling an earlier, tokenized intermediate stage that will be needed for some aspects of suggestion generation.

Build-bot: skip build:web
Test-bot: skip
---
 .../main/correction/tokenization-corrector.ts |   8 +-
 .../src/main/model-compositor.ts              |   9 +-
 .../worker-thread/src/main/predict-helpers.ts | 295 +++++-----
 .../early-correction-search-stopping.tests.ts |  29 +-
 .../prediction-helpers/auto-correct.tests.ts  | 544 ++++++++++--------
 .../create-default-keep.tests.ts              | 140 +++--
 .../predict-from-corrections.tests.ts         |  38 +-
 .../suggestion-deduplication.tests.ts         |  90 +--
 .../suggestion-finalization.tests.ts          |  98 ++--
 .../suggestion-similarity.tests.ts            | 188 +++---
 .../worker-custom-punctuation.tests.ts        |  14 +
 .../worker-model-compositor.tests.ts          |   4 +
 12 files changed, 801 insertions(+), 656 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
index 3b71b2eb4f8..885cdb0ed2b 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
@@ -58,6 +58,7 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
   private _previousResults: TokenizationResultMapping[] = [];
 
   // fully private
+  public readonly modelsCorrectables: boolean;
   private selectionQueue: PriorityQueue<QuotientNodeFinalizer>;
   private tokenCostMap: Map<number, number>;
   private tokenLookupMap: Map<number, ContextToken>;
@@ -172,13 +173,16 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
     this._correctables = [];
 
     this.tokenLookupMap = new Map();
+    let modelsCorrectables = false;
 
     orderedTokens.forEach((token, index) => {
       // New issue:  this mangles the space IDs!  We almost certainly need some
       // sort of proper map to the source token.
       const searchModule = new QuotientNodeFinalizer(token.searchModule, index == orderedTokens.length - 1);
       this.tokenLookupMap.set(searchModule.spaceId, token);
-      if(!filterClosure(token)) {
+      const passesFilter = filterClosure(token);
+      modelsCorrectables ||= passesFilter;
+      if(!passesFilter) {
         this._uncorrectables.push(searchModule);
       } else if(index == tailCorrectionLength - 1) {
         // The sole assignment case for this field.  It may only be assigned for
@@ -189,6 +193,8 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
         this._correctables.push(searchModule);
       }
     });
+    // Set a readonly flag indicating if this Corrector started with correctable entries.
+    this.modelsCorrectables = modelsCorrectables;
 
     this._generatedTokenResults = new Map();
     const uncorrectables = this._uncorrectables;
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index 8a4a53035cf..08e0f8b9f89 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -1,11 +1,11 @@
 import * as models from '@keymanapp/models-templates';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
-import * as correction from './correction/index.js'
-import TransformUtils from './transformUtils.js';
 import { applySuggestionCasing, correctAndEnumerate, createDefaultKeep, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js';
 import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
+import TransformUtils from './transformUtils.js';
 
+import * as correction from './correction/index.js'
 import { ContextTracker } from './correction/context-tracker.js';
 import { DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL } from './correction/distance-modeler.js';
 
@@ -162,10 +162,13 @@ export class ModelCompositor {
     // lexicon for a word.  (Example:  "Apple" the company vs "apple" the fruit.)
     for(let tuple of rawPredictions) {
       if(currentCasing && currentCasing != 'lower') {
-        applySuggestionCasing(tuple.prediction.sample, basePrefix, this.lexicalModel, currentCasing);
+        applySuggestionCasing(tuple.components.prediction, basePrefix, this.lexicalModel, currentCasing);
       }
     }
 
+    // what if... we fuse suggestions together here, after the 'apply casing' step?
+    // deduplication, etc function fine from a fused-prediction perspective here.
+
     // We want to dedupe before trimming the list so that we can present a full set
     // of viable distinct suggestions if available.
     const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, rawPredictions, context);
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index c8a953b79c2..177eef0a694 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -76,24 +76,53 @@ export const CORRECTION_SEARCH_THRESHOLDS = {
   REPLACEMENT_SEARCH_THRESHOLD: 4 as const // e^-4 = 0.0183156388.  Allows "80%" of an extra edit.
 }
 
-/**
- * Collates information related to suggestions during the suggestion generation
- * process.
- */
-export type CorrectionPredictionTuple = {
+export interface CompositedPredictionData {
   /**
    * The potential Suggestion (or Keep)
    */
-  prediction: ProbabilityMass<Suggestion | Keep>,
+  prediction: Suggestion | Keep;
   /**
    * The correction upon which the Suggestion (or Keep) is based
    */
-  correction: ProbabilityMass<string>,
+  correction: string
+}
+
+export interface PredictionProbabilities {
   /**
-   * The likelihood of the prediction - its lexical-model likelihood multiplied
-   * by the keystroke-sequence + correction likelihood.
+   * The probability of the word itself, separate from corrections, as
+   * determined by the LexicalModel itself.
    */
-  totalProb: number;
+  prediction: number;
+
+  /**
+   * The probability of text-correction steps taken to build the correction upon
+   * which the prediction is based.
+   */
+  correction: number;
+
+  /**
+   * The likelihood of the represented prediction, combining both the
+   * `prediction` and `correction` components into a single value.
+   */
+  total: number;
+}
+
+/**
+ * Tracks common intermediate prediction data, such as its underlying probabilities and its similarity to the actual context.
+ */
+export interface PredictionMetadata {
+  /**
+   * Tracks the relevant probability components contributing to a generated
+   * prediction.
+   */
+  probabilities: PredictionProbabilities;
+
+  /**
+   * Indicates that the 'suggestion' represents context changes that qualify for
+   * auto-selection.
+   */
+  autoSelectable: boolean;
+
   /**
    * How directly the prediction matches the current token in the context.
    *
@@ -101,12 +130,26 @@ export type CorrectionPredictionTuple = {
    * available upon initial construction of this type.
    */
   matchLevel?: SuggestionSimilarity;
+
   /**
    * Text from the triggering input that should _not_ be affected by the
    * prediction.
    */
   preservationTransform?: Transform;
-};
+}
+
+export interface IntermediateCompositedPrediction {
+  /**
+   * Contains the fully composited predictive-text Suggestion and its underlying correction string.
+   */
+  components: CompositedPredictionData;
+  /**
+   * Tracks common intermediate prediction data, such as its underlying probabilities and its similarity to the actual context.
+   */
+  metadata: PredictionMetadata;
+}
+
+type IntermediatePrediction = IntermediateCompositedPrediction;
 
 /**
  * An enum to be used when categorizing the level of similarity between
@@ -144,15 +187,15 @@ export enum SuggestionSimilarity {
   exact = 3
 }
 
-export function tupleDisplayOrderSort(a: CorrectionPredictionTuple, b: CorrectionPredictionTuple) {
+export function tupleDisplayOrderSort(a: IntermediatePrediction, b: IntermediatePrediction) {
   // Similarity distance
-  const simDist = (b.matchLevel ?? 0) - (a.matchLevel ?? 0);
+  const simDist = (b.metadata.matchLevel ?? 0) - (a.metadata.matchLevel ?? 0);
   if(simDist != 0) {
     return simDist;
   }
 
   // Probability distance
-  return b.totalProb - a.totalProb;
+  return b.metadata.probabilities.total - a.metadata.probabilities.total;
 }
 
 /**
@@ -320,7 +363,7 @@ export function determineSuggestionRange(
 export function buildAndMapPredictions(
   transition: ContextTransition,
   tokenizationCorrection: TokenizationResultMapping,
-): CorrectionPredictionTuple[] {
+): IntermediateCompositedPrediction[] {
   const model = transition.final.model;
   const tokenization = tokenizationCorrection.matchingSpace.tokenization;
 
@@ -398,6 +441,10 @@ export function buildAndMapPredictions(
     // Regardless of origin, overwrite the transform's deleteLeft value with what it should actually hold.
     predictions.forEach((entry) => {
       entry.sample.transform.deleteLeft = deleteLeft;
+      if(transition.transitionId !== undefined) {
+        entry.sample.transformId = transition.transitionId;
+        entry.sample.transform.id = transition.transitionId;
+      }
     });
 
     // Use traversals if possible - extract the most likely entry that is on the traversal,
@@ -415,8 +462,9 @@ export function buildAndMapPredictions(
     .slice(0, predictionComponents.length-1)
     .reduce((accum, curr) => accum * curr[0].p, 1)
 
-  const completePredictionTuples: CorrectionPredictionTuple[] = predictionComponents[predictionComponents.length-1].map((prediction) => {
+  const completePredictionTuples: IntermediateCompositedPrediction[] = predictionComponents[predictionComponents.length-1].map((prediction) => {
     const predictionCost = prediction.p * prefixProb;
+
     return {
       // Will need to do this differently.  We want to have each component
       // individualized b/c casing. Case should be maintained for prior tokens
@@ -428,28 +476,30 @@ export function buildAndMapPredictions(
       // applySuggestionCasing applies onto suggestions, so we'll want to build
       // the FULL suggestion AFTER applying casing changes (to each token's
       // suggestion component).
-      prediction: {
-        sample: {
+      components: {
+        prediction: {
           transformId: transition.transitionId,
           transform: models.buildMergedTransform(predictionPrefix, prediction.sample.transform),
           displayAs: models.buildMergedTransform(predictionPrefix, prediction.sample.transform).insert // should composite the displayAs strings instead...
         },
-        p: predictionCost,
+        correction: correctionTransforms[correctionTransforms.length-1].insert
       },
-      correction: {
-        // Is used partly for word-casing, partly for auto-select enabling.
-        // Is already the full word, as that's what is provided by TokenizationCorrector.
-        sample: correctionTransforms[correctionTransforms.length-1].insert,
-        p: correctionCost
-      },
-      totalProb: predictionCost * correctionCost,
-      matchLevel: SuggestionSimilarity.none,
-      // Long-term, we shouldn't have `.preservationTransform` here.
-      //
-      // Needed for now until the search actually operates based on
-      // TokenizationCorrector, rather than the half-converted use currently in
-      // place.
-      preservationTransform: tokenization.taillessTrueKeystroke
+      metadata: {
+        probabilities: {
+          prediction: predictionCost,
+          correction: correctionCost,
+          total: predictionCost * correctionCost
+        },
+        matchLevel: SuggestionSimilarity.none,
+        autoSelectable: tokenizationCorrection.matchingSpace.modelsCorrectables,
+
+        // Long-term, we shouldn't have `.preservationTransform` here.
+        //
+        // Needed for now until the search actually operates based on
+        // TokenizationCorrector, rather than the half-converted use currently in
+        // place.
+        preservationTransform: tokenization.taillessTrueKeystroke
+      }
     }
   });
 
@@ -515,7 +565,7 @@ export async function correctAndEnumerate(
   /**
    * The suggestions generated based on the user's input state.
    */
-  rawPredictions: CorrectionPredictionTuple[];
+  rawPredictions: IntermediateCompositedPrediction[];
 
   /**
    * The id of a prior ContextTransition event that triggered a Suggestion found
@@ -567,9 +617,8 @@ export async function correctAndEnumerate(
   const searchModules = tokenizations.map(t => t.tail.searchModule);
 
   // Only run the correction search when corrections are enabled.
-  let rawPredictions: CorrectionPredictionTuple[] = [];
+  let rawPredictions: IntermediateCompositedPrediction[] = [];
   let bestCorrectionCost: number;
-  const correctionPredictionMap: Record<string, Distribution<Suggestion>> = {};
   for await(const match of getBestTokenMatches(searchModules, timer)) {
     // Corrections obtained:  now to predict from them!
     const tokenization = tokenizations.find(t => t.spaceId == match.spaceId);
@@ -588,38 +637,15 @@ export async function correctAndEnumerate(
       continue;
     }
 
-    /* If we're dealing with the FIRST keystroke of a new sequence, we'll **dramatically** boost
-     * the exponent to ensure only VERY nearby corrections have a chance of winning, and only if
-     * there are significantly more likely words.  We only need this to allow very minor fat-finger
-     * adjustments for 100% keystroke-sequence corrections in order to prevent finickiness on
-     * key borders.
-     *
-     * Technically, the probabilities this produces won't be normalized as-is... but there's no
-     * true NEED to do so for it, even if it'd be 'nice to have'.  Consistently tracking when
-     * to apply it could become tricky, so it's simpler to leave out.
-     *
-     * Worst-case, it's possible to temporarily add normalization if a code deep-dive
-     * is needed in the future.
-     */
-    const costFactor = (tokenization.tail.inputCount <= 1) ? ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT : 1;
-
     const suggestionRange = determineSuggestionRange(transition.base.displayTokenization, tokenization)
     const corrector = new TokenizationCorrector(tokenization, suggestionRange.tokensToPredict.length, () => true);
     const predictions = buildAndMapPredictions(transition, new TokenizationResultMapping([match], corrector));
 
     // Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions.
     if(predictions.length > 0 && bestCorrectionCost === undefined) {
-      bestCorrectionCost = match.totalCost * costFactor;
-    }
-
-    // If we're getting the same prediction again, it's lower-cost.  Update!
-    let oldPredictionSet = correctionPredictionMap[match.matchString];
-    if(oldPredictionSet) {
-      rawPredictions = rawPredictions.filter((entry) => !oldPredictionSet.find((match) => entry.prediction.sample == match.sample));
+      bestCorrectionCost = predictions[0].metadata.probabilities.correction;
     }
 
-    correctionPredictionMap[match.matchString] = predictions.map((entry) => entry.prediction);
-
     rawPredictions = rawPredictions.concat(predictions);
 
     if(shouldStopSearchingEarly(bestCorrectionCost, match.totalCost, rawPredictions)) {
@@ -640,7 +666,7 @@ export async function correctAndEnumerate(
 export function shouldStopSearchingEarly(
   bestCorrectionCost: number,
   currentCorrectionCost: number,
-  rawPredictions: CorrectionPredictionTuple[]
+  rawPredictions: IntermediateCompositedPrediction[]
 ) {
   if(currentCorrectionCost >= bestCorrectionCost + CORRECTION_SEARCH_THRESHOLDS.MAX_SEARCH_THRESHOLD) {
     return true;
@@ -656,7 +682,7 @@ export function shouldStopSearchingEarly(
       // If the best suggestion from the search's current tier fails to beat the worst
       // pending suggestion from previous tiers, assume all further corrections will
       // similarly fail to win; terminate the search-loop.
-      if(rawPredictions[ModelCompositor.MAX_SUGGESTIONS-1].totalProb > Math.exp(-currentCorrectionCost)) {
+      if(rawPredictions[ModelCompositor.MAX_SUGGESTIONS-1].metadata.probabilities.total > Math.exp(-currentCorrectionCost)) {
         return true;
       }
     }
@@ -681,8 +707,8 @@ export function correctAndEnumerateWithoutTraversals(
   lexicalModel: LexicalModel,
   corrections: ProbabilityMass<Transform>[],
   context: Context
-): CorrectionPredictionTuple[] {
-  let returnedPredictions: CorrectionPredictionTuple[] = [];
+): IntermediateCompositedPrediction[] {
+  let returnedPredictions: IntermediateCompositedPrediction[] = [];
 
   const wordbreak = determineModelWordbreaker(lexicalModel);
   const tokenizer = determineModelTokenizer(lexicalModel);
@@ -730,15 +756,21 @@ export function correctAndEnumerateWithoutTraversals(
         correctionText = wordbreak(postContext);
       }
 
-      let tuple: CorrectionPredictionTuple = {
-        prediction: pair,
-        correction: {
-          sample: correctionText,
-          p: correction.p
+      let tuple: IntermediateCompositedPrediction = {
+        components: {
+          prediction: pair.sample,
+          correction: correctionText
         },
-        totalProb: pair.p * correction.p,
-        matchLevel: SuggestionSimilarity.none,
-        preservationTransform
+        metadata: {
+          probabilities: {
+            prediction: pair.p,
+            correction: correction.p,
+            total: pair.p * correction.p
+          },
+          autoSelectable: correctionValidForAutoSelect(tailCorrection.insert),
+          matchLevel: SuggestionSimilarity.none,
+          preservationTransform
+        }
       };
       return tuple;
     });
@@ -784,17 +816,17 @@ export function applySuggestionCasing(suggestion: Suggestion, baseWord: string,
  */
 export function dedupeSuggestions(
   lexicalModel: LexicalModel,
-  rawPredictions: CorrectionPredictionTuple[],
+  rawPredictions: IntermediateCompositedPrediction[],
   context: Context
 ) {
   const wordbreak = determineModelWordbreaker(lexicalModel);
 
-  let suggestionDistribMap: {[key: string]: CorrectionPredictionTuple} = {};
-  let suggestionDistribution: CorrectionPredictionTuple[] = [];
+  let suggestionDistribMap: {[key: string]: IntermediateCompositedPrediction} = {};
+  let suggestionDistribution: IntermediateCompositedPrediction[] = [];
 
   // Deduplicator + annotator of 'keep' suggestions.
   for(let tuple of rawPredictions) {
-    const predictedWord = wordbreak(models.applyTransform(tuple.prediction.sample.transform, context));
+    const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context));
 
     // Assumption:  suggestions that have the same net result should have the
     // same displayAs string.  (We could try to pick the one with highest net
@@ -804,7 +836,7 @@ export function dedupeSuggestions(
     // Merge 'em!
     const existingSuggestion = suggestionDistribMap[predictedWord];
     if(existingSuggestion) {
-      existingSuggestion.totalProb += tuple.totalProb;
+      existingSuggestion.metadata.probabilities.total += tuple.metadata.probabilities.total;
     } else {
       suggestionDistribMap[predictedWord] = tuple;
     }
@@ -832,15 +864,16 @@ export function dedupeSuggestions(
  *   current text
  * - any other suggestion
  *
+ * @param lexicalModel
  * @param suggestionDistribution
- * @param context
- * @param trueInput inputTransform + its assigned probability
+ * @param baseContext
+ * @param finalContext
  * @returns true if an existing suggestion fulfills the role of 'keep';
  * otherwise, false.
  */
 export function processSimilarity(
   lexicalModel: LexicalModel,
-  suggestionDistribution: CorrectionPredictionTuple[],
+  suggestionDistribution: IntermediateCompositedPrediction[],
   context: Context,
   trueInput: ProbabilityMass<Transform>
 ): boolean {
@@ -860,38 +893,38 @@ export function processSimilarity(
   for(let tuple of suggestionDistribution) {
     // Don't set it unnecessarily; this can have side-effects in some automated tests.
     if(inputTransform.id !== undefined) {
-      tuple.prediction.sample.transformId = inputTransform.id;
+      tuple.components.prediction.transformId = inputTransform.id;
     }
 
-    const predictedWord = wordbreak(models.applyTransform(tuple.prediction.sample.transform, context));
+  const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context));
 
     // Is the suggestion an exact match (or, "similar enough") to the
     // actually-typed context?  If so, we wish to note this fact and to
     // prioritize such a suggestion over suggestions that are not.
-    if(keyed(tuple.correction.sample) == keyedPrefix) {
+    if(keyed(tuple.components.correction) == keyedPrefix) {
       if(predictedWord == truePrefix) {
         // Exact match:  it's a perfect 'keep' suggestion.
-        tuple.matchLevel = SuggestionSimilarity.exact;
-        keepOption = toAnnotatedSuggestion(lexicalModel, tuple.prediction.sample, 'keep',  models.QuoteBehavior.noQuotes);
+        tuple.metadata.matchLevel = SuggestionSimilarity.exact;
+        keepOption = toAnnotatedSuggestion(lexicalModel, tuple.components.prediction, 'keep',  models.QuoteBehavior.noQuotes);
 
         // Indicates that this suggestion exists directly within the lexical
         // model as a valid suggestion.  (We actively display it if it's an
         // exact match, but hide it if not, only preserving it for reversions
         // if/when needed.)
         keepOption.matchesModel = true;
-        Object.assign(tuple.prediction.sample, keepOption);
-        keepOption = tuple.prediction.sample as Outcome<Keep>;
+        Object.assign(tuple.components.prediction, keepOption);
+        keepOption = tuple.components.prediction as Outcome<Keep>;
       } else if(keyCased(predictedWord) == lowercasedPrefix) {
         // Case-insensitive match.  No diacritic differences; the ONLY difference is casing.
-        tuple.matchLevel = SuggestionSimilarity.sameText;
+        tuple.metadata.matchLevel = SuggestionSimilarity.sameText;
       } else if(keyed(predictedWord) == keyedPrefix) {
         // Diacritic-insensitive / exact-key match.
-        tuple.matchLevel = SuggestionSimilarity.sameKey;
+        tuple.metadata.matchLevel = SuggestionSimilarity.sameKey;
       } else {
-        tuple.matchLevel = SuggestionSimilarity.none;
+        tuple.metadata.matchLevel = SuggestionSimilarity.none;
       }
     } else {
-      tuple.matchLevel = SuggestionSimilarity.none;
+      tuple.metadata.matchLevel = SuggestionSimilarity.none;
     }
   }
 
@@ -917,7 +950,7 @@ export function createDefaultKeep(
   lexicalModel: LexicalModel,
   postContext: Context,
   trueInput: ProbabilityMass<Transform>
-): CorrectionPredictionTuple {
+): IntermediateCompositedPrediction {
   const { sample: inputTransform, p: inputTransformProb } = trueInput;
   const wordbreak = determineModelWordbreaker(lexicalModel);
 
@@ -946,19 +979,19 @@ export function createDefaultKeep(
 
   // Insert our synthetic keepOption as a prediction tuple.
   return {
-    // Product of the two p's below.
-    totalProb: inputTransformProb * MAX_PROB,
-    prediction: {
-      sample: keepOption,
-      // We always show the keep option if it doesn't directly match,
-      // so max probability is fine.
-      p: MAX_PROB,
+    components: {
+      prediction: keepOption,
+      correction: truePrefix
     },
-    correction: {
-      sample: truePrefix,
-      p: inputTransformProb * MAX_PROB
-    },
-    matchLevel: SuggestionSimilarity.exact
+    metadata: {
+      probabilities: {
+        prediction: MAX_PROB,
+        correction: inputTransformProb,
+        total: inputTransformProb * MAX_PROB
+      },
+      autoSelectable: false,
+      matchLevel: SuggestionSimilarity.exact
+    }
   };
 }
 
@@ -991,12 +1024,12 @@ export function correctionValidForAutoSelect(correction: string) {
   return false;
 }
 
-export function predictionAutoSelect(suggestionDistribution: CorrectionPredictionTuple[]) {
+export function predictionAutoSelect(suggestionDistribution: IntermediateCompositedPrediction[]) {
   if(suggestionDistribution.length == 0) {
     return;
   }
 
-  const keepOption = suggestionDistribution[0].prediction.sample as Outcome<Keep>;
+  const keepOption = suggestionDistribution[0].components.prediction as Outcome<Keep>;
   if(keepOption.tag == 'keep' && keepOption.matchesModel) {
     // Auto-select it for auto-acceptance; we don't correct away from perfectly-valid
     // lexical entries, even if they are comparatively low-frequency.
@@ -1010,19 +1043,19 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio
 
   if(suggestionDistribution.length == 1) {
     // Prevent auto-acceptance when the root doesn't meet validation criteria.
-    if(!correctionValidForAutoSelect(suggestionDistribution[0].correction.sample)) {
+    if(!suggestionDistribution[0].metadata.autoSelectable) {
       return;
     }
 
     // Mark for auto-acceptance; there are no alternatives.
-    suggestionDistribution[0].prediction.sample.autoAccept = true;
+    suggestionDistribution[0].components.prediction.autoAccept = true;
     return;
   }
 
   // Is it reasonable to auto-accept any of our suggestions?
   const bestSuggestion = suggestionDistribution[0];
 
-  const baseCorrection = bestSuggestion.correction.sample;
+  const baseCorrection = bestSuggestion.components.correction;
   if(baseCorrection.length == 0) {
     // If the correction is rooted on an empty root, there's no basis for
     // auto-correcting to this suggestion.
@@ -1031,8 +1064,8 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio
 
   // Find the highest probability for any correction that led to a valid prediction.
   // No need to full-on re-sort everything, though.
-  const bestCorrection = suggestionDistribution.reduce((prev, current) => prev?.correction.p > current.correction.p ? prev : current, null).correction;
-  if(bestCorrection.p > bestSuggestion.correction.p) {
+  const bestCorrectionP = suggestionDistribution.reduce((prev, current) => Math.max(prev, current.metadata.probabilities.correction), 0);
+  if(bestCorrectionP > bestSuggestion.metadata.probabilities.correction) {
     // Here, the best suggestion didn't come from the best correction.
     // Is it actually reasonable to auto-correct?  We're probably just very
     // biased toward its frequency.  (Maybe a threshold should be considered?)
@@ -1043,28 +1076,28 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio
   // - such as replacing `cant` with `can't` if the latter is much more frequent -
   // we may wish to group matchLevel values below by 'mapping' them with an appropriate
   // function.  (Both on the next line and within the reduce functor.)
-  const bestSuggestionTier = bestSuggestion.matchLevel;
+  const bestSuggestionTier = bestSuggestion.metadata.matchLevel;
 
   // compare best vs other probabilities of compatible tier.
   const probSum = suggestionDistribution.reduce((accum, current) => {
     // If the suggestion is from a different similarity tier, do not count it against
     // the required auto-select probability ratio threshold.  That threshold should
     // only apply within the suggestion's tier.
-    return accum + (current.matchLevel == bestSuggestionTier ? current.totalProb : 0)
+    return accum + (current.metadata.matchLevel == bestSuggestionTier ? current.metadata.probabilities.total : 0)
   }, 0);
-  const proportionOfBest = bestSuggestion.totalProb / probSum;
+  const proportionOfBest = bestSuggestion.metadata.probabilities.total / probSum;
   if(proportionOfBest < AUTOSELECT_PROPORTION_THRESHOLD) {
     return;
   }
 
-  if(!correctionValidForAutoSelect(bestSuggestion.correction.sample)) {
+  if(!bestSuggestion.metadata.autoSelectable) {
     return;
   }
 
   // compare correction-cost aspects?  We disable if the base correction is lower than best,
   // but should we do other comparisons too?
 
-  bestSuggestion.prediction.sample.autoAccept = true;
+  bestSuggestion.components.prediction.autoAccept = true;
 }
 
 /**
@@ -1085,7 +1118,7 @@ export function predictionAutoSelect(suggestionDistribution: CorrectionPredictio
  */
 export function finalizeSuggestions(
   lexicalModel: LexicalModel,
-  deduplicatedSuggestionTuples: CorrectionPredictionTuple[],
+  deduplicatedSuggestionTuples: IntermediateCompositedPrediction[],
   context: Context,
   inputTransform: Transform,
   verbose?: boolean
@@ -1094,42 +1127,44 @@ export function finalizeSuggestions(
   const tokenize = determineModelTokenizer(lexicalModel);
 
   const suggestions = deduplicatedSuggestionTuples.map((tuple) => {
-    const prediction = tuple.prediction;
+    const prediction = tuple.components.prediction;
 
     // If this is a suggestion after any form of wordbreak input, make sure we preserve any components
     // from prior tokens!
     //
     // Note:  may need adjustment if/when supporting phrase-level correction.
-    if(tuple.preservationTransform) {
+    if(tuple.metadata.preservationTransform) {
       const mergedTransform = {
-        ...models.buildMergedTransform(tuple.preservationTransform, {...prediction.sample.transform, deleteLeft: 0}),
-        deleteLeft: prediction.sample.transform.deleteLeft
+        ...models.buildMergedTransform(tuple.metadata.preservationTransform, {...prediction.transform, deleteLeft: 0}),
+        deleteLeft: prediction.transform.deleteLeft
       };
 
       // Temporarily and locally drops 'readonly' semantics so that we can reassign the transform.
       // See https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#improved-control-over-mapped-type-modifiers
-      let mutableSuggestion = prediction.sample as {-readonly [transform in keyof Suggestion]: Suggestion[transform]};
+      let mutableSuggestion = prediction as {-readonly [transform in keyof Suggestion]: Suggestion[transform]};
 
       // Assignment via by-reference behavior, as suggestion is an object
       mutableSuggestion.transform = mergedTransform;
     }
 
     // Is sometimes not set during unit tests.
-    if(prediction.sample.transformId !== undefined) {
-      prediction.sample.transform.id = prediction.sample.transformId;
+    if(prediction.transformId) {
+      prediction.transform.id = prediction.transformId;
     }
 
+    const probs = tuple.metadata.probabilities;
+
     if(!verbose) {
       return {
-        ...prediction.sample,
-        p: tuple.totalProb
+        ...prediction,
+        p: probs.total
       };
     } else {
       const sample: Outcome<Suggestion | Keep> = {
-        ...prediction.sample,
-        p: tuple.totalProb,
-        "lexical-p": prediction.p,
-        "correction-p": tuple.correction.p
+        ...prediction,
+        p: probs.total,
+        "lexical-p": probs.prediction,
+        "correction-p": probs.correction
       }
 
       return sample;
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts
index bf45e3b94e1..430d9c6c7e0 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts
@@ -1,6 +1,16 @@
 import { assert } from 'chai';
 
-import { CORRECTION_SEARCH_THRESHOLDS, CorrectionPredictionTuple, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index";
+import { CORRECTION_SEARCH_THRESHOLDS, IntermediateCompositedPrediction, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index";
+
+function mockIntermediatePrediction(value: number) {
+  return {
+    metadata: {
+      probabilities: {
+        total: value
+      }
+    }
+  } as IntermediateCompositedPrediction
+}
 
 describe('correction-search: shouldStopSearchingEarly', () => {
   it('stops early once new corrections are less likely than currently discovered predictions', () => {
@@ -12,12 +22,7 @@ describe('correction-search: shouldStopSearchingEarly', () => {
     assert.equal(predictionProbs.length, ModelCompositor.MAX_SUGGESTIONS, "test setup no longer valid");
 
     // The only part for each entry we actually care about here:  .totalProb.
-    /** @type {import('#./predict-helpers.js').CorrectionPredictionTuple[]} */
-    const predictions = predictionProbs.map((entry) => {
-      return {
-        totalProb: entry
-      } as CorrectionPredictionTuple
-    });
+    const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry));
 
     // Thresholding is performed in log-space.
     // 0.0501 and 0.0499 are offset on each side of 0.05, the last value in the array defined above.
@@ -33,8 +38,8 @@ describe('correction-search: shouldStopSearchingEarly', () => {
     //
     // Can technically run the method with an empty array, but the actual scenario would have
     // at least one prediction present in the "found predictions" array.
-    assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [{ totalProb: Math.exp(-1) } as CorrectionPredictionTuple]));
-    assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [{ totalProb: Math.exp(-1) } as CorrectionPredictionTuple]));
+    assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [mockIntermediatePrediction(Math.exp(-1))]));
+    assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [mockIntermediatePrediction(Math.exp(-1))]));
   });
 
   it('stops checking corrections earlier when enough predictions have been found', () => {
@@ -43,11 +48,7 @@ describe('correction-search: shouldStopSearchingEarly', () => {
 
     // The only part for each entry we actually care about here:  .totalProb.
     /** @type {import('#./predict-helpers.js').CorrectionPredictionTuple[]} */
-    const predictions = predictionProbs.map((entry) => {
-      return {
-        totalProb: entry
-      } as CorrectionPredictionTuple
-    });
+    const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry));
 
     const baseCost = 1;
 
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts
index d32326e8436..b55886bb42f 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/auto-correct.tests.ts
@@ -1,6 +1,6 @@
 import { assert } from 'chai';
 
-import { AUTOSELECT_PROPORTION_THRESHOLD, CorrectionPredictionTuple, predictionAutoSelect, SuggestionSimilarity, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index";
+import { AUTOSELECT_PROPORTION_THRESHOLD, IntermediateCompositedPrediction, predictionAutoSelect, SuggestionSimilarity, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index";
 /*
   * Preconditions:
   * - there should always be a 'keep' option.  Now, whether or not that option
@@ -9,7 +9,7 @@ import { AUTOSELECT_PROPORTION_THRESHOLD, CorrectionPredictionTuple, predictionA
   */
 describe('predictionAutoSelect', () => {
   it(`does not throw when no suggestions are available`, () => {
-    const predictions: CorrectionPredictionTuple[] = [];
+    const predictions: IntermediateCompositedPrediction[] = [];
     const originalPredictions = [].concat(predictions);
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
 
@@ -17,14 +17,10 @@ describe('predictionAutoSelect', () => {
   });
 
   it(`selects solitary 'keep' suggestion that does match the model`, () => {
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       {
-        correction: {
-          sample: 'apple',
-          p: 1
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             tag: 'keep',
             transform: {  // can be null / "mocked out"
               insert: 'e',
@@ -33,9 +29,16 @@ describe('predictionAutoSelect', () => {
             matchesModel: true,
             displayAs: 'apple'
           },
-          p: 1
+          correction: 'apple',
         },
-        totalProb: 1
+        metadata: {
+          probabilities: {
+            prediction: 1,
+            correction: 1,
+            total: 1
+          },
+          autoSelectable: true
+        }
       }
     ];
 
@@ -43,19 +46,15 @@ describe('predictionAutoSelect', () => {
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepOrderedMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.isOk(autoselected);
   });
 
   it(`does not select suggestions if the root correction has no letters`, () => {
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       {
-        correction: {
-          sample: '5',
-          p: 1
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             tag: 'keep',
             transform: {
               insert: '5',
@@ -64,17 +63,20 @@ describe('predictionAutoSelect', () => {
             matchesModel: false,
             displayAs: '5'
           },
-          p: 0.01
+          correction: '5'
         },
-        totalProb: 0.01
+        metadata: {
+          probabilities: {
+            prediction: 0.01,
+            correction: 1,
+            total: 0.01
+          },
+          autoSelectable: false
+        }
       },
       {
-        correction: {
-          sample: '5',
-          p: 1
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {
               insert: '5th',
               deleteLeft: 0
@@ -82,9 +84,16 @@ describe('predictionAutoSelect', () => {
             matchesModel: true,
             displayAs: '5th'
           },
-          p: 0.8
+          correction: '5'
         },
-        totalProb: 0.8
+        metadata: {
+          probabilities: {
+            prediction: 0.8,
+            correction: 1,
+            total: 0.8
+          },
+          autoSelectable: false
+        }
       }
     ];
 
@@ -92,19 +101,15 @@ describe('predictionAutoSelect', () => {
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepOrderedMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.isNotOk(autoselected);
   });
 
   it(`does not select solitary 'keep' suggestion that doesn't match the model`, () => {
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       {
-        correction: {
-          sample: 'appl',
-          p: 1
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             tag: 'keep',
             transform: { // can be null / "mocked out"
               insert: 'l',
@@ -113,9 +118,16 @@ describe('predictionAutoSelect', () => {
             matchesModel: false,
             displayAs: '"appl"'
           },
-          p: 1
+          correction: 'appl'
         },
-        totalProb: 1
+        metadata: {
+          probabilities: {
+            prediction: 1,
+            correction: 1,
+            total: 1
+          },
+          autoSelectable: true
+        }
       }
     ];
 
@@ -123,18 +135,14 @@ describe('predictionAutoSelect', () => {
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepOrderedMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.isNotOk(autoselected);
   });
 
   it(`selects 'keep' suggestion that does match the model over any alternatives`, () => {
-    const keepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .8
-      },
-      prediction: {
-        sample: {
+    const keepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           tag: 'keep',
           transform: {  // can be null / "mocked out"
             insert: 'n',
@@ -143,65 +151,81 @@ describe('predictionAutoSelect', () => {
           matchesModel: true,
           displayAs: 'thin'
         },
-        p: .05
+        correction: 'thin'
       },
-      totalProb: .04
+      metadata: {
+        probabilities: {
+          prediction: .05,
+          correction: .8,
+          total: .05 * .8
+        },
+        autoSelectable: true
+      }
     }
 
-    const highestNonKeepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .8
-      },
-      prediction: {
-        sample: {
+    const highestNonKeepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {  // can be null / "mocked out"
             insert: 'nk',
             deleteLeft: 0
           },
           displayAs: 'think'
         },
-        p: .55
+        correction: 'thin'
       },
-      totalProb: .44
+      metadata: {
+        probabilities: {
+          prediction: .55,
+          correction: .8,
+          total: .55 * .8
+        },
+        autoSelectable: true
+      }
     };
 
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       keepSuggestion,
       highestNonKeepSuggestion,
       {
-        correction: {
-          sample: 'thin',
-          p: .8
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {  // can be null / "mocked out"
               insert: 'ng',
               deleteLeft: 0
             },
             displayAs: 'thing'
           },
-          p: .4
+          correction: 'thin'
         },
-        totalProb: .32
+        metadata: {
+          probabilities: {
+            prediction: .4,
+            correction: .8,
+            total: .4 * .8
+          },
+          autoSelectable: true
+        }
       },
       {
-        correction: {
-          sample: 'thic',
-          p: .2
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {  // can be null / "mocked out"
               insert: 'ck',
               deleteLeft: 0
             },
             displayAs: 'thick'
           },
-          p: 1
+          correction: 'thic'
         },
-        totalProb: .2
+        metadata: {
+          probabilities: {
+            prediction: 1,
+            correction: .2,
+            total: 1 * .2
+          },
+          autoSelectable: true
+        }
       }
     ];
 
@@ -209,18 +233,14 @@ describe('predictionAutoSelect', () => {
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.equal(autoselected, keepSuggestion);
   });
 
   it(`selects solitary non-'keep' suggestion when 'keep' does not match model`, () => {
-    const keepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .8
-      },
-      prediction: {
-        sample: {
+    const keepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           tag: 'keep',
           transform: {  // can be null / "mocked out"
             insert: 'n',
@@ -229,40 +249,50 @@ describe('predictionAutoSelect', () => {
           displayAs: '"thin"',
           matchesModel: false
         },
-        p: .05
+        correction: 'thin'
       },
-      totalProb: .04
+      metadata: {
+        probabilities: {
+          prediction: .05,
+          correction: .8,
+          total: .8 * .05
+        },
+        autoSelectable: true
+      }
     }
 
     // To 'win', a suggestion (currently) needs at least twice the probability of the sum of all alternatives.
     // This threshold may be subject to change.
     //
     // Refer to AUTOSELECT_PROPORTION_THRESHOLD in predict-helpers.ts.
-    const onlyNonKeepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .8
-      },
-      prediction: {
-        sample: {
+    const onlyNonKeepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {  // can be null / "mocked out"
             insert: 'nk',
             deleteLeft: 0
           },
           displayAs: 'think'
         },
-        p: .01
+        correction: 'thin'
       },
-      totalProb: .008
+      metadata: {
+        probabilities: {
+          prediction: .01,
+          correction: .8,
+          total: .01 * .8
+        },
+        autoSelectable: true
+      }
     };
 
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       keepSuggestion,
       onlyNonKeepSuggestion
     ];
 
-    const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0);
-    assert.isBelow(onlyNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
+    const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0);
+    assert.isBelow(onlyNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
 
     predictions.sort(tupleDisplayOrderSort);
 
@@ -270,18 +300,14 @@ describe('predictionAutoSelect', () => {
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepOrderedMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.equal(autoselected, onlyNonKeepSuggestion);
   });
 
   it(`does not select non-'keep' without sufficient winning probability`, () => {
-    const keepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .8
-      },
-      prediction: {
-        sample: {
+    const keepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           tag: 'keep',
           transform: {  // can be null / "mocked out"
             insert: 'n',
@@ -290,74 +316,90 @@ describe('predictionAutoSelect', () => {
           displayAs: '"thin"',
           matchesModel: false
         },
-        p: .05
+        correction: 'thin'
       },
-      totalProb: .04
+      metadata: {
+        probabilities: {
+          prediction: .05,
+          correction: .8,
+          total: .05 * .8
+        },
+        autoSelectable: true
+      }
     }
 
     // To 'win', a suggestion (currently) needs at least twice the probability of the sum of all alternatives.
     // This threshold may be subject to change.
     //
     // Refer to AUTOSELECT_PROPORTION_THRESHOLD in predict-helpers.ts.
-    const highestNonKeepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .8
-      },
-      prediction: {
-        sample: {
+    const highestNonKeepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {  // can be null / "mocked out"
             insert: 'nk',
             deleteLeft: 0
           },
           displayAs: 'think'
         },
-        p: .55
+        correction: 'thin'
       },
-      totalProb: .44
+      metadata: {
+        probabilities: {
+          prediction: .55,
+          correction: .8,
+          total: .55 * .8
+        },
+        autoSelectable: true
+      }
     };
 
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       keepSuggestion,
       highestNonKeepSuggestion,
       {
-        correction: {
-          sample: 'thin',
-          p: .8
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {  // can be null / "mocked out"
               insert: 'ng',
               deleteLeft: 0
             },
             displayAs: 'thing'
           },
-          p: .4
+          correction: 'thin'
         },
-        totalProb: .32
+        metadata: {
+          probabilities: {
+            prediction: .4,
+            correction: .8,
+            total: .4 * .8
+          },
+          autoSelectable: true
+        }
       },
       {
-        correction: {
-          sample: 'thic',
-          p: .2
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {  // can be null / "mocked out"
               insert: 'ck',
               deleteLeft: 0
             },
             displayAs: 'thick'
           },
-          p: 1
+          correction: 'thic'
         },
-        totalProb: .2
+        metadata: {
+          probabilities: {
+            prediction: 1,
+            correction: .2,
+            total: 1 * .2
+          },
+          autoSelectable: true
+        }
       }
     ];
 
-    const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0);
-    assert.isBelow(highestNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
+    const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0);
+    assert.isBelow(highestNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
 
     predictions.sort(tupleDisplayOrderSort);
 
@@ -365,18 +407,14 @@ describe('predictionAutoSelect', () => {
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepOrderedMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.isNotOk(autoselected);
   });
 
   it(`does select non-'keep' with sufficient winning probability`, () => {
-    const keepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .8
-      },
-      prediction: {
-        sample: {
+    const keepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           tag: 'keep',
           transform: {  // can be null / "mocked out"
             insert: 'n',
@@ -385,87 +423,99 @@ describe('predictionAutoSelect', () => {
           displayAs: '"thin"',
           matchesModel: false
         },
-        p: .05
+        correction: 'thin'
       },
-      totalProb: .04
+      metadata: {
+        probabilities: {
+          prediction: .05,
+          correction: .8,
+          total: .05 * .8
+        },
+        autoSelectable: true
+      }
     }
 
-    const highestNonKeepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thin',
-        p: .9
-      },
-      prediction: {
-        sample: {
+    const highestNonKeepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {  // can be null / "mocked out"
             insert: 'nk',
             deleteLeft: 0
           },
           displayAs: 'think'
         },
-        p: .75
+        correction: 'thin'
       },
-      totalProb: .675
+      metadata: {
+        probabilities: {
+          prediction: .75,
+          correction: .9,
+          total: .75 * .9
+        },
+        autoSelectable: true
+      }
     };
 
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       keepSuggestion,
       highestNonKeepSuggestion,
       {
-        correction: {
-          sample: 'thin',
-          p: .9
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {  // can be null / "mocked out"
               insert: 'ng',
               deleteLeft: 0
             },
             displayAs: 'thing'
           },
-          p: .2
+          correction: 'thin'
         },
-        totalProb: .18
+        metadata: {
+          probabilities: {
+            prediction: .2,
+            correction: .9,
+            total: .2 * .9
+          },
+          autoSelectable: true
+        }
       },
       {
-        correction: {
-          sample: 'thic',
-          p: .1
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {  // can be null / "mocked out"
               insert: 'ck',
               deleteLeft: 0
             },
             displayAs: 'thick'
           },
-          p: 1
+          correction: 'thic'
         },
-        totalProb: .1
+        metadata: {
+          probabilities: {
+            prediction: 1,
+            correction: .1,
+            total: 1 * .1
+          },
+          autoSelectable: true
+        }
       }
     ];
 
-    const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0);
-    assert.isAbove(highestNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
+    const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0);
+    assert.isAbove(highestNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
 
     const originalPredictions = [].concat(predictions);
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.equal(autoselected, highestNonKeepSuggestion);
   });
 
   it('ignores non key-matched suggestions when key-matched suggestions exist', () => {
-    const keepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'cant',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const keepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           tag: 'keep',
           transform: {  // can be null / "mocked out"
             insert: 't',
@@ -474,51 +524,64 @@ describe('predictionAutoSelect', () => {
           displayAs: '"cant"',
           matchesModel: false
         },
-        p: 1
+        correction: 'cant'
       },
-      totalProb: 1,
-      matchLevel: SuggestionSimilarity.exact
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: 1,
+          total: 1 * 1
+        },
+        autoSelectable: true,
+        matchLevel: SuggestionSimilarity.exact
+      }
     }
 
-    const expectedSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'cant',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const expectedSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {  // can be null / "mocked out"
             insert: '\'t',
             deleteLeft: 0
           },
           displayAs: "can't"
         },
-        p: .2
+        correction: 'cant'
       },
-      totalProb: .2,
-      matchLevel: SuggestionSimilarity.sameKey
+      metadata: {
+        probabilities: {
+          prediction: .2,
+          correction: 1,
+          total: .2 * 1
+        },
+        autoSelectable: true,
+        matchLevel: SuggestionSimilarity.sameKey
+      }
     };
 
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       keepSuggestion,
       expectedSuggestion,
       {
-        correction: {
-          sample: 'cant',
-          p: 1
-        },
-        prediction: {
-          sample: {
+        components: {
+          prediction: {
             transform: {  // can be null / "mocked out"
               insert: 'teen',
               deleteLeft: 0
             },
             displayAs: 'canteen'
           },
-          p: .8
+          correction: 'cant'
         },
-        totalProb: .8,
-        matchLevel: SuggestionSimilarity.none
+        metadata: {
+          probabilities: {
+            prediction: .8,
+            correction: 1,
+            total: .8 * 1
+          },
+          autoSelectable: true,
+          matchLevel: SuggestionSimilarity.none
+        }
       }
     ];
 
@@ -527,20 +590,16 @@ describe('predictionAutoSelect', () => {
 
     assert.sameDeepMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.equal(autoselected, expectedSuggestion);
   });
 
   // The idea:  avoid "over-correcting" when a potential correction has a
   // super-high-frequency word.
   it('does not auto-select suggestion if its root correction is not most likely', () => {
-    const keepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thi',
-        p: .7
-      },
-      prediction: {
-        sample: {
+    const keepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           tag: 'keep',
           transform: {  // can be null / "mocked out"
             insert: 'i',
@@ -549,61 +608,74 @@ describe('predictionAutoSelect', () => {
           displayAs: '"thi"',
           matchesModel: false
         },
-        p: .05
+        correction: 'thi'
       },
-      totalProb: .035
+      metadata: {
+        probabilities: {
+          prediction: .05,
+          correction: .7,
+          total: .05 * .7
+        },
+        autoSelectable: true
+      }
     }
 
-    const highestCorrectionSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'thi',
-        p: .7
-      },
-      prediction: {
-        sample: {
+    const highestCorrectionSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {  // can be null / "mocked out"
             insert: 'in',
             deleteLeft: 0
           },
           displayAs: 'thin'
         },
-        p: .1
+        correction: 'thi',
       },
-      totalProb: .07
+      metadata: {
+        probabilities: {
+          prediction: .1,
+          correction: .7,
+          total: .1 * .7
+        },
+        autoSelectable: true
+      }
     };
 
-    const highestNonKeepSuggestion: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'the',
-        p: .3
-      },
-      prediction: {
-        sample: {
+    const highestNonKeepSuggestion: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {  // can be null / "mocked out"
             insert: 'e',
             deleteLeft: 0
           },
           displayAs: 'the'
         },
-        p: 1
+        correction: 'the'
       },
-      totalProb: .3
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: .3,
+          total: 1 * .3
+        },
+        autoSelectable: true
+      }
     };
 
-    const predictions: CorrectionPredictionTuple[] = [
+    const predictions: IntermediateCompositedPrediction[] = [
       keepSuggestion,
       highestNonKeepSuggestion,
       highestCorrectionSuggestion
     ];
 
-    const totalProb = predictions.reduce((accum, current) => accum + current.totalProb, 0);
-    assert.isAbove(highestNonKeepSuggestion.totalProb, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
+    const totalProb = predictions.reduce((accum, current) => accum + current.metadata.probabilities.total, 0);
+    assert.isAbove(highestNonKeepSuggestion.metadata.probabilities.total, totalProb * AUTOSELECT_PROPORTION_THRESHOLD, 'test setup is no longer valid');
 
     const originalPredictions = [].concat(predictions);
     assert.doesNotThrow(() => predictionAutoSelect(predictions));
     assert.sameDeepMembers(predictions, originalPredictions);
 
-    const autoselected = predictions.find((entry) => entry.prediction.sample.autoAccept);
+    const autoselected = predictions.find((entry) => entry.components.prediction.autoAccept);
     assert.isNotOk(autoselected);
   });
 
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts
index 40fd2dafc6f..a86048b33d4 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts
@@ -13,7 +13,7 @@ import { LexicalModelTypes } from "@keymanapp/common-types";
 import * as wordBreakers from '@keymanapp/models-wordbreakers';
 import { applyTransform } from '@keymanapp/models-templates';
 
-import { CorrectionPredictionTuple, createDefaultKeep, models, SuggestionSimilarity } from "@keymanapp/lm-worker/test-index";
+import { IntermediateCompositedPrediction, createDefaultKeep, models, SuggestionSimilarity } from "@keymanapp/lm-worker/test-index";
 
 import CasingFunction = LexicalModelTypes.CasingFunction;
 import Context = LexicalModelTypes.Context;
@@ -109,13 +109,9 @@ describe('createDefaultKeep', () => {
       p: 1
     };
 
-    const expectedKeep: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'iphone',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const expectedKeep: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {
             insert: 'iphone',
             deleteLeft: 5
@@ -124,10 +120,17 @@ describe('createDefaultKeep', () => {
           matchesModel: false,
           tag: 'keep'
         },
-        p: 1
+        correction: 'iphone'
       },
-      totalProb: 1,
-      matchLevel: SuggestionSimilarity.exact
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: 1,
+          total: 1 * 1
+        },
+        autoSelectable: false,
+        matchLevel: SuggestionSimilarity.exact
+      }
     };
 
     const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
@@ -150,13 +153,9 @@ describe('createDefaultKeep', () => {
       p: 1
     };
 
-    const expectedKeep: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'iphone',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const expectedKeep: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {
             insert: 'iphone',
             deleteLeft: 7
@@ -165,10 +164,17 @@ describe('createDefaultKeep', () => {
           matchesModel: false,
           tag: 'keep'
         },
-        p: 1
+        correction: 'iphone'
       },
-      totalProb: 1,
-      matchLevel: SuggestionSimilarity.exact
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: 1,
+          total: 1 * 1
+        },
+        autoSelectable: false,
+        matchLevel: SuggestionSimilarity.exact
+      }
     };
 
     const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
@@ -191,13 +197,9 @@ describe('createDefaultKeep', () => {
       p: 1
     };
 
-    const expectedKeep: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'iphone',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const expectedKeep: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {
             insert: 'iphone',
             deleteLeft: 8
@@ -206,10 +208,17 @@ describe('createDefaultKeep', () => {
           matchesModel: false,
           tag: 'keep'
         },
-        p: 1
+        correction: 'iphone'
       },
-      totalProb: 1,
-      matchLevel: SuggestionSimilarity.exact
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: 1,
+          total: 1 * 1
+        },
+        autoSelectable: false,
+        matchLevel: SuggestionSimilarity.exact
+      }
     };
 
     const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
@@ -232,13 +241,9 @@ describe('createDefaultKeep', () => {
       p: 1
     };
 
-    const expectedKeep: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'and',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const expectedKeep: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {
             insert: 'and',
             deleteLeft: 3
@@ -247,10 +252,17 @@ describe('createDefaultKeep', () => {
           matchesModel: false,
           tag: 'keep'
         },
-        p: 1
+        correction: 'and'
       },
-      totalProb: 1,
-      matchLevel: SuggestionSimilarity.exact
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: 1,
+          total: 1 * 1
+        },
+        autoSelectable: false,
+        matchLevel: SuggestionSimilarity.exact
+      }
     };
 
     const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
@@ -273,13 +285,9 @@ describe('createDefaultKeep', () => {
       p: 1
     };
 
-    const expectedKeep: CorrectionPredictionTuple = {
-      correction: {
-        sample: 'iphones',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const expectedKeep: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {
             insert: 'iphones',
             deleteLeft: 7
@@ -288,10 +296,17 @@ describe('createDefaultKeep', () => {
           matchesModel: false,
           tag: 'keep'
         },
-        p: 1
+        correction: 'iphones'
       },
-      totalProb: 1,
-      matchLevel: SuggestionSimilarity.exact
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: 1,
+          total: 1 * 1
+        },
+        autoSelectable: false,
+        matchLevel: SuggestionSimilarity.exact
+      }
     };
 
     const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
@@ -314,13 +329,9 @@ describe('createDefaultKeep', () => {
       p: 1
     };
 
-    const expectedKeep: CorrectionPredictionTuple = {
-      correction: {
-        sample: '',
-        p: 1
-      },
-      prediction: {
-        sample: {
+    const expectedKeep: IntermediateCompositedPrediction = {
+      components: {
+        prediction: {
           transform: {
             insert: '',
             deleteLeft: 0
@@ -329,10 +340,17 @@ describe('createDefaultKeep', () => {
           matchesModel: false,
           tag: 'keep'
         },
-        p: 1
+        correction: ''
       },
-      totalProb: 1,
-      matchLevel: SuggestionSimilarity.exact
+      metadata: {
+        probabilities: {
+          prediction: 1,
+          correction: 1,
+          total: 1 * 1
+        },
+        autoSelectable: false,
+        matchLevel: SuggestionSimilarity.exact
+      }
     };
 
     const tuple = createDefaultKeep(testModelWithCasing, applyTransform(trueInput.sample, context), trueInput);
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
index a99187defa5..8234c6ba2a9 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
@@ -71,7 +71,7 @@ const DUMMY_MODEL_CONFIG = {
   languageUsesCasing: true
 };
 
-describe('predictFromCorrections', () => {
+describe('correctAndEnumerateWithoutTraversals', () => {
   it('handles a single correction prefixing multiple entries - no transform ID', () => {
     const context: Context = {
       left: 'It',
@@ -113,14 +113,15 @@ describe('predictFromCorrections', () => {
     });
 
     const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
-    predictions.forEach((entry) => assert.equal(entry.correction.sample, 'Its'));
-    predictions.forEach((entry) => assert.equal(entry.correction.p, 0.6));
+
+    predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its'));
+    predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6));
     predictions.sort(tupleDisplayOrderSort);
 
-    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.prediction.sample), dummied_suggestions);
+    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions);
 
-    assert.approximately(predictions[0].totalProb, 0.18 * 0.6, 0.00001);
-    assert.approximately(predictions[1].totalProb, 0.02 * 0.6, 0.00001);
+    assert.approximately(predictions[0].metadata.probabilities.total, 0.18 * 0.6, 0.00001);
+    assert.approximately(predictions[1].metadata.probabilities.total, 0.02 * 0.6, 0.00001);
   });
 
   it('handles a single correction prefixing multiple entries - with transform ID', () => {
@@ -165,19 +166,20 @@ describe('predictFromCorrections', () => {
     });
 
     const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
-    predictions.forEach((entry) => assert.equal(entry.correction.sample, 'Its'));
-    predictions.forEach((entry) => assert.equal(entry.correction.p, 0.6));
+
+    predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its'));
+    predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6));
     predictions.sort(tupleDisplayOrderSort);
 
-    assert.sameOrderedMembers(predictions.map((entry) => entry.prediction.sample.displayAs), ["it's", "its"]);
-    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.prediction.sample), dummied_suggestions.map((entry) => {
+    assert.sameOrderedMembers(predictions.map((entry) => entry.components.prediction.displayAs), ["it's", "its"]);
+    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.map((entry) => {
       entry = deepCopy(entry);
       entry.transformId = 314159;
       return entry;
     }));
 
-    assert.approximately(predictions[0].totalProb, 0.18 * 0.6, 0.00001);
-    assert.approximately(predictions[1].totalProb, 0.02 * 0.6, 0.00001);
+    assert.approximately(predictions[0].metadata.probabilities.total, 0.18 * 0.6, 0.00001);
+    assert.approximately(predictions[1].metadata.probabilities.total, 0.02 * 0.6, 0.00001);
   });
 
   it('handles multiple corrections at once', () => {
@@ -250,12 +252,12 @@ describe('predictFromCorrections', () => {
     const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
     predictions.sort(tupleDisplayOrderSort);
 
-    assert.sameOrderedMembers(predictions.map((entry) => entry.prediction.sample.displayAs), ["is", "it's", "isn't", "its"]);
-    assert.sameDeepMembers(predictions.map((entry) => entry.prediction.sample), dummied_suggestions.flatMap((entry) => entry));
+    assert.sameOrderedMembers(predictions.map((entry) => entry.components.prediction.displayAs), ["is", "it's", "isn't", "its"]);
+    assert.sameDeepMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.flatMap((entry) => entry));
 
-    assert.approximately(predictions[0].totalProb, 0.4 * 0.4, 0.00001);
-    assert.approximately(predictions[1].totalProb, 0.18 * 0.6, 0.00001);
-    assert.approximately(predictions[2].totalProb, 0.4 * 0.2, 0.00001);
-    assert.approximately(predictions[3].totalProb, 0.02 * 0.6, 0.00001);
+    assert.approximately(predictions[0].metadata.probabilities.total, 0.4 * 0.4, 0.00001);
+    assert.approximately(predictions[1].metadata.probabilities.total, 0.18 * 0.6, 0.00001);
+    assert.approximately(predictions[2].metadata.probabilities.total, 0.4 * 0.2, 0.00001);
+    assert.approximately(predictions[3].metadata.probabilities.total, 0.02 * 0.6, 0.00001);
   });
 });
\ No newline at end of file
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts
index eea66d8ad0a..d1aa6df257e 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-deduplication.tests.ts
@@ -4,7 +4,7 @@ import * as wordBreakers from '@keymanapp/models-wordbreakers';
 import { deepCopy } from '@keymanapp/web-utils';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
-import { CorrectionPredictionTuple, dedupeSuggestions, models } from "@keymanapp/lm-worker/test-index";
+import { IntermediateCompositedPrediction, dedupeSuggestions, models } from "@keymanapp/lm-worker/test-index";
 
 import Context = LexicalModelTypes.Context;
 import DummyModel = models.DummyModel;
@@ -24,77 +24,89 @@ const testModel = new DummyModel({
  * @returns
  */
 const build_its_is_set = () => {
-  const its: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'its',
-      p: 0.8
-    },
-    prediction: {
-      sample: {
+  const its: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 's',
           deleteLeft: 0
         },
         displayAs: 'its'
       },
-      p: 0.2
+      correction: 'its'
     },
-    totalProb: 0.16
-    // matchLevel does not yet exist.
+    metadata: {
+      probabilities: {
+        prediction: .2,
+        correction: .8,
+        total: .2 * .8
+      },
+      autoSelectable: true
+      // matchLevel does not yet exist.
+    }
   };
 
-  const it_is: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'its',
-      p: 0.8
-    },
-    prediction: {
-      sample: {
+  const it_is: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: '\'s',
           deleteLeft: 0
         },
         displayAs: 'it\'s'
       },
-      p: 0.8
+      correction: 'its'
     },
-    totalProb: 0.64
+    metadata: {
+      probabilities: {
+        prediction: .8,
+        correction: .8,
+        total: .8 * .8
+      },
+      autoSelectable: true
+    }
   };
 
-  const is: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'is',
-      p: 0.2
-    },
-    prediction: {
-      sample: {
+  const is: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 's',
           deleteLeft: 1
         },
         displayAs: 'is'
       },
-      p: 0.5
+      correction: 'is'
     },
-    totalProb: 0.1
+    metadata: {
+      probabilities: {
+        prediction: .5,
+        correction: .2,
+        total: .5 * .2
+      },
+      autoSelectable: true
+    }
   };
 
-  const is_not: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'is',
-      p: 0.2
-    },
-    prediction: {
-      sample: {
+  const is_not: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 'sn\'t',
           deleteLeft: 1
         },
         displayAs: 'isn\'t'
       },
-      p: 0.5
+      correction: 'is'
     },
-    totalProb: 0.1
+    metadata: {
+      probabilities: {
+        prediction: .5,
+        correction: .2,
+        total: .5 * .2
+      },
+      autoSelectable: true
+    }
   };
 
   return {
@@ -145,7 +157,7 @@ describe('dedupeSuggestions', () => {
     // There's no mathematically safe way to combine the components if the
     // underlying correction sources differ between duplicated suggestions,
     // though it's mathematically safe to combine their product.
-    expected.forEach((entry) => entry.totalProb *= (entry.prediction.sample.transform.insert == '\'s') ? 3 : 2);
+    expected.forEach((entry) => entry.metadata.probabilities.total *= (entry.components.prediction.transform.insert == '\'s') ? 3 : 2);
 
     assert.deepEqual(deduplicated, expected);
   });
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts
index 4e63055a101..c1c2ecacc5c 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-finalization.tests.ts
@@ -5,7 +5,7 @@ import { deepCopy } from '@keymanapp/web-utils';
 import * as wordBreakers from '@keymanapp/models-wordbreakers';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
-import { CorrectionPredictionTuple, finalizeSuggestions, models } from "@keymanapp/lm-worker/test-index";
+import { IntermediateCompositedPrediction, finalizeSuggestions, models } from "@keymanapp/lm-worker/test-index";
 
 import DummyModel = models.DummyModel;
 import Outcome = LexicalModelTypes.Outcome;
@@ -39,6 +39,7 @@ const testModelWithoutSpacing = new DummyModel({
   }
 });
 
+
 /**
  * Builds a fresh copy of test values useful for suggestion-similarity
  * testing.
@@ -47,78 +48,89 @@ const testModelWithoutSpacing = new DummyModel({
  */
 const build_its_is_set = (verbose?: string) => {
   const verboseFlag = (verbose == 'verbose' ? true : false);
-
-  const its: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'its',
-      p: 0.8
-    },
-    prediction: {
-      sample: {
+  const its: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 's',
           deleteLeft: 0
         },
         displayAs: 'its'
       },
-      p: 0.2
+      correction: 'its'
     },
-    totalProb: 0.16
-    // matchLevel does not yet exist.
+    metadata: {
+      probabilities: {
+        prediction: .2,
+        correction: .8,
+        total: .2 * .8
+      },
+      autoSelectable: true
+      // matchLevel does not yet exist.
+    }
   };
 
-  const it_is: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'its',
-      p: 0.8
-    },
-    prediction: {
-      sample: {
+  const it_is: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: '\'s',
           deleteLeft: 0
         },
         displayAs: 'it\'s'
       },
-      p: 0.8
+      correction: 'its'
     },
-    totalProb: 0.64
+    metadata: {
+      probabilities: {
+        prediction: .8,
+        correction: .8,
+        total: .8 * .8
+      },
+      autoSelectable: true
+    }
   };
 
-  const is: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'is',
-      p: 0.2
-    },
-    prediction: {
-      sample: {
+  const is: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 's',
           deleteLeft: 1
         },
         displayAs: 'is'
       },
-      p: 0.5
+      correction: 'is'
     },
-    totalProb: 0.1
+    metadata: {
+      probabilities: {
+        prediction: .5,
+        correction: .2,
+        total: .5 * .2
+      },
+      autoSelectable: true
+    }
   };
 
-  const is_not: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'is',
-      p: 0.2
-    },
-    prediction: {
-      sample: {
+  const is_not: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 'sn\'t',
           deleteLeft: 1
         },
         displayAs: 'isn\'t'
       },
-      p: 0.5
+      correction: 'is'
     },
-    totalProb: 0.1
+    metadata: {
+      probabilities: {
+        prediction: .5,
+        correction: .2,
+        total: .5 * .2
+      },
+      autoSelectable: true
+    }
   };
 
   const baseDefinitions = {
@@ -132,13 +144,13 @@ const build_its_is_set = (verbose?: string) => {
   const expected = unfinalized.map((entry) => {
 
     const mapped: Outcome<Suggestion & { 'correction-p'?: number, 'lexical-p'?: number }> = {
-      ...deepCopy(entry.prediction.sample),
-      p: entry.totalProb
+      ...deepCopy(entry.components.prediction),
+      p: entry.metadata.probabilities.total
     };
 
     if(verboseFlag) {
-      mapped['correction-p'] = entry.correction.p;
-      mapped['lexical-p'] = entry.prediction.p;
+      mapped['correction-p'] = entry.metadata.probabilities.correction;
+      mapped['lexical-p'] = entry.metadata.probabilities.prediction;
     }
 
     return mapped;
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts
index 72911d11281..e4cbfc81b1d 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts
@@ -5,7 +5,7 @@ import * as wordBreakers from '@keymanapp/models-wordbreakers';
 import { deepCopy } from '@keymanapp/web-utils';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
-import { CorrectionPredictionTuple, models, processSimilarity, SuggestionSimilarity, toAnnotatedSuggestion } from "@keymanapp/lm-worker/test-index";
+import { IntermediateCompositedPrediction, models, processSimilarity, SuggestionSimilarity, toAnnotatedSuggestion } from "@keymanapp/lm-worker/test-index";
 
 import CasingFunction = LexicalModelTypes.CasingFunction;
 import Context = LexicalModelTypes.Context;
@@ -109,77 +109,89 @@ const testModelWithCasing = new DummyModel({
  * @returns
  */
 const build_its_is_set = () => {
-  const its: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'its',
-      p: 0.8
-    },
-    prediction: {
-      sample: {
+  const its: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 's',
           deleteLeft: 0
         },
         displayAs: 'its'
       },
-      p: 0.2
+      correction: 'its'
     },
-    totalProb: 0.16
-    // matchLevel does not yet exist.
+    metadata: {
+      probabilities: {
+        prediction: .2,
+        correction: .8,
+        total: .2 * .8
+      },
+      autoSelectable: true
+      // matchLevel does not yet exist.
+    }
   };
 
-  const it_is: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'its',
-      p: 0.8
-    },
-    prediction: {
-      sample: {
+  const it_is: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: '\'s',
           deleteLeft: 0
         },
         displayAs: 'it\'s'
       },
-      p: 0.8
+      correction: 'its'
     },
-    totalProb: 0.64
+    metadata: {
+      probabilities: {
+        prediction: .8,
+        correction: .8,
+        total: .8 * .8
+      },
+      autoSelectable: true
+    }
   };
 
-  const is: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'is',
-      p: 0.2
-    },
-    prediction: {
-      sample: {
+  const is: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 's',
           deleteLeft: 1
         },
         displayAs: 'is'
       },
-      p: 0.5
+      correction: 'is'
     },
-    totalProb: 0.1
+    metadata: {
+      probabilities: {
+        prediction: .5,
+        correction: .2,
+        total: .5 * .2
+      },
+      autoSelectable: true
+    }
   };
 
-  const is_not: CorrectionPredictionTuple = {
-    correction: {
-      sample: 'is',
-      p: 0.2
-    },
-    prediction: {
-      sample: {
+  const is_not: IntermediateCompositedPrediction = {
+    components: {
+      prediction: {
         transform: {
           insert: 'sn\'t',
           deleteLeft: 1
         },
         displayAs: 'isn\'t'
       },
-      p: 0.5
+      correction: 'is'
     },
-    totalProb: 0.1
+    metadata: {
+      probabilities: {
+        prediction: .5,
+        correction: .2,
+        total: .5 * .2
+      },
+      autoSelectable: true
+    }
   };
 
   return {
@@ -210,32 +222,22 @@ describe('processSimilarity', () => {
     const testSet = build_its_is_set();
     const distribution = [...Object.values(testSet)];
 
-    const expectation: CorrectionPredictionTuple[] = [
-      {
-        ...testSet.its,
-        matchLevel: SuggestionSimilarity.exact
-      }, {
-        ...testSet.it_is,
-        matchLevel: SuggestionSimilarity.sameKey
-      }, {
-        ...testSet.is,
-        matchLevel: SuggestionSimilarity.none
-      }, {
-        ...testSet.is_not,
-        matchLevel: SuggestionSimilarity.none
-      }
-    ];
+    const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)];
+    expectation[0].metadata.matchLevel = SuggestionSimilarity.exact;    // its
+    expectation[1].metadata.matchLevel = SuggestionSimilarity.sameKey;  // it_is
+    expectation[2].metadata.matchLevel = SuggestionSimilarity.none;     // is
+    expectation[3].metadata.matchLevel = SuggestionSimilarity.none;     // is_not
 
     const its = testSet.its;
     const original_its = deepCopy(its);
-    const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.prediction.sample, 'keep', QuoteBehavior.noQuotes);
+    const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.components.prediction, 'keep', QuoteBehavior.noQuotes);
     keep_its.matchesModel = true;
 
     processSimilarity(testModelWithCasing, distribution, context, trueInput);
 
     assert.sameDeepMembers(distribution, expectation);
-    assert.equal(its.prediction.sample.tag, 'keep');
-    assert.deepEqual(its.prediction.sample, keep_its);
+    assert.equal(its.components.prediction.tag, 'keep');
+    assert.deepEqual(its.components.prediction, keep_its);
   });
 
   it(`selects contraction as 'more similar' than same-keyed non-contraction when context is contraction`, () => {
@@ -257,32 +259,22 @@ describe('processSimilarity', () => {
     const testSet = build_its_is_set();
     const distribution = [...Object.values(testSet)];
 
-    const expectation: CorrectionPredictionTuple[] = [
-      {
-        ...testSet.its,
-        matchLevel: SuggestionSimilarity.sameKey
-      }, {
-        ...testSet.it_is,
-        matchLevel: SuggestionSimilarity.exact
-      }, {
-        ...testSet.is,
-        matchLevel: SuggestionSimilarity.none
-      }, {
-        ...testSet.is_not,
-        matchLevel: SuggestionSimilarity.none
-      }
-    ];
+    const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)];
+    expectation[0].metadata.matchLevel = SuggestionSimilarity.sameKey;  // its
+    expectation[1].metadata.matchLevel = SuggestionSimilarity.exact;    // it_is
+    expectation[2].metadata.matchLevel = SuggestionSimilarity.none;     // is
+    expectation[3].metadata.matchLevel = SuggestionSimilarity.none;     // is_not
 
     const it_is = testSet.it_is;
     const original_it_is = deepCopy(it_is);
-    const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.prediction.sample, 'keep', QuoteBehavior.noQuotes);
+    const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.components.prediction, 'keep', QuoteBehavior.noQuotes);
     keep_it_is.matchesModel = true;
 
     processSimilarity(testModelWithCasing, distribution, context, trueInput);
 
     assert.sameDeepMembers(distribution, expectation);
-    assert.equal(it_is.prediction.sample.tag, 'keep');
-    assert.deepEqual(it_is.prediction.sample, keep_it_is);
+    assert.equal(it_is.components.prediction.tag, 'keep');
+    assert.deepEqual(it_is.components.prediction, keep_it_is);
   });
 
   describe('with casing', () => {
@@ -314,34 +306,22 @@ describe('processSimilarity', () => {
 
       // Have the predictions replace existing context parts with the lowercased equivalents.
       Object.values(testSet).forEach((entry) => {
-        const transform = entry.prediction.sample.transform;
+        const transform = entry.components.prediction.transform;
         transform.insert = transform.deleteLeft == 0 ? `it${transform.insert}` : `i${transform.insert}`;
         transform.deleteLeft = 2;
       });
 
       const distribution = [...Object.values(testSet)];
 
-      const expectation: CorrectionPredictionTuple[] = [
-        {
-          ...testSet.its,
-          matchLevel: SuggestionSimilarity.sameKey
-        }, {
-          ...testSet.it_is,
-          // case mismatch, detectable because we have access to a lowercasing/uppercasing function.
-          matchLevel: SuggestionSimilarity.sameText
-        }, {
-          ...testSet.is,
-          matchLevel: SuggestionSimilarity.none
-        }, {
-          ...testSet.is_not,
-          matchLevel: SuggestionSimilarity.none
-        }
-      ];
-
+      const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)];
+      expectation[0].metadata.matchLevel = SuggestionSimilarity.sameKey;   // its
+      expectation[1].metadata.matchLevel = SuggestionSimilarity.sameText;  // it_is
+      expectation[2].metadata.matchLevel = SuggestionSimilarity.none;      // is
+      expectation[3].metadata.matchLevel = SuggestionSimilarity.none;      // is_not
       processSimilarity(testModelWithCasing, distribution, context, trueInput);
 
       // Because we mucked with the casing here, there is no perfect 'keep' match.
-      const keep = distribution.find((entry) => entry.prediction.sample.tag == 'keep');
+      const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');
       assert.isNotOk(keep);
       assert.sameDeepMembers(distribution, expectation);
     });
@@ -368,34 +348,20 @@ describe('processSimilarity', () => {
 
       // Have the predictions replace existing context parts with the lowercased equivalents.
       Object.values(testSet).forEach((entry) => {
-        const transform = entry.prediction.sample.transform;
+        const transform = entry.components.prediction.transform;
         transform.insert = transform.deleteLeft == 0 ? `it${transform.insert}` : `i${transform.insert}`;
         transform.deleteLeft = 2;
       });
 
       const distribution = [...Object.values(testSet)];
 
-      const expectation: CorrectionPredictionTuple[] = [
-        {
-          ...testSet.its,
-          matchLevel: SuggestionSimilarity.none
-        }, {
-          ...testSet.it_is,
-          // case mismatch, detectable because we have access to a lowercasing/uppercasing function.
-          matchLevel: SuggestionSimilarity.none
-        }, {
-          ...testSet.is,
-          matchLevel: SuggestionSimilarity.none
-        }, {
-          ...testSet.is_not,
-          matchLevel: SuggestionSimilarity.none
-        }
-      ];
+      const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)];
 
+      expectation.forEach((entry) => entry.metadata.matchLevel = SuggestionSimilarity.none);
       processSimilarity(testModelWithoutCasing, distribution, context, trueInput);
 
       // Because we mucked with the casing here, there is no perfect 'keep' match.
-      const keep = distribution.find((entry) => entry.prediction.sample.tag == 'keep');
+      const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');
       assert.isNotOk(keep);
       assert.sameDeepMembers(distribution, expectation);
     });
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts
index 9b9ab2c3121..4bac59cafc4 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-custom-punctuation.tests.ts
@@ -81,6 +81,20 @@ describe('Custom Punctuation', function () {
             open: "'",
             close: "'"
           }
+        },
+        // Some of the suggestions above actually wordbreak differently from
+        // what might be expected.  So, we override the wordbreaker to ensure
+        // the tests run smoothly.
+        wordbreaker: (text) => {
+          const textLen = text.length;
+          if(text.charAt(textLen - 1) == " ") {
+            return [
+              {text: text.substring(0, textLen-2), start: 0, end: textLen-1, length: textLen-1},
+              {text: text.substring(textLen-1), start: textLen-1, end: textLen, length: 1}
+            ];
+          } else {
+            return [{text, start: 0, end: textLen, length: textLen}];
+          }
         }
       });
 
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts
index 928b6e75c47..8c20df9626e 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts
@@ -869,6 +869,9 @@ describe('ModelCompositor', function() {
         deleteLeft: 1
       }
 
+      // Future adjustment:  add the 'baseSuggestion' to DummyModel so that it actually
+      // returns the suggestion again.
+      //  `new models.DummyModel(..., futureSuggestions: [[baseSuggestion]])`
       let model = new models.DummyModel({punctuation: englishPunctuation});
       let compositor = new ModelCompositor(model, true);
 
@@ -883,6 +886,7 @@ describe('ModelCompositor', function() {
 
       // As this test is a bit... 'hard-wired', we only get the 'keep' suggestion.
       // It should still be accurate, though.
+      // Can be fixed via the "Future adjustment" noted above.
       assert.equal(suggestions.length, 1);
 
       let expectedTransform = {

From 61fc801f1480e805a7555b3c6aba2eeae9986ca8 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Tue, 12 May 2026 14:15:17 -0500
Subject: [PATCH 12/16] change(web): support multi-token suggestion similarity

Build-bot: skip build:web
Test-bot: skip
---
 .../src/main/model-compositor.ts              |  2 +-
 .../worker-thread/src/main/predict-helpers.ts | 36 +++++++++----------
 .../suggestion-similarity.tests.ts            |  8 ++---
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index 08e0f8b9f89..1d4893aab49 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -174,7 +174,7 @@ export class ModelCompositor {
     const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, rawPredictions, context);
 
     // Needs "casing" to be applied first.
-    const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, transformDistribution[0]);
+    const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, postContext);
 
     // If no existing suggestion directly matches the user-visible version of
     // the token, also add a 'keep' suggestion (with `.matchesModel = false`)
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 177eef0a694..176c80e09f0 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -874,35 +874,33 @@ export function dedupeSuggestions(
 export function processSimilarity(
   lexicalModel: LexicalModel,
   suggestionDistribution: IntermediateCompositedPrediction[],
-  context: Context,
-  trueInput: ProbabilityMass<Transform>
+  baseContext: Context,
+  finalContext: Context
 ): boolean {
-  const { sample: inputTransform } = trueInput;
   const wordbreak = determineModelWordbreaker(lexicalModel);
 
-  const postContext = models.applyTransform(inputTransform, context);
-  const truePrefix = wordbreak(postContext);
-
   const keyed = (text: string) => lexicalModel.toKey ? lexicalModel.toKey(text) : text;
   const keyCased = (text: string) => lexicalModel.applyCasing ? lexicalModel.applyCasing('lower', text) : text;
-  const keyedPrefix = keyed(truePrefix);
-  const lowercasedPrefix = keyCased(truePrefix);
+  const keyedTarget = keyed(finalContext.left);
+  const lowercasedTarget = keyCased(finalContext.left);
 
   let keepOption: Outcome<Keep>;
 
-  for(let tuple of suggestionDistribution) {
-    // Don't set it unnecessarily; this can have side-effects in some automated tests.
-    if(inputTransform.id !== undefined) {
-      tuple.components.prediction.transformId = inputTransform.id;
-    }
+  // If there are no suggestions found, we can't validate that the underlying
+  // correction was an empty token.
+  let allCorrectionsEmpty: boolean = suggestionDistribution.length > 0
+    ? true
+    : wordbreak(finalContext) == '';
 
-  const predictedWord = wordbreak(models.applyTransform(tuple.components.prediction.transform, context));
+  for(let tuple of suggestionDistribution) {
+    const appliedContext = models.applyTransform(tuple.components.prediction.transform, baseContext);
+    allCorrectionsEmpty &&= tuple.components.correction == '';
 
     // Is the suggestion an exact match (or, "similar enough") to the
     // actually-typed context?  If so, we wish to note this fact and to
     // prioritize such a suggestion over suggestions that are not.
-    if(keyed(tuple.components.correction) == keyedPrefix) {
-      if(predictedWord == truePrefix) {
+    if(keyed(tuple.components.correction) == keyedTarget) {
+      if(appliedContext.left == finalContext.left) {
         // Exact match:  it's a perfect 'keep' suggestion.
         tuple.metadata.matchLevel = SuggestionSimilarity.exact;
         keepOption = toAnnotatedSuggestion(lexicalModel, tuple.components.prediction, 'keep',  models.QuoteBehavior.noQuotes);
@@ -914,10 +912,10 @@ export function processSimilarity(
         keepOption.matchesModel = true;
         Object.assign(tuple.components.prediction, keepOption);
         keepOption = tuple.components.prediction as Outcome<Keep>;
-      } else if(keyCased(predictedWord) == lowercasedPrefix) {
+      } else if(keyCased(appliedContext.left) == lowercasedTarget) {
         // Case-insensitive match.  No diacritic differences; the ONLY difference is casing.
         tuple.metadata.matchLevel = SuggestionSimilarity.sameText;
-      } else if(keyed(predictedWord) == keyedPrefix) {
+      } else if(keyed(appliedContext.left) == keyedTarget) {
         // Diacritic-insensitive / exact-key match.
         tuple.metadata.matchLevel = SuggestionSimilarity.sameKey;
       } else {
@@ -932,7 +930,7 @@ export function processSimilarity(
   //
   // No actual 'keep' needed if the current context token is empty, so we say we
   // have a 'keep' for that case, even though there isn't really one.
-  return !!(keepOption || truePrefix == '');
+  return !!(keepOption || allCorrectionsEmpty);
 }
 
 /**
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts
index e4cbfc81b1d..a485fecd053 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts
@@ -233,7 +233,7 @@ describe('processSimilarity', () => {
     const keep_its = toAnnotatedSuggestion(testModelWithCasing, original_its.components.prediction, 'keep', QuoteBehavior.noQuotes);
     keep_its.matchesModel = true;
 
-    processSimilarity(testModelWithCasing, distribution, context, trueInput);
+    processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
     assert.sameDeepMembers(distribution, expectation);
     assert.equal(its.components.prediction.tag, 'keep');
@@ -270,7 +270,7 @@ describe('processSimilarity', () => {
     const keep_it_is = toAnnotatedSuggestion(testModelWithCasing, original_it_is.components.prediction, 'keep', QuoteBehavior.noQuotes);
     keep_it_is.matchesModel = true;
 
-    processSimilarity(testModelWithCasing, distribution, context, trueInput);
+    processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
     assert.sameDeepMembers(distribution, expectation);
     assert.equal(it_is.components.prediction.tag, 'keep');
@@ -318,7 +318,7 @@ describe('processSimilarity', () => {
       expectation[1].metadata.matchLevel = SuggestionSimilarity.sameText;  // it_is
       expectation[2].metadata.matchLevel = SuggestionSimilarity.none;      // is
       expectation[3].metadata.matchLevel = SuggestionSimilarity.none;      // is_not
-      processSimilarity(testModelWithCasing, distribution, context, trueInput);
+      processSimilarity(testModelWithCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
       // Because we mucked with the casing here, there is no perfect 'keep' match.
       const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');
@@ -358,7 +358,7 @@ describe('processSimilarity', () => {
       const expectation: IntermediateCompositedPrediction[] = [...Object.values(testSet)];
 
       expectation.forEach((entry) => entry.metadata.matchLevel = SuggestionSimilarity.none);
-      processSimilarity(testModelWithoutCasing, distribution, context, trueInput);
+      processSimilarity(testModelWithoutCasing, distribution, context, models.applyTransform(trueInput.sample, context));
 
       // Because we mucked with the casing here, there is no perfect 'keep' match.
       const keep = distribution.find((entry) => entry.components.prediction.tag == 'keep');

From ff53eed4a2295242f018127ef286bcf2d291a356 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Mon, 4 May 2026 14:39:18 -0500
Subject: [PATCH 13/16] change(web): add tokenized prediction intermediate type
 for whitespace correction support

Converts early uses of CompositedPredictionData to TokenizedPredictionData to facilitate important token-based aspects of whitespace correction support, such as case-handling.

Build-bot: skip build:web
Test-bot: skip
---
 .../src/main/model-compositor.ts              |  35 +--
 .../worker-thread/src/main/predict-helpers.ts | 206 +++++++++++++-----
 2 files changed, 158 insertions(+), 83 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
index 1d4893aab49..71c7704c144 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts
@@ -1,15 +1,13 @@
 import * as models from '@keymanapp/models-templates';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
-import { applySuggestionCasing, correctAndEnumerate, createDefaultKeep, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js';
-import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
-import TransformUtils from './transformUtils.js';
-
 import * as correction from './correction/index.js'
+import { applySuggestionCasing, compositeIntermediatePredictions, correctAndEnumerate, createDefaultKeep, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js';
+import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
+
 import { ContextTracker } from './correction/context-tracker.js';
 import { DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL } from './correction/distance-modeler.js';
 
-import CasingForm = LexicalModelTypes.CasingForm;
 import Configuration = LexicalModelTypes.Configuration;
 import Context = LexicalModelTypes.Context;
 import Distribution = LexicalModelTypes.Distribution;
@@ -125,24 +123,6 @@ export class ModelCompositor {
     const transformId = inputTransform.id;
     this.initContextTracker(context, transformId);
 
-    const allowBksp = TransformUtils.isBackspace(inputTransform);
-    const allowWhitespace = TransformUtils.isWhitespace(inputTransform);
-
-    const postContext = models.applyTransform(inputTransform, context);
-
-    // TODO:  It would be best for the correctAndEnumerate method to return the
-    // suggestion's prefix, as it already has lots of logic oriented to this.
-    // The context-tracker used there with v14+ models can determine this more
-    // robustly.
-    const truePrefix = this.wordbreak(postContext);
-    // Only use of `truePrefix`.
-    const basePrefix = (allowBksp || allowWhitespace) ? truePrefix : this.wordbreak(context);
-
-    // Used to restore whitespaces if operations would remove them.
-    const currentCasing: CasingForm = lexicalModel.languageUsesCasing
-      ? detectCurrentCasing(lexicalModel, postContext)
-      : null;
-
     // Section 1:  determine 'prediction roots' - enumerate corrections from most to least likely,
     // searching for results that yield viable predictions from the model.
 
@@ -160,9 +140,9 @@ export class ModelCompositor {
     // Properly capitalizes the suggestions based on the existing context casing state.
     // This may result in duplicates if multiple casing options exist within the
     // lexicon for a word.  (Example:  "Apple" the company vs "apple" the fruit.)
-    for(let tuple of rawPredictions) {
-      if(currentCasing && currentCasing != 'lower') {
-        applySuggestionCasing(tuple.components.prediction, basePrefix, this.lexicalModel, currentCasing);
+    if(lexicalModel.languageUsesCasing) {
+      for(let tuple of rawPredictions) {
+        tuple.components.forEach((component) => applySuggestionCasing(component, this.lexicalModel));
       }
     }
 
@@ -171,9 +151,10 @@ export class ModelCompositor {
 
     // We want to dedupe before trimming the list so that we can present a full set
     // of viable distinct suggestions if available.
-    const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, rawPredictions, context);
+    const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, compositeIntermediatePredictions(rawPredictions), context);
 
     // Needs "casing" to be applied first.
+    const postContext = postContextState?.context ?? models.applyTransform(inputTransform, context);
     const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, postContext);
 
     // If no existing suggestion directly matches the user-visible version of
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 176c80e09f0..567166264fc 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -4,7 +4,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
 import { defaultWordbreaker, WordBreakProperty } from '@keymanapp/models-wordbreakers';
 
 import TransformUtils from './transformUtils.js';
-import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
+import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
 import { ContextTokenization, mapWhitespacedTokenization } from './correction/context-tokenization.js';
 import { ContextTracker } from './correction/context-tracker.js';
 import { ContextToken } from './correction/context-token.js';
@@ -76,6 +76,24 @@ export const CORRECTION_SEARCH_THRESHOLDS = {
   REPLACEMENT_SEARCH_THRESHOLD: 4 as const // e^-4 = 0.0183156388.  Allows "80%" of an extra edit.
 }
 
+export interface TokenizedPredictionData {
+  /**
+   * The potential Suggestion
+   */
+  prediction: Suggestion,
+  /**
+   * The correction upon which the Suggestion is based
+   */
+  correction: string,
+  /**
+   * The ContextToken underlying the correction/prediction.
+   *
+   * May be undefined, especially for models that do not leverage the
+   * LexiconTraversal pattern.
+   */
+  source: ContextToken // useful for getting the unkeyed, original version of the text (in model-compositor, where casing is applied)
+}
+
 export interface CompositedPredictionData {
   /**
    * The potential Suggestion (or Keep)
@@ -138,6 +156,19 @@ export interface PredictionMetadata {
   preservationTransform?: Transform;
 }
 
+export interface IntermediateTokenizedPrediction {
+  /**
+   * Contains the tokenized components to be used to construct a full
+   * predictive-text Suggestion, as well as data about the source for each
+   * component.
+   */
+  components: TokenizedPredictionData[];
+  /**
+   * Tracks common intermediate prediction data, such as its underlying probabilities and its similarity to the actual context.
+   */
+  metadata: PredictionMetadata;
+}
+
 export interface IntermediateCompositedPrediction {
   /**
    * Contains the fully composited predictive-text Suggestion and its underlying correction string.
@@ -149,7 +180,7 @@ export interface IntermediateCompositedPrediction {
   metadata: PredictionMetadata;
 }
 
-type IntermediatePrediction = IntermediateCompositedPrediction;
+type IntermediatePrediction = IntermediateCompositedPrediction | IntermediateTokenizedPrediction;
 
 /**
  * An enum to be used when categorizing the level of similarity between
@@ -363,7 +394,7 @@ export function determineSuggestionRange(
 export function buildAndMapPredictions(
   transition: ContextTransition,
   tokenizationCorrection: TokenizationResultMapping,
-): IntermediateCompositedPrediction[] {
+): IntermediateTokenizedPrediction[] {
   const model = transition.final.model;
   const tokenization = tokenizationCorrection.matchingSpace.tokenization;
 
@@ -451,21 +482,28 @@ export function buildAndMapPredictions(
     // rather than predicting (and possibly extending) tokens not adjacent to the caret.
     //
     // Also, fall back to the actual correction string should prediction not be valid here.
-    return i == correctionTransforms.length - 1 ? predictions : [predictions[0]];
+    const predictionsToReturn = i == correctionTransforms.length - 1 ? predictions : [predictions[0]];
+
+    return predictionsToReturn.map((prediction) => {
+      return {
+        prediction,
+        correction: correctionTransform.insert
+      };
+    });
   });
 
   // Constructs a common prefix for all but the final token's component.
   const predictionPrefix = predictionComponents
     .slice(0, predictionComponents.length-1)
-    .reduce((accum, curr) => models.buildMergedTransform(accum, curr[0].sample.transform), { insert: '', deleteLeft: 0 });
+    .map((arr) => arr[0]);
   const prefixProb = predictionComponents
     .slice(0, predictionComponents.length-1)
-    .reduce((accum, curr) => accum * curr[0].p, 1)
+    .reduce((accum, curr) => accum * curr[0].prediction.p, 1)
 
-  const completePredictionTuples: IntermediateCompositedPrediction[] = predictionComponents[predictionComponents.length-1].map((prediction) => {
-    const predictionCost = prediction.p * prefixProb;
+  const completePredictionTuples: IntermediateTokenizedPrediction[] = predictionComponents[predictionComponents.length-1].map((tuple) => {
+    const predictionCost = tuple.prediction.p * prefixProb;
 
-    return {
+    const returnVal: IntermediateTokenizedPrediction = {
       // Will need to do this differently.  We want to have each component
       // individualized b/c casing. Case should be maintained for prior tokens
       // and managed independently for each.
@@ -476,23 +514,15 @@ export function buildAndMapPredictions(
       // applySuggestionCasing applies onto suggestions, so we'll want to build
       // the FULL suggestion AFTER applying casing changes (to each token's
       // suggestion component).
-      components: {
-        prediction: {
-          transformId: transition.transitionId,
-          transform: models.buildMergedTransform(predictionPrefix, prediction.sample.transform),
-          displayAs: models.buildMergedTransform(predictionPrefix, prediction.sample.transform).insert // should composite the displayAs strings instead...
-        },
-        correction: correctionTransforms[correctionTransforms.length-1].insert
-      },
+      components: [],
       metadata: {
         probabilities: {
           prediction: predictionCost,
           correction: correctionCost,
           total: predictionCost * correctionCost
         },
-        matchLevel: SuggestionSimilarity.none,
         autoSelectable: tokenizationCorrection.matchingSpace.modelsCorrectables,
-
+        matchLevel: SuggestionSimilarity.none,
         // Long-term, we shouldn't have `.preservationTransform` here.
         //
         // Needed for now until the search actually operates based on
@@ -501,6 +531,21 @@ export function buildAndMapPredictions(
         preservationTransform: tokenization.taillessTrueKeystroke
       }
     }
+
+    // Iteratively add the components into the return value here.
+    const orderedTokens = tokenizationCorrection.matchingSpace.orderedTokens;
+    const reportTokenizedPrediction = (tuple: typeof predictionPrefix[0], index: number) => {
+      returnVal.components.push({
+        prediction: tuple.prediction.sample,
+        correction: tuple.correction,
+        source: orderedTokens[index]
+      });
+    };
+    // Also gets the (changing) tail portion.
+    predictionPrefix.forEach((tuple, index) => reportTokenizedPrediction(tuple, index));
+    reportTokenizedPrediction(tuple, orderedTokens.length - 1);
+
+    return returnVal;
   });
 
   return completePredictionTuples;
@@ -565,7 +610,7 @@ export async function correctAndEnumerate(
   /**
    * The suggestions generated based on the user's input state.
    */
-  rawPredictions: IntermediateCompositedPrediction[];
+  rawPredictions: IntermediateTokenizedPrediction[];
 
   /**
    * The id of a prior ContextTransition event that triggered a Suggestion found
@@ -617,7 +662,7 @@ export async function correctAndEnumerate(
   const searchModules = tokenizations.map(t => t.tail.searchModule);
 
   // Only run the correction search when corrections are enabled.
-  let rawPredictions: IntermediateCompositedPrediction[] = [];
+  let rawPredictions: IntermediateTokenizedPrediction[] = [];
   let bestCorrectionCost: number;
   for await(const match of getBestTokenMatches(searchModules, timer)) {
     // Corrections obtained:  now to predict from them!
@@ -666,7 +711,7 @@ export async function correctAndEnumerate(
 export function shouldStopSearchingEarly(
   bestCorrectionCost: number,
   currentCorrectionCost: number,
-  rawPredictions: IntermediateCompositedPrediction[]
+  rawPredictions: IntermediateTokenizedPrediction[]
 ) {
   if(currentCorrectionCost >= bestCorrectionCost + CORRECTION_SEARCH_THRESHOLDS.MAX_SEARCH_THRESHOLD) {
     return true;
@@ -707,9 +752,8 @@ export function correctAndEnumerateWithoutTraversals(
   lexicalModel: LexicalModel,
   corrections: ProbabilityMass<Transform>[],
   context: Context
-): IntermediateCompositedPrediction[] {
-  let returnedPredictions: IntermediateCompositedPrediction[] = [];
-
+): IntermediateTokenizedPrediction[] {
+  let returnedPredictions: IntermediateTokenizedPrediction[] = [];
   const wordbreak = determineModelWordbreaker(lexicalModel);
   const tokenizer = determineModelTokenizer(lexicalModel);
 
@@ -720,13 +764,28 @@ export function correctAndEnumerateWithoutTraversals(
     // support, though.
 
     const tokenizedCorrection = mapWhitespacedTokenization(tokenization.left.map((t) => { return {exampleInput: t.text} }), lexicalModel, correction.sample).tokenizedTransform;
-    const deleteLeft = [...tokenizedCorrection.values()].reduce((total, curr) => total + curr.deleteLeft, 0);
+    const deleteLeft = tokenization.left.length > 1 ? 0 : tokenization.left.reduce((prev, curr) => prev + KMWString.length(curr.text), 0);
+
+    const intermediateTokens: TokenizedPredictionData[] = [];
+    [...tokenizedCorrection.entries()].forEach((entry, index) => {
+      let dl = index == 0 ? deleteLeft: 0;
+      let text: string;
+
+      if(index != 0) {
+        text = entry[1].insert;
+      } else {
+        text = wordbreak(models.applyTransform(entry[1], context));
+      }
 
-    const tokenizedCorrectionEntries = [...tokenizedCorrection.entries()];
-    const preservationTransform = tokenizedCorrectionEntries.slice(0, -1).map((e) => e[1]).reduce((accum, curr) => {
-      return models.buildMergedTransform(accum, {...curr, deleteLeft: 0});
-    }, { insert: '', deleteLeft: 0, id: correction.sample.id});
-    preservationTransform.deleteLeft = deleteLeft;
+      intermediateTokens.push({
+        prediction: {
+          transform: { insert: text, deleteLeft: dl },
+          displayAs: text
+        },
+        correction: text,
+        source: null
+      })
+    });
 
     // Step 2:  predict based on the final token.
     const emptyContext: Context = {
@@ -735,32 +794,28 @@ export function correctAndEnumerateWithoutTraversals(
       endOfBuffer: true
     };
 
-    const tailCorrection = tokenizedCorrectionEntries[tokenizedCorrectionEntries.length-1][1];
+    const tailCorrection = { insert: intermediateTokens[intermediateTokens.length-1].correction, deleteLeft: 0};
     let predictions = lexicalModel.predict(tailCorrection, emptyContext);
 
     // Step 3: create the intermediate prediction data entries for each generated prediction
     let predictionSet = predictions.map((pair: ProbabilityMass<Suggestion>) => {
+
+      // Overwrite the last entry with the prediction.
+      const components = [...intermediateTokens];
+
+      components[components.length - 1] = {
+        ...components[components.length - 1],
+        prediction: pair.sample
+      };
+
       // Let's not rely on the model to copy transform IDs.
       // Only bother is there IS an ID to copy.
       if(correction.sample.id !== undefined) {
-        pair.sample.transformId = correction.sample.id;
-      }
-
-      let correctionText: string;
-      if(tokenizedCorrectionEntries.length != 1) {
-        correctionText = correction.sample.insert;
-        // deleteLeft: 0; it's pre-applied within preservationTransform.
-      } else {
-        // Use the deleteLeft & tokenize.
-        const postContext = models.applyTransform(correction.sample, context);
-        correctionText = wordbreak(postContext);
+        components.forEach((c) => c.prediction.transformId = correction.sample.id);
       }
 
-      let tuple: IntermediateCompositedPrediction = {
-        components: {
-          prediction: pair.sample,
-          correction: correctionText
-        },
+      let tuple: IntermediateTokenizedPrediction = {
+        components,
         metadata: {
           probabilities: {
             prediction: pair.p,
@@ -768,8 +823,7 @@ export function correctAndEnumerateWithoutTraversals(
             total: pair.p * correction.p
           },
           autoSelectable: correctionValidForAutoSelect(tailCorrection.insert),
-          matchLevel: SuggestionSimilarity.none,
-          preservationTransform
+          matchLevel: SuggestionSimilarity.none
         }
       };
       return tuple;
@@ -789,20 +843,60 @@ export function correctAndEnumerateWithoutTraversals(
  * @param lexicalModel
  * @param casingForm
  */
-export function applySuggestionCasing(suggestion: Suggestion, baseWord: string, lexicalModel: LexicalModel, casingForm: CasingForm) {
-  // Step 1:  does the suggestion replace the whole word?  If not, we should extend the suggestion to do so.
-  let unchangedLength  = KMWString.length(baseWord) - suggestion.transform.deleteLeft;
+export function applySuggestionCasing(predictionToken: TokenizedPredictionData, lexicalModel: LexicalModel) {
+  const suggestion = predictionToken.prediction;
+
+  // Step 0:  our pattern for generating predictions and corrections already
+  // enforces them to encompass the whole word.
 
-  if(unchangedLength > 0) {
-    suggestion.transform.deleteLeft += unchangedLength;
-    suggestion.transform.insert = KMWString.substr(baseWord, 0, unchangedLength) + suggestion.transform.insert;
+  // Step 1:  detect the original token's casing
+  let casingForm: CasingForm;
+
+  // If we are using the context-tracking engine (when traversals are enabled),
+  // we just leverage the context token's exampleInput to determine casing.
+  //
+  // If it's not available, the correction entry reflects a word-broken piece of
+  // the original context, with its original casing - so we use that instead.
+  let casingRoot = predictionToken.source ? predictionToken.source.exampleInput : predictionToken.correction;
+  if(!casingRoot) {
+    // There's no text in place to verify casing expectations; just leave it
+    // unchanged.
+    return;
   }
 
+  casingForm = detectCurrentCasing(lexicalModel, {
+    left: casingRoot,
+    startOfBuffer: true,
+    endOfBuffer: true
+  });
+
   // Step 2: Now that the transform affects the whole word, we may safely apply casing rules.
   suggestion.transform.insert = lexicalModel.applyCasing(casingForm, suggestion.transform.insert);
   suggestion.displayAs = lexicalModel.applyCasing(casingForm, suggestion.displayAs);
 }
 
+export function compositeIntermediatePredictions(predictions: IntermediateTokenizedPrediction[]): IntermediateCompositedPrediction[] {
+  return predictions.map((predictionData) => {
+    const components = predictionData.components;
+
+    return {
+      components: components.reduce((total, current) => {
+        const mergedTransform = models.buildMergedTransform(total.prediction.transform, current.prediction.transform);
+        const mergedDisplayAs = total.prediction.displayAs + current.prediction.displayAs
+
+        return {
+          prediction: {...total.prediction, transform: mergedTransform, displayAs: mergedDisplayAs},
+          correction: total.correction + current.correction
+        }
+      }, {
+        prediction: {...components[0].prediction, transform: { insert: '', deleteLeft: 0 }, displayAs: ''},
+        correction: ''
+      }),
+      metadata: predictionData.metadata
+    };
+  });
+}
+
 /**
  * Given an array of suggestions output from the correction and model-lookup processes,
  * this function checks for any duplicate suggestions and merges them.

From d79d54afc33bbbb4edcb3bd91ab444c609e40afc Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Mon, 11 May 2026 16:24:59 -0500
Subject: [PATCH 14/16] fix(web): adjusts existing unit tests to match new
 intermediate-prediction-data format

---
 .../early-correction-search-stopping.tests.ts |  14 +-
 .../predict-from-corrections.tests.ts         |  17 +-
 .../worker-thread/suggestion-casing.tests.ts  | 208 ++++++++++--------
 3 files changed, 131 insertions(+), 108 deletions(-)

diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts
index 430d9c6c7e0..9595f15527a 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts
@@ -1,15 +1,15 @@
 import { assert } from 'chai';
 
-import { CORRECTION_SEARCH_THRESHOLDS, IntermediateCompositedPrediction, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index";
+import { CORRECTION_SEARCH_THRESHOLDS, IntermediateTokenizedPrediction, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index";
 
-function mockIntermediatePrediction(value: number) {
+function mockTokenizedPrediction(value: number) {
   return {
     metadata: {
       probabilities: {
         total: value
       }
     }
-  } as IntermediateCompositedPrediction
+  } as IntermediateTokenizedPrediction
 }
 
 describe('correction-search: shouldStopSearchingEarly', () => {
@@ -22,7 +22,7 @@ describe('correction-search: shouldStopSearchingEarly', () => {
     assert.equal(predictionProbs.length, ModelCompositor.MAX_SUGGESTIONS, "test setup no longer valid");
 
     // The only part for each entry we actually care about here:  .totalProb.
-    const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry));
+    const predictions = predictionProbs.map((entry) => mockTokenizedPrediction(entry));
 
     // Thresholding is performed in log-space.
     // 0.0501 and 0.0499 are offset on each side of 0.05, the last value in the array defined above.
@@ -38,8 +38,8 @@ describe('correction-search: shouldStopSearchingEarly', () => {
     //
     // Can technically run the method with an empty array, but the actual scenario would have
     // at least one prediction present in the "found predictions" array.
-    assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [mockIntermediatePrediction(Math.exp(-1))]));
-    assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [mockIntermediatePrediction(Math.exp(-1))]));
+    assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [mockTokenizedPrediction(Math.exp(-1))]));
+    assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [mockTokenizedPrediction(Math.exp(-1))]));
   });
 
   it('stops checking corrections earlier when enough predictions have been found', () => {
@@ -48,7 +48,7 @@ describe('correction-search: shouldStopSearchingEarly', () => {
 
     // The only part for each entry we actually care about here:  .totalProb.
     /** @type {import('#./predict-helpers.js').CorrectionPredictionTuple[]} */
-    const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry));
+    const predictions = predictionProbs.map((entry) => mockTokenizedPrediction(entry));
 
     const baseCost = 1;
 
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
index 8234c6ba2a9..d18a2d92e43 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-corrections.tests.ts
@@ -114,11 +114,12 @@ describe('correctAndEnumerateWithoutTraversals', () => {
 
     const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
 
-    predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its'));
+    predictions.forEach((entry) => assert.equal(entry.components.length, 1));
+    predictions.forEach((entry) => assert.equal(entry.components[0].correction, 'Its'));
     predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6));
     predictions.sort(tupleDisplayOrderSort);
 
-    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions);
+    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components[0].prediction), dummied_suggestions);
 
     assert.approximately(predictions[0].metadata.probabilities.total, 0.18 * 0.6, 0.00001);
     assert.approximately(predictions[1].metadata.probabilities.total, 0.02 * 0.6, 0.00001);
@@ -167,12 +168,13 @@ describe('correctAndEnumerateWithoutTraversals', () => {
 
     const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
 
-    predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its'));
+    predictions.forEach((entry) => assert.equal(entry.components.length, 1));
+    predictions.forEach((entry) => assert.equal(entry.components[0].correction, 'Its'));
     predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6));
     predictions.sort(tupleDisplayOrderSort);
 
-    assert.sameOrderedMembers(predictions.map((entry) => entry.components.prediction.displayAs), ["it's", "its"]);
-    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.map((entry) => {
+    assert.sameOrderedMembers(predictions.map((entry) => entry.components[0].prediction.displayAs), ["it's", "its"]);
+    assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components[0].prediction), dummied_suggestions.map((entry) => {
       entry = deepCopy(entry);
       entry.transformId = 314159;
       return entry;
@@ -252,8 +254,9 @@ describe('correctAndEnumerateWithoutTraversals', () => {
     const predictions = correctAndEnumerateWithoutTraversals(model, correctionDistribution, context);
     predictions.sort(tupleDisplayOrderSort);
 
-    assert.sameOrderedMembers(predictions.map((entry) => entry.components.prediction.displayAs), ["is", "it's", "isn't", "its"]);
-    assert.sameDeepMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.flatMap((entry) => entry));
+    predictions.forEach((entry) => assert.equal(entry.components.length, 1));
+    assert.sameOrderedMembers(predictions.map((entry) => entry.components[0].prediction.displayAs), ["is", "it's", "isn't", "its"]);
+    assert.sameDeepMembers(predictions.map((entry) => entry.components[0].prediction), dummied_suggestions.flatMap((entry) => entry));
 
     assert.approximately(predictions[0].metadata.probabilities.total, 0.4 * 0.4, 0.00001);
     assert.approximately(predictions[1].metadata.probabilities.total, 0.18 * 0.6, 0.00001);
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts
index dd586eab646..7de4abac395 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts
@@ -13,7 +13,7 @@ import * as wordBreakers from '@keymanapp/models-wordbreakers';
 import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
-import { applySuggestionCasing, models } from '@keymanapp/lm-worker/test-index';
+import { TokenizedPredictionData, applySuggestionCasing, models } from '@keymanapp/lm-worker/test-index';
 
 import CasingFunction = LexicalModelTypes.CasingFunction;
 import TrieModel = models.TrieModel;
@@ -45,117 +45,137 @@ describe('applySuggestionCasing', function() {
   );
 
   it('properly cases suggestions with no suggestion root', function() {
-    let suggestion = {
-      transform: {
-        insert: 'the',
-        deleteLeft: 0
+    let suggestion: TokenizedPredictionData[] = [{
+      prediction: {
+        transform: {
+          insert: 'the',
+          deleteLeft: 0
+        },
+        displayAs: 'the'
       },
-      displayAs: 'the'
-    };
-
-    applySuggestionCasing(suggestion, '', plainCasedModel, 'initial');
-    assert.equal(suggestion.displayAs, 'The');
-    assert.equal(suggestion.transform.insert, 'The');
-
-    suggestion = {
-      transform: {
-        insert: 'thE',
-        deleteLeft: 0
-      },
-      displayAs: 'thE'
-    };
-
-    applySuggestionCasing(suggestion, '', plainCasedModel, 'initial');
-    assert.equal(suggestion.displayAs, 'ThE');
-    assert.equal(suggestion.transform.insert, 'ThE');
-
-    suggestion = {
-      transform: {
-        insert: 'the',
-        deleteLeft: 0
+      correction: '',
+      source: null
+    }];
+
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'the');
+    assert.equal(suggestion[0].prediction.transform.insert, 'the');
+
+    suggestion = [{
+      prediction: {
+        transform: {
+          insert: 'ThE',
+          deleteLeft: 0
+        },
+        displayAs: 'ThE'
       },
-      displayAs: 'the'
-    };
+      correction: '',
+      source: null
+    }];
 
-    applySuggestionCasing(suggestion, '', plainCasedModel, 'upper');
-    assert.equal(suggestion.displayAs, 'THE');
-    assert.equal(suggestion.transform.insert, 'THE');
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'ThE');
+    assert.equal(suggestion[0].prediction.transform.insert, 'ThE');
   });
 
   it('properly cases suggestions that fully replace the suggestion root', function() {
-    let suggestion = {
-      transform: {
-        insert: 'therefore',
-        deleteLeft: 3
+    let suggestion: TokenizedPredictionData[] = [{
+      prediction: {
+        transform: {
+          insert: 'therefore',
+          deleteLeft: 3
+        },
+        displayAs: 'therefore'
       },
-      displayAs: 'therefore'
-    };
-
-    applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial');
-    assert.equal(suggestion.displayAs, 'Therefore');
-    assert.equal(suggestion.transform.insert, 'Therefore');
-
-    suggestion = {
-      transform: {
-        insert: 'thereFore',
-        deleteLeft: 3
+      correction: 'The',
+      source: null
+    }];
+
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'Therefore');
+    assert.equal(suggestion[0].prediction.transform.insert, 'Therefore');
+
+    suggestion = [{
+      prediction: {
+        transform: {
+          insert: 'thereFore',
+          deleteLeft: 3
+        },
+        displayAs: 'thereFore'
       },
-      displayAs: 'thereFore'
-    };
-
-    applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial');
-    assert.equal(suggestion.displayAs, 'ThereFore');
-    assert.equal(suggestion.transform.insert, 'ThereFore');
-
-    suggestion = {
-      transform: {
-        insert: 'therefore',
-        deleteLeft: 3
+      correction: 'The',
+      source: null
+    }];
+
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'ThereFore');
+    assert.equal(suggestion[0].prediction.transform.insert, 'ThereFore');
+
+    suggestion = [{
+      prediction: {
+        transform: {
+          insert: 'therefore',
+          deleteLeft: 3
+        },
+        displayAs: 'therefore'
       },
-      displayAs: 'therefore'
-    };
+      correction: 'THE',
+      source: null
+    }];
 
-    applySuggestionCasing(suggestion, 'the', plainCasedModel, 'upper');
-    assert.equal(suggestion.displayAs, 'THEREFORE');
-    assert.equal(suggestion.transform.insert, 'THEREFORE');
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'THEREFORE');
+    assert.equal(suggestion[0].prediction.transform.insert, 'THEREFORE');
   });
 
   it('properly cases suggestions that do not fully replace the suggestion root', function() {
-    let suggestion = {
-      transform: {
-        insert: 'erefore',
-        deleteLeft: 1
+    let suggestion: TokenizedPredictionData[] = [{
+      prediction: {
+        transform: {
+          insert: 'therefore',
+          deleteLeft: 3
+        },
+        displayAs: 'therefore'
       },
-      displayAs: 'therefore'
-    };
+      correction: 'The',
+      source: null
+    }];
 
     // When integrated, the 'the' string comes from a wordbreak operation on the current context.
-    applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial');
-    assert.equal(suggestion.displayAs, 'Therefore');
-    assert.equal(suggestion.transform.insert, 'Therefore');
-
-    suggestion = {
-      transform: {
-        insert: 'ereFore',
-        deleteLeft: 1
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'Therefore');
+    assert.equal(suggestion[0].prediction.transform.insert, 'Therefore');
+
+    suggestion = [{
+      prediction: {
+        transform: {
+          insert: 'ThereFore',
+          deleteLeft: 3
+        },
+        displayAs: 'thereFore'
       },
-      displayAs: 'thereFore'
-    };
-
-    applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial');
-    assert.equal(suggestion.displayAs, 'ThereFore');
-    assert.equal(suggestion.transform.insert, 'ThereFore');
-
-    suggestion = {
-      transform: {
-        insert: 'erefore',
-        deleteLeft: 1
+      correction: 'The',
+      source: null
+    }];
+
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'ThereFore');
+    assert.equal(suggestion[0].prediction.transform.insert, 'ThereFore');
+
+    suggestion = [{
+      prediction: {
+        transform: {
+          insert: 'therefore',
+          deleteLeft: 3
+        },
+        displayAs: 'therefore'
       },
-      displayAs: 'therefore'
-    };
+      correction: 'THE',
+      source: null
+    }];
 
-    applySuggestionCasing(suggestion, 'the', plainCasedModel, 'upper');
-    assert.equal(suggestion.displayAs, 'THEREFORE');
-    assert.equal(suggestion.transform.insert, 'THEREFORE');
+    applySuggestionCasing(suggestion[0], plainCasedModel);
+    assert.equal(suggestion[0].prediction.displayAs, 'THEREFORE');
+    assert.equal(suggestion[0].prediction.transform.insert, 'THEREFORE');
   });
 });
\ No newline at end of file

From b763609f46ce8af4a741a16f62150d6d3ec56210 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 13 May 2026 14:18:17 -0500
Subject: [PATCH 15/16] fix(web): apply original casing-application logic on a
 per-token basis

---
 .../worker-thread/src/main/predict-helpers.ts               | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 567166264fc..5f825e4b9d9 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -871,8 +871,10 @@ export function applySuggestionCasing(predictionToken: TokenizedPredictionData,
   });
 
   // Step 2: Now that the transform affects the whole word, we may safely apply casing rules.
-  suggestion.transform.insert = lexicalModel.applyCasing(casingForm, suggestion.transform.insert);
-  suggestion.displayAs = lexicalModel.applyCasing(casingForm, suggestion.displayAs);
+  if(casingForm && casingForm != 'lower') {
+    suggestion.transform.insert = lexicalModel.applyCasing(casingForm, suggestion.transform.insert);
+    suggestion.displayAs = lexicalModel.applyCasing(casingForm, suggestion.displayAs);
+  }
 }
 
 export function compositeIntermediatePredictions(predictions: IntermediateTokenizedPrediction[]): IntermediateCompositedPrediction[] {

From 1e505feac056c6b73e1ed12b5b51f427959417cb Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 13 May 2026 15:13:42 -0500
Subject: [PATCH 16/16] change(web): adjust TokenizationCorrector spec

Build-bot: skip build:web
Test-bot: skip
---
 .../main/correction/tokenization-corrector.ts | 65 ++++++++++++++-----
 .../correction/tokenization-result-mapping.ts | 20 ++++--
 .../worker-thread/src/main/predict-helpers.ts | 18 +++--
 .../tokenization-corrector.tests.ts           | 52 +++++----------
 4 files changed, 92 insertions(+), 63 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
index 885cdb0ed2b..93267f5e86b 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
@@ -14,9 +14,10 @@ import { ContextToken } from "./context-token.js";
 import { CorrectionSearchable, PathResult } from "./correction-searchable.js";
 import { ContextTokenization } from "./context-tokenization.js";
 import { QuotientNodeFinalizer } from "./quotient-node-finalizer.js";
-import { TokenizationResultMapping } from "./tokenization-result-mapping.js";
+import { TokenizationResult, TokenizationResultMapping } from "./tokenization-result-mapping.js";
 import { EDIT_DISTANCE_COST_SCALE } from "./distance-modeler.js";
 import { MAX_EDIT_THRESHOLD_FACTOR } from "./search-quotient-spur.js";
+import { TokenResultMapping } from "./token-result-mapping.js";
 
 // PathResult needs to be generic:
 // - a result for correcting a single Token - "TokenResult"?
@@ -46,7 +47,7 @@ export type TokenResult = {
  * all correctable tokens, generating corrections for the full represented
  * range.
  */
-export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray<TokenResult>, TokenizationResultMapping> {
+export class TokenizationCorrector implements CorrectionSearchable<TokenizationResult, TokenizationResultMapping> {
   public readonly tokenization: ContextTokenization;
   private readonly tailCorrectionLength: number;
 
@@ -56,6 +57,7 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
   private _predictable?: QuotientNodeFinalizer;
   private _generatedTokenResults: Map<number, TokenResult>;
   private _previousResults: TokenizationResultMapping[] = [];
+  private _correctableCodepointLength: number = 0;
 
   // fully private
   public readonly modelsCorrectables: boolean;
@@ -65,6 +67,7 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
   private lastTotalCost: number;
   private handleHasBeenCalled: boolean = false;
   private predictableMatchFound: boolean = false;
+  private matchableTokenCount = 0;
 
   get currentCost(): number {
     const correctable = this.selectionQueue.peek();
@@ -175,16 +178,23 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
     this.tokenLookupMap = new Map();
     let modelsCorrectables = false;
 
+    // 0 index:  the first index in range to be modeled, as split off from the main tokenization.
     orderedTokens.forEach((token, index) => {
       // New issue:  this mangles the space IDs!  We almost certainly need some
       // sort of proper map to the source token.
       const searchModule = new QuotientNodeFinalizer(token.searchModule, index == orderedTokens.length - 1);
       this.tokenLookupMap.set(searchModule.spaceId, token);
-      const passesFilter = filterClosure(token);
+      // Index within the token subset being examined.
+      const passesFilter = filterClosure(token, index);
       modelsCorrectables ||= passesFilter;
       if(!passesFilter) {
         this._uncorrectables.push(searchModule);
-      } else if(index == tailCorrectionLength - 1) {
+        return;
+      }
+
+      this.matchableTokenCount++;
+      this._correctableCodepointLength += searchModule.codepointLength;
+      if(index == tailCorrectionLength - 1) {
         // The sole assignment case for this field.  It may only be assigned for
         // the final token, and only if its text is of a form considered
         // correctable by the filter.
@@ -249,6 +259,10 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
     return new TokenizationResultMapping(results, this);
   }
 
+  private get matchedTokenCount() {
+    return [...this._generatedTokenResults.values()].filter((r) => r instanceof TokenResultMapping).length;
+  }
+
   // The actual method used to iteratively search for tokenization-level corrections.
   handleNextNode(): PathResult<TokenizationResultMapping> {
     // Notable states:
@@ -272,11 +286,17 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
         this.handleHasBeenCalled = true;
         const results = this.collateResults();
         this._previousResults.push(results);
-        return {
-          'type': 'complete',
-          cost: this.lastTotalCost,
-          mapping: results
-        };
+
+        // If no matchables exist, there's no prediction to do; don't make a return.
+        if(this.matchedTokenCount > 0) {
+          return {
+            'type': 'complete',
+            cost: this.lastTotalCost,
+            mapping: results
+          };
+        } else {
+          return { type: 'none' };
+        }
       }
     }
 
@@ -284,7 +304,6 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
 
     const correctableToUpdate = this.selectionQueue.dequeue();
     const tokenResult = correctableToUpdate?.handleNextNode();
-
     const delistCorrectable = () => {
       if(correctableToUpdate != this._predictable) {
         // Lock the 'correctable' token now that either a valid correction for
@@ -298,8 +317,12 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
     }
 
     const correctionIsThePredictable = correctableToUpdate == this._predictable;
+
+    // TODO:  adjust this._correctableCodepointLength when converting a token from
+    // correctable/predictable to uncorrectable!
     if(tokenResult.type == 'none') {
       if(!correctionIsThePredictable || !this.predictableMatchFound) {
+        this._correctableCodepointLength -= correctableToUpdate.codepointLength;
         // Transition the node from 'correctable' to 'uncorrectable' - we were
         // unable to find valid corrections for it.
         const lockedResult = correctableToUpdate.bestExample;
@@ -359,13 +382,14 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
       this.selectionQueue.enqueue(this._predictable);
     }
 
-    const correctionResults = this.collateResults();
-    if(correctionResults.matchedResult.findIndex((c) => c == undefined) != -1) {
+    // If any token lacks a matching lookup value, abort.
+    if([...this.tokenLookupMap.keys()].find((k) => !this._generatedTokenResults.has(k))) {
       return {
         type: 'intermediate',
         cost: tokenizationCost
       };
     }
+    const correctionResults = this.collateResults();
 
     // Determine the proper return type and construct the proper return object accordingly.
     //
@@ -373,11 +397,18 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
     // that indicates no further predictions may be found.
     if(tokenResult.type != 'none' || !correctionIsThePredictable || !this.predictableMatchFound) {
       this._previousResults.push(correctionResults);
-      return {
-        type: 'complete',
-        cost: tokenizationCost,
-        mapping: correctionResults
-      };
+
+      if(this.matchedTokenCount > 0) {
+        return {
+          type: 'complete',
+          cost: tokenizationCost,
+          mapping: correctionResults
+        };
+      } else {
+        return {
+          type: 'none'
+        }
+      }
     } else {
       return {
         type: 'none'
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-result-mapping.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-result-mapping.ts
index 32e0fb48fce..c6fd8db93f3 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-result-mapping.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-result-mapping.ts
@@ -1,13 +1,23 @@
 import { CorrectionResultMapping } from "./correction-result-mapping.js";
 import { TokenizationCorrector, TokenResult } from './tokenization-corrector.js';
 
-export class TokenizationResultMapping implements CorrectionResultMapping<ReadonlyArray<TokenResult>> {
+export interface TokenizationResult {
+  tokenCorrections: ReadonlyArray<TokenResult>,
+  totalEditCount: number,
+  totalEditableCodepoints: number
+}
+
+export class TokenizationResultMapping implements CorrectionResultMapping<TokenizationResult> {
   readonly matchingSpace: TokenizationCorrector;
-  readonly matchedResult: ReadonlyArray<TokenResult>;
+  readonly matchedResult: TokenizationResult;
 
   constructor(tokenization: TokenResult[], corrector: TokenizationCorrector) {
     this.matchingSpace = corrector;
-    this.matchedResult = tokenization;
+    this.matchedResult = {
+      tokenCorrections: tokenization,
+      totalEditCount: tokenization.reduce((accum, curr) => accum + curr.knownCost, 0),
+      totalEditableCodepoints: 0 //corrector.
+    }
   }
 
   get spaceId(): number {
@@ -22,7 +32,7 @@ export class TokenizationResultMapping implements CorrectionResultMapping<Readon
   //  * `totalCost`.)
   //  */
   // get knownCost(): number {
-  //   return this.node.editCount;
+  //   return this.matchedResult.tokenCorrections.reduce((accum, curr) => accum + curr.knownCost, 0);
   // }
 
   // /**
@@ -40,6 +50,6 @@ export class TokenizationResultMapping implements CorrectionResultMapping<Readon
    * to the resulting output.
    */
   get totalCost(): number {
-    return this.matchedResult.reduce((total, curr) => total + curr.totalCost, 0);
+    return this.matchedResult.tokenCorrections.reduce((total, curr) => total + curr.totalCost, 0);
   }
 }
\ No newline at end of file
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 5f825e4b9d9..171c3d675ff 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -410,7 +410,7 @@ export function buildAndMapPredictions(
     endOfBuffer: false
   };
 
-  const correctionTransforms = tokenizationCorrection.matchedResult.map((correction, i) => {
+  const correctionTransforms = tokenizationCorrection.matchedResult.tokenCorrections.map((correction, i) => {
     return {
       insert: correction.matchString,  // insert correction string
       deleteLeft: i == 0 ? deleteLeft : 0,
@@ -418,7 +418,7 @@ export function buildAndMapPredictions(
     };
   });
 
-  const correctionCost = tokenizationCorrection.matchedResult.map((correction) => {
+  const correctionCost = tokenizationCorrection.matchedResult.tokenCorrections.map((correction) => {
     let rootCost = correction.totalCost;
     /* If we're dealing with the FIRST keystroke of a new sequence, we'll **dramatically** boost
      * the exponent to ensure only VERY nearby corrections have a chance of winning, and only if
@@ -452,7 +452,16 @@ export function buildAndMapPredictions(
   }).reduce((accum, curr) => accum * curr, 1);
 
   const predictionComponents = correctionTransforms.map((correctionTransform, i) => {
-    const predictions = model.predict(correctionTransform, emptyContext);
+    let predictions = model.predict(correctionTransform, emptyContext);
+
+    // Ensure codepointLength == prediction codepoint length if i does not match the tail!
+    // Filter out cases that do not conform to this condition.
+    if(i != correctionTransforms.length - 1) {
+      predictions = predictions.filter((p) => {
+        const codepointLength = tokenizationCorrection.matchingSpace.orderedTokens[i].searchModule.codepointLength;
+        return KMWString.length(p.sample.transform.insert) == codepointLength;
+      });
+    }
 
     // Failsafe:  if there are no matching predictions, create a fake prediction
     // matching the original text.
@@ -577,7 +586,8 @@ export function prepareTokenizationSearch(
     return new TokenizationCorrector(tuple.tokenization, mutatedLength, (token, index) => {
       return index >= unaffectedTokenCount  // is a modified token
         && index == mutatedLength - 1       // TEMP: adjacent to the caret (TO BE REMOVED)
-        && correctionValidForAutoSelect(token.exampleInput);  // and is eligible text-correction
+        // and is eligible for text-correction
+        && (token.searchModule.codepointLength == 0 || correctionValidForAutoSelect(token.exampleInput));
     });
   });
 
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts
index a53c3a4a4b7..804cf43d40d 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts
@@ -29,7 +29,8 @@ import {
   SubstitutionQuotientSpur,
   TokenizationCorrector,
   TokenResult,
-  TokenizationResultMapping
+  TokenizationResultMapping,
+  TokenizationResult
 } from '@keymanapp/lm-worker/test-index';
 
 import Distribution = LexicalModelTypes.Distribution;
@@ -302,7 +303,7 @@ describe('TokenizationCorrector', () => {
       assert.equal(searchResult.type, 'complete');
       if(searchResult.type == 'complete') {
         const mapping = searchResult.mapping;
-        const tokenResults = mapping.matchedResult;
+        const tokenResults = mapping.matchedResult.tokenCorrections;
         assert.isNotNaN(searchResult.cost);
         assert.equal(searchResult.cost, searchResult.mapping.totalCost);
         assert.equal(tokenResults.length, 1);
@@ -327,7 +328,7 @@ describe('TokenizationCorrector', () => {
       assert.equal(searchResult.type, 'none');
     });
 
-    it('finds a default correction for a single correctable token without a model match', () => {
+    it('returns no result when a single correctable token lacks a model match', () => {
       const fixture = buildFixture_therefore();
 
       const theref = fixture.theref.tail;
@@ -371,23 +372,6 @@ describe('TokenizationCorrector', () => {
         searchResult = instance.handleNextNode();
       } while(searchResult.type == 'intermediate');
 
-      assert.equal(searchResult.type, 'complete');
-      if(searchResult.type == 'complete') {
-        const mapping = searchResult.mapping;
-        const tokenResults = mapping.matchedResult;
-        assert.isNotNaN(searchResult.cost);
-        assert.equal(searchResult.cost, searchResult.mapping.totalCost);
-        assert.equal(tokenResults.length, 1);
-        assert.sameOrderedMembers(tokenResults.map((r) => r.matchString), ['therefxyz']);
-
-        // Now that an entry has been found, verify the corrector's state.
-        assert.isNotOk(instance.predictableToken); // should become an uncorrectable.
-        assert.isTrue(instance.generatedTokenResults.has(therefxyz));
-        assert.equal(instance.generatedTokenResults.get(therefxyz), tokenResults[0]);
-      }
-
-      // There should be no further possible suggestions.
-      searchResult = instance.handleNextNode();
       assert.equal(searchResult.type, 'none');
     });
 
@@ -411,7 +395,7 @@ describe('TokenizationCorrector', () => {
       let firstResults: ReadonlyArray<TokenResult>;
       if(searchResult.type == 'complete') {
         const mapping = searchResult.mapping;
-        const tokenResults = mapping.matchedResult;
+        const tokenResults = mapping.matchedResult.tokenCorrections;
         firstResults = tokenResults;
         assert.isNotNaN(searchResult.cost);
         assert.equal(searchResult.cost, searchResult.mapping.totalCost);
@@ -434,7 +418,7 @@ describe('TokenizationCorrector', () => {
         searchResult = instance.handleNextNode();
         if(searchResult.type == 'complete') {
           const mapping = searchResult.mapping;
-          const tokenResults = mapping.matchedResult;
+          const tokenResults = mapping.matchedResult.tokenCorrections;
 
           // Verify that the first (bound) token is not altered further.
           // It should receive no further correction attempts.
@@ -445,7 +429,7 @@ describe('TokenizationCorrector', () => {
       } while(searchResult.type != 'none');
     });
 
-    it('immediately returns a single result when the only represented token is uncorrectable', () => {
+    it('immediately returns with no result when the only represented token is uncorrectable', () => {
       const fixture = buildFixture_terminalWhitespace();
 
       const tokenization = fixture.spaceOnly;
@@ -457,13 +441,7 @@ describe('TokenizationCorrector', () => {
       );
 
       const searchResult = instance.handleNextNode();
-      assert.equal(searchResult.type, 'complete');
-      if(searchResult.type == 'complete') {
-        assert.equal(searchResult.mapping.matchedResult[0].matchString, ' ');
-      }
-
-      const nilResult = instance.handleNextNode();
-      assert.equal(nilResult.type, 'none');
+      assert.equal(searchResult.type, 'none');
     });
 
     it('returns a single result when the final token is uncorrectable', () => {
@@ -484,8 +462,8 @@ describe('TokenizationCorrector', () => {
 
       assert.equal(searchResult.type, 'complete');
       if(searchResult.type == 'complete') {
-        assert.equal(searchResult.mapping.matchedResult[0].matchString, 'space');
-        assert.equal(searchResult.mapping.matchedResult[1].matchString, ' ');
+        assert.equal(searchResult.mapping.matchedResult.tokenCorrections[0].matchString, 'space');
+        assert.equal(searchResult.mapping.matchedResult.tokenCorrections[1].matchString, ' ');
       }
 
       const nilResult = instance.handleNextNode();
@@ -502,20 +480,20 @@ describe('TokenizationCorrector', () => {
         let haveSeenSingleTokenCorrection = false;
         let haveSeenThreeTokenCorrection = false;
         for await(let phraseMatch of getBestMatches<
-          ReadonlyArray<TokenResult>,
+          TokenizationResult,
           TokenizationResultMapping,
           TokenizationCorrector
           >(correctors, buildTestTimer())) {
 
-          if(phraseMatch.matchedResult.length == 1) {
+          if(phraseMatch.matchedResult.tokenCorrections.length == 1) {
             if(!haveSeenSingleTokenCorrection) {
-              assert.sameOrderedMembers(phraseMatch.matchedResult.map((t) => t.matchString), ['theref' /* -ore */]);
+              assert.sameOrderedMembers(phraseMatch.matchedResult.tokenCorrections.map((t) => t.matchString), ['theref' /* -ore */]);
             }
 
             haveSeenSingleTokenCorrection = true;
-          } else if(phraseMatch.matchedResult.length == 3) {
+          } else if(phraseMatch.matchedResult.tokenCorrections.length == 3) {
             if(!haveSeenThreeTokenCorrection) {
-              assert.sameOrderedMembers(phraseMatch.matchedResult.map((t) => t.matchString), ['the', ' ', 'ef' /* -fort */]);
+              assert.sameOrderedMembers(phraseMatch.matchedResult.tokenCorrections.map((t) => t.matchString), ['the', ' ', 'ef' /* -fort */]);
             }
             haveSeenThreeTokenCorrection = true;
           }