Skip to content

Commit f38eaad

Browse files
committed
pdf-server: import baseline from widget fieldValue, not field-dict
page.getAnnotations().fieldValue is what AnnotationLayer actually renders. getFieldObjects().value reads the field-dict /V — which can be out of sync in an internally-inconsistent PDF (e.g. after a pdf-lib setText silently failed on a comb field, leaving the field-dict stale while the widget still shows the older value from the raw bytes). Your Form.pdf: ID widget shows 'eeeee', field-dict says 'eew2e'. Panel was importing 'eew2e' → mismatch with what's on screen. Fix: capture a.fieldValue during the page.getAnnotations scan (we're already iterating there for widget IDs) and prefer it over the field-dict value. Fall back to field-dict if the widget doesn't expose one. normaliseFieldValue() handles the format differences (choice widgets give arrays; field-dict gives strings).
1 parent c649d88 commit f38eaad

1 file changed

Lines changed: 54 additions & 28 deletions

File tree

examples/pdf-server/src/mcp-app.ts

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3374,22 +3374,34 @@ function restoreAnnotations(): void {
33743374
// =============================================================================
33753375

33763376
/**
3377-
* Extract a meaningful value from a getFieldObjects() field array.
3378-
* Returns null for empty/unfilled/button fields so they don't clutter
3379-
* the panel or count as baseline edits.
3377+
* Normalise a raw form field value into our string|boolean model.
3378+
* Returns null for empty/unfilled/button values so they don't clutter the
3379+
* panel or count as baseline.
3380+
*
3381+
* `type` is from getFieldObjects() (which knows field types); `raw` is
3382+
* preferably from page.getAnnotations().fieldValue (which is what the
3383+
* widget actually renders). A PDF can have the field-dict /V out of sync
3384+
* with the widget — AnnotationLayer trusts the widget, so we must too.
33803385
*/
3381-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
3382-
function importFieldValue(fieldArr: any[]): string | boolean | null {
3383-
// For radio groups, getFieldObjects returns a parent entry with
3384-
// value=undefined plus child entries — find the first with a real value.
3385-
const f = fieldArr.find((x) => x.value != null) ?? fieldArr[0];
3386-
if (!f || f.type === "button") return null;
3387-
const v = f.value;
3388-
if (v == null || v === "" || v === "Off") return null;
3389-
if (f.type === "checkbox") return true;
3390-
if (f.type === "radiobutton") return String(v);
3391-
if (Array.isArray(v)) return v.join(", ");
3392-
return String(v);
3386+
function normaliseFieldValue(
3387+
type: string | undefined,
3388+
raw: unknown,
3389+
): string | boolean | null {
3390+
if (type === "button") return null;
3391+
// Checkbox/radio: fieldValue is the export string (e.g. "Yes"), "Off" = unset
3392+
if (type === "checkbox") {
3393+
return raw != null && raw !== "" && raw !== "Off" ? true : null;
3394+
}
3395+
if (type === "radiobutton") {
3396+
return raw != null && raw !== "" && raw !== "Off" ? String(raw) : null;
3397+
}
3398+
// Text/choice: fieldValue may be a string or an array of selections
3399+
if (Array.isArray(raw)) {
3400+
const joined = raw.filter(Boolean).join(", ");
3401+
return joined || null;
3402+
}
3403+
if (raw == null || raw === "") return null;
3404+
return String(raw);
33933405
}
33943406

33953407
/**
@@ -3422,8 +3434,11 @@ async function buildFieldNameMap(
34223434
}
34233435

34243436
// Scan every page's widget annotations to collect the CORRECT storage keys,
3425-
// plus labels, pages, and positions (which getFieldObjects() lacks anyway).
3437+
// plus labels, pages, positions, AND fieldValue (what the widget renders
3438+
// — which can differ from getFieldObjects().value if the PDF is internally
3439+
// inconsistent, e.g. after a pdf-lib setText silently failed).
34263440
const fieldPositions: Array<{ name: string; page: number; y: number }> = [];
3441+
const widgetFieldValues = new Map<string, unknown>();
34273442
for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
34283443
let annotations;
34293444
try {
@@ -3451,6 +3466,11 @@ async function buildFieldNameMap(
34513466
if (a.rect) {
34523467
fieldPositions.push({ name: a.fieldName, page: pageNum, y: a.rect[3] });
34533468
}
3469+
// Capture the value the widget will actually render. First widget wins
3470+
// (radio groups share the field's /V so they all match anyway).
3471+
if (!widgetFieldValues.has(a.fieldName) && a.fieldValue !== undefined) {
3472+
widgetFieldValues.set(a.fieldName, a.fieldValue);
3473+
}
34543474
}
34553475
}
34563476

@@ -3465,22 +3485,31 @@ async function buildFieldNameMap(
34653485
}
34663486
}
34673487

3468-
// Import baseline values from getFieldObjects() (the only place .value lives)
3469-
// AND remap its field-dict IDs to widget IDs.
3488+
// Import baseline values AND remap cachedFieldObjects to widget IDs.
3489+
//
3490+
// Baseline: prefer the widget's fieldValue (what AnnotationLayer renders)
3491+
// over getFieldObjects().value. A PDF can have the field-dict /V out of
3492+
// sync with the widget — if we import the field-dict value, the panel
3493+
// disagrees with what's on screen.
34703494
//
3471-
// Why remap: pdf.js _bindResetFormAction (the PDF's in-document Reset
3472-
// button) iterates this structure, using .id to key storage and find DOM
3473-
// elements via [data-element-id=...]. Both use WIDGET ids — pdf.js Reset
3474-
// only works when field-dict id == widget id. pdf-lib's save splits
3475-
// merged field+widget objects, breaking that assumption. We rebuild with
3476-
// widget ids so Reset keeps working.
3495+
// Remap: pdf.js _bindResetFormAction (the PDF's in-document Reset button)
3496+
// iterates this structure, using .id to key storage and find DOM elements
3497+
// via [data-element-id=...]. Both use WIDGET ids. pdf-lib's save splits
3498+
// merged field+widget objects, so we rebuild with widget ids.
34773499
if (cachedFieldObjects) {
34783500
const remapped: Record<string, any[]> = {};
34793501
for (const [name, fieldArr] of Object.entries(cachedFieldObjects)) {
34803502
const widgetIds = fieldNameToIds.get(name);
34813503
if (!widgetIds) continue; // no widget → not rendered anyway
34823504

3483-
const v = importFieldValue(fieldArr);
3505+
// Type comes from getFieldObjects (widget annot data doesn't have it).
3506+
// Value comes from the widget annotation (fall back to field-dict if
3507+
// the widget didn't expose one).
3508+
const type = fieldArr.find((f) => f.type)?.type;
3509+
const raw = widgetFieldValues.has(name)
3510+
? widgetFieldValues.get(name)
3511+
: fieldArr.find((f) => f.value != null)?.value;
3512+
const v = normaliseFieldValue(type, raw);
34843513
if (v !== null) {
34853514
pdfBaselineFormValues.set(name, v);
34863515
// Seed current state from baseline so the panel shows it. A
@@ -3492,9 +3521,6 @@ async function buildFieldNameMap(
34923521
// Skip parent entries with no concrete id (radio groups: the /T tree
34933522
// has a parent with the export value, plus one child per widget).
34943523
const concrete = fieldArr.filter((f) => f.id && f.type);
3495-
// Remap: one entry per widget, copying type/defaultValue/exportValues
3496-
// from the corresponding field entry (or the first concrete one if
3497-
// counts differ — e.g. text fields have 1 field entry but 1 widget).
34983524
remapped[name] = widgetIds.map((wid, i) => ({
34993525
...(concrete[i] ?? concrete[0] ?? fieldArr[0]),
35003526
id: wid,

0 commit comments

Comments
 (0)