firefly-operationOS · ancongui · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project uses **CalVer `YY.M.PP`** (PEP 440 may normalise patch numbers
 for the Python wheel — e.g. `26.06.00` → `26.6.0`).
 
+## [26.6.5] - 2026-06-12
+
+### Fixed
+
+- **Classifier-off + no `expected_type` silently produced zero documents.** When
+  `stages.classifier` was off and a file carried no `expected_type`, the segment
+  stayed `unmatched` and the file yielded no document — with no error. A single-row
+  file now defaults to the sole declared `document_type` in that case (mirroring the
+  single-candidate shortcut the classifier itself takes), so the common "one type,
+  no classifier" path just works.
+- **Request-scope LLM transformation returned empty rows.** The transformer's output
+  model wrapped each row under a `values` key, but the prompt instructs the model to
+  emit flat `{field: value}` rows — so the structured output never matched and every
+  consolidated row came back empty. The output row is now a flat dict, matching the
+  prompt, so `result.request_transformations` carries populated rows (e.g. a cap
+  table consolidated across several deeds).
+
 ## [26.6.4] - 2026-06-12
 
 ### Fixed

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "flydocs"
 # CalVer YY.MM.PP -- bumped per release. Note that PEP 440 normalises
 # ``26.05.01`` -> ``26.5.1`` in the built wheel filename.
-version = "26.6.4"
+version = "26.6.5"
 description = "Pure-multimodal Intelligent Document Processing service: structured fields + bounding boxes, validation, authenticity checks, LLM judge, and a business-rule engine. Sync + queue-backed async APIs over fireflyframework-pyfly and -agentic. Part of Firefly OperationOS, platform-agnostic by design."
 readme = "README.md"
 requires-python = ">=3.13"

diff --git a/src/flydocs/core/services/pipeline/orchestrator.py b/src/flydocs/core/services/pipeline/orchestrator.py
@@ -440,6 +440,13 @@ async def _step_load(self, ctx: PipelineContext, _inputs: dict[str, Any]) -> Any
         """
         request: ExtractionRequest = ctx.metadata["request"]
         files: list[_FileSlot] = []
+        # When the classifier is disabled and the caller pinned no ``expected_type``,
+        # default a single-row file to the sole declared document type. Without the
+        # classifier there is no node that assigns a type, so otherwise the segment
+        # stays ``unmatched`` and the file silently yields no document. Mirrors the
+        # single-candidate shortcut the classifier step itself takes.
+        classifier_off = not request.options.stages.classifier
+        sole_doctype = request.document_types[0].id if len(request.document_types) == 1 else None
         # Slot index is monotonic across the expansion of all inputs.
         slot_index = 0
         for file in request.files:
@@ -452,6 +459,8 @@ async def _step_load(self, ctx: PipelineContext, _inputs: dict[str, Any]) -> Any
             multi_row = len(normalised) > 1
             for row in normalised:
                 effective_doctype = file.expected_type if not multi_row else None
+                if effective_doctype is None and not multi_row and classifier_off and sole_doctype:
+                    effective_doctype = sole_doctype
                 slot_filename = (
                     "/".join((*row.derived_from, row.filename)) if row.derived_from else row.filename
                 )

diff --git a/src/flydocs/core/services/transformations/llm_transformer.py b/src/flydocs/core/services/transformations/llm_transformer.py
@@ -45,16 +45,16 @@
 _MAX_OUTPUT_TOKENS = 8192
 
 
-class _TransformRow(BaseModel):
-    """One row returned by the LLM. Free-form key/value dict."""
-
-    values: dict[str, Any] = Field(default_factory=dict)
-
-
 class _TransformOutput(BaseModel):
-    """LLM response envelope."""
+    """LLM response envelope.
+
+    Each row is a flat ``{field_name: value}`` object, exactly as the prompt
+    instructs the model to emit. (A previous shape wrapped each row under a
+    ``values`` key, which the prompt never produced — so every row came back
+    empty. Keeping the row a flat dict here matches the prompt 1:1.)
+    """
 
-    rows: list[_TransformRow] = Field(default_factory=list)
+    rows: list[dict[str, Any]] = Field(default_factory=list)
 
 
 class LlmTransformer:
@@ -191,7 +191,7 @@ def _serialise_row(row: ExtractedField) -> dict[str, Any]:
     return out
 
 
-def _rebuild_rows(llm_rows: list[_TransformRow], template_row: ExtractedField) -> list[ExtractedField]:
+def _rebuild_rows(llm_rows: list[dict[str, Any]], template_row: ExtractedField) -> list[ExtractedField]:
     """Materialise LLM row dicts back into ExtractedField rows.
 
     The template row's metadata (bbox, page) is propagated so the
@@ -206,7 +206,7 @@ def _rebuild_rows(llm_rows: list[_TransformRow], template_row: ExtractedField) -
     materialised: list[ExtractedField] = []
     for i, lr in enumerate(llm_rows):
         sub_fields: list[ExtractedField] = []
-        for name, value in lr.values.items():
+        for name, value in (lr or {}).items():
             tmpl = template_by_name.get(name)
             sub_fields.append(
                 ExtractedField(