Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ Intrinsics are specialized LoRA adapters that add task-specific capabilities (RA
| `rag` | `find_citations(response, documents, context, backend)` | Document sentences supporting the response |
| `rag` | `check_context_relevance(question, document, context, backend)` | Whether a document is relevant (0–1); only supported for granite-4.0, not granite-4.1 |
| `rag` | `flag_hallucinated_content(response, documents, context, backend)` | Flag potentially hallucinated sentences |
| `guardian` | `factuality_detection(context, backend, *, documents=None, model_options=None)` | Determine if the last response is factually incorrect ("yes"/"no") |
| `guardian` | `factuality_correction(context, backend, *, documents=None, model_options=None)` | Correct the last response to be factually accurate |

```python
from mellea.backends.huggingface import LocalHFBackend
Expand Down
9 changes: 3 additions & 6 deletions docs/examples/intrinsics/factuality_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,8 @@
)
# NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.

ctx = (
ctx.add(document)
.add(Message("user", user_text))
.add(Message("assistant", response_text))
)
ctx = ctx.add(Message("user", user_text))
ctx = ctx.add(Message("assistant", response_text))

result = guardian.factuality_correction(ctx, backend)
result = guardian.factuality_correction(ctx, backend, documents=[document])
print(f"Result of factuality correction: {result}") # corrected response string
9 changes: 3 additions & 6 deletions docs/examples/intrinsics/factuality_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@
)
# NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.

ctx = (
ctx.add(document)
.add(Message("user", user_text))
.add(Message("assistant", response_text))
)
ctx = ctx.add(Message("user", user_text))
ctx = ctx.add(Message("assistant", response_text))

result = guardian.factuality_detection(ctx, backend)
result = guardian.factuality_detection(ctx, backend, documents=[document])
print(f"Result of factuality detection: {result}") # string "yes" or "no"
55 changes: 39 additions & 16 deletions mellea/stdlib/components/intrinsic/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,25 @@
from ....stdlib import functional as mfuncs
from ...components import Document
from ...context import ChatContext
from ..chat import Message
from .intrinsic import Intrinsic


def _resolve_question(
question: str | None, context: ChatContext, backend: Backend | None = None
) -> tuple[str, ChatContext]:
"""Return ``(question_text, context_to_use)``.
) -> tuple[str, ChatContext, list[Document] | None]:
"""Return ``(question_text, context_to_use, documents)``.

When *question* is not ``None``, returns it with *context* unchanged.
When *question* is not ``None``, returns it with *context* unchanged and no documents.
When ``None``, extracts the text from the last turn's ``model_input``
and rewinds *context* to before that element.
and rewinds *context* to before that element. Also extracts documents if the
last input is a Message.

Supports ``Message`` (via ``.content``), ``CBlock`` (via ``.value``),
and generic ``Component`` types (via ``TemplateFormatter.print()``).
"""
if question is not None:
return question, context
return question, context, None
from ....core import CBlock, Component
from ..chat import Message

Expand All @@ -40,8 +42,10 @@ def _resolve_question(
)

model_input = turn.model_input
documents: list[Document] | None = None
if isinstance(model_input, Message):
text = model_input.content
documents = model_input._docs
elif isinstance(model_input, CBlock):
if model_input.value is None:
raise ValueError(
Expand All @@ -65,29 +69,48 @@ def _resolve_question(
rewound = context.previous_node
if rewound is None:
raise ValueError("Cannot rewind context past the root node")
return text, rewound # type: ignore[return-value]
return text, rewound, documents # type: ignore[return-value]


def _resolve_response(
response: str | None, context: ChatContext
) -> tuple[str, ChatContext]:
"""Return ``(response_text, context_to_use)``.
) -> tuple[str, ChatContext, list[Document] | None]:
"""Return ``(response_text, context_to_use, documents)``.

When *response* is not ``None``, returns it with *context* unchanged.
When ``None``, extracts from the last turn's ``output.value`` and rewinds
*context* to before that output.
When *response* is not ``None``, returns it with *context* unchanged and no documents.
When ``None``, extracts from the last turn's ``output.value`` (generated) or
``model_input.content`` (manually-added Message), then rewinds *context*
to before that turn. Also extracts documents if the last message is a Message.
"""
if response is not None:
return response, context
return response, context, None
turn = context.last_turn()
if turn is None or turn.output is None:
if turn is None:
raise ValueError("response is None and context has no last turn with output")
if turn.output.value is None:
raise ValueError("response is None and last turn output has no value")

documents: list[Document] | None = None
# Try generated output first
if turn.output is not None:
if turn.output.value is None:
raise ValueError("response is None and last turn output has no value")
response_text = turn.output.value
# Fall back to manually-added assistant Message
elif (
turn.model_input is not None
and isinstance(turn.model_input, Message)
and turn.model_input.role == "assistant"
):
response_text = turn.model_input.content
documents = turn.model_input._docs
else:
raise ValueError(
"response is None and context has no last turn with output or assistant message"
)

rewound = context.previous_node
if rewound is None:
raise ValueError("Cannot rewind context past the root node")
return turn.output.value, rewound # type: ignore[return-value]
return response_text, rewound, documents # type: ignore[return-value]


def call_intrinsic(
Expand Down
78 changes: 44 additions & 34 deletions mellea/stdlib/components/intrinsic/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from ._util import _resolve_response, call_intrinsic


def check_certainty(context: ChatContext, backend: AdapterMixin) -> float:
def check_certainty(
context: ChatContext, backend: AdapterMixin, model_options: dict | None = None
) -> float:
"""Estimate the model's certainty about its last response.

Intrinsic function that evaluates how certain the model is about the
Expand All @@ -19,11 +21,15 @@ def check_certainty(context: ChatContext, backend: AdapterMixin) -> float:
Args:
context: Chat context containing user question and assistant answer.
backend: Backend instance that supports LoRA/aLoRA adapters.
model_options: Optional model options to pass to the backend (e.g.,
temperature, max_tokens). Defaults to ``{ModelOption.TEMPERATURE: 0.0}``.

Returns:
Certainty score as a float (higher = more certain).
"""
result_json = call_intrinsic("uncertainty", context, backend)
result_json = call_intrinsic(
"uncertainty", context, backend, model_options=model_options
)
return result_json["certainty"]


Expand All @@ -37,7 +43,10 @@ def check_certainty(context: ChatContext, backend: AdapterMixin) -> float:


def requirement_check(
context: ChatContext, backend: AdapterMixin, requirement: str
context: ChatContext,
backend: AdapterMixin,
requirement: str,
model_options: dict | None = None,
) -> float:
"""Detect if text adheres to provided requirements.

Expand All @@ -49,13 +58,17 @@ def requirement_check(
context: Chat context containing user question and assistant answer.
backend: Backend instance that supports LoRA/aLoRA adapters.
requirement: Set of requirements to satisfy.
model_options: Optional model options to pass to the backend (e.g.,
temperature, max_tokens). Defaults to ``{ModelOption.TEMPERATURE: 0.0}``.

Returns:
Score as a float between 0.0 and 1.0 (higher = more likely satisfied).
"""
eval_message = f"<requirements>: {requirement}\n{_EVALUATION_PROMPT}"
context = context.add(Message("user", eval_message))
result_json = call_intrinsic("requirement-check", context, backend)
result_json = call_intrinsic(
"requirement-check", context, backend, model_options=model_options
)
return result_json["requirement_check"]["score"]


Expand All @@ -64,46 +77,43 @@ def find_context_attributions(
documents: collections.abc.Iterable[str | Document],
context: ChatContext,
backend: AdapterMixin,
model_options: dict | None = None,
) -> list[dict]:
"""Find sentences in conversation history and documents that most influence an LLM's response.

Intrinsic function that finds sentences in prior conversation messages and RAG
documents that were most important to the LLM in generating each sentence in the
assistant response.

:param response: Assistant response. When ``None``, the response is extracted
from the last assistant output in ``context``.
:param documents: Documents that were used to generate ``response``. Each element
may be a ``Document`` or a plain string. Strings are wrapped in ``Document``
with an auto-generated ``doc_id`` (``"0"``, ``"1"``, ...); for explicit
control, pass ``Document`` objects with ``doc_id`` set. ``Document`` objects
without ``doc_id`` trigger a warning because the intrinsic uses ``doc_id`` to
identify attribution sources.
:param context: Context of the dialog between user and assistant, ending with a
user query
:param backend: Backend that supports intrinsic adapters

:return: List of records with the following fields:
* ``response_begin``
* ``response_end``
* ``response_text``
* ``attribution_doc_id``
* ``attribution_msg_index``
* ``attribution_begin``
* ``attribution_end``
* ``attribution_text``
Begin and end offsets are character offsets into their respective UTF-8 strings.
Args:
response: Assistant response. When ``None``, the response is extracted
from the last assistant output in ``context``.
documents: Documents that were used to generate ``response``. Each element
may be a ``Document`` or a plain string. Strings are wrapped in ``Document``
with an auto-generated ``doc_id`` (``"0"``, ``"1"``, ...); for explicit
control, pass ``Document`` objects with ``doc_id`` set. ``Document`` objects
without ``doc_id`` trigger a warning because the intrinsic uses ``doc_id`` to
identify attribution sources.
context: Context of the dialog between user and assistant, ending with a
user query.
backend: Backend that supports intrinsic adapters.
model_options: Optional model options to pass to the backend (e.g.,
temperature, max_tokens). Defaults to ``{ModelOption.TEMPERATURE: 0.0}``.

Returns:
List of records with the following fields: ``response_begin``,
``response_end``, ``response_text``, ``attribution_doc_id``,
``attribution_msg_index``, ``attribution_begin``, ``attribution_end``,
``attribution_text``. Begin and end offsets are character offsets into
their respective UTF-8 strings.
"""
response, context = _resolve_response(response, context)
response, context, resolved_docs = _resolve_response(response, context)
explicit_docs = _coerce_to_documents(documents, auto_doc_id=False)
docs_to_use = [*(explicit_docs or []), *(resolved_docs or [])] or None
result_json = call_intrinsic(
"context-attribution",
context.add(
Message(
"assistant",
response,
documents=_coerce_to_documents(documents, auto_doc_id=False),
)
),
context.add(Message("assistant", response, documents=docs_to_use)),
backend,
model_options=model_options,
)
return result_json
Loading
Loading