From 5d29c4b2806939a6dd0be40b4e4ae1d57fcde4db Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali@certora.com>
Date: Tue, 19 May 2026 11:55:46 +0300
Subject: [PATCH 1/6] remove the range option from the get_file tool

---
 graphcore/tools/vfs.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/graphcore/tools/vfs.py b/graphcore/tools/vfs.py
index 296cb2e..57024bc 100644
--- a/graphcore/tools/vfs.py
+++ b/graphcore/tools/vfs.py
@@ -48,18 +48,10 @@ def _make_checker(patt: str | None) -> Callable[[str], bool]:
     match = re.compile(patt)
     return lambda f_name: match.fullmatch(f_name) is None
 
-class FileRange(BaseModel):
-    start_line: int = Field(description="The line to start reading from; lines are numbered starting from 1.")
-    end_line: int = Field(description="The line to read until EXCLUSIVE.")
-
-def _get_file(cont: str | None, range: FileRange | None) -> str:
+def _get_file(cont: str | None) -> str:
     if cont is None:
         return "File not found"
-    if not range:
-        return cont
-    start = range.start_line - 1
-    to_ret = cont.splitlines()[start:range.end_line - 1]
-    return "\n".join(to_ret)
+    return cont
 
 
 
@@ -125,7 +117,6 @@ class _GetFileSchemaBase(BaseModel):
     If the path doesn't exist, this function returns "File not found".
     """
     path: str = Field(description="The relative path of the file on the VFS. IMPORTANT: Do NOT include a leading `./` it is implied")
-    range: FileRange | None = Field(description="If set, (start, end) indicates to return lines starting from line `start` (lines are 1 indexed) until `end` (exclusive). If unset, the entire file is returned.", default=None)
 
 
 class _ListFileSchemaBase(BaseModel):
@@ -387,11 +378,10 @@ class GetFileSchema(_GetFileSchemaBase):
     def get_file(
         path: str,
         state: Annotated[InputType, InjectedState],
-        range: FileRange | None = None
     ) -> str:
         norm_path = _normalize_and_validate(path)
         cont = _get_content(state, norm_path)
-        return _get_file(cont, range)
+        return _get_file(cont)
 
     @cache
     def list_underlying() -> Sequence[str]:
@@ -489,14 +479,14 @@ class GetFileSchema(_GetFileSchemaBase):
 
     @tool(args_schema=GetFileSchema)
     @handle_path_errors
-    def get_file(path: str, range: FileRange | None = None) -> str:
+    def get_file(path: str) -> str:
         norm_path = _normalize_and_validate(path)
         if not check_allowed(norm_path):
             return "File not found"
         child = base_path / norm_path
         if child.is_file():
             try:
-                return _get_file(child.read_text(), range)
+                return _get_file(child.read_text())
             except Exception:
                 return "File not found"
         return "File not found"

From 36569ea39f5424d212f074911896b406c544e50b Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali@certora.com>
Date: Tue, 19 May 2026 22:21:25 +0300
Subject: [PATCH 2/6] use token usage as threshold for summarization

---
 graphcore/graph.py   | 43 +++++++++++++++++++++++++++++++++----------
 graphcore/summary.py |  3 +--
 graphcore/utils.py   | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/graphcore/graph.py b/graphcore/graph.py
index e1a6de6..45784e6 100644
--- a/graphcore/graph.py
+++ b/graphcore/graph.py
@@ -13,6 +13,7 @@
 #      You should have received a copy of the GNU General Public License
 #      along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
+import logging
 from typing import Optional, List, Annotated, Literal, TypeVar, Type, Protocol, cast, Any, Tuple, NotRequired, Iterable, Generic, Callable, Generator, Awaitable, Coroutine
 from typing_extensions import TypedDict
 from langchain_core.messages import ToolMessage, AnyMessage, SystemMessage, HumanMessage, BaseMessage, AIMessage, RemoveMessage
@@ -29,9 +30,22 @@
 from langgraph.prebuilt.tool_node import ToolInvocationError
 from langchain_anthropic import ChatAnthropic
 from pydantic import BaseModel, ValidationError
-from .utils import cached_invoke, acached_invoke
+from .utils import cached_invoke, acached_invoke, current_prompt_tokens, default_max_prompt_tokens, get_token_usage
 from .summary import SummaryConfig
 
+logger = logging.getLogger(__name__)
+
+
+def _log_usage(msg: BaseMessage) -> None:
+    """Emit a one-line per-call token-usage record. No-op if msg lacks usage metadata."""
+    if not isinstance(msg, AIMessage):
+        return
+    u = get_token_usage(msg)
+    model = u["model_name"] or "?"
+    logger.info(
+        f"LLM call ({model}): input={u['input_tokens']} output={u['output_tokens']} cache_read={u['cache_read_input_tokens']} cache_write={u['cache_creation_input_tokens']}",
+    )
+
 """
 This provides the framework for building applications which loop with an LLM,
 using tools to refine the LLM output.
@@ -167,13 +181,18 @@ async def impl(
         s: list[AnyMessage]
     ) -> BaseMessage:
         res = await acached_invoke(llm, s)
+        _log_usage(res)
         return res
     return impl
 
 def _sync_llm(
     llm: LLM
 ) -> SyncLLM:
-    return lambda m: cached_invoke(llm, m)
+    def impl(m: list[AnyMessage]) -> BaseMessage:
+        res = cached_invoke(llm, m)
+        _log_usage(res)
+        return res
+    return impl
 
 IN = TypeVar("IN")
 OUT = TypeVar("OUT")
@@ -261,7 +280,7 @@ def to_return(state: StateT) -> PureFunctionGenerator:
         summary_prompt = config.get_summarization_prompt(state)
 
         messages = state["messages"].copy()
-        assert len(messages) >= config.max_messages
+        assert messages, "summarizer invoked with empty message history"
 
         try:
             msg = yield(messages + [HumanMessage(content=summary_prompt, display_tag="summarization")])
@@ -348,7 +367,7 @@ def impl(
             to_ret[k] = v
         return cast(O, to_ret)
     return impl
-        
+
 
 def get_summarizer(
     llm: LLM,
@@ -496,14 +515,14 @@ def with_context(self, t: type[_BContextBind]) -> "Builder[_BStateT, _BContextBi
         to_ret._summary_config = self._summary_config
         to_ret._conversation_handler = self._conversation_handler
         return to_ret
-    
+
     def with_checkpointer(self, checkpointer: Checkpointer) -> "Builder[_BStateT, _BContextT, _BInputT]":
         to_ret : "Builder[_BStateT, _BContextT, _BInputT]" = Builder()
         self._copy_typed_to(to_ret)
         self._copy_untyped_to_(to_ret)
         to_ret._checkpointer = checkpointer
         return to_ret
-    
+
     def inject[OInput: FlowInput|None, OState: MessagesState | None, OCtxt: StateLike | None](
         self,
         f: Callable[["Builder[_BStateT, _BContextT, _BInputT]"], "Builder[OState, OCtxt, OInput]"]
@@ -572,8 +591,8 @@ def with_summary_config(self, config: SummaryConfig[_BStateT]) -> "Builder[_BSta
         to_ret._summary_config = config
         return to_ret
 
-    def with_default_summarizer(self, *, max_messages: int = 20, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]":
-        return self.with_summary_config(SummaryConfig(max_messages=max_messages, enabled=enabled))
+    def with_default_summarizer(self, *, enabled: bool = True) -> "Builder[_BStateT, _BContextT, _BInputT]":
+        return self.with_summary_config(SummaryConfig(enabled=enabled))
 
     def with_tools(self, l: Iterable[BaseTool | SplitTool]) -> "Builder[_BStateT, _BContextT, _BInputT]":
         to_ret: "Builder[_BStateT, _BContextT, _BInputT]" = Builder()
@@ -638,7 +657,7 @@ def build_async(self) -> Tuple["StateGraph[_BStateT, _BContextT, _BInputT, Any]"
             i=async_initial_node,
             r=async_tool_result_generator,
         )
-    
+
     def compile_async(
         self, *,
         checkpointer: Checkpointer = None
@@ -822,10 +841,14 @@ def ai_message_router(state: StateT) -> Literal["tools", "no_tools"]:
     builder.add_edge(NO_TOOLS_NODE, TOOL_RESULT_NODE)
 
     if summary_config is not None:
+        model_name = getattr(unbound_llm, "model", "?")
+        threshold = default_max_prompt_tokens(model_name)
+        logger.info(f"Summarization threshold: {threshold} prompt tokens (model={model_name})")
+
         def routing(state: StateT) -> Literal["summarize", "tool_result", "__end__"]:
             if state.get(output_key, None) is not None:
                 return "__end__"
-            elif len(state["messages"]) > summary_config.max_messages:
+            elif current_prompt_tokens(state["messages"]) > threshold:
                 return "summarize"
             else:
                 return "tool_result"
diff --git a/graphcore/summary.py b/graphcore/summary.py
index bfb5182..b71f0e6 100644
--- a/graphcore/summary.py
+++ b/graphcore/summary.py
@@ -22,8 +22,7 @@
 logger = logging.getLogger(__name__)
 
 class SummaryConfig(Generic[StateT]):
-    def __init__(self, max_messages: int = 20, enabled: bool = True):
-        self.max_messages = max_messages
+    def __init__(self, enabled: bool = True):
         self.enabled = enabled
 
     def get_summarization_prompt(self, state: StateT) -> str:
diff --git a/graphcore/utils.py b/graphcore/utils.py
index 2a5da97..e267ef5 100644
--- a/graphcore/utils.py
+++ b/graphcore/utils.py
@@ -136,3 +136,40 @@ def get_token_usage(m: AIMessage) -> TokenUsageDict:
             continue # be cool
         to_ret[k] = to_ret[k] + tok
     return to_ret
+
+
+def current_prompt_tokens(messages: List[AnyMessage]) -> int:
+    """
+    Effective context size of the most recent LLM call, used to decide when to summarize.
+
+    Returns input + cache-read + cache-creation tokens from the latest AIMessage. ToolMessages
+    appended after that AIMessage are not counted (router fires after TOOLS_NODE) and the
+    summarizer's own AIMessage is discarded before reaching state. Both are small enough that
+    the threshold should be set with headroom anyway.
+    """
+    for m in reversed(messages):
+        if isinstance(m, AIMessage):
+            usage = get_token_usage(m)
+            return (
+                usage["input_tokens"]
+                + usage["cache_read_input_tokens"]
+                + usage["cache_creation_input_tokens"]
+            )
+    return 0
+
+
+def default_max_prompt_tokens(model_name: str) -> int:
+    """
+    Prompt-token threshold at which to compact history. Conservatively below the model's
+    context window to leave room for output, thinking budget, and the next batch of tool results.
+    Add a new case here when introducing a new model.
+    """
+    match model_name:
+        case "claude-opus-4-6":
+            return 500_000   # 1M context window
+        case "claude-sonnet-4-6":
+            return 500_000   # 1M context window
+        case "claude-opus-4-7":
+            return 500_000   # 1M context window
+        case _:
+            return 100_000   # conservative fallback for unknown models

From 070b80a66a7010ae8f9b80992006f67fe43a25a9 Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali@certora.com>
Date: Wed, 20 May 2026 09:33:41 +0300
Subject: [PATCH 3/6] Revert "remove the range option from the get_file tool"

This reverts commit 5d29c4b2806939a6dd0be40b4e4ae1d57fcde4db.
---
 graphcore/tools/vfs.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/graphcore/tools/vfs.py b/graphcore/tools/vfs.py
index 57024bc..296cb2e 100644
--- a/graphcore/tools/vfs.py
+++ b/graphcore/tools/vfs.py
@@ -48,10 +48,18 @@ def _make_checker(patt: str | None) -> Callable[[str], bool]:
     match = re.compile(patt)
     return lambda f_name: match.fullmatch(f_name) is None
 
-def _get_file(cont: str | None) -> str:
+class FileRange(BaseModel):
+    start_line: int = Field(description="The line to start reading from; lines are numbered starting from 1.")
+    end_line: int = Field(description="The line to read until EXCLUSIVE.")
+
+def _get_file(cont: str | None, range: FileRange | None) -> str:
     if cont is None:
         return "File not found"
-    return cont
+    if not range:
+        return cont
+    start = range.start_line - 1
+    to_ret = cont.splitlines()[start:range.end_line - 1]
+    return "\n".join(to_ret)
 
 
 
@@ -117,6 +125,7 @@ class _GetFileSchemaBase(BaseModel):
     If the path doesn't exist, this function returns "File not found".
     """
     path: str = Field(description="The relative path of the file on the VFS. IMPORTANT: Do NOT include a leading `./` it is implied")
+    range: FileRange | None = Field(description="If set, (start, end) indicates to return lines starting from line `start` (lines are 1 indexed) until `end` (exclusive). If unset, the entire file is returned.", default=None)
 
 
 class _ListFileSchemaBase(BaseModel):
@@ -378,10 +387,11 @@ class GetFileSchema(_GetFileSchemaBase):
     def get_file(
         path: str,
         state: Annotated[InputType, InjectedState],
+        range: FileRange | None = None
     ) -> str:
         norm_path = _normalize_and_validate(path)
         cont = _get_content(state, norm_path)
-        return _get_file(cont)
+        return _get_file(cont, range)
 
     @cache
     def list_underlying() -> Sequence[str]:
@@ -479,14 +489,14 @@ class GetFileSchema(_GetFileSchemaBase):
 
     @tool(args_schema=GetFileSchema)
     @handle_path_errors
-    def get_file(path: str) -> str:
+    def get_file(path: str, range: FileRange | None = None) -> str:
         norm_path = _normalize_and_validate(path)
         if not check_allowed(norm_path):
             return "File not found"
         child = base_path / norm_path
         if child.is_file():
             try:
-                return _get_file(child.read_text())
+                return _get_file(child.read_text(), range)
             except Exception:
                 return "File not found"
         return "File not found"

From 67fb324bd74858e53c9d4bf4b4731e5fa42eb0ce Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali@certora.com>
Date: Wed, 20 May 2026 10:45:08 +0300
Subject: [PATCH 4/6] update get_file's range description

---
 graphcore/tools/vfs.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/graphcore/tools/vfs.py b/graphcore/tools/vfs.py
index 296cb2e..f0b90d2 100644
--- a/graphcore/tools/vfs.py
+++ b/graphcore/tools/vfs.py
@@ -125,7 +125,16 @@ class _GetFileSchemaBase(BaseModel):
     If the path doesn't exist, this function returns "File not found".
     """
     path: str = Field(description="The relative path of the file on the VFS. IMPORTANT: Do NOT include a leading `./` it is implied")
-    range: FileRange | None = Field(description="If set, (start, end) indicates to return lines starting from line `start` (lines are 1 indexed) until `end` (exclusive). If unset, the entire file is returned.", default=None)
+    range: FileRange | None = Field(
+        description=(
+            "Optional line range. By DEFAULT leave this unset to read the entire file — partial reads "
+            "routinely miss surrounding context (imports, related definitions, modifiers) and force "
+            "wasteful re-reads. Only set this for exceptionally large files where you are certain no "
+            "other part will be relevant. When set, (start, end) returns lines from `start` (1-indexed) "
+            "until `end` (exclusive)."
+        ),
+        default=None,
+    )
 
 
 class _ListFileSchemaBase(BaseModel):

From ef139ab6d463c899e481f15c3579633533ee4bca Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali@certora.com>
Date: Wed, 20 May 2026 10:45:21 +0300
Subject: [PATCH 5/6] John's CR

---
 graphcore/graph.py | 2 +-
 graphcore/utils.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/graphcore/graph.py b/graphcore/graph.py
index 45784e6..3818e6a 100644
--- a/graphcore/graph.py
+++ b/graphcore/graph.py
@@ -841,7 +841,7 @@ def ai_message_router(state: StateT) -> Literal["tools", "no_tools"]:
     builder.add_edge(NO_TOOLS_NODE, TOOL_RESULT_NODE)
 
     if summary_config is not None:
-        model_name = getattr(unbound_llm, "model", "?")
+        model_name = getattr(unbound_llm, "model", None)
         threshold = default_max_prompt_tokens(model_name)
         logger.info(f"Summarization threshold: {threshold} prompt tokens (model={model_name})")
 
diff --git a/graphcore/utils.py b/graphcore/utils.py
index e267ef5..c0ad446 100644
--- a/graphcore/utils.py
+++ b/graphcore/utils.py
@@ -160,7 +160,7 @@ def current_prompt_tokens(messages: List[AnyMessage]) -> int:
 
 def default_max_prompt_tokens(model_name: str) -> int:
     """
-    Prompt-token threshold at which to compact history. Conservatively below the model's
+    Prompt-token threshold at which to compact history. Keep this conservatively below the model's
     context window to leave room for output, thinking budget, and the next batch of tool results.
     Add a new case here when introducing a new model.
     """
@@ -172,4 +172,4 @@ def default_max_prompt_tokens(model_name: str) -> int:
         case "claude-opus-4-7":
             return 500_000   # 1M context window
         case _:
-            return 100_000   # conservative fallback for unknown models
+            return 100_000   # fallback for unknown models

From 8578a5f4609642d402663845ae133aa6777032a9 Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali@certora.com>
Date: Wed, 20 May 2026 11:26:44 +0300
Subject: [PATCH 6/6] fix pyright

---
 graphcore/graph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphcore/graph.py b/graphcore/graph.py
index 3818e6a..1328a13 100644
--- a/graphcore/graph.py
+++ b/graphcore/graph.py
@@ -841,7 +841,7 @@ def ai_message_router(state: StateT) -> Literal["tools", "no_tools"]:
     builder.add_edge(NO_TOOLS_NODE, TOOL_RESULT_NODE)
 
     if summary_config is not None:
-        model_name = getattr(unbound_llm, "model", None)
+        model_name = getattr(unbound_llm, "model", "")
         threshold = default_max_prompt_tokens(model_name)
         logger.info(f"Summarization threshold: {threshold} prompt tokens (model={model_name})")