Update max_tokens to 32000, add dynamic patch length generation, and docs

Ademfcan · Ademfcan · commit 129481f5217b · 2026-01-03T03:01:37.000-08:00
diff --git a/src/codestory/context.py b/src/codestory/context.py
@@ -49,7 +49,7 @@ class GlobalConfig:
     api_key: str | None = None
     api_base: str | None = None
     temperature: float = 0
-    max_tokens: int | None = 4096
+    max_tokens: int | None = 32000
     relevance_filtering: bool = False
     relevance_filter_similarity_threshold: float = 0.75
     secret_scanner_aggression: Literal["safe", "standard", "strict", "none"] = "safe"
diff --git a/src/codestory/core/semantic_analysis/summarization/chunk_summarizer.py b/src/codestory/core/semantic_analysis/summarization/chunk_summarizer.py
@@ -40,7 +40,6 @@
     INITIAL_SUMMARY_USER,
 )
 from codestory.core.semantic_analysis.summarization.summarizer_utils import (
-    DEFAULT_PATCH_CUTOFF_CHARS,
     generate_annotated_patch,
 )
 
@@ -86,23 +85,20 @@ def __init__(
         context_manager: ContextManager,
         patch_generator: PatchGenerator,
         batching_strategy: Literal["auto", "requests", "prompt"] = "auto",
-        max_tokens: int = 4096,
-        patch_cutoff_chars: int = DEFAULT_PATCH_CUTOFF_CHARS,
+        max_tokens: int = 32000,
     ):
         """Initialize the ChunkSummarizer.
 
         Args:
             codestory_adapter: The CodeStoryAdapter for LLM invocation
             batching_strategy: Strategy for batching LLM requests
             max_tokens: Maximum tokens per request
-            patch_cutoff_chars: Maximum characters per patch before truncation
         """
         self.model = codestory_adapter
         self.context_manager = context_manager
         self.patch_generator = patch_generator
         self.batching_strategy = batching_strategy
         self.max_tokens = max_tokens
-        self.patch_cutoff_chars = patch_cutoff_chars
 
     def summarize_containers(
         self,
@@ -130,7 +126,7 @@ def summarize_containers(
                 container=container,
                 context_manager=self.context_manager,
                 patch_generator=self.patch_generator,
-                patch_cutoff_chars=self.patch_cutoff_chars,
+                max_tokens=self.max_tokens,
             )
             annotated_patches.append(patch)
 
diff --git a/src/codestory/core/semantic_analysis/summarization/summarizer_utils.py b/src/codestory/core/semantic_analysis/summarization/summarizer_utils.py
@@ -41,22 +41,19 @@
     )
 
 
-DEFAULT_PATCH_CUTOFF_CHARS = 1000
-
-
 def generate_annotated_patches(
     containers: list[AtomicContainer],
     context_manager: ContextManager,
     patch_generator: PatchGenerator,
-    patch_cutoff_chars: int = DEFAULT_PATCH_CUTOFF_CHARS,
+    max_tokens: int | None = None,
 ) -> list[str]:
     """Generate annotated patches for a list of containers as XML strings.
 
     Args:
         containers: List of AtomicContainer objects
         context_manager: ContextManager for semantic analysis
         patch_generator: PatchGenerator for patch generation
-        patch_cutoff_chars: Maximum characters per patch before truncation
+        max_tokens: Maximum tokens for the model to use for dynamic cutoff calculation
 
     Returns:
         List of XML-formatted annotated patches, one per container
@@ -77,7 +74,7 @@ def generate_annotated_patches(
             container=container,
             context_manager=context_manager,
             patch_generator=patch_generator,
-            patch_cutoff_chars=patch_cutoff_chars,
+            max_tokens=max_tokens,
         )
         patches.append(patch)
     return patches
@@ -87,7 +84,7 @@ def generate_annotated_patch(
     container: AtomicContainer,
     context_manager: ContextManager,
     patch_generator: PatchGenerator,
-    patch_cutoff_chars: int = DEFAULT_PATCH_CUTOFF_CHARS,
+    max_tokens: int | None = None,
 ) -> str:
     """Generate an XML-formatted annotated patch for a single container."""
     merged_container = merge_container(container)
@@ -98,7 +95,9 @@ def generate_annotated_patch(
     )
 
     return generate_annotated_chunk_patch(
-        annotated_container, patch_generator, patch_cutoff_chars
+        annotated_container,
+        patch_generator,
+        max_tokens=max_tokens,
     )
 
 
@@ -115,7 +114,7 @@ def prioritize_longer_fqns(fqns: set[TypedFQN]) -> list[TypedFQN]:
 def generate_annotated_chunk_patch(
     annotated_container: AnnotatedContainer,
     patch_generator: PatchGenerator,
-    patch_cutoff_chars: int,
+    max_tokens: int | None = None,
 ) -> str:
     """Generate an XML-formatted annotated patch with semantic information per file.
 
@@ -142,6 +141,20 @@ def generate_annotated_chunk_patch(
         path = chunk.canonical_path()
         groups[path].append((chunk, sig))
 
+    # Dynamically calculate patch cutoff if max_tokens is provided
+    num_paths = len(groups)
+    patch_cutoff_chars = 1000  # Default fallback
+    if max_tokens is not None and num_paths > 0:
+        # Use a safe offset for prompt overhead (system prompt, metadata, etc.)
+        # The user suggests 500-1000 tokens.
+        token_offset = 1000
+        # Estimate 3 characters per token (consistent with chunk_summarizer estimation)
+        chars_per_token = 3
+
+        tokens_per_path = (max_tokens // num_paths) - token_offset
+        # Ensure we have a reasonable minimum per path
+        patch_cutoff_chars = max(tokens_per_path * chars_per_token, 1000)
+
     file_sections = []
 
     # Sort paths for consistency
diff --git a/src/docs/configuration/index.md b/src/docs/configuration/index.md
@@ -58,7 +58,7 @@ Below are the key configuration options available in Codestory CLI:
 | `api_key` | API key for the LLM provider | `None` |
 | `api_base` | Custom API base URL for the LLM provider (optional) | `None` |
 | `temperature` | Temperature for LLM responses (0.0-1.0) | `0` |
-| `max_tokens` | Maximum tokens to send per llm request | `4096` |
+| `max_tokens` | Maximum tokens to send per llm request | `32000` |
 | `relevance_filtering` | Whether to filter changes by relevance to your intent (`cst commit` only) | `false` |
 | `relevance_filter_similarity_threshold` | How similar do changes have to be to your intent to be included | `0.75` |
 | `secret_scanner_aggression` | How aggressively to scan for secrets (`safe`, `standard`, `strict`, `none`) | `safe` |