4141 )
4242
4343
44- DEFAULT_PATCH_CUTOFF_CHARS = 1000
45-
46-
4744def generate_annotated_patches (
4845 containers : list [AtomicContainer ],
4946 context_manager : ContextManager ,
5047 patch_generator : PatchGenerator ,
51- patch_cutoff_chars : int = DEFAULT_PATCH_CUTOFF_CHARS ,
48+ max_tokens : int | None = None ,
5249) -> list [str ]:
5350 """Generate annotated patches for a list of containers as XML strings.
5451
5552 Args:
5653 containers: List of AtomicContainer objects
5754 context_manager: ContextManager for semantic analysis
5855 patch_generator: PatchGenerator for patch generation
59- patch_cutoff_chars : Maximum characters per patch before truncation
56+ max_tokens : Maximum tokens for the model to use for dynamic cutoff calculation
6057
6158 Returns:
6259 List of XML-formatted annotated patches, one per container
@@ -77,7 +74,7 @@ def generate_annotated_patches(
7774 container = container ,
7875 context_manager = context_manager ,
7976 patch_generator = patch_generator ,
80- patch_cutoff_chars = patch_cutoff_chars ,
77+ max_tokens = max_tokens ,
8178 )
8279 patches .append (patch )
8380 return patches
@@ -87,7 +84,7 @@ def generate_annotated_patch(
8784 container : AtomicContainer ,
8885 context_manager : ContextManager ,
8986 patch_generator : PatchGenerator ,
90- patch_cutoff_chars : int = DEFAULT_PATCH_CUTOFF_CHARS ,
87+ max_tokens : int | None = None ,
9188) -> str :
9289 """Generate an XML-formatted annotated patch for a single container."""
9390 merged_container = merge_container (container )
@@ -98,7 +95,9 @@ def generate_annotated_patch(
9895 )
9996
10097 return generate_annotated_chunk_patch (
101- annotated_container , patch_generator , patch_cutoff_chars
98+ annotated_container ,
99+ patch_generator ,
100+ max_tokens = max_tokens ,
102101 )
103102
104103
@@ -115,7 +114,7 @@ def prioritize_longer_fqns(fqns: set[TypedFQN]) -> list[TypedFQN]:
115114def generate_annotated_chunk_patch (
116115 annotated_container : AnnotatedContainer ,
117116 patch_generator : PatchGenerator ,
118- patch_cutoff_chars : int ,
117+ max_tokens : int | None = None ,
119118) -> str :
120119 """Generate an XML-formatted annotated patch with semantic information per file.
121120
@@ -142,6 +141,20 @@ def generate_annotated_chunk_patch(
142141 path = chunk .canonical_path ()
143142 groups [path ].append ((chunk , sig ))
144143
144+ # Dynamically calculate patch cutoff if max_tokens is provided
145+ num_paths = len (groups )
146+ patch_cutoff_chars = 1000 # Default fallback
147+ if max_tokens is not None and num_paths > 0 :
148+ # Use a safe offset for prompt overhead (system prompt, metadata, etc.)
149+ # The user suggests 500-1000 tokens.
150+ token_offset = 1000
151+ # Estimate 3 characters per token (consistent with chunk_summarizer estimation)
152+ chars_per_token = 3
153+
154+ tokens_per_path = (max_tokens // num_paths ) - token_offset
155+ # Ensure we have a reasonable minimum per path
156+ patch_cutoff_chars = max (tokens_per_path * chars_per_token , 1000 )
157+
145158 file_sections = []
146159
147160 # Sort paths for consistency
0 commit comments