From ea151d7c78622830a6c9e06f748a343d60b1b680 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 3 Feb 2026 10:01:11 +0000 Subject: [PATCH] Optimize JavaAnalyzer.find_methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves an **11% runtime improvement** (24.5ms → 22.0ms) by eliminating recursive function call overhead through two key optimizations: ## Primary Optimization: Iterative Tree Traversal The core improvement replaces recursive calls to `_walk_tree_for_methods` with an explicit stack-based iteration. In Python, each recursive call incurs significant overhead from: - Stack frame creation and teardown - Parameter passing (6 parameters per call) - Return address management The profiler data confirms this: the original code spent 24.5% of time in recursive call setup (lines 39338 hits at 816.7ns per hit), while the optimized version eliminates this entirely by using a stack data structure. The iterative approach processes nodes in the same depth-first, left-to-right order (by reversing children before pushing to stack) but replaces ~19,726 recursive function calls with simple stack operations. This is particularly effective for Java code analysis where the AST can have deep nesting (nested classes, methods, etc.). ## Secondary Optimization: Type Declaration Tuple Caching Moving `type_declarations = ("class_declaration", "interface_declaration", "enum_declaration")` from a local variable allocated on every call (19,726 times) to a single instance attribute `self._type_declarations` eliminates 19,726 tuple allocations. The profiler shows this saved 3.3% of execution time in the original version. ## Performance Characteristics The optimization excels on test cases with: - **Many methods** (100+ methods): 11.6-14.6% speedup as recursive overhead compounds - **Deep nesting** (nested classes): 8.85% speedup by avoiding deep call stacks - **Large files with filtering**: 11-12% speedup as the stack approach handles conditional logic efficiently - **Mixed interfaces/classes**: 13.2% speedup due to reduced overhead when tracking type context The optimization maintains identical correctness across all test cases, preserving method discovery, filtering behavior, line numbers, class tracking, and return types. --- codeflash/languages/java/parser.py | 66 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/codeflash/languages/java/parser.py b/codeflash/languages/java/parser.py index bdffac44e..ad83fa3b7 100644 --- a/codeflash/languages/java/parser.py +++ b/codeflash/languages/java/parser.py @@ -113,6 +113,9 @@ def __init__(self) -> None: """Initialize the Java analyzer.""" self._parser: Parser | None = None + # Track type context (class, interface, or enum) - single allocation + self._type_declarations = ("class_declaration", "interface_declaration", "enum_declaration") + @property def parser(self) -> Parser: """Get the parser, creating it lazily.""" @@ -165,8 +168,10 @@ def find_methods( tree = self.parse(source_bytes) methods: list[JavaMethodNode] = [] + + root = tree.root_node self._walk_tree_for_methods( - tree.root_node, + root, source_bytes, methods, include_private=include_private, @@ -186,41 +191,42 @@ def _walk_tree_for_methods( current_class: str | None, ) -> None: """Recursively walk the tree to find method definitions.""" - new_class = current_class - - # Track type context (class, interface, or enum) - type_declarations = ("class_declaration", "interface_declaration", "enum_declaration") - if node.type in type_declarations: - name_node = node.child_by_field_name("name") - if name_node: - new_class = self.get_node_text(name_node, source_bytes) + # Use an explicit stack to avoid recursion overhead and repeated allocations. + stack: list[tuple[Node, str | None]] = [(node, current_class)] + type_declarations = self._type_declarations # local ref for slightly faster access + + while stack: + node, current_class = stack.pop() + + # Track type context (class, interface, or enum) + new_class = current_class + node_type = node.type + if node_type in type_declarations: + name_node = node.child_by_field_name("name") + if name_node: + new_class = self.get_node_text(name_node, source_bytes) - if node.type == "method_declaration": - method_info = self._extract_method_info(node, source_bytes, current_class) + if node_type == "method_declaration": + method_info = self._extract_method_info(node, source_bytes, current_class) - if method_info: - # Apply filters - should_include = True + if method_info: + # Apply filters + should_include = True - if method_info.is_private and not include_private: - should_include = False + if method_info.is_private and not include_private: + should_include = False - if method_info.is_static and not include_static: - should_include = False + if method_info.is_static and not include_static: + should_include = False - if should_include: - methods.append(method_info) + if should_include: + methods.append(method_info) - # Recurse into children - for child in node.children: - self._walk_tree_for_methods( - child, - source_bytes, - methods, - include_private=include_private, - include_static=include_static, - current_class=new_class if node.type in type_declarations else current_class, - ) + # Push children in reverse order to preserve original left-to-right DFS order + children = node.children + if children: + for child in reversed(children): + stack.append((child, new_class)) def _extract_method_info( self, node: Node, source_bytes: bytes, current_class: str | None