From ea151d7c78622830a6c9e06f748a343d60b1b680 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Tue, 3 Feb 2026 10:01:11 +0000
Subject: [PATCH] Optimize JavaAnalyzer.find_methods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The optimized code achieves an **11% runtime improvement** (24.5ms → 22.0ms) by eliminating recursive function call overhead through two key optimizations:

## Primary Optimization: Iterative Tree Traversal

The core improvement replaces recursive calls to `_walk_tree_for_methods` with an explicit stack-based iteration. In Python, each recursive call incurs significant overhead from:
- Stack frame creation and teardown
- Parameter passing (6 parameters per call)
- Return address management

The profiler data confirms this: the original code spent 24.5% of time in recursive call setup (lines 39338 hits at 816.7ns per hit), while the optimized version eliminates this entirely by using a stack data structure.

The iterative approach processes nodes in the same depth-first, left-to-right order (by reversing children before pushing to stack) but replaces ~19,726 recursive function calls with simple stack operations. This is particularly effective for Java code analysis where the AST can have deep nesting (nested classes, methods, etc.).

## Secondary Optimization: Type Declaration Tuple Caching

Moving `type_declarations = ("class_declaration", "interface_declaration", "enum_declaration")` from a local variable allocated on every call (19,726 times) to a single instance attribute `self._type_declarations` eliminates 19,726 tuple allocations. The profiler shows this saved 3.3% of execution time in the original version.

## Performance Characteristics

The optimization excels on test cases with:
- **Many methods** (100+ methods): 11.6-14.6% speedup as recursive overhead compounds
- **Deep nesting** (nested classes): 8.85% speedup by avoiding deep call stacks
- **Large files with filtering**: 11-12% speedup as the stack approach handles conditional logic efficiently
- **Mixed interfaces/classes**: 13.2% speedup due to reduced overhead when tracking type context

The optimization maintains identical correctness across all test cases, preserving method discovery, filtering behavior, line numbers, class tracking, and return types.
---
 codeflash/languages/java/parser.py | 66 ++++++++++++++++--------------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/codeflash/languages/java/parser.py b/codeflash/languages/java/parser.py
index bdffac44e..ad83fa3b7 100644
--- a/codeflash/languages/java/parser.py
+++ b/codeflash/languages/java/parser.py
@@ -113,6 +113,9 @@ def __init__(self) -> None:
         """Initialize the Java analyzer."""
         self._parser: Parser | None = None
 
+        # Track type context (class, interface, or enum) - single allocation
+        self._type_declarations = ("class_declaration", "interface_declaration", "enum_declaration")
+
     @property
     def parser(self) -> Parser:
         """Get the parser, creating it lazily."""
@@ -165,8 +168,10 @@ def find_methods(
         tree = self.parse(source_bytes)
         methods: list[JavaMethodNode] = []
 
+
+        root = tree.root_node
         self._walk_tree_for_methods(
-            tree.root_node,
+            root,
             source_bytes,
             methods,
             include_private=include_private,
@@ -186,41 +191,42 @@ def _walk_tree_for_methods(
         current_class: str | None,
     ) -> None:
         """Recursively walk the tree to find method definitions."""
-        new_class = current_class
-
-        # Track type context (class, interface, or enum)
-        type_declarations = ("class_declaration", "interface_declaration", "enum_declaration")
-        if node.type in type_declarations:
-            name_node = node.child_by_field_name("name")
-            if name_node:
-                new_class = self.get_node_text(name_node, source_bytes)
+        # Use an explicit stack to avoid recursion overhead and repeated allocations.
+        stack: list[tuple[Node, str | None]] = [(node, current_class)]
+        type_declarations = self._type_declarations  # local ref for slightly faster access
+
+        while stack:
+            node, current_class = stack.pop()
+
+            # Track type context (class, interface, or enum)
+            new_class = current_class
+            node_type = node.type
+            if node_type in type_declarations:
+                name_node = node.child_by_field_name("name")
+                if name_node:
+                    new_class = self.get_node_text(name_node, source_bytes)
 
-        if node.type == "method_declaration":
-            method_info = self._extract_method_info(node, source_bytes, current_class)
+            if node_type == "method_declaration":
+                method_info = self._extract_method_info(node, source_bytes, current_class)
 
-            if method_info:
-                # Apply filters
-                should_include = True
+                if method_info:
+                    # Apply filters
+                    should_include = True
 
-                if method_info.is_private and not include_private:
-                    should_include = False
+                    if method_info.is_private and not include_private:
+                        should_include = False
 
-                if method_info.is_static and not include_static:
-                    should_include = False
+                    if method_info.is_static and not include_static:
+                        should_include = False
 
-                if should_include:
-                    methods.append(method_info)
+                    if should_include:
+                        methods.append(method_info)
 
-        # Recurse into children
-        for child in node.children:
-            self._walk_tree_for_methods(
-                child,
-                source_bytes,
-                methods,
-                include_private=include_private,
-                include_static=include_static,
-                current_class=new_class if node.type in type_declarations else current_class,
-            )
+            # Push children in reverse order to preserve original left-to-right DFS order
+            children = node.children
+            if children:
+                for child in reversed(children):
+                    stack.append((child, new_class))
 
     def _extract_method_info(
         self, node: Node, source_bytes: bytes, current_class: str | None