From e22c1eaf1ab954f6d5d156fac8c3318ab209e177 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 8 Jun 2026 01:29:59 -0600 Subject: [PATCH 1/6] fix(extractor): recognize inline-new expression as receiver type in extractReceiverName When the object of a member call is a `new_expression` (e.g. `new Dog().bark()`) or a parenthesized `new_expression` (e.g. `(new Dog('Rex')).bark()`), `extractReceiverName` now returns the constructor name (e.g. `'Dog'`) directly instead of the raw node text (e.g. `'(new Dog(\'Rex\'))')`). This lets the resolver reach the direct qualified method lookup path (`Dog.bark`) without relying on the text-based regex heuristic that was handling these expressions in `call-resolver.ts`. Closes #1396 --- src/extractors/javascript.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index ad71b05d..d33fee5f 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -2516,6 +2516,21 @@ function extractReceiverName(objNode: TreeSitterNode | null): string | undefined if (!objNode) return undefined; const t = objNode.type; if (t === 'identifier' || t === 'this' || t === 'super') return objNode.text; + // `(new Foo(...)).method()` — extract the constructor name so the resolver can + // look up `Foo.method` directly without relying on a text-based regex heuristic. + if (t === 'new_expression') { + const name = extractNewExprTypeName(objNode); + if (name) return name; + } + if (t === 'parenthesized_expression') { + for (let i = 0; i < objNode.childCount; i++) { + const child = objNode.child(i); + if (child?.type === 'new_expression') { + const name = extractNewExprTypeName(child); + if (name) return name; + } + } + } return objNode.text; } From ca5065b7a3f73e4c7819cdf1a7112958ea3c3f82 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 8 Jun 2026 02:18:57 -0600 Subject: [PATCH 2/6] docs(resolver): update stale inline-new-receiver comment in call-resolver The comment at lines 85-93 of call-resolver.ts described behaviour from before extractReceiverName was taught to handle new_expression and parenthesized_expression(new_expression) nodes. The comment said extractReceiverName returned raw node text for those cases, which is no longer true. Update the comment to reflect that the regex is now a belt-and-suspenders fallback for unhandled AST node types, not the primary handler for inline-new receivers. --- src/domain/graph/builder/call-resolver.ts | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/domain/graph/builder/call-resolver.ts b/src/domain/graph/builder/call-resolver.ts index 20dd885c..88c3dd09 100644 --- a/src/domain/graph/builder/call-resolver.ts +++ b/src/domain/graph/builder/call-resolver.ts @@ -82,12 +82,15 @@ export function resolveByMethodOrGlobal( : (typeEntry as { type?: string }).type : null; - // Handle inline new-expression receivers: `(new Foo).bar()` or `(new Foo()).bar()`. - // extractReceiverName returns the raw node text for non-identifier nodes, so `(new A).t()` - // produces receiver='(new A)'. Extract the constructor name directly. - // The regex intentionally restricts to uppercase-initial names ([A-Z_$]) as a heuristic - // to distinguish constructors (PascalCase) from regular functions — avoiding false positives - // on `(new xmlParser()).parse()` style calls which are rare in practice. + // Belt-and-suspenders fallback for inline new-expression receivers that + // extractReceiverName did not normalise (e.g. raw text leaked from an + // unhandled AST node type). extractReceiverName already handles the common + // `new_expression` / `parenthesized_expression(new_expression)` shapes by + // returning the constructor name directly, so this branch is exercised only + // by future node types or constructs that fall through to the raw-text path. + // The uppercase-initial restriction ([A-Z_$]) is a heuristic to distinguish + // constructors (PascalCase) from regular functions and avoids false positives + // on `(new xmlParser()).parse()` style calls. if (!typeName && call.receiver) { const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(call.receiver); if (m?.[1]) typeName = m[1]; From 3074e935be2989b0cb45aa9313af836b28a79881 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 8 Jun 2026 03:16:02 -0600 Subject: [PATCH 3/6] fix(extractor): infer C# var-declared instance types from object_creation_expression initializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a local variable is declared as `var service = new UserService(repo)`, the tree-sitter C# grammar represents the type node as `implicit_type` (not `var_keyword`) and places the `object_creation_expression` as a direct child of `variable_declarator` (not nested in an `equals_value_clause`). Previously `handleCSharpVarDecl` returned early on `implicit_type`, leaving the typeMap without an entry for `service`. Calls like `service.AddUser()` therefore had no receiver type and were not resolved. Fix (WASM/TS): recognise both `implicit_type` and `var_keyword` as the var-inference signal, then walk the `variable_declarator`'s children for a direct `object_creation_expression` or an `equals_value_clause` containing one, and seed the typeMap with the constructor type at confidence 0.9. Fix (native/Rust): symmetric change to `match_csharp_type_map` plus new `extract_var_init_type` helper, keeping both engines in parity. Result: C# `receiver-typed` recall: 0/4 → 4/4 (100%); aggregate recall: 73.9% → 91.3%. Threshold ratcheted from {precision: 0.5, recall: 0.2} to {precision: 0.9, recall: 0.9}. Fixes #1402 --- .../codegraph-core/src/extractors/csharp.rs | 54 +++++++++++++++---- src/extractors/csharp.ts | 32 +++++++++-- .../resolution/resolution-benchmark.test.ts | 3 +- tests/parsers/csharp.test.ts | 22 ++++++++ 4 files changed, 95 insertions(+), 16 deletions(-) diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 444109e6..5db7069b 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -438,6 +438,29 @@ fn extract_csharp_base_types( // ── Type map extraction ───────────────────────────────────────────────────── +fn extract_var_init_type(declarator: &Node, source: &[u8]) -> Option { + for i in 0..declarator.child_count() { + let child = declarator.child(i)?; + if child.kind() == "object_creation_expression" { + if let Some(t) = child.child_by_field_name("type") { + return extract_csharp_type_name(&t, source).map(|s| s.to_string()); + } + } + if child.kind() == "equals_value_clause" { + for j in 0..child.child_count() { + if let Some(expr) = child.child(j) { + if expr.kind() == "object_creation_expression" { + if let Some(t) = expr.child_by_field_name("type") { + return extract_csharp_type_name(&t, source).map(|s| s.to_string()); + } + } + } + } + } + } + None +} + fn extract_csharp_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { match type_node.kind() { "identifier" | "qualified_name" => Some(node_text(type_node, source)), @@ -455,18 +478,29 @@ fn match_csharp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, "variable_declaration" => { let type_node = node.child_by_field_name("type").or_else(|| node.child(0)); if let Some(type_node) = type_node { - if type_node.kind() != "var_keyword" && type_node.kind() != "implicit_type" { - if let Some(type_name) = extract_csharp_type_name(&type_node, source) { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "variable_declarator" { - let name_node = child.child_by_field_name("name") - .or_else(|| child.child(0)); - if let Some(name_node) = name_node { - if name_node.kind() == "identifier" { + let is_var = type_node.kind() == "implicit_type" || type_node.kind() == "var_keyword"; + let explicit_type_name: Option = if is_var { + None + } else { + extract_csharp_type_name(&type_node, source).map(|s| s.to_string()) + }; + if is_var || explicit_type_name.is_some() { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "variable_declarator" { + let name_node = child.child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(name_node) = name_node { + if name_node.kind() == "identifier" { + let type_name = if is_var { + extract_var_init_type(&child, source) + } else { + explicit_type_name.clone() + }; + if let Some(type_name) = type_name { symbols.type_map.push(TypeMapEntry { name: node_text(&name_node, source).to_string(), - type_name: type_name.to_string(), + type_name, confidence: 0.9, }); } diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 52f47cb4..97385bb5 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -332,17 +332,39 @@ function extractCSharpTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void /** Extract type info from a variable_declaration node (local vars with explicit types). */ function handleCSharpVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { const typeNode = node.childForFieldName('type') || node.child(0); - if (!typeNode || typeNode.type === 'var_keyword') return; - const typeName = extractCSharpTypeName(typeNode); - if (!typeName) return; + if (!typeNode) return; + const isVar = typeNode.type === 'implicit_type' || typeNode.type === 'var_keyword'; + const explicitTypeName = isVar ? null : extractCSharpTypeName(typeNode); + if (!isVar && !explicitTypeName) return; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (child?.type !== 'variable_declarator') continue; const nameNode = child.childForFieldName('name') || child.child(0); - if (nameNode && nameNode.type === 'identifier' && ctx.typeMap) { - setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); + if (nameNode?.type !== 'identifier' || !ctx.typeMap) continue; + const typeName = isVar ? extractVarInitType(child) : explicitTypeName; + if (typeName) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); + } +} + +/** Extract the constructor type from a `var x = new Foo()` initializer. */ +function extractVarInitType(declarator: TreeSitterNode): string | null { + for (let i = 0; i < declarator.childCount; i++) { + const child = declarator.child(i); + if (child?.type === 'object_creation_expression') { + const tNode = child.childForFieldName('type'); + if (tNode) return extractCSharpTypeName(tNode); + } + if (child?.type === 'equals_value_clause') { + for (let j = 0; j < child.childCount; j++) { + const expr = child.child(j); + if (expr?.type === 'object_creation_expression') { + const tNode = expr.childForFieldName('type'); + if (tNode) return extractCSharpTypeName(tNode); + } + } } } + return null; } /** Extract type info from a parameter node. */ diff --git a/tests/benchmarks/resolution/resolution-benchmark.test.ts b/tests/benchmarks/resolution/resolution-benchmark.test.ts index 1e78c0ee..f67812ba 100644 --- a/tests/benchmarks/resolution/resolution-benchmark.test.ts +++ b/tests/benchmarks/resolution/resolution-benchmark.test.ts @@ -137,7 +137,8 @@ const THRESHOLDS: Record = { python: { precision: 0.7, recall: 0.3 }, go: { precision: 0.7, recall: 0.3 }, java: { precision: 0.7, recall: 0.3 }, - csharp: { precision: 0.5, recall: 0.2 }, + // csharp 0.9: var-declared instance typeMap (implicit_type) lifts receiver-typed from 0/4 → 4/4 recall + csharp: { precision: 0.9, recall: 0.9 }, kotlin: { precision: 0.6, recall: 0.2 }, // Lower bars — resolution still maturing rust: { precision: 0.6, recall: 0.2 }, diff --git a/tests/parsers/csharp.test.ts b/tests/parsers/csharp.test.ts index 8f9f4e0c..20170765 100644 --- a/tests/parsers/csharp.test.ts +++ b/tests/parsers/csharp.test.ts @@ -151,4 +151,26 @@ public class Service : BaseService, IDisposable { expect.objectContaining({ name: 'User.Name', kind: 'property' }), ); }); + + it('populates typeMap for var-declared instances (implicit type)', () => { + const symbols = parseCSharp(`public class Program { + void Run() { + var service = new UserService(); + var repo = new UserRepository(); + service.AddUser(null); + } +}`); + expect(symbols.typeMap.get('service')).toEqual({ type: 'UserService', confidence: 0.9 }); + expect(symbols.typeMap.get('repo')).toEqual({ type: 'UserRepository', confidence: 0.9 }); + }); + + it('populates typeMap for explicitly-typed local variables', () => { + const symbols = parseCSharp(`public class Foo { + void Bar() { + UserService svc = new UserService(); + svc.DoWork(); + } +}`); + expect(symbols.typeMap.get('svc')).toEqual({ type: 'UserService', confidence: 0.9 }); + }); }); From d8a329dc1003b3e2b3a2a11e0e8f0292e156b1b7 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 8 Jun 2026 11:27:55 -0600 Subject: [PATCH 4/6] fix(extractor): use if-let-else continue in extract_var_init_type outer loop Replace `declarator.child(i)?` with `let Some(child) = ... else { continue }` to skip None child slots rather than returning None from the entire function. Matches the inner loop pattern and the TypeScript mirror's optional-chaining. --- crates/codegraph-core/src/extractors/csharp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 5db7069b..de6de19b 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -440,7 +440,7 @@ fn extract_csharp_base_types( fn extract_var_init_type(declarator: &Node, source: &[u8]) -> Option { for i in 0..declarator.child_count() { - let child = declarator.child(i)?; + let Some(child) = declarator.child(i) else { continue }; if child.kind() == "object_creation_expression" { if let Some(t) = child.child_by_field_name("type") { return extract_csharp_type_name(&t, source).map(|s| s.to_string()); From 953d815783532674baf4ae3c74ee7c527675a3e8 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 9 Jun 2026 05:31:09 -0600 Subject: [PATCH 5/6] fix(test): handle ECONNRESET and other network errors in embedding regression test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macOS CI runner intermittently fails with ECONNRESET when downloading the HuggingFace model. Broaden the catch in the embedding regression test to treat connection-level errors (ECONNRESET, ETIMEDOUT, ENOTFOUND, ECONNREFUSED) and 'terminated' worker errors the same as HTTP 429 — mark rateLimited=true and skip the dependent tests instead of failing. --- tests/search/embedding-regression.test.ts | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/search/embedding-regression.test.ts b/tests/search/embedding-regression.test.ts index 38f2e99e..53ed699d 100644 --- a/tests/search/embedding-regression.test.ts +++ b/tests/search/embedding-regression.test.ts @@ -68,12 +68,21 @@ describe.skipIf(!hasTransformers)('embedding regression (real model)', () => { dbPath = path.join(tmpDir, '.codegraph', 'graph.db'); // Build embeddings with the smallest/fastest model. - // Skip gracefully when HuggingFace rate-limits the model download (HTTP 429). + // Skip gracefully when HuggingFace rate-limits the model download (HTTP 429) + // or when the network is unavailable (ECONNRESET, ETIMEDOUT, ENOTFOUND, etc.). try { await buildEmbeddings(tmpDir, 'minilm', dbPath); } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err); - if (msg.includes('429')) { + const code = (err as NodeJS.ErrnoException).code ?? ''; + const isNetworkError = + msg.includes('429') || + msg.includes('terminated') || + code === 'ECONNRESET' || + code === 'ETIMEDOUT' || + code === 'ENOTFOUND' || + code === 'ECONNREFUSED'; + if (isNetworkError) { rateLimited = true; return; } From be501a1bcffdb712b0740874466f94025210484d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 9 Jun 2026 15:37:45 -0600 Subject: [PATCH 6/6] fix: document single-level paren limit and narrow network-error codes (#1415) --- src/extractors/javascript.ts | 4 ++++ tests/search/embedding-regression.test.ts | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 0e5fab75..edc4ad0a 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -2625,6 +2625,10 @@ function extractReceiverName(objNode: TreeSitterNode | null): string | undefined if (name) return name; } if (t === 'parenthesized_expression') { + // Only one level of parentheses is unwrapped here. Doubly-nested parens + // (e.g. `((new Dog())).bark()`) and cast expressions inside parens + // (e.g. `(new Dog() as Animal).bark()`) fall through to raw-text handling + // below and are caught by the regex fallback in call-resolver.ts. for (let i = 0; i < objNode.childCount; i++) { const child = objNode.child(i); if (child?.type === 'new_expression') { diff --git a/tests/search/embedding-regression.test.ts b/tests/search/embedding-regression.test.ts index 53ed699d..6ec5df0c 100644 --- a/tests/search/embedding-regression.test.ts +++ b/tests/search/embedding-regression.test.ts @@ -69,7 +69,8 @@ describe.skipIf(!hasTransformers)('embedding regression (real model)', () => { // Build embeddings with the smallest/fastest model. // Skip gracefully when HuggingFace rate-limits the model download (HTTP 429) - // or when the network is unavailable (ECONNRESET, ETIMEDOUT, ENOTFOUND, etc.). + // or when the network is unavailable (ECONNRESET, ETIMEDOUT, ENOTFOUND, + // ECONNREFUSED, ERR_HTTP2_STREAM_CANCEL, ERR_HTTP2_SESSION_ERROR). try { await buildEmbeddings(tmpDir, 'minilm', dbPath); } catch (err: unknown) { @@ -77,11 +78,12 @@ describe.skipIf(!hasTransformers)('embedding regression (real model)', () => { const code = (err as NodeJS.ErrnoException).code ?? ''; const isNetworkError = msg.includes('429') || - msg.includes('terminated') || code === 'ECONNRESET' || code === 'ETIMEDOUT' || code === 'ENOTFOUND' || - code === 'ECONNREFUSED'; + code === 'ECONNREFUSED' || + code === 'ERR_HTTP2_STREAM_CANCEL' || + code === 'ERR_HTTP2_SESSION_ERROR'; if (isNetworkError) { rateLimited = true; return;