Skip to content

Commit f1a6c59

Browse files
Pavel SergeevPabloSergi
authored andcommitted
feat: Add C# language support for namespaces, classes, and interfaces, improve inheritance parsing, and update the default LLM.
1 parent 415dc5b commit f1a6c59

17 files changed

Lines changed: 735 additions & 164 deletions

codebase_rag/constants.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class FileAction(StrEnum):
120120
PKG_CONANFILE = "conanfile.txt"
121121

122122
DEFAULT_REGION = "us-central1"
123-
DEFAULT_MODEL = "llama3.2"
123+
DEFAULT_MODEL = "qwen2.5-coder:7b"
124124
DEFAULT_API_KEY = "ollama"
125125

126126
ENV_OPENAI_API_KEY = "OPENAI_API_KEY"
@@ -198,13 +198,18 @@ class GoogleProviderType(StrEnum):
198198
ONEOF_PACKAGE = "package"
199199
ONEOF_FOLDER = "folder"
200200
ONEOF_MODULE = "module"
201+
ONEOF_NAMESPACE = "namespace" # C# namespace
201202
ONEOF_CLASS = "class_node"
202203
ONEOF_FUNCTION = "function"
203204
ONEOF_METHOD = "method"
204205
ONEOF_FILE = "file"
205206
ONEOF_EXTERNAL_PACKAGE = "external_package"
206207
ONEOF_MODULE_IMPLEMENTATION = "module_implementation"
207208
ONEOF_MODULE_INTERFACE = "module_interface"
209+
ONEOF_INTERFACE = "interface_node" # Interface (C#, Java, TypeScript)
210+
ONEOF_ENUM = "enum_node" # Enum type
211+
ONEOF_TYPE_ALIAS = "type_alias" # Type alias
212+
ONEOF_UNION = "union_node" # Union type (C/C++)
208213

209214
# (H) CLI error and info messages
210215
CLI_ERR_OUTPUT_REQUIRES_UPDATE = (
@@ -320,6 +325,7 @@ class NodeLabel(StrEnum):
320325
FOLDER = "Folder"
321326
FILE = "File"
322327
MODULE = "Module"
328+
NAMESPACE = "Namespace" # C# namespace
323329
CLASS = "Class"
324330
FUNCTION = "Function"
325331
METHOD = "Method"
@@ -338,6 +344,7 @@ class NodeLabel(StrEnum):
338344
NodeLabel.FOLDER: UniqueKeyType.PATH,
339345
NodeLabel.FILE: UniqueKeyType.PATH,
340346
NodeLabel.MODULE: UniqueKeyType.QUALIFIED_NAME,
347+
NodeLabel.NAMESPACE: UniqueKeyType.QUALIFIED_NAME, # C# namespace
341348
NodeLabel.CLASS: UniqueKeyType.QUALIFIED_NAME,
342349
NodeLabel.FUNCTION: UniqueKeyType.QUALIFIED_NAME,
343350
NodeLabel.METHOD: UniqueKeyType.QUALIFIED_NAME,
@@ -363,6 +370,7 @@ class RelationshipType(StrEnum):
363370
CONTAINS_FOLDER = "CONTAINS_FOLDER"
364371
CONTAINS_FILE = "CONTAINS_FILE"
365372
CONTAINS_MODULE = "CONTAINS_MODULE"
373+
CONTAINS_NAMESPACE = "CONTAINS_NAMESPACE" # Module declares namespace
366374
DEFINES = "DEFINES"
367375
DEFINES_METHOD = "DEFINES_METHOD"
368376
IMPORTS = "IMPORTS"
@@ -1735,6 +1743,16 @@ class CppNodeType(StrEnum):
17351743
TS_JAVA_ANNOTATION_TYPE_DECLARATION = "annotation_type_declaration"
17361744

17371745
TS_BASE_CLASS_CLAUSE = "base_class_clause"
1746+
TS_CS_BASE_LIST = "base_list" # C# base class/interface list
1747+
TS_CS_USING_DIRECTIVE = "using_directive" # C# using statement
1748+
TS_CS_EQUALS = "=" # C# equals sign for using alias
1749+
TS_QUALIFIED_NAME = "qualified_name" # Dotted name like System.Collections.Generic
1750+
TS_CS_PROPERTY_DECLARATION = "property_declaration" # C# property (get/set accessor)
1751+
TS_CS_INDEXER_DECLARATION = "indexer_declaration" # C# indexer (this[])
1752+
TS_CS_EVENT_FIELD_DECLARATION = "event_field_declaration" # C# event field
1753+
TS_CS_OPERATOR_DECLARATION = "operator_declaration" # C# operator overload
1754+
TS_CS_NAMESPACE_DECLARATION = "namespace_declaration" # C# namespace
1755+
TS_CS_FILE_SCOPED_NAMESPACE = "file_scoped_namespace_declaration" # C# 10+ file-scoped namespace
17381756
TS_TEMPLATE_TYPE = "template_type"
17391757
TS_ACCESS_SPECIFIER = "access_specifier"
17401758
TS_VIRTUAL = "virtual"
@@ -2723,6 +2741,10 @@ class MCPParamName(StrEnum):
27232741
TS_CS_ANONYMOUS_METHOD_EXPRESSION,
27242742
TS_CS_LAMBDA_EXPRESSION,
27252743
TS_METHOD_DECLARATION,
2744+
TS_CS_PROPERTY_DECLARATION, # C# properties (get/set)
2745+
TS_CS_INDEXER_DECLARATION, # C# indexers (this[])
2746+
TS_CS_EVENT_FIELD_DECLARATION, # C# events
2747+
TS_CS_OPERATOR_DECLARATION, # C# operator overloads
27262748
)
27272749
SPEC_CS_CLASS_TYPES = (
27282750
TS_CLASS_DECLARATION,

codebase_rag/cypher_queries.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@
5959
WHERE caller.name = 'main'
6060
RETURN caller.name AS caller, callee.name AS callee, callee.qualified_name AS qualified_name"""
6161

62+
CYPHER_EXAMPLE_CLASS_BY_NAME_SUFFIX = """MATCH (c:Class)
63+
WHERE c.name ENDS WITH 'Controller'
64+
RETURN c.qualified_name AS qualified_name, c.name AS name, labels(c) AS type"""
65+
66+
CYPHER_EXAMPLE_CLASS_BY_NAME_CONTAINS = """MATCH (c:Class)
67+
WHERE toLower(c.name) CONTAINS 'service'
68+
RETURN c.qualified_name AS qualified_name, c.name AS name, labels(c) AS type"""
69+
6270
CYPHER_EXPORT_NODES = """
6371
MATCH (n)
6472
RETURN id(n) as node_id, labels(n) as labels, properties(n) as properties

codebase_rag/graph_updater.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,11 @@ def run(self) -> None:
273273
logger.info(ls.PASS_2_FILES)
274274
self._process_files()
275275

276+
# Create deferred IMPLEMENTS relationships after all interfaces are known
277+
self.factory.definition_processor.process_deferred_implements()
278+
# Create deferred INHERITS relationships for C# after all classes are known
279+
self.factory.definition_processor.process_deferred_inherits()
280+
276281
logger.info(ls.FOUND_FUNCTIONS.format(count=len(self.function_registry)))
277282
logger.info(ls.PASS_3_CALLS)
278283
self._process_function_calls()

codebase_rag/logs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
DEF_PARSE_FAILED = "Failed to parse or ingest {path}: {error}"
3636
DEF_PARSING_DEPENDENCY = " Parsing dependency file: {path}"
3737
DEF_FOUND_DEPENDENCY = " Found dependency: {name} (spec: {spec})"
38+
DEF_FOUND_NAMESPACE = " Found namespace: {name} (qn: {qn})"
3839

3940
# (H) Semantic/embedding logs
4041
SEMANTIC_NOT_AVAILABLE = (
@@ -408,6 +409,9 @@
408409
IMP_CPP_MODULE_IMPL = "C++20 module implementation: {name}"
409410
IMP_CPP_MODULE_IFACE = "C++20 module interface: {name}"
410411
IMP_CPP_PARTITION = "C++20 module partition import: {partition} -> {full}"
412+
IMP_CS_USING = "C# using: {name} -> {namespace}"
413+
IMP_CS_ALIAS = "C# using alias: {alias} -> {namespace}"
414+
IMP_CS_STATIC = "C# using static: {name} -> {namespace}"
411415
IMP_GENERIC = "Generic import parsing for {language}: {node_type}"
412416

413417
# (H) Structure processor logs
@@ -418,6 +422,9 @@
418422
CLASS_CPP_MODULE_INTERFACE = " Found C++ Module Interface: {qn}"
419423
CLASS_CPP_MODULE_IMPL = " Found C++ Module Implementation: {qn}"
420424
CLASS_FOUND_INTERFACE = " Found Interface: {name} (qn: {qn})"
425+
CLASS_FOUND_BASE_CLASS = " Found C# Base Class: {name} (qn: {qn})"
426+
CLASS_DEFERRED_IMPLEMENTS = " Creating deferred IMPLEMENTS: {class_qn} -> {interface_qn}"
427+
CLASS_DEFERRED_INHERITS = " Creating deferred INHERITS: {class_qn} -> {base_class_qn}"
421428
CLASS_FOUND_ENUM = " Found Enum: {name} (qn: {qn})"
422429
CLASS_FOUND_TYPE = " Found Type: {name} (qn: {qn})"
423430
CLASS_FOUND_STRUCT = " Found Struct: {name} (qn: {qn})"

codebase_rag/parsers/class_ingest/mixin.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ class ClassIngestMixin:
4040
module_qn_to_file_path: dict[str, Path]
4141
import_processor: ImportProcessor
4242
class_inheritance: dict[str, list[str]]
43+
# Deferred IMPLEMENTS relationships: list of (class_type, class_qn, interface_name, module_qn)
44+
pending_implements: list[tuple[str, str, str, str]]
45+
# Deferred INHERITS relationships: list of (class_type, class_qn, base_class_name, module_qn)
46+
pending_inherits: list[tuple[str, str, str, str]]
4347

4448
@abstractmethod
4549
def _get_docstring(self, node: ASTNode) -> str | None: ...
@@ -160,6 +164,25 @@ def _process_class_node(
160164
self._resolve_to_qn,
161165
self.function_registry,
162166
)
167+
168+
# Collect IMPLEMENTS relationships for deferred processing
169+
# This ensures interfaces are in function_registry before resolution
170+
if class_node.type == cs.TS_CLASS_DECLARATION:
171+
from . import parent_extraction as pe
172+
173+
for interface_name in pe.extract_interface_names_raw(class_node):
174+
self.pending_implements.append(
175+
(node_type, class_qn, interface_name, module_qn)
176+
)
177+
178+
# Collect INHERITS relationships for deferred processing (C# only)
179+
# This ensures parent classes are in function_registry before resolution
180+
if language == cs.SupportedLanguage.CSHARP:
181+
for base_class_name in pe.extract_base_class_names_raw(class_node):
182+
self.pending_inherits.append(
183+
(node_type, class_qn, base_class_name, module_qn)
184+
)
185+
163186
self._ingest_class_methods(class_node, class_qn, language, lang_queries)
164187

165188
def _ingest_rust_impl_methods(
@@ -274,6 +297,40 @@ def process_all_method_overrides(self) -> None:
274297
self.ingestor,
275298
)
276299

300+
def process_deferred_implements(self) -> None:
301+
"""Create IMPLEMENTS relationships after all files are processed.
302+
303+
This ensures that interfaces are already in function_registry
304+
so we can resolve their qualified names correctly.
305+
"""
306+
for class_type, class_qn, interface_name, module_qn in self.pending_implements:
307+
interface_qn = self._resolve_to_qn(interface_name, module_qn)
308+
logger.debug(
309+
logs.CLASS_DEFERRED_IMPLEMENTS.format(
310+
class_qn=class_qn, interface_qn=interface_qn
311+
)
312+
)
313+
rel.create_implements_relationship(
314+
class_type, class_qn, interface_qn, self.ingestor
315+
)
316+
317+
def process_deferred_inherits(self) -> None:
318+
"""Create INHERITS relationships for C# after all files are processed.
319+
320+
This ensures that parent classes are already in function_registry
321+
so we can resolve their qualified names correctly.
322+
"""
323+
for class_type, class_qn, base_class_name, module_qn in self.pending_inherits:
324+
base_class_qn = self._resolve_to_qn(base_class_name, module_qn)
325+
logger.debug(
326+
logs.CLASS_DEFERRED_INHERITS.format(
327+
class_qn=class_qn, base_class_qn=base_class_qn
328+
)
329+
)
330+
rel.create_inheritance_relationship(
331+
class_type, class_qn, base_class_qn, self.function_registry, self.ingestor
332+
)
333+
277334
def _resolve_class_name(self, class_name: str, module_qn: str) -> str | None:
278335
return resolve_class_name(
279336
class_name, module_qn, self.import_processor, self.function_registry

codebase_rag/parsers/class_ingest/parent_extraction.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ def extract_parent_classes(
5252
)
5353
)
5454

55+
# C#: extract base class from base_list (not interfaces)
56+
if class_node.type == cs.TS_CLASS_DECLARATION:
57+
parent_classes.extend(
58+
extract_csharp_base_class(class_node, module_qn, resolve_to_qn)
59+
)
60+
5561
return parent_classes
5662

5763

@@ -297,19 +303,86 @@ def resolve_js_ts_parent_class(
297303
return resolve_to_qn(parent_name, module_qn)
298304

299305

306+
def extract_interface_names_raw(class_node: Node) -> list[str]:
307+
"""Extract raw interface names without qualified name resolution.
308+
309+
Used for deferred IMPLEMENTS relationship creation where resolution
310+
happens after all files are processed.
311+
"""
312+
interface_names: list[str] = []
313+
314+
# Java: uses 'interfaces' field
315+
interfaces_node = class_node.child_by_field_name(cs.FIELD_INTERFACES)
316+
if interfaces_node:
317+
for child in interfaces_node.children:
318+
if child.type == cs.TS_TYPE_LIST:
319+
for type_child in child.children:
320+
if type_child.type == cs.TS_TYPE_IDENTIFIER and type_child.text:
321+
if name := safe_decode_text(type_child):
322+
interface_names.append(name)
323+
324+
# C#: uses 'base_list' child node (contains both base class and interfaces)
325+
for child in class_node.children:
326+
if child.type == cs.TS_CS_BASE_LIST:
327+
for subchild in child.children:
328+
if subchild.type == cs.TS_IDENTIFIER and subchild.text:
329+
if name := safe_decode_text(subchild):
330+
# C# convention: interfaces start with 'I' followed by uppercase
331+
if len(name) > 1 and name.startswith("I") and name[1].isupper():
332+
interface_names.append(name)
333+
break
334+
335+
return interface_names
336+
337+
338+
def extract_base_class_names_raw(class_node: Node) -> list[str]:
339+
"""Extract raw base class names without qualified name resolution.
340+
341+
Used for deferred INHERITS relationship creation where resolution
342+
happens after all files are processed.
343+
344+
For C#, extracts non-interface identifiers from base_list.
345+
"""
346+
base_class_names: list[str] = []
347+
348+
# C#: uses 'base_list' child node (contains both base class and interfaces)
349+
for child in class_node.children:
350+
if child.type == cs.TS_CS_BASE_LIST:
351+
for subchild in child.children:
352+
if subchild.type == cs.TS_IDENTIFIER and subchild.text:
353+
if name := safe_decode_text(subchild):
354+
# Skip interfaces (start with 'I' followed by uppercase)
355+
if len(name) > 1 and name.startswith("I") and name[1].isupper():
356+
continue
357+
# This is a base class
358+
base_class_names.append(name)
359+
break
360+
361+
return base_class_names
362+
363+
300364
def extract_implemented_interfaces(
301365
class_node: Node,
302366
module_qn: str,
303367
resolve_to_qn: Callable[[str, str], str],
304368
) -> list[str]:
305369
implemented_interfaces: list[str] = []
306370

371+
# Java: uses 'interfaces' field
307372
interfaces_node = class_node.child_by_field_name(cs.FIELD_INTERFACES)
308373
if interfaces_node:
309374
extract_java_interface_names(
310375
interfaces_node, implemented_interfaces, module_qn, resolve_to_qn
311376
)
312377

378+
# C#: uses 'base_list' child node (contains both base class and interfaces)
379+
for child in class_node.children:
380+
if child.type == cs.TS_CS_BASE_LIST:
381+
extract_csharp_interface_names(
382+
child, implemented_interfaces, module_qn, resolve_to_qn
383+
)
384+
break
385+
313386
return implemented_interfaces
314387

315388

@@ -325,3 +398,59 @@ def extract_java_interface_names(
325398
if type_child.type == cs.TS_TYPE_IDENTIFIER and type_child.text:
326399
if interface_name := safe_decode_text(type_child):
327400
interface_list.append(resolve_to_qn(interface_name, module_qn))
401+
402+
403+
def extract_csharp_interface_names(
404+
base_list_node: Node,
405+
interface_list: list[str],
406+
module_qn: str,
407+
resolve_to_qn: Callable[[str, str], str],
408+
) -> None:
409+
"""Extract interface names from C# base_list.
410+
411+
In C#, base_list contains both base class and interfaces:
412+
- class Foo : BaseClass, IInterface1, IInterface2
413+
- By convention, interfaces start with 'I'
414+
- We extract names starting with 'I' as interfaces
415+
"""
416+
for child in base_list_node.children:
417+
if child.type == cs.TS_IDENTIFIER and child.text:
418+
if name := safe_decode_text(child):
419+
# C# convention: interfaces start with 'I' followed by uppercase
420+
if len(name) > 1 and name.startswith("I") and name[1].isupper():
421+
interface_list.append(resolve_to_qn(name, module_qn))
422+
logger.debug(logs.CLASS_FOUND_INTERFACE.format(name=name, qn=resolve_to_qn(name, module_qn)))
423+
424+
425+
def extract_csharp_base_class(
426+
class_node: Node,
427+
module_qn: str,
428+
resolve_to_qn: Callable[[str, str], str],
429+
) -> list[str]:
430+
"""Extract base class name from C# class declaration.
431+
432+
In C#, base_list contains both base class and interfaces:
433+
- class Foo : BaseClass, IInterface1, IInterface2
434+
- The base class is the first non-interface identifier
435+
- By convention, interfaces start with 'I' followed by uppercase
436+
"""
437+
parent_classes: list[str] = []
438+
439+
for child in class_node.children:
440+
if child.type == cs.TS_CS_BASE_LIST:
441+
for subchild in child.children:
442+
if subchild.type == cs.TS_IDENTIFIER and subchild.text:
443+
if name := safe_decode_text(subchild):
444+
# Skip interfaces (start with 'I' followed by uppercase)
445+
if len(name) > 1 and name.startswith("I") and name[1].isupper():
446+
continue
447+
# This is a base class
448+
parent_classes.append(resolve_to_qn(name, module_qn))
449+
logger.debug(
450+
logs.CLASS_FOUND_BASE_CLASS.format(
451+
name=name, qn=resolve_to_qn(name, module_qn)
452+
)
453+
)
454+
break
455+
456+
return parent_classes

0 commit comments

Comments
 (0)