Skip to content

Commit ab32455

Browse files
committed
tests and config
1 parent 3472479 commit ab32455

37 files changed

+1394
-446
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
- Do not add trivial POCO/unit tests (e.g., pure record equality); focus tests on meaningful flows and integration scenarios.
1919
- When registering graph stores (especially Postgres) use `AddKeyedSingleton<T>(key)` without bespoke lambdas unless strictly required; rely on DI to construct services.
2020
- All integration tests must run against real dependencies via Testcontainers modules (Neo4j, Postgres/AGE, Cosmos, Janus, etc.); do not fall back to Aspire seeding or mock containers.
21+
- Keep integration tests provider-agnostic; avoid adding Postgres-only scenarios when the same flow should apply to every backend.
2122

2223
# Conversations
2324
any resulting updates to agents.md should go under the section "## Rules to follow"

README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,38 @@ See [`docs/indexing-and-query.md`](docs/indexing-and-query.md) for the full list
164164

165165
---
166166

167+
## Configuration Parity
168+
169+
The .NET configuration surface now mirrors the original Python CLI. `GraphRagConfig` exposes the same sections as `graphrag.config`, including cache providers, NLP-driven graph extraction, claim extraction, graph pruning, and every search mode (local/global/DRIFT/basic). This makes it straightforward to move existing Python configs across without rethinking every knob:
170+
171+
```json
172+
{
173+
"GraphRag": {
174+
"Cache": { "Type": "File", "BaseDir": "cache" },
175+
"ExtractGraphNlp": {
176+
"ConcurrentRequests": 25,
177+
"TextAnalyzer": { "ModelName": "en_core_web_md", "IncludeNamedEntities": true }
178+
},
179+
"ExtractClaims": {
180+
"Enabled": true,
181+
"ModelId": "chat_model",
182+
"Prompt": "prompts/claims.txt",
183+
"MaxGleanings": 2
184+
},
185+
"PruneGraph": { "MinNodeFrequency": 2, "MinEdgeWeightPercentile": 40 },
186+
"EmbedGraph": { "Enabled": false, "Dimensions": 1536 },
187+
"LocalSearch": { "ChatModelId": "chat_model", "EmbeddingModelId": "embedding_model" },
188+
"GlobalSearch": { "MapPrompt": "prompts/global_map.txt", "ReducePrompt": "prompts/global_reduce.txt" },
189+
"DriftSearch": { "Prompt": "prompts/drift.txt", "Concurrency": 32 },
190+
"BasicSearch": { "K": 10 }
191+
}
192+
}
193+
```
194+
195+
`ClaimExtractionConfig.GetResolvedStrategy` mirrors the Python behaviour by loading prompt files from the configured root (and throwing if the file is missing) while still letting you override the entire `Strategy` block when needed.
196+
197+
---
198+
167199
## Community Detection & Graph Analytics
168200

169201
Community creation defaults to the fast label propagation algorithm. Tweak clustering directly through configuration:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
using System.Runtime.CompilerServices;
2+
3+
[assembly: InternalsVisibleTo("ManagedCode.GraphRag.Tests")]

src/ManagedCode.GraphRag.Postgres/PostgresGraphStore.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ private static (IReadOnlyDictionary<string, object?> Writes, IReadOnlyCollection
574574
writes[key] = value;
575575
}
576576

577-
IReadOnlyDictionary<string, object?> writeResult = writes.Count == 0 ? EmptyProperties : writes;
577+
var writeResult = writes.Count == 0 ? EmptyProperties : writes;
578578
IReadOnlyCollection<string> removeResult = removes.Count == 0 ? Array.Empty<string>() : removes;
579579
return (writeResult, removeResult);
580580
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
namespace GraphRag.Config;
2+
3+
public sealed class BasicSearchConfig
4+
{
5+
public string? Prompt { get; set; }
6+
7+
public string ChatModelId { get; set; } = "default_chat_model";
8+
9+
public string EmbeddingModelId { get; set; } = "default_embedding_model";
10+
11+
public int K { get; set; } = 10;
12+
13+
public int MaxContextTokens { get; set; } = 12_000;
14+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
namespace GraphRag.Config;
2+
3+
public sealed class CacheConfig
4+
{
5+
public CacheType Type { get; set; } = CacheType.File;
6+
7+
public string BaseDir { get; set; } = "cache";
8+
9+
public string? ConnectionString { get; set; }
10+
11+
public string? ContainerName { get; set; }
12+
13+
public string? StorageAccountBlobUrl { get; set; }
14+
15+
public string? CosmosDbAccountUrl { get; set; }
16+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
namespace GraphRag.Config;
2+
3+
public sealed class ClaimExtractionConfig
4+
{
5+
private const string DefaultDescription = "Any claims or facts that could be relevant to information discovery.";
6+
7+
public bool Enabled { get; set; }
8+
9+
public string ModelId { get; set; } = "default_chat_model";
10+
11+
public string? Prompt { get; set; }
12+
13+
public string Description { get; set; } = DefaultDescription;
14+
15+
public int MaxGleanings { get; set; } = 1;
16+
17+
public Dictionary<string, object?>? Strategy { get; set; }
18+
= new(StringComparer.OrdinalIgnoreCase);
19+
20+
public Dictionary<string, object?> GetResolvedStrategy(string? rootDirectory = null)
21+
{
22+
if (Strategy is { Count: > 0 })
23+
{
24+
return new Dictionary<string, object?>(Strategy, StringComparer.OrdinalIgnoreCase);
25+
}
26+
27+
string? promptPayload = null;
28+
if (!string.IsNullOrWhiteSpace(Prompt))
29+
{
30+
var baseDir = string.IsNullOrWhiteSpace(rootDirectory)
31+
? Directory.GetCurrentDirectory()
32+
: rootDirectory!;
33+
var fullPath = Path.IsPathRooted(Prompt!)
34+
? Prompt!
35+
: Path.Combine(baseDir, Prompt!);
36+
37+
if (!File.Exists(fullPath))
38+
{
39+
throw new FileNotFoundException($"Claim extraction prompt '{fullPath}' could not be found.", fullPath);
40+
}
41+
42+
promptPayload = File.ReadAllText(fullPath);
43+
}
44+
45+
return new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
46+
{
47+
["model_id"] = ModelId,
48+
["extraction_prompt"] = promptPayload,
49+
["claim_description"] = Description,
50+
["max_gleanings"] = MaxGleanings
51+
};
52+
}
53+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
namespace GraphRag.Config;
2+
3+
public sealed class DriftSearchConfig
4+
{
5+
public string? Prompt { get; set; }
6+
7+
public string? ReducePrompt { get; set; }
8+
9+
public string ChatModelId { get; set; } = "default_chat_model";
10+
11+
public string EmbeddingModelId { get; set; } = "default_embedding_model";
12+
13+
public int DataMaxTokens { get; set; } = 12_000;
14+
15+
public int? ReduceMaxTokens { get; set; }
16+
17+
public double ReduceTemperature { get; set; }
18+
19+
public int? ReduceMaxCompletionTokens { get; set; }
20+
21+
public int Concurrency { get; set; } = 32;
22+
23+
public int DriftFollowupCount { get; set; } = 20;
24+
25+
public int PrimerFolds { get; set; } = 5;
26+
27+
public int PrimerMaxTokens { get; set; } = 12_000;
28+
29+
public int Depth { get; set; } = 3;
30+
31+
public double LocalSearchTextUnitProportion { get; set; } = 0.9;
32+
33+
public double LocalSearchCommunityProportion { get; set; } = 0.1;
34+
35+
public int LocalSearchTopKEntities { get; set; } = 10;
36+
37+
public int LocalSearchTopKRelationships { get; set; } = 10;
38+
39+
public int LocalSearchMaxDataTokens { get; set; } = 12_000;
40+
41+
public double LocalSearchTemperature { get; set; }
42+
43+
public double LocalSearchTopP { get; set; } = 1.0;
44+
45+
public int LocalSearchSampleCount { get; set; } = 1;
46+
47+
public int? LocalSearchMaxGeneratedTokens { get; set; }
48+
49+
public int? LocalSearchMaxCompletionTokens { get; set; }
50+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
namespace GraphRag.Config;
2+
3+
public sealed class EmbedGraphConfig
4+
{
5+
public bool Enabled { get; set; }
6+
7+
public int Dimensions { get; set; } = 1536;
8+
9+
public int WalkLength { get; set; } = 40;
10+
11+
public int NumWalks { get; set; } = 10;
12+
13+
public int WindowSize { get; set; } = 2;
14+
15+
public int Iterations { get; set; } = 3;
16+
17+
public int RandomSeed { get; set; } = 597_832;
18+
19+
public bool UseLargestConnectedComponent { get; set; } = true;
20+
}

src/ManagedCode.GraphRag/Config/Enums.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@ public enum InputFileType
77
Json
88
}
99

10+
public enum CacheType
11+
{
12+
File,
13+
Memory,
14+
None,
15+
Blob,
16+
CosmosDb
17+
}
18+
1019
public enum StorageType
1120
{
1221
File,
@@ -34,6 +43,12 @@ public enum ChunkStrategyType
3443
Sentence
3544
}
3645

46+
public enum AsyncType
47+
{
48+
Threaded,
49+
AsyncIO
50+
}
51+
3752
public enum SearchMethod
3853
{
3954
Local,

0 commit comments

Comments
 (0)