Skip to content

Commit 69fb03d

Browse files
authored
Merge pull request #24 from royisme/codex/refactor-neo4jknowledgeservice-to-querypipeline
Refactor Neo4j knowledge query pipeline
2 parents b3f01bf + 10296cd commit 69fb03d

8 files changed

Lines changed: 886 additions & 523 deletions

File tree

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,22 @@ The platform combines vector search, graph traversal, and LLM-driven analysis to
8787
- Docker (optional, for containerized deployment)
8888
- Node.js 18+ (for frontend development)
8989

90+
### Querying Knowledge
91+
```python
92+
# Query the knowledge base
93+
response = httpx.post("http://localhost:8000/api/v1/knowledge/query", json={
94+
"question": "How does the authentication system work?",
95+
"mode": "hybrid", # or "graph_only", "vector_only"
96+
"use_tools": False,
97+
"top_k": 5
98+
})
99+
100+
# Search similar documents
101+
response = httpx.post("http://localhost:8000/api/v1/knowledge/search", json={
102+
"query": "user authentication",
103+
"top_k": 10
104+
})
105+
```
90106
### Installation
91107

92108
Clone the repository and install dependencies:

README_CN.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,9 @@ response = httpx.post("http://localhost:8000/api/v1/documents/directory", json={
150150
# 查询知识库
151151
response = httpx.post("http://localhost:8000/api/v1/knowledge/query", json={
152152
"question": "认证系统是如何工作的?",
153-
"mode": "hybrid" # 或 "graph_only", "vector_only"
153+
"mode": "hybrid", # 或 "graph_only", "vector_only"
154+
"use_tools": False,
155+
"top_k": 5
154156
})
155157

156158
# 搜索相似文档

docs/api/rest.md

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,26 @@ Query the knowledge base using GraphRAG.
135135
```json
136136
{
137137
"question": "How does authentication work in this system?",
138-
"mode": "hybrid"
138+
"mode": "hybrid",
139+
"use_graph": true,
140+
"use_vector": true,
141+
"use_tools": false,
142+
"top_k": 5,
143+
"graph_depth": 2
139144
}
140145
```
141146

142147
**Parameters**:
143148
- `question` (string, required): Question to ask
144149
- `mode` (string, optional): Query mode
145-
- `hybrid` (default): Graph traversal + vector search
146-
- `graph_only`: Only graph relationships
147-
- `vector_only`: Only vector similarity
150+
- `hybrid` (default): Run graph + vector retrieval sequentially
151+
- `graph_only`: Only run graph retrieval
152+
- `vector_only`: Only run vector retrieval
153+
- `use_graph` / `use_vector` (boolean, optional): Override mode defaults
154+
- `use_tools` (boolean, optional): Execute registered workflow tools (default: `false`)
155+
- `top_k` (integer, optional): Override vector retrieval `top_k` (default: global setting)
156+
- `graph_depth` (integer, optional): Override graph traversal depth (default: `2`)
157+
- `tool_kwargs` (object, optional): Extra parameters passed to workflow tools
148158

149159
**Response**:
150160
```json
@@ -153,6 +163,7 @@ Query the knowledge base using GraphRAG.
153163
"answer": "The system uses JWT-based authentication...",
154164
"source_nodes": [
155165
{
166+
"node_id": "node-123",
156167
"text": "JWT implementation details...",
157168
"score": 0.92,
158169
"metadata": {
@@ -161,7 +172,33 @@ Query the knowledge base using GraphRAG.
161172
}
162173
}
163174
],
164-
"mode": "hybrid"
175+
"retrieved_nodes": [...],
176+
"pipeline_steps": [
177+
{
178+
"step": "graph_retrieval",
179+
"node_count": 3,
180+
"config": {
181+
"graph_traversal_depth": 2,
182+
"max_knowledge_sequence": 30
183+
}
184+
},
185+
{
186+
"step": "vector_retrieval",
187+
"node_count": 5,
188+
"config": {
189+
"top_k": 5
190+
}
191+
}
192+
],
193+
"tool_outputs": [],
194+
"query_mode": "hybrid",
195+
"config": {
196+
"graph": true,
197+
"vector": true,
198+
"tools": false,
199+
"top_k": 5,
200+
"graph_depth": 2
201+
}
165202
}
166203
```
167204

docs/architecture/components.md

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -367,9 +367,14 @@ async def lifespan(app: FastAPI):
367367
```python
368368
class Neo4jKnowledgeService:
369369
def __init__(self):
370-
self.graph_store = None # Neo4j graph store
371-
self.knowledge_index = None # LlamaIndex KnowledgeGraphIndex
372-
self.query_engine = None # RAG query engine
370+
self.graph_store = None # Neo4j graph store
371+
self.storage_context = None # Shared storage context
372+
self.knowledge_index = None # KnowledgeGraphIndex
373+
self.vector_index = None # VectorStoreIndex for similarity search
374+
self.response_synthesizer = None # LLM-backed synthesizer
375+
self.query_pipeline = None # Graph/Vector pipeline
376+
self.function_tools = [] # Workflow tools
377+
self.tool_node = None # Optional ToolNode
373378
self._initialized = False
374379
```
375380

@@ -389,7 +394,7 @@ sequenceDiagram
389394
KnowServ->>LlamaIndex: Configure Settings
390395
KnowServ->>LlamaIndex: Create KnowledgeGraphIndex
391396
LlamaIndex-->>KnowServ: Index ready
392-
KnowServ->>KnowServ: Create query engine
397+
KnowServ->>KnowServ: Build QueryPipeline (graph + vector + synth)
393398
KnowServ-->>Client: Initialized
394399
```
395400

@@ -419,24 +424,29 @@ async def add_document(
419424
async def query(
420425
self,
421426
question: str,
422-
top_k: int = 5
427+
*,
428+
mode: str = "hybrid",
429+
use_tools: bool = False
423430
) -> Dict[str, Any]:
424-
"""Query knowledge base with RAG"""
425-
# 1. Use query engine to retrieve relevant context
426-
# 2. Generate answer using LLM with context
427-
response = await asyncio.to_thread(
428-
self.query_engine.query,
429-
question
430-
)
431+
"""Run the QueryPipeline composed of graph/vector retrievers and a synthesizer."""
432+
config = self._resolve_pipeline_config(mode, use_tools=use_tools)
433+
result = await asyncio.to_thread(self.query_pipeline.run, question, config)
431434

432-
# 3. Return answer with source nodes
433435
return {
434436
"success": True,
435-
"answer": str(response),
436-
"sources": [node.metadata for node in response.source_nodes]
437+
"answer": str(result["response"]),
438+
"source_nodes": format_sources(result["source_nodes"]),
439+
"pipeline_steps": result["steps"],
440+
"tool_outputs": result["tool_outputs"]
437441
}
438442
```
439443

444+
**Pipeline Components**:
445+
1. `KnowledgeGraphRAGRetriever` — extracts entities and traverses the property graph.
446+
2. `VectorIndexRetriever` — performs vector similarity search over the Neo4j vector index.
447+
3. `ResponseSynthesizer` — merges retrieved context and generates the final answer.
448+
4. `FunctionTool` / `ToolNode` (optional) — exposes the query as a workflow tool for multi-turn agents.
449+
440450
#### 3. Semantic Search
441451
```python
442452
async def search_similar(

0 commit comments

Comments
 (0)