-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsettings.py
More file actions
122 lines (94 loc) · 6.02 KB
/
settings.py
File metadata and controls
122 lines (94 loc) · 6.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
Configuration settings for Codebase RAG.
This module defines all application settings using Pydantic Settings.
Settings can be configured via environment variables or .env file.
"""
from pydantic_settings import BaseSettings
from pydantic import Field
from typing import Optional, Literal, Dict, Any
class Settings(BaseSettings):
# Application Settings
app_name: str = "Code Graph Knowledge Service"
app_version: str = "1.0.0"
debug: bool = False
# Server Settings (Two-Port Architecture)
host: str = Field(default="0.0.0.0", description="Host for all services", alias="HOST")
# Port configuration
port: int = Field(default=8123, description="Legacy port (deprecated)", alias="PORT")
mcp_port: int = Field(default=8000, description="MCP SSE service port (PRIMARY)", alias="MCP_PORT")
web_ui_port: int = Field(default=8080, description="Web UI + REST API port (SECONDARY)", alias="WEB_UI_PORT")
# Vector Search Settings (using Neo4j built-in vector index)
vector_index_name: str = Field(default="knowledge_vectors", description="Neo4j vector index name")
vector_dimension: int = Field(default=384, description="Vector embedding dimension")
# Neo4j Graph Database
neo4j_uri: str = Field(default="bolt://localhost:7687", description="Neo4j connection URI", alias="NEO4J_URI")
neo4j_username: str = Field(default="neo4j", description="Neo4j username", alias="NEO4J_USER")
neo4j_password: str = Field(default="password", description="Neo4j password", alias="NEO4J_PASSWORD")
neo4j_database: str = Field(default="neo4j", description="Neo4j database name")
# LLM Provider Configuration
llm_provider: Literal["ollama", "openai", "gemini", "openrouter"] = Field(
default="ollama",
description="LLM provider to use",
alias="LLM_PROVIDER"
)
# Ollama LLM Service
ollama_base_url: str = Field(default="http://localhost:11434", description="Ollama service URL", alias="OLLAMA_HOST")
ollama_model: str = Field(default="llama2", description="Ollama model name", alias="OLLAMA_MODEL")
# OpenAI Configuration
openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key", alias="OPENAI_API_KEY")
openai_model: str = Field(default="gpt-3.5-turbo", description="OpenAI model name", alias="OPENAI_MODEL")
openai_base_url: Optional[str] = Field(default=None, description="OpenAI API base URL", alias="OPENAI_BASE_URL")
# Google Gemini Configuration
google_api_key: Optional[str] = Field(default=None, description="Google API key", alias="GOOGLE_API_KEY")
gemini_model: str = Field(default="gemini-pro", description="Gemini model name", alias="GEMINI_MODEL")
# OpenRouter Configuration
openrouter_api_key: Optional[str] = Field(default=None, description="OpenRouter API key", alias="OPENROUTER_API_KEY")
openrouter_base_url: str = Field(default="https://openrouter.ai/api/v1", description="OpenRouter API base URL", alias="OPENROUTER_BASE_URL")
openrouter_model: Optional[str] = Field(default="openai/gpt-3.5-turbo", description="OpenRouter model", alias="OPENROUTER_MODEL")
openrouter_max_tokens: int = Field(default=2048, description="OpenRouter max tokens for completion", alias="OPENROUTER_MAX_TOKENS")
# Embedding Provider Configuration
embedding_provider: Literal["ollama", "openai", "gemini", "huggingface", "openrouter"] = Field(
default="ollama",
description="Embedding provider to use",
alias="EMBEDDING_PROVIDER"
)
# Ollama Embedding
ollama_embedding_model: str = Field(default="nomic-embed-text", description="Ollama embedding model", alias="OLLAMA_EMBEDDING_MODEL")
# OpenAI Embedding
openai_embedding_model: str = Field(default="text-embedding-ada-002", description="OpenAI embedding model", alias="OPENAI_EMBEDDING_MODEL")
# Gemini Embedding
gemini_embedding_model: str = Field(default="models/embedding-001", description="Gemini embedding model", alias="GEMINI_EMBEDDING_MODEL")
# HuggingFace Embedding
huggingface_embedding_model: str = Field(default="BAAI/bge-small-en-v1.5", description="HuggingFace embedding model", alias="HF_EMBEDDING_MODEL")
# OpenRouter Embedding
openrouter_embedding_model: str = Field(default="text-embedding-ada-002", description="OpenRouter embedding model", alias="OPENROUTER_EMBEDDING_MODEL")
# Model Parameters
temperature: float = Field(default=0.1, description="LLM temperature")
max_tokens: int = Field(default=2048, description="Maximum tokens for LLM response")
# RAG Settings
chunk_size: int = Field(default=512, description="Text chunk size for processing")
chunk_overlap: int = Field(default=50, description="Chunk overlap size")
top_k: int = Field(default=5, description="Top K results for retrieval")
# Timeout Settings
connection_timeout: int = Field(default=30, description="Connection timeout in seconds")
operation_timeout: int = Field(default=120, description="Operation timeout in seconds")
large_document_timeout: int = Field(default=300, description="Large document processing timeout in seconds")
# Document Processing Settings
max_document_size: int = Field(default=10 * 1024 * 1024, description="Maximum document size in bytes (10MB)")
max_payload_size: int = Field(default=50 * 1024 * 1024, description="Maximum task payload size for storage (50MB)")
ingestion_pipelines: Dict[str, Dict[str, Any]] = Field(
default_factory=dict,
description="Optional ingestion pipeline overrides",
)
# API Settings
cors_origins: list = Field(default=["*"], description="CORS allowed origins")
api_key: Optional[str] = Field(default=None, description="API authentication key")
# logging
log_file: Optional[str] = Field(default="app.log", description="Log file path")
log_level: str = Field(default="INFO", description="Log level")
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "ignore" # Ignore extra fields to avoid validation errors
# Global settings instance
settings = Settings()