codealive-mcp/src/codealive_mcp_server.py at 9ebf5cc221cabedef4757222dc6b83f63b3daf57 · CodeAlive-AI/codealive-mcp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""
CodeAlive MCP Server - Main entry point.

A Model Context Protocol server for semantic code search and analysis using CodeAlive API.
"""

import argparse
import datetime
import os
import sys
from pathlib import Path

from dotenv import load_dotenv
from fastmcp import FastMCP
from starlette.requests import Request
from starlette.responses import JSONResponse

# Load environment variables
project_dir = Path(__file__).parent.parent
dotenv_path = project_dir / ".env"
load_dotenv(dotenv_path=dotenv_path)

# Add src directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

# Import core components
from core import codealive_lifespan, setup_debug_logging
from tools import codebase_consultant, get_data_sources, codebase_search, get_overview

# Initialize FastMCP server with lifespan and enhanced system instructions
mcp = FastMCP(
    name="CodeAlive MCP Server",
    instructions="""
    This server provides access to the CodeAlive API for AI-powered code assistance and code understanding.

    CodeAlive enables you to:
    - Analyze code repositories and workspaces
    - Search through code using natural language
    - Understand code structure, dependencies, and patterns
    - Answer questions about code implementation details
    - Integrate with local git repositories for seamless code exploration

    When working with a codebase:
    1. First use `get_data_sources` to identify available repositories and workspaces
    2. Then use `codebase_search` to find relevant files and code snippets
    3. Finally, use `chat_completions` for in-depth analysis of the code

    For effective code exploration:
    - Start with broad queries to understand the overall structure
    - Use specific function/class names when looking for particular implementations
    - Combine natural language with code patterns in your queries
    - Always use "auto" search mode by default; it intelligently selects the appropriate search depth
    - IMPORTANT: Only use "deep" search mode for very complex conceptual queries as it's resource-intensive
    - Remember that context from previous messages is maintained in the same conversation

    CRITICAL - include_content parameter usage:
    You MUST intelligently determine if searching CURRENT or EXTERNAL repositories:

    - CURRENT repository (user's working directory): include_content=false
      * You have file access → Get paths from search, then use Read tool for latest content
    - EXTERNAL repositories (not in working directory): include_content=true
      * No file access → Must include content in search results

    Use these heuristics to identify CURRENT vs EXTERNAL (combine multiple signals):
    1. Repository/directory name matching (e.g., working in "my-app", repo named "my-app")
    2. Description matching observed codebase (tech stack, architecture, features)
    3. User's language ("this repo", "our code" = CURRENT; "the X service" = EXTERNAL)
    4. URL matching with git remote (when available)
    5. Working context (files you've been reading/editing match this repo)

    When uncertain, use context: Is user asking about their current work or a different service?

    Flexible data source usage:
    - You can use a workspace name as a single data source to search or chat across all its repositories at once
    - Alternatively, you can use specific repository names for more targeted searches
    - For complex queries, you can combine multiple repository names from different workspaces
    - Choose between workspace-level or repository-level access based on the scope of the query

    Repository integration:
    - CodeAlive can connect to repositories you've indexed in the system
    - If a user is working within a git repository that matches a CodeAlive-indexed repository (by URL),
      you can suggest using CodeAlive's search and chat to understand the codebase
    - Data sources include repository URLs to help match with local git repositories

    When analyzing search results:
    - Pay attention to file paths to understand the project structure
    - Look for patterns across multiple matching files
    - Consider the relationships between different code components
    """,
    lifespan=codealive_lifespan
)


# Add health check endpoint for AWS ALB
@mcp.custom_route("/health", methods=["GET"])
async def health_check(request: Request) -> JSONResponse:
    """Health check endpoint for load balancer."""
    return JSONResponse({
        "status": "healthy",
        "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
        "service": "codealive-mcp-server"
    })


# Register tools
mcp.tool()(codebase_consultant)
mcp.tool()(get_data_sources)
mcp.tool()(codebase_search)
mcp.tool()(get_overview)


def main():
    """Main entry point for the server."""
    parser = argparse.ArgumentParser(description="CodeAlive MCP Server")
    parser.add_argument("--api-key", help="CodeAlive API Key")
    parser.add_argument("--base-url", help="CodeAlive Base URL")
    parser.add_argument("--transport", help="Transport type (stdio or http)", default="stdio")
    parser.add_argument("--host", help="Host for HTTP transport", default="0.0.0.0")
    parser.add_argument("--port", help="Port for HTTP transport", type=int, default=8000)
    parser.add_argument("--debug", action="store_true", help="Enable debug mode for verbose logging")
    parser.add_argument("--ignore-ssl", action="store_true", help="Ignore SSL certificate validation")

    args = parser.parse_args()

    # Command line arguments take precedence over .env file/environment variables
    if args.api_key:
        os.environ["CODEALIVE_API_KEY"] = args.api_key

    if args.base_url:
        os.environ["CODEALIVE_BASE_URL"] = args.base_url
        print(f"Using base URL from command line: {args.base_url}")

    # Disable SSL verification if explicitly requested or in debug mode
    if args.ignore_ssl or args.debug:
        os.environ["CODEALIVE_IGNORE_SSL"] = "true"
        if args.ignore_ssl:
            print("SSL certificate validation disabled by --ignore-ssl flag")
        elif args.debug:
            print("SSL certificate validation disabled in debug mode")

    # Debug environment if requested
    if args.debug:
        os.environ["DEBUG_MODE"] = "true"
        setup_debug_logging()
        print("\nDEBUG MODE ENABLED")
        print("Environment:")
        print(f"  - Current working dir: {os.getcwd()}")
        print(f"  - Script location: {__file__}")
        print(f"  - Dotenv path: {dotenv_path}")
        print(f"  - Dotenv file exists: {os.path.exists(dotenv_path)}")

    # Set transport mode for validation
    os.environ["TRANSPORT_MODE"] = args.transport

    api_key = os.environ.get("CODEALIVE_API_KEY", "")
    base_url = os.environ.get("CODEALIVE_BASE_URL", "")

    if args.transport == "stdio":
        # STDIO mode: require API key in environment
        if not api_key:
            print("ERROR: STDIO mode requires CODEALIVE_API_KEY environment variable.")
            print("Please set this in your .env file or environment.")
            sys.exit(1)
        print(f"STDIO mode: Using API key from environment (ends with: ...{api_key[-4:] if len(api_key) > 4 else '****'})")
    else:
        # HTTP mode: API keys should be provided via Authorization headers
        if api_key:
            print("WARNING: HTTP mode detected CODEALIVE_API_KEY in environment.")
            print("In production, API keys should be provided via Authorization: Bearer headers.")
            print("Environment variable will be ignored in HTTP mode.")
        print("HTTP mode: API keys will be extracted from Authorization: Bearer headers")

    if not base_url:
        print("WARNING: CODEALIVE_BASE_URL environment variable is not set, using default.")
        print("CodeAlive will connect to the production API at https://app.codealive.ai")

    # Run the server with the selected transport
    if args.transport == "http":
        # Use /api path to avoid conflicts with health endpoint
        mcp.run(transport="http", host=args.host, port=args.port, path="/api", stateless_http=True)
    else:
        mcp.run(transport="stdio")


if __name__ == "__main__":
    main()