RAG-Application/api.py at main · hinata-devcode/RAG-Application · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
import os
import logging

import vector_db

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO
)
logger = logging.getLogger(__name__)

app = FastAPI(title="RazorpayX AI Support API", version="1.0.0")

# Setup CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# --- DTOs ---

class ChatMessage(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    query_text: str = Field(..., description="The user's question or query")
    role: str = Field(default="EXTERNAL", description="User role, e.g., INTERNAL or EXTERNAL")
    product: Optional[str] = Field(default="All Products", description="Product filter")
    chat_history: List[ChatMessage] = Field(default_factory=list, description="Previous chat context")

class ChatResponse(BaseModel):
    answer: str

# --- Core Logic from app.py ---

def get_best_available_llm():
    """Resilient Model Factory - tries Gemini first, then Azure OpenAI as fallback"""
    gemini_key = os.environ.get("GEMINI_API_KEY")
    azure_key = os.environ.get("AZURE_OPENAI_API_KEY")
    azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")

    if gemini_key:
        try:
            from google import genai
            client = genai.Client(api_key=gemini_key)
            test_response = client.models.generate_content(
                model="gemini-2.5-pro",
                contents="test"
            )
            if test_response.text:
                return ("gemini", client, "gemini-2.5-pro")
        except Exception as e:
            if "429" in str(e):
                logger.warning("Gemini Pro rate limit reached (429). Falling back to Flash.")
            else:
                logger.warning(f"Gemini 2.5 Pro failed: {e}")
            try:
                test_response = client.models.generate_content(
                    model="gemini-2.5-flash",
                    contents="test"
                )
                if test_response.text:
                    return ("gemini", client, "gemini-2.5-flash")
            except Exception as e2:
                logger.error(f"Gemini 2.5 Flash failed: {e2}")

    if azure_key and azure_endpoint:
        try:
            from langchain_openai import AzureChatOpenAI
            llm = AzureChatOpenAI(
                azure_endpoint=azure_endpoint,
                api_key=azure_key,
                azure_deployment="hackon-fy26q3-gpt5",
                api_version="2025-01-01-preview"
            )
            logger.warning("Google services failed. Switched to OpenAI GPT-4o.")
            return ("azure_openai", llm, "gpt-5-chat")
        except Exception as e:
            logger.error(f"Azure OpenAI failed: {e}")

    return (None, None, None)

def generate_response(llm_type, client, model, query, context, chat_history):
    """Generate response using the available LLM"""
    history_text = ""
    if chat_history:
        for msg in chat_history[-6:]:
            role = "User" if msg.role == "user" else "Assistant"
            history_text += f"{role}: {msg.content}\n"

    system_prompt = """You are RazorpayX AI Support Assistant. You MUST follow these rules strictly:

1. ONLY answer based on the provided context from the knowledge base. Do NOT use any external knowledge or make up information.
2. If the context contains relevant information, provide a clear and helpful answer based ONLY on that context.
3. If the context does NOT contain relevant information to answer the question, respond with: "I don't have information about that in my knowledge base. Please contact RazorpayX support for assistance."
4. NEVER hallucinate or generate answers from general knowledge. Only use what's in the provided context.
5. Be concise, accurate, and professional.
6. If you're unsure, say so rather than guessing."""

    user_prompt = f"""Chat History:
{history_text}

Context from Knowledge Base:
{context}

User Question: {query}

Please provide a helpful answer based on the context above."""

    try:
        if llm_type == "gemini":
            from google.genai import types
            response = client.models.generate_content(
                model=model,
                contents=[
                    types.Content(role="user", parts=[types.Part(text=user_prompt)])
                ],
                config=types.GenerateContentConfig(
                    system_instruction=system_prompt,
                ),
            )
            return response.text or "I couldn't generate a response. Please try again."

        elif llm_type == "azure_openai":
            from langchain_core.messages import HumanMessage, SystemMessage
            messages = [
                SystemMessage(content=system_prompt),
                HumanMessage(content=user_prompt)
            ]
            response = client.invoke(messages)
            return response.content or "I couldn't generate a response. Please try again."

    except Exception as e:
        raise Exception(f"Error generating response: {str(e)}")

    raise Exception("No LLM available. Please configure API keys.")

# --- Endpoints ---

@app.post("/api/v1/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
    try:
        # Retrieve context from ChromaDB
        results = vector_db.query_with_filters(
            query_text=request.query_text,
            role=request.role,
            product=request.product if request.product != "All Products" else None,
            n_results=5
        )

        context = ""
        if results and results.get("documents") and results["documents"][0]:
            docs = results["documents"][0]
            context = "\n\n---\n\n".join(docs[:5])

        if not context:
            context = "No relevant information found in the knowledge base."

        # Get LLM
        llm_type, client, model = get_best_available_llm()
        if not llm_type:
             return ChatResponse(answer=f"⚠️ No AI model is configured. Please add GEMINI_API_KEY or AZURE_OPENAI_API_KEY to use the chat feature.\n\nIn the meantime, here's what I found in the knowledge base:\n\n{context}")

        # Call LLM
        answer = generate_response(
            llm_type=llm_type,
            client=client,
            model=model,
            query=request.query_text,
            context=context,
            chat_history=request.chat_history
        )

        return ChatResponse(answer=answer)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))