-
-
Notifications
You must be signed in to change notification settings - Fork 784
Expand file tree
/
Copy pathindex.js
More file actions
208 lines (184 loc) · 6.81 KB
/
index.js
File metadata and controls
208 lines (184 loc) · 6.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
// --------------------------------------
// 1. Import necessary modules
// --------------------------------------
import express from "express";
import bodyParser from "body-parser";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { OpenAIEmbeddings } from "@langchain/openai";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { BraveSearch } from "@langchain/community/tools/brave_search";
import OpenAI from "openai";
import cheerio from "cheerio";
import dotenv from "dotenv";
dotenv.config();
// --------------------------------------
// 2. Initialize Express
// --------------------------------------
const app = express();
const port = 3005;
app.use(bodyParser.json());
// --------------------------------------
// 3. Initialize OpenAI + Embeddings
// --------------------------------------
const openai = new OpenAI({
baseURL: "https://api.groq.com/openai/v1",
apiKey: process.env.GROQ_API_KEY,
});
const embeddings = new OpenAIEmbeddings();
// --------------------------------------
// 4. Helper: Rephrase user input
// --------------------------------------
async function rephraseInput(input) {
try {
const response = await openai.chat.completions.create({
model: "mixtral-8x7b-32768",
messages: [
{
role: "system",
content:
"You are a rephraser. Always reply with a concise version of the input, optimized for a search engine query.",
},
{ role: "user", content: input },
],
});
return response.choices[0].message.content;
} catch (err) {
console.error("Error rephrasing input:", err);
return input;
}
}
// --------------------------------------
// 5. Helper: Extract main text from a web page
// --------------------------------------
function extractMainContent(html, link) {
const $ = cheerio.load(html);
$("script, style, head, nav, footer, iframe, img").remove();
return $("body").text().replace(/\s+/g, " ").trim();
}
// --------------------------------------
// 6. Helper: Generate Follow-Up Questions
// --------------------------------------
async function generateFollowUpQuestions(answer) {
try {
const groqResponse = await openai.chat.completions.create({
model: "mixtral-8x7b-32768",
messages: [
{
role: "system",
content:
"Generate 3 relevant follow-up questions based on the provided text. Return them as a JSON array.",
},
{
role: "user",
content: `Generate 3 follow-up questions for: ${answer}`,
},
],
});
return JSON.parse(groqResponse.choices[0].message.content);
} catch (err) {
console.error("Error generating follow-up questions:", err);
return ["Can you explain more?", "Why is that important?", "Tell me more about this topic."];
}
}
// --------------------------------------
// 7. Helper: Custom Domain Knowledge
// --------------------------------------
function getCustomKnowledge(message) {
const text = message.toLowerCase();
const facts = {
"pm of india": "🇮🇳 The current Prime Minister of India is **Narendra Modi**, serving since May 2014.",
dog: "🐶 Dogs are loyal domestic animals known as human’s best friends.",
cat: "🐱 Cats are independent and curious animals, loved for their agility and affection.",
tiger: "🐯 Tigers are the largest wild cats and apex predators found mostly in Asia.",
space: "🚀 Space is a vast expanse beyond Earth’s atmosphere, filled with stars, galaxies, and planets.",
ocean: "🌊 Oceans cover over 70% of Earth’s surface and are home to millions of species.",
};
for (const key in facts) {
if (text.includes(key)) return facts[key];
}
return null;
}
// --------------------------------------
// 8. Main POST Route
// --------------------------------------
app.post("/", async (req, res) => {
const startTime = Date.now();
const {
message,
textChunkSize = 800,
textChunkOverlap = 200,
numberOfSimilarityResults = 2,
numberOfPagesToScan = 4,
} = req.body;
console.log("\n📩 New query:", message);
try {
// Check for quick domain knowledge
const predefined = getCustomKnowledge(message);
if (predefined) {
return res.json({
answer: predefined,
sources: [],
followUpQuestions: await generateFollowUpQuestions(predefined),
});
}
// Rephrase query
const rephrasedMessage = await rephraseInput(message);
console.log("🔁 Rephrased:", rephrasedMessage);
// Initialize Brave Search
const loader = new BraveSearch({ apiKey: process.env.BRAVE_SEARCH_API_KEY });
const docs = await loader.call(rephrasedMessage, { count: numberOfPagesToScan });
const normalized = JSON.parse(docs)
.filter((d) => d.title && d.link)
.slice(0, numberOfPagesToScan);
console.log(`🔍 Found ${normalized.length} relevant web pages.`);
// Fetch, chunk, and vectorize
const sources = await Promise.all(
normalized.map(async ({ title, link }) => {
try {
const response = await fetch(link);
const html = await response.text();
const content = extractMainContent(html, link);
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: textChunkSize,
chunkOverlap: textChunkOverlap,
});
const chunks = await splitter.splitText(content);
const store = await MemoryVectorStore.fromTexts(chunks, { link, title }, embeddings);
return await store.similaritySearch(message, numberOfSimilarityResults);
} catch (err) {
console.error("Error processing link:", link, err);
return [];
}
})
);
// Prepare LLM summary
const chat = await openai.chat.completions.create({
model: "mixtral-8x7b-32768",
messages: [
{
role: "system",
content: `You are an intelligent assistant. Respond with an informative and structured summary for the query: "${message}" using context below.`,
},
{ role: "user", content: `Sources: ${JSON.stringify(sources)}` },
],
});
const finalAnswer = chat.choices[0].message.content;
// Return response
res.json({
answer: finalAnswer,
sources,
followUpQuestions: await generateFollowUpQuestions(finalAnswer),
responseTime: `${(Date.now() - startTime) / 1000}s`,
});
console.log("✅ Response sent in", (Date.now() - startTime) / 1000, "seconds");
} catch (error) {
console.error("❌ Error in processing:", error);
res.status(500).json({ error: "Internal Server Error", details: error.message });
}
});
// --------------------------------------
// 9. Start Server
// --------------------------------------
app.listen(port, () => {
console.log(`🚀 Server is running on http://localhost:${port}`);
});