-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
143 lines (116 loc) · 5.2 KB
/
app.py
File metadata and controls
143 lines (116 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import streamlit as st
from dotenv import load_dotenv
from utils.loader import extract_content_from_url
from utils.chunker import chunk_text
from utils.vector_store import create_vector_store, query_vector_store
import google.generativeai as genai
# Load environment variables
load_dotenv()
# Configure Gemini
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel('gemini-2.5-flash')
# Streamlit app configuration
st.set_page_config(
page_title="Personal Research Assistant",
page_icon="🔍",
layout="wide"
)
def generate_summary(text):
"""Generate an executive summary using Gemini."""
prompt = f"""
Please provide a concise executive summary of the following text.
Focus on the main points, key findings, and overall significance.
Keep it under 300 words.
Text:
{text}
"""
response = model.generate_content(prompt)
return response.text
def generate_key_insights(text):
"""Generate key insights using Gemini."""
prompt = f"""
Extract the most important insights from the following text.
Present them as a bulleted list of 5-7 key points.
Each point should be concise but meaningful.
Text:
{text}
"""
response = model.generate_content(prompt)
return response.text
def answer_question(text, question):
"""Answer a question about the text using Gemini."""
prompt = f"""
Based on the following text, answer the question as accurately as possible.
If the answer isn't in the text, say "I couldn't find the answer in the provided content."
Text:
{text}
Question: {question}
Answer:
"""
response = model.generate_content(prompt)
return response.text
def main():
st.title("🔍 Personal Research Assistant")
st.markdown("Extract insights from any blog or article URL")
# Sidebar for settings
with st.sidebar:
st.header("Settings")
chunk_size = st.slider("Chunk size (for long articles)", 500, 2000, 1000)
chunk_overlap = st.slider("Chunk overlap", 0, 500, 200)
st.markdown("---")
st.markdown("Powered by Goose3, Gemini 3, and LangChain")
# Main content area
url = st.text_input("Enter article/blog URL:", placeholder="https://example.com/article")
if url:
with st.spinner("Extracting and processing content..."):
try:
# Extract content
article = extract_content_from_url(url)
if not article.text:
st.error("Could not extract meaningful content from this URL.")
return
# Display basic info
st.subheader(article.title)
if article.meta_description:
st.caption(article.meta_description)
# Generate summary and insights
col1, col2 = st.columns(2)
with col1:
st.subheader("Executive Summary")
summary = generate_summary(article.text)
st.write(summary)
with col2:
st.subheader("Key Insights")
insights = generate_key_insights(article.text)
st.markdown(insights)
# Chunking and vector store for long articles
if len(article.text.split()) > 1000:
st.markdown("---")
st.subheader("Semantic Search (for long articles)")
chunks = chunk_text(article.text, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
vector_store = create_vector_store(chunks)
query = st.text_input("Ask a question about the article:")
if query:
relevant_chunks = query_vector_store(vector_store, query, k=3)
context = "\n\n".join([chunk.page_content for chunk in relevant_chunks])
answer = answer_question(context, query)
st.markdown("**Answer:**")
st.write(answer)
with st.expander("See relevant passages"):
for chunk in relevant_chunks:
st.markdown(f"```\n{chunk.page_content}\n```")
st.markdown("---")
# Simple Q&A for shorter articles
else:
st.markdown("---")
st.subheader("Q&A")
question = st.text_input("Ask a question about the article:")
if question:
answer = answer_question(article.text, question)
st.markdown("**Answer:**")
st.write(answer)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()