-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
123 lines (95 loc) · 3.32 KB
/
main.py
File metadata and controls
123 lines (95 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
Main entry point for RAG System.
Provides CLI utilities for initializing Pinecone index and processing documents.
"""
import argparse
import sys
from pathlib import Path
from src.config import Config
from src.rag import DocumentProcessor, PineconeManager
from src.utils.helpers import setup_logger
logger = setup_logger(__name__)
def initialize_index():
"""Initialize Pinecone index."""
try:
print("\n🔧 Initializing Pinecone index...")
Config.validate()
pm = PineconeManager()
pm.create_index()
# Get and display index stats
stats = pm.get_index_stats()
print(f"\n✅ Index initialized successfully!")
print(f"Index stats: {stats}")
except Exception as e:
print(f"❌ Error initializing index: {e}")
sys.exit(1)
def process_documents(file_path: str, namespace: str = ""):
"""
Process documents from a file or directory.
Args:
file_path: Path to file or directory
namespace: Pinecone namespace (optional)
"""
try:
print(f"\n📄 Processing documents from: {file_path}")
Config.validate()
processor = DocumentProcessor()
path = Path(file_path)
if path.is_file():
# Process single file
chunks = processor.process_file(str(path), path.name, namespace)
print(f"✅ Processed {path.name} ({chunks} chunks)")
elif path.is_dir():
# Process multiple files
supported_extensions = [".txt", ".pdf", ".docx"]
files = [
(str(f), f.name)
for f in path.rglob("*")
if f.is_file() and f.suffix.lower() in supported_extensions
]
if not files:
print(f"⚠️ No supported files found in {file_path}")
return
total_chunks = processor.process_multiple_files(files, namespace)
print(f"✅ Processed {len(files)} files ({total_chunks} total chunks)")
else:
print(f"❌ Path not found: {file_path}")
sys.exit(1)
except Exception as e:
print(f"❌ Error processing documents: {e}")
sys.exit(1)
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
description="RAG System CLI - Initialize and manage RAG documents"
)
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# Initialize index command
init_parser = subparsers.add_parser(
"init",
help="Initialize Pinecone index"
)
# Process documents command
process_parser = subparsers.add_parser(
"process",
help="Process documents for RAG"
)
process_parser.add_argument(
"file_path",
help="Path to file or directory to process"
)
process_parser.add_argument(
"--namespace",
default="",
help="Pinecone namespace for organization"
)
args = parser.parse_args()
if not args.command:
parser.print_help()
return
if args.command == "init":
initialize_index()
elif args.command == "process":
process_documents(args.file_path, args.namespace)
if __name__ == "__main__":
main()