-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcheck_test_examples.py
More file actions
69 lines (55 loc) · 2.3 KB
/
check_test_examples.py
File metadata and controls
69 lines (55 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
import sys
from pathlib import Path
# Add the src directory to sys.path
TESTGEN_AUTOMATION_ROOT = Path(__file__).parent
TESTGEN_AUTOMATION_SRC_DIR = TESTGEN_AUTOMATION_ROOT / "src"
if str(TESTGEN_AUTOMATION_SRC_DIR) not in sys.path:
sys.path.insert(0, str(TESTGEN_AUTOMATION_SRC_DIR))
from chroma_db.chroma_client import get_chroma_client
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import torch
print("Checking test examples collection...")
# Initialize embeddings
if torch.backends.mps.is_available():
DEVICE_FOR_EMBEDDINGS = "mps"
else:
DEVICE_FOR_EMBEDDINGS = "cpu"
embeddings = HuggingFaceEmbeddings(
model_name="BAAI/bge-small-en-v1.5",
encode_kwargs={'normalize_embeddings': True}
)
# Get ChromaDB client
chroma_client = get_chroma_client()
# Check if test examples collection exists
test_examples_collection_name = "test_examples_collection"
collections = chroma_client.list_collections()
collection_names = [col.name for col in collections]
print(f"Available collections: {collection_names}")
if test_examples_collection_name in collection_names:
print(f"\n--- Found test examples collection: {test_examples_collection_name} ---")
# Get the collection
test_examples_vectorstore = Chroma(
client=chroma_client,
collection_name=test_examples_collection_name,
embedding_function=embeddings
)
# Count documents
count = test_examples_vectorstore._collection.count()
print(f"Total documents in test examples collection: {count}")
if count > 0:
# Get a sample document
sample = test_examples_vectorstore._collection.get(limit=1)
print(f"\n--- Sample document metadata ---")
if sample['metadatas']:
print(f"Metadata: {sample['metadatas'][0]}")
if sample['documents']:
doc_content = sample['documents'][0]
print(f"Document preview: {doc_content[:200]}...")
else:
print("Test examples collection is empty!")
else:
print(f"\n--- Test examples collection '{test_examples_collection_name}' NOT FOUND ---")
print("This is why RAG is hanging - the collection doesn't exist!")
print("You need to index test examples first using the index_test_examples.py script.")