-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_index.py
More file actions
executable file
·60 lines (50 loc) · 1.83 KB
/
check_index.py
File metadata and controls
executable file
·60 lines (50 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python3
"""
Quick script to check what's saved in Elasticsearch
"""
from elasticsearch_indexer import ElasticsearchIndexer
def main():
try:
indexer = ElasticsearchIndexer()
except Exception as e:
print(f"Error connecting to Elasticsearch: {e}")
return
# Get index stats
stats = indexer.get_index_stats('rag_chunks')
if 'error' in stats:
print(f"Error: {stats['error']}")
return
print(f"Index: rag_chunks")
print(f"Total documents: {stats.get('document_count', 0)}")
print("=" * 80)
# Get all documents
try:
# Elasticsearch 8.x API
response = indexer.client.search(
index='rag_chunks',
query={'match_all': {}},
size=100,
_source=['chunk_id', 'type', 'page', 'content', 'embedding_model', 'embedding_dim', 'metadata', 'document_info']
)
except Exception as e:
# Fallback for older API
try:
response = indexer.client.search(
index='rag_chunks',
body={'query': {'match_all': {}}, 'size': 100},
_source=['chunk_id', 'type', 'page', 'content', 'embedding_model', 'embedding_dim', 'metadata', 'document_info']
)
except Exception as e2:
print(f"Error retrieving documents: {e2}")
return
print(f"\nRetrieved {len(response['hits']['hits'])} documents:\n")
for i, hit in enumerate(response['hits']['hits'], 1):
doc = hit['_source']
print(f"[{i}] Chunk ID: {doc.get('chunk_id')} | Type: {doc.get('type')} | Page: {doc.get('page')}")
content = doc.get('content', '')
if len(content) > 150:
content = content[:150] + '...'
print(f" Content: {content}")
print()
if __name__ == "__main__":
main()