-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfix-embedding-dimensions.js
More file actions
149 lines (123 loc) Β· 5.85 KB
/
fix-embedding-dimensions.js
File metadata and controls
149 lines (123 loc) Β· 5.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env node
/**
* Fix embedding dimension mismatches in SPARQL store
* Finds embeddings with incompatible dimensions and either fixes or removes them
*/
import dotenv from 'dotenv';
dotenv.config();
import { initializeServices } from './src/mcp/lib/initialization.js';
async function fixEmbeddingDimensions() {
console.log('π§ Starting embedding dimension fix...');
try {
const { memoryManager, config } = await initializeServices();
const storage = memoryManager.storage;
// Get target dimension from config
const llmProviders = config.get('llmProviders') || [];
const activeEmbeddingProvider = llmProviders
.filter(p => p.capabilities?.includes('embedding'))
.sort((a, b) => (a.priority || 999) - (b.priority || 999))[0];
const targetDimension = activeEmbeddingProvider.embeddingDimension;
console.log(`π― Target embedding dimension: ${targetDimension}`);
// Query to find all embeddings and their dimensions
const checkQuery = `
PREFIX semem: <http://purl.org/stuff/semem/>
PREFIX ragno: <http://purl.org/stuff/ragno/>
SELECT ?entity ?embeddingNode ?vectorContent WHERE {
?entity ragno:hasEmbedding ?embeddingNode .
?embeddingNode ragno:vectorContent ?vectorContent .
}
`;
console.log('π Analyzing stored embeddings...');
const results = await storage.executeSparqlQuery(checkQuery);
let validCount = 0;
let invalidCount = 0;
const invalidEmbeddings = [];
for (const binding of results.results.bindings) {
const entity = binding.entity.value;
const embeddingNode = binding.embeddingNode.value;
const vectorContent = binding.vectorContent.value;
try {
const embedding = JSON.parse(vectorContent);
const dimension = Array.isArray(embedding) ? embedding.length : 0;
if (dimension === targetDimension) {
validCount++;
console.log(`β
${entity}: ${dimension}D (valid)`);
} else {
invalidCount++;
console.log(`β ${entity}: ${dimension}D (invalid, expected ${targetDimension}D)`);
invalidEmbeddings.push({
entity,
embeddingNode,
currentDimension: dimension,
embedding
});
}
} catch (error) {
invalidCount++;
console.log(`β ${entity}: Invalid JSON embedding`);
invalidEmbeddings.push({
entity,
embeddingNode,
currentDimension: 0,
embedding: null
});
}
}
console.log(`\nπ Summary: ${validCount} valid, ${invalidCount} invalid embeddings`);
if (invalidEmbeddings.length > 0) {
console.log('\nπ§ Fixing invalid embeddings...');
for (const invalid of invalidEmbeddings) {
console.log(`\nπ Processing ${invalid.entity}...`);
if (invalid.embedding && Array.isArray(invalid.embedding)) {
// Try to adjust the embedding dimension
const adjustedEmbedding = storage.vectors.adjustEmbeddingLength(
invalid.embedding,
targetDimension
);
console.log(` π Adjusted from ${invalid.currentDimension}D to ${adjustedEmbedding.length}D`);
// Update the embedding in SPARQL store
const updateQuery = `
PREFIX ragno: <http://purl.org/stuff/ragno/>
DELETE {
<${invalid.embeddingNode}> ragno:vectorContent ?oldContent .
}
INSERT {
<${invalid.embeddingNode}> ragno:vectorContent "${JSON.stringify(adjustedEmbedding).replace(/"/g, '\\"')}" .
}
WHERE {
<${invalid.embeddingNode}> ragno:vectorContent ?oldContent .
}
`;
await storage.executeSparqlUpdate(updateQuery);
console.log(` β
Updated embedding in SPARQL store`);
} else {
// Remove invalid embedding entirely
console.log(` ποΈ Removing invalid embedding...`);
const deleteQuery = `
PREFIX ragno: <http://purl.org/stuff/ragno/>
DELETE {
<${invalid.entity}> ragno:hasEmbedding <${invalid.embeddingNode}> .
<${invalid.embeddingNode}> ?p ?o .
}
WHERE {
<${invalid.entity}> ragno:hasEmbedding <${invalid.embeddingNode}> .
<${invalid.embeddingNode}> ?p ?o .
}
`;
await storage.executeSparqlUpdate(deleteQuery);
console.log(` β
Removed invalid embedding from SPARQL store`);
}
}
}
console.log('\nπ Reloading memory to rebuild FAISS index...');
await memoryManager.storage._ensureMemoryLoaded();
console.log('\nβ
Embedding dimension fix completed!');
console.log(`π FAISS index now contains: ${storage.vectors.getIndexSize()} entries`);
await memoryManager.dispose();
} catch (error) {
console.error('β Error fixing embedding dimensions:', error);
process.exit(1);
}
}
// Run the fix
fixEmbeddingDimensions();