Skip to content

Commit 6dbdbfe

Browse files
committed
add some stats and cleanup
1 parent 7def35f commit 6dbdbfe

2 files changed

Lines changed: 10 additions & 6 deletions

File tree

index/scorch/stats.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ type Stats struct {
136136
MaxMemMergeZapTime uint64
137137
TotMemMergeSegments uint64
138138
TotMemorySegmentsAtRoot uint64
139+
140+
TotTrainedSamples uint64
141+
TotTrainTime uint64
139142
}
140143

141144
// atomically populates the returned map

index/scorch/train_vector.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"strings"
2929
"sync"
3030
"sync/atomic"
31+
"time"
3132

3233
"github.com/RoaringBitmap/roaring/v2"
3334
"github.com/blevesearch/bleve/v2/util"
@@ -132,6 +133,7 @@ func (t *vectorTrainer) trainLoop() {
132133
t.parent.asyncTasks.Done()
133134
}()
134135

136+
timeNow := time.Now()
135137
path := filepath.Join(t.parent.path, index.TrainedIndexFileName)
136138
for {
137139
select {
@@ -215,6 +217,8 @@ func (t *vectorTrainer) trainLoop() {
215217
// exit the trainer loop we've ingested the final sample set and training
216218
// is assumed to be complete.
217219
if t.trainingComplete.Load() {
220+
atomic.StoreUint64(&t.parent.stats.TotTrainedSamples, t.trainedSamples)
221+
atomic.StoreUint64(&t.parent.stats.TotTrainTime, uint64(time.Since(timeNow).Milliseconds()))
218222
return
219223
}
220224
}
@@ -302,19 +306,16 @@ func (t *vectorTrainer) train(batch *index.Batch) error {
302306
}
303307

304308
func (t *vectorTrainer) getInternal(key []byte) ([]byte, error) {
305-
// todo: return the total number of vectors that have been processed so far in training
306-
// in cbft use that as a checkpoint to resume training for n-x samples.
307309
switch string(key) {
308310
case string(util.BoltTrainCompleteKey):
309-
t.m.RLock()
310-
defer t.m.RUnlock()
311-
return []byte(fmt.Sprintf("%t", t.centroidIndex != nil)), nil
311+
return []byte(strconv.FormatBool(t.trainingComplete.Load())), nil
312312
}
313313
return nil, nil
314314
}
315315

316316
func (t *vectorTrainer) getCentroidIndex(field string) (interface{}, error) {
317-
// return the coarse quantizer of the centroid index belonging to the field
317+
// return the coarse quantizer of the trained faiss index belonging to the field
318+
// if its not available then zap performs naive merge
318319
t.m.RLock()
319320
defer t.m.RUnlock()
320321
if t.centroidIndex != nil {

0 commit comments

Comments
 (0)