diff --git a/Sources/FluidAudio/Diarizer/Offline/Clustering/KMeansClustering.swift b/Sources/FluidAudio/Diarizer/Offline/Clustering/KMeansClustering.swift index a0615e119..296c03624 100644 --- a/Sources/FluidAudio/Diarizer/Offline/Clustering/KMeansClustering.swift +++ b/Sources/FluidAudio/Diarizer/Offline/Clustering/KMeansClustering.swift @@ -61,7 +61,7 @@ struct KMeansClustering { return (Array(0.. (clusters: [Int], centroids: [[Double]]) { + guard embeddings.count > numClusters, nInit > 1 else { + return clusterWithCentroids( + embeddings: embeddings, numClusters: numClusters, + maxIterations: maxIterations, seed: baseSeed) + } + let normalized = normalizeEmbeddings(embeddings) + var best: (clusters: [Int], centroids: [[Double]])? + var bestInertia = Double.greatestFiniteMagnitude + for i in 0..= 0 && c < result.centroids.count { + inertia += euclideanDistanceSquared(normalized[idx], result.centroids[c]) + } + if inertia < bestInertia { + bestInertia = inertia + best = result + } + } + return best ?? clusterWithCentroids( + embeddings: embeddings, numClusters: numClusters, + maxIterations: maxIterations, seed: baseSeed) + } + private static func normalizeEmbeddings(_ embeddings: [[Double]]) -> [[Double]] { embeddings.map { embedding in var norm: Double = 0 diff --git a/Sources/FluidAudio/Diarizer/Offline/Clustering/VBxClustering.swift b/Sources/FluidAudio/Diarizer/Offline/Clustering/VBxClustering.swift index dc9232417..1ec518f5f 100644 --- a/Sources/FluidAudio/Diarizer/Offline/Clustering/VBxClustering.swift +++ b/Sources/FluidAudio/Diarizer/Offline/Clustering/VBxClustering.swift @@ -704,10 +704,14 @@ struct VBxClustering { "Speaker count \(detectedCount) outside bounds [\(constraints.minSpeakers), \(constraints.maxSpeakers)]; re-clustering to \(targetCount)" ) - let (kmeansClusters, centroids) = KMeansClustering.clusterWithCentroids( + // n_init=10 の決定的初期化から最小 inertia を採用(sklearn 流)。単一ランダム初期化は + // 脆い話者を非決定的に collapse させる(ICT 小牧で実証、~10%↔~30% の揺れ)。 + let (kmeansClusters, centroids) = KMeansClustering.clusterWithCentroidsNInit( embeddings: trainingEmbeddings, numClusters: targetCount, - maxIterations: 100 + maxIterations: 100, + nInit: 10, + baseSeed: 0 ) return VBxOutput(