Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct KMeansClustering {
return (Array(0..<count), embeddings)
}

var rng = SeededRNG(seed: seed ?? UInt64.random(in: 0...UInt64.max))
var rng = SeededRNG(seed: seed ?? 0)
let normalized = normalizeEmbeddings(embeddings)
var centroids = initializeCentroids(from: normalized, k: k, rng: &rng)
var assignments = [Int](repeating: 0, count: count)
Expand Down Expand Up @@ -89,6 +89,45 @@ struct KMeansClustering {
return (assignments, centroids)
}

/// Runs K-Means `nInit` times with deterministic seeds (baseSeed, baseSeed+1, …) and returns
/// the lowest-inertia result (sklearn-style `n_init`). Single random-seed init is both
/// non-deterministic and fragile: it collapses small/边界 speakers run-to-run (ICT 4-spk
/// 小牧 が kept↔collapse で揺れ、~10%↔~30% を実証)。Best-of-N with fixed seeds makes the
/// re-clustering both reproducible and robust.
static func clusterWithCentroidsNInit(
embeddings: [[Double]],
numClusters: Int,
maxIterations: Int = 300,
nInit: Int = 10,
baseSeed: UInt64 = 0
) -> (clusters: [Int], centroids: [[Double]]) {
guard embeddings.count > numClusters, nInit > 1 else {
return clusterWithCentroids(
embeddings: embeddings, numClusters: numClusters,
maxIterations: maxIterations, seed: baseSeed)
}
let normalized = normalizeEmbeddings(embeddings)
var best: (clusters: [Int], centroids: [[Double]])?
var bestInertia = Double.greatestFiniteMagnitude
for i in 0..<nInit {
let result = clusterWithCentroids(
embeddings: embeddings, numClusters: numClusters,
maxIterations: maxIterations, seed: baseSeed &+ UInt64(i))
// inertia = Σ ‖normalized(emb) − assignedCentroid‖²(centroids も normalized 空間)
var inertia: Double = 0
for (idx, c) in result.clusters.enumerated() where c >= 0 && c < result.centroids.count {
inertia += euclideanDistanceSquared(normalized[idx], result.centroids[c])
}
if inertia < bestInertia {
bestInertia = inertia
best = result
}
}
return best ?? clusterWithCentroids(
embeddings: embeddings, numClusters: numClusters,
maxIterations: maxIterations, seed: baseSeed)
}

private static func normalizeEmbeddings(_ embeddings: [[Double]]) -> [[Double]] {
embeddings.map { embedding in
var norm: Double = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -704,10 +704,14 @@ struct VBxClustering {
"Speaker count \(detectedCount) outside bounds [\(constraints.minSpeakers), \(constraints.maxSpeakers)]; re-clustering to \(targetCount)"
)

let (kmeansClusters, centroids) = KMeansClustering.clusterWithCentroids(
// n_init=10 の決定的初期化から最小 inertia を採用(sklearn 流)。単一ランダム初期化は
// 脆い話者を非決定的に collapse させる(ICT 小牧で実証、~10%↔~30% の揺れ)。
let (kmeansClusters, centroids) = KMeansClustering.clusterWithCentroidsNInit(
embeddings: trainingEmbeddings,
numClusters: targetCount,
maxIterations: 100
maxIterations: 100,
nInit: 10,
baseSeed: 0
)

return VBxOutput(
Expand Down