-
-
Notifications
You must be signed in to change notification settings - Fork 42
Decode gates, quality telemetry, adaptive debounce #688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d5ca06e
8f6d0d2
01efabe
24ac022
98ca097
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,11 +20,18 @@ extension SuggestionCoordinator { | |
| return | ||
| } | ||
|
|
||
| // The debounce window adapts to the last generation latency: snappier when the model is | ||
| // fast, calmer when it is slow (fewer doomed generations to cancel). The configured value | ||
| // is the fallback until a first latency exists. | ||
| let debounceMilliseconds = DebouncePolicy.milliseconds( | ||
| lastGenerationLatencyMilliseconds: latestLatencyMilliseconds, | ||
| fallback: settingsSnapshot.debounceMilliseconds | ||
| ) | ||
| // The debounce clock starts at the keystroke, not here. The host-publish poll has already | ||
| // consumed real wall time waiting for the host to publish the keystroke to AX, and that | ||
| // wait collapses bursts just as well as sleeping does. Stacking the full debounce on top | ||
| // of the publish wait was pure added latency, so only the unconsumed remainder is slept. | ||
| let remainingDelay = max(0, settingsSnapshot.debounceMilliseconds - consumedDelayMilliseconds) | ||
| let remainingDelay = max(0, debounceMilliseconds - consumedDelayMilliseconds) | ||
|
|
||
| // Task cancellation in Swift is cooperative, so we also use an explicit work id. | ||
| // That gives us strict "latest request wins" semantics even if an old task wakes up late. | ||
|
|
@@ -42,7 +49,7 @@ extension SuggestionCoordinator { | |
| logStage( | ||
| "debouncing", | ||
| workID: workID, | ||
| message: "Debouncing (\(settingsSnapshot.debounceMilliseconds)ms window) before generating." | ||
| message: "Debouncing (\(debounceMilliseconds)ms window, \(remainingDelay)ms remaining) before generating." | ||
| ) | ||
| } | ||
|
|
||
|
|
@@ -496,6 +503,10 @@ extension SuggestionCoordinator { | |
| guard liveContext.generation == result.generation else { | ||
|
|
||
| latestRawModelOutput = SuggestionDebugLogger.debugPreview(result.rawText) | ||
| // Lifecycle discards are counted under their own reasons so `generated` always equals | ||
| // `shown` plus the suppression histogram; without this, every drop here silently | ||
|
Comment on lines
503
to
+507
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The stale-drop, selected-text, and stale-accept-echo gates unconditionally call |
||
| // inflated the generated count against the others. | ||
| qualityMetricsStore.recordSuppressed(reason: "discardedStaleContext") | ||
| logStage( | ||
| "stale-drop", | ||
| workID: workID, | ||
|
|
@@ -514,6 +525,11 @@ extension SuggestionCoordinator { | |
| clearSuggestion() | ||
| hideOverlay(reason: "Overlay hidden because the model returned an empty continuation.") | ||
| state = .idle | ||
| // The router already counted engine-attributed suppressions (normalizer, confidence | ||
| // floor); only the unattributed "model produced nothing" case needs a ledger entry. | ||
| if result.suppressionReason == nil { | ||
| qualityMetricsStore.recordSuppressed(reason: "emptyUnattributed") | ||
| } | ||
| logStage( | ||
| "empty-result", | ||
| workID: workID, | ||
|
|
@@ -529,6 +545,7 @@ extension SuggestionCoordinator { | |
| clearSuggestion(clearDiagnostics: true) | ||
| hideOverlay(reason: "Overlay hidden because text is selected.") | ||
| state = .idle | ||
| qualityMetricsStore.recordSuppressed(reason: "discardedSelection") | ||
| logStage( | ||
| "selected-text", | ||
| workID: workID, | ||
|
|
@@ -553,6 +570,7 @@ extension SuggestionCoordinator { | |
| clearSuggestion(clearDiagnostics: false) | ||
| hideOverlay(reason: "Overlay hidden because the regeneration only echoed the just-accepted text before the host published it.") | ||
| state = .idle | ||
| qualityMetricsStore.recordSuppressed(reason: "discardedAcceptEcho") | ||
| logStage( | ||
| "stale-accept-echo", | ||
| workID: workID, | ||
|
|
@@ -576,6 +594,8 @@ extension SuggestionCoordinator { | |
| clearSuggestion() | ||
| hideOverlay(reason: "Overlay hidden because the completion failed the seam guard.") | ||
| state = .idle | ||
| let seamReason = if case .seamMisspelling = seamVerdict { "seamMisspelling" } else { "seamJunkPunctuationRun" } | ||
| qualityMetricsStore.recordSuppressed(reason: seamReason) | ||
| logStage( | ||
| "seam-suppressed", | ||
| workID: workID, | ||
|
|
@@ -589,6 +609,9 @@ extension SuggestionCoordinator { | |
|
|
||
| latestLatencyMilliseconds = Int(result.latency * 1000) | ||
| latestGenerationNumber = liveContext.generation | ||
| // One shown event per suggestion: this is the only place a fresh generation becomes | ||
| // visible (re-presentations after partial accepts reuse the same session). | ||
| qualityMetricsStore.recordShown() | ||
| let session = interactionState.startSession( | ||
| fullText: result.text, | ||
| liveContext: liveContext, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| import Combine | ||
| import Foundation | ||
|
|
||
| /// Local, always-on counters that answer "is suggestion quality improving for real use": how many | ||
| /// completions were generated, how many were shown, why the withheld ones were withheld, and how | ||
| /// many shown suggestions the user actually accepted. | ||
| /// | ||
| /// Latency tracking (`PerformanceMetricsStore`) stays opt-in because it records per-request rows; | ||
| /// these are lifetime counters with zero content, so they run unconditionally and survive restarts. | ||
| /// Acceptance rate (accepted / shown) is the closest thing to ground truth the app can measure on | ||
| /// device, and the suppression histogram tells the difference between "the model produced nothing" | ||
| /// and "a specific guard fired", which otherwise only exists scattered through debug-only JSONL. | ||
| @MainActor | ||
| final class SuggestionQualityMetricsStore: ObservableObject { | ||
| struct Counters: Codable, Equatable { | ||
| var generated = 0 | ||
| var shown = 0 | ||
| /// Sessions the user accepted at least once. Counted per suggestion, not per Tab press, | ||
| /// so word-by-word acceptance of one suggestion is one acceptance. | ||
| var acceptedSuggestions = 0 | ||
| /// Keyed by `CompletionSuppressionReason` raw values plus coordinator-level reasons | ||
| /// (the seam guard verdicts). String-keyed so new reasons never need a schema migration. | ||
| var suppressedByReason: [String: Int] = [:] | ||
| var firstRecordedAt: Date? | ||
|
|
||
| var suppressedTotal: Int { suppressedByReason.values.reduce(0, +) } | ||
|
|
||
| var acceptanceRate: Double? { | ||
| guard shown > 0 else { return nil } | ||
| return Double(acceptedSuggestions) / Double(shown) | ||
| } | ||
| } | ||
|
|
||
| @Published private(set) var counters: Counters | ||
|
|
||
| private let userDefaults: UserDefaults | ||
| private static let defaultsKey = "cotabbyQualityMetricsCounters" | ||
|
|
||
| /// Stored-property @MainActor classes deallocated inside app-hosted tests double-free without | ||
| /// an explicitly nonisolated deinit (the isolated-deinit runtime path over-releases). Same | ||
| /// workaround as the other main-actor stores exercised by tests. | ||
| nonisolated deinit {} | ||
|
|
||
| init(userDefaults: UserDefaults = .standard) { | ||
| self.userDefaults = userDefaults | ||
| if let data = userDefaults.data(forKey: Self.defaultsKey), | ||
| let decoded = try? JSONDecoder().decode(Counters.self, from: data) { | ||
| counters = decoded | ||
| } else { | ||
| counters = Counters() | ||
| } | ||
| } | ||
|
|
||
| func recordGenerated() { | ||
| mutate { $0.generated += 1 } | ||
| } | ||
|
|
||
| func recordShown() { | ||
| mutate { $0.shown += 1 } | ||
| } | ||
|
|
||
| func recordAcceptedSuggestion() { | ||
| mutate { $0.acceptedSuggestions += 1 } | ||
| } | ||
|
|
||
| func recordSuppressed(reason: String) { | ||
| mutate { $0.suppressedByReason[reason, default: 0] += 1 } | ||
| } | ||
|
|
||
| func reset() { | ||
| counters = Counters() | ||
| userDefaults.removeObject(forKey: Self.defaultsKey) | ||
| } | ||
|
|
||
| private func mutate(_ change: (inout Counters) -> Void) { | ||
| var updated = counters | ||
| change(&updated) | ||
| if updated.firstRecordedAt == nil { | ||
| updated.firstRecordedAt = Date() | ||
| } | ||
| counters = updated | ||
| if let data = try? JSONEncoder().encode(updated) { | ||
| userDefaults.set(data, forKey: Self.defaultsKey) | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
acceptedSuggestionswithout a matchingshownincrementrecordSuggestionAcceptedIfFirstChunkonly guards onconsumedCharacterCount == 0; it fires for any session kind, including.correction(typoWord:).presentCorrectionnever callsrecordShown(), so every accepted correction incrementsacceptedSuggestionswithout a correspondingshownentry. SinceacceptanceRate = acceptedSuggestions / shown, a session where the user accepts even one correction while the shown count is in single digits produces a rate above 100%, making the metric actively misleading.Add
guard case .continuation = session.kind else { return }before therecordAcceptedSuggestion()call to restrict the counter to generated completions only.