diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj
index a3fbc84e..6ff8ed51 100644
--- a/Cotabby.xcodeproj/project.pbxproj
+++ b/Cotabby.xcodeproj/project.pbxproj
@@ -70,6 +70,7 @@
 		1681C0F22323FB1156579D99 /* AGPL-3.0.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6F0EE728C0B1A7AD6B19CD0C /* AGPL-3.0.txt */; };
 		175C4FA56C29DEE58C2D4D7E /* SuggestionSettingsModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 86460C747AA883FDE756BDBA /* SuggestionSettingsModel.swift */; };
 		18382D1919D90E3C1EE143C2 /* AppSurfaceClassifierTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C451E144D220D5C63372A8C0 /* AppSurfaceClassifierTests.swift */; };
+		18680D0D66469A2954A50B6C /* SuggestionQualityMetricsStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 81718CA62FBC775A6CEBCED1 /* SuggestionQualityMetricsStore.swift */; };
 		1899BC5A35DC96B4D04B18A5 /* es.txt in Resources */ = {isa = PBXBuildFile; fileRef = 0B6816DF5D33863F966240B4 /* es.txt */; };
 		19386985A3A91D0843092086 /* AboutPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3FA53BBC3D81503C1D17477 /* AboutPaneView.swift */; };
 		19CA1BF8B508E0E219EF4485 /* SuggestionEngineModelsTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 470A7DAE3D6A2C873B395AE3 /* SuggestionEngineModelsTests.swift */; };
@@ -248,6 +249,7 @@
 		55EDBFF489D4C31276E2A67F /* PermissionHostApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = B6ACCB12E4DB32D2F2BEA567 /* PermissionHostApp.swift */; };
 		5614E22EAA5F5C37A9E4F7B6 /* LlamaRuntimeManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = A52D0B550E00EF173A5D157E /* LlamaRuntimeManager.swift */; };
 		56611BA0087710277140E9E6 /* DisplayCoordinateConverterTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C1C5DE0F3FF63545000E2453 /* DisplayCoordinateConverterTests.swift */; };
+		5687320132AD97B4086260DF /* SuggestionQualityMetricsStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 81718CA62FBC775A6CEBCED1 /* SuggestionQualityMetricsStore.swift */; };
 		576B3FF30FB457EF04F9A715 /* SuggestionTextColorCodec.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1CE61E74928C221B8BB261C6 /* SuggestionTextColorCodec.swift */; };
 		586B36CD813E1432D0AB1380 /* DecodeStopPolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = D12ABBCE23A946C22894945B /* DecodeStopPolicy.swift */; };
 		58AC3193D846FDE88513377D /* BundledRuntimeLocatorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 18D990E515E1AE4F312F4E95 /* BundledRuntimeLocatorTests.swift */; };
@@ -297,10 +299,12 @@
 		66D9E37B12A9265D4733E72E /* LlamaRuntimeCore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 944065A858D9BC936CB12B23 /* LlamaRuntimeCore.swift */; };
 		68DA5F93B7185B4F5E6DB4C3 /* it.txt in Resources */ = {isa = PBXBuildFile; fileRef = 0397F1DACB094A0F6A66BC0E /* it.txt */; };
 		6955C3A4D7AB3EEF7FA7C469 /* InputSuppressionController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2D1F9CEBAB0F330F8E7B61D8 /* InputSuppressionController.swift */; };
+		695E431AC3FF79769E2C5EEF /* SuggestionQualityMetricsStoreTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B4CC566AC1DE33FD0CD30E1E /* SuggestionQualityMetricsStoreTests.swift */; };
 		6A4E62EC9B7B970695F87136 /* TextDirectionDetector.swift in Sources */ = {isa = PBXBuildFile; fileRef = 328847A0F494360033366791 /* TextDirectionDetector.swift */; };
 		6A8454A989104AE150308BCF /* it-100k.txt in Resources */ = {isa = PBXBuildFile; fileRef = 2D8AA55C2B730110E8598F91 /* it-100k.txt */; };
 		6AE0B46FB52D189D94E1F79A /* WordCountFormatterTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1E0513E3B23937B099A3CFF2 /* WordCountFormatterTests.swift */; };
 		6BE0C8F9D054A2C0D9018001 /* ConfidenceSuppressionPolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1BD71ECC2AE4821B643E0935 /* ConfidenceSuppressionPolicy.swift */; };
+		6C59B369AAC6948C53E41654 /* DebouncePolicyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = EB23CDF7CAA1DEAD606B46B3 /* DebouncePolicyTests.swift */; };
 		6CBEF02FCDFCF406E378C27C /* SuggestionInteractionStateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CB1D4F2681FAF59014AE115 /* SuggestionInteractionStateTests.swift */; };
 		6D0E79CF3C1A8CE53046FCE5 /* AXTextGeometryResolverTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C046CB4F3CB4BFE9391DB5DE /* AXTextGeometryResolverTests.swift */; };
 		6D57E3CDF56127422311C065 /* TerminalAppDetector.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F4C4A7EAF886E0CC945BFEF /* TerminalAppDetector.swift */; };
@@ -484,6 +488,7 @@
 		B782EC08B7516791BDB21172 /* FieldStyleCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = B7FBF2B766E728F25899B64E /* FieldStyleCache.swift */; };
 		B7A98BC225304E4DFED9E622 /* OnboardingTemplateRecommender.swift in Sources */ = {isa = PBXBuildFile; fileRef = FA878B447441BB4F3E327CC8 /* OnboardingTemplateRecommender.swift */; };
 		B816C6191738AB616F2E8D2D /* SuggestionCoordinatorTestSupport.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C174D8294858BF9DF3D361D /* SuggestionCoordinatorTestSupport.swift */; };
+		B849D68E0474CECAE809881C /* DebouncePolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6DC17643448271DE5DE61A89 /* DebouncePolicy.swift */; };
 		B93AB7E845086F6FBB068369 /* SuggestionRequestFactoryTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = EE94342B888A5A2CCF66BC93 /* SuggestionRequestFactoryTests.swift */; };
 		B9623395B31459D9D45B1320 /* CurrentWordExtractor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 247561C626843957CFB4B632 /* CurrentWordExtractor.swift */; };
 		B9F400BCC20757DA5DB0B5F9 /* FoundationModelSuggestionEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5664E34B23FBDF69292FEF43 /* FoundationModelSuggestionEngine.swift */; };
@@ -503,6 +508,7 @@
 		BFCA7FAFDAEBF586AB615567 /* ClipboardRelevanceFilterTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90B0D133AB77A2503FB08827 /* ClipboardRelevanceFilterTests.swift */; };
 		C0537A515AED443F6C61DB2A /* MenuBarSections.swift in Sources */ = {isa = PBXBuildFile; fileRef = 83A810F9D28A18BA6F2066C7 /* MenuBarSections.swift */; };
 		C0B833234748E82D3382631A /* emoji.json in Resources */ = {isa = PBXBuildFile; fileRef = C379D77029D6E88C8C1B9AF7 /* emoji.json */; };
+		C0F757D74758B76DA2962BC5 /* LlamaDecodeGateDefaultsTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B3E5554AAC5D0007CCC61A7 /* LlamaDecodeGateDefaultsTests.swift */; };
 		C0FE11D76BDF01A5470C554D /* FocusCapabilityFlickerGate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A44BEC8C23FF227731DD0CD /* FocusCapabilityFlickerGate.swift */; };
 		C149EAED2CF6F8B6274053E0 /* AppSurfaceClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 94B0830FBE4F2E239F670DBA /* AppSurfaceClassifier.swift */; };
 		C178E35A9A713BD4D9943E62 /* TypoCaseTransfer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 08CE63B8725EBD71A4C024E1 /* TypoCaseTransfer.swift */; };
@@ -582,6 +588,7 @@
 		E311B80968761E90FBA19A8A /* TypoGate.swift in Sources */ = {isa = PBXBuildFile; fileRef = B8412FE2BAC406421248A03B /* TypoGate.swift */; };
 		E313639E71AE1374D2B9A956 /* SuggestionWorkController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6B2D97BAA3618A7D0357AC44 /* SuggestionWorkController.swift */; };
 		E38801433B99E65BD7E45A0E /* LlamaPromptCacheHintTrackerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */; };
+		E3C0326597083762BA6D76CA /* DebouncePolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6DC17643448271DE5DE61A89 /* DebouncePolicy.swift */; };
 		E3CAAEFAAB5BB24CEE16445B /* LLMIOFileHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */; };
 		E4382BEA8A8551612E5966B9 /* BaseCompletionPromptRenderer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 85EF79E6144D6C6AD062B569 /* BaseCompletionPromptRenderer.swift */; };
 		E46F50AEDA8FE13B02E3FA8D /* AXHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = AC70775535A3428991025AB8 /* AXHelper.swift */; };
@@ -730,6 +737,7 @@
 		29CDC8BE5312B9BEFD9B22CB /* SurfaceContextComposerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SurfaceContextComposerTests.swift; sourceTree = "<group>"; };
 		29ED42C4BDD0C521101AF95E /* DeviceInfo.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeviceInfo.swift; sourceTree = "<group>"; };
 		2A02336442BB735EE2E8D064 /* SettingsAttentionEvaluator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsAttentionEvaluator.swift; sourceTree = "<group>"; };
+		2B3E5554AAC5D0007CCC61A7 /* LlamaDecodeGateDefaultsTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaDecodeGateDefaultsTests.swift; sourceTree = "<group>"; };
 		2B7A28471B8526C2693FFF65 /* AcknowledgementsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AcknowledgementsView.swift; sourceTree = "<group>"; };
 		2BC293F6125E2B14DCF05AD9 /* SettingsAttentionEvaluatorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsAttentionEvaluatorTests.swift; sourceTree = "<group>"; };
 		2D1F9CEBAB0F330F8E7B61D8 /* InputSuppressionController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputSuppressionController.swift; sourceTree = "<group>"; };
@@ -812,6 +820,7 @@
 		6CF1FBAABEF545B620AF8D78 /* ru-100k.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "ru-100k.txt"; sourceTree = "<group>"; };
 		6D4C1EF008B9DFA753D561D3 /* LlamaEvalScoringTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaEvalScoringTests.swift; sourceTree = "<group>"; };
 		6DB982BF30B3601F57277776 /* fr-100k.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "fr-100k.txt"; sourceTree = "<group>"; };
+		6DC17643448271DE5DE61A89 /* DebouncePolicy.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DebouncePolicy.swift; sourceTree = "<group>"; };
 		6DC693E00430F46E41CB56E6 /* RequestID.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RequestID.swift; sourceTree = "<group>"; };
 		6E3B1232C4BE8072A5183F9C /* SymSpell.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SymSpell.swift; sourceTree = "<group>"; };
 		6E3EC87078D3A4C21DB3252C /* RandomMacroEvaluator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RandomMacroEvaluator.swift; sourceTree = "<group>"; };
@@ -840,6 +849,7 @@
 		7F4C4A7EAF886E0CC945BFEF /* TerminalAppDetector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TerminalAppDetector.swift; sourceTree = "<group>"; };
 		807148A920E003DEF8BA6092 /* SystemMetricsStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SystemMetricsStore.swift; sourceTree = "<group>"; };
 		815F2ABAF6AB75DA3AFBBCEF /* WordCountFormatter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WordCountFormatter.swift; sourceTree = "<group>"; };
+		81718CA62FBC775A6CEBCED1 /* SuggestionQualityMetricsStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionQualityMetricsStore.swift; sourceTree = "<group>"; };
 		82E7794DF60664B1FA8F6E7B /* UnitConversionEvaluator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UnitConversionEvaluator.swift; sourceTree = "<group>"; };
 		82F7F7355967725162DF2D1B /* CustomRulesEditor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomRulesEditor.swift; sourceTree = "<group>"; };
 		83A810F9D28A18BA6F2066C7 /* MenuBarSections.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MenuBarSections.swift; sourceTree = "<group>"; };
@@ -929,6 +939,7 @@
 		B41F06FEF208B30ECCF23A6F /* MacroModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MacroModels.swift; sourceTree = "<group>"; };
 		B424E2AC97C99D335B0D5751 /* SuggestionTextNormalizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionTextNormalizer.swift; sourceTree = "<group>"; };
 		B4B4A2E2DD6733658EC05BD8 /* DownloadFileRescuer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadFileRescuer.swift; sourceTree = "<group>"; };
+		B4CC566AC1DE33FD0CD30E1E /* SuggestionQualityMetricsStoreTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionQualityMetricsStoreTests.swift; sourceTree = "<group>"; };
 		B6ACCB12E4DB32D2F2BEA567 /* PermissionHostApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionHostApp.swift; sourceTree = "<group>"; };
 		B6D36DB66629CF22C1783945 /* CompletionSeamGuardTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CompletionSeamGuardTests.swift; sourceTree = "<group>"; };
 		B6D42CD456B4B3C988B148A6 /* FocusTrackingModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusTrackingModel.swift; sourceTree = "<group>"; };
@@ -1018,6 +1029,7 @@
 		E68BE6A22BA0D42C8DD9868C /* SelfCaptureGate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SelfCaptureGate.swift; sourceTree = "<group>"; };
 		E7F42112F14026E6253BB865 /* PermissionAndContextModelTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionAndContextModelTests.swift; sourceTree = "<group>"; };
 		EAAE6B395FAB604DF059280A /* KeyCodeLabels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeyCodeLabels.swift; sourceTree = "<group>"; };
+		EB23CDF7CAA1DEAD606B46B3 /* DebouncePolicyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DebouncePolicyTests.swift; sourceTree = "<group>"; };
 		EB630F9814388203DD1CA2EC /* ShortcutsPaneView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ShortcutsPaneView.swift; sourceTree = "<group>"; };
 		EC04832FBD5311352F35241B /* SuggestionCaretLayoutRepairTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionCaretLayoutRepairTests.swift; sourceTree = "<group>"; };
 		EC4A3C4BC38793EB11F484F1 /* CompositionInputModeClassifierTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CompositionInputModeClassifierTests.swift; sourceTree = "<group>"; };
@@ -1304,6 +1316,7 @@
 				F4D9DF8723AF32C058BFACDE /* SpellingDictionaryCatalog.swift */,
 				ADBE3E6CC585C1683787C877 /* SuggestionEngineModels.swift */,
 				386C98FFCF76EC1C8C7E82BB /* SuggestionModels.swift */,
+				81718CA62FBC775A6CEBCED1 /* SuggestionQualityMetricsStore.swift */,
 				D93563FDA25DFC0038E5F887 /* SuggestionSettingsData.swift */,
 				86460C747AA883FDE756BDBA /* SuggestionSettingsModel.swift */,
 				DEB16474A67CE1D210B944C9 /* SuggestionSubsystemContracts.swift */,
@@ -1372,6 +1385,7 @@
 				1C4751DFE9DA372FBC40BA30 /* CurrentWordExtractorTests.swift */,
 				AD752451330486FE270018B0 /* CustomRulesTests.swift */,
 				313EDBA60565836F32CEEC10 /* DateMacroEvaluatorTests.swift */,
+				EB23CDF7CAA1DEAD606B46B3 /* DebouncePolicyTests.swift */,
 				B3B09064903B760D6DF2DF7D /* DecodeStopPolicyTests.swift */,
 				8F20A19A24D20E16D25ADCDA /* DeepGeometryWalkThrottleTests.swift */,
 				C49F67B3EEB2F2A577A54085 /* DeviceInfoTests.swift */,
@@ -1414,6 +1428,7 @@
 				2960080A726E51198225147A /* InsertionStrategySelectorTests.swift */,
 				2930EC34057319130393696B /* KeyCodeLabelsTests.swift */,
 				4793D4EA5D36D7E5CC216C27 /* LanguageSupportTests.swift */,
+				2B3E5554AAC5D0007CCC61A7 /* LlamaDecodeGateDefaultsTests.swift */,
 				906011A6C9D66EEBAF3B5CC0 /* LlamaEvalScoring.swift */,
 				6D4C1EF008B9DFA753D561D3 /* LlamaEvalScoringTests.swift */,
 				0CA88BB29BC8727878C99E95 /* LlamaPromptCacheHintTrackerTests.swift */,
@@ -1466,6 +1481,7 @@
 				45A896811745673061AF3612 /* SuggestionFocusFreshnessTests.swift */,
 				8CB1D4F2681FAF59014AE115 /* SuggestionInteractionStateTests.swift */,
 				CDB25ABC4FFB0E63477CDCB0 /* SuggestionOverlayStabilityGateTests.swift */,
+				B4CC566AC1DE33FD0CD30E1E /* SuggestionQualityMetricsStoreTests.swift */,
 				EE94342B888A5A2CCF66BC93 /* SuggestionRequestFactoryTests.swift */,
 				9C8F07AC52C7A482F5FE34C5 /* SuggestionSessionReconcilerTests.swift */,
 				00BB95A341A8B5F4A1725640 /* SuggestionSettingsModelTests.swift */,
@@ -1624,6 +1640,7 @@
 				9CC2D6472ACD377FD73A5801 /* ControlTokenMarkers.swift */,
 				C7B2D34A6F3AC9DFD61350F7 /* CotabbyDebugOptions.swift */,
 				247561C626843957CFB4B632 /* CurrentWordExtractor.swift */,
+				6DC17643448271DE5DE61A89 /* DebouncePolicy.swift */,
 				D12ABBCE23A946C22894945B /* DecodeStopPolicy.swift */,
 				29ED42C4BDD0C521101AF95E /* DeviceInfo.swift */,
 				74BD1D4DB27D5D96D1E06096 /* DisplayCoordinateConverter.swift */,
@@ -1938,6 +1955,7 @@
 				55D4E6FB63E3475749E61EB3 /* CustomRulesCatalog.swift in Sources */,
 				F7237FDB0665465F1C7EDCDE /* CustomRulesEditor.swift in Sources */,
 				E0DAB3CDE782C330AF21FC0D /* DateMacroEvaluator.swift in Sources */,
+				E3C0326597083762BA6D76CA /* DebouncePolicy.swift in Sources */,
 				FA25762161F068F59BEC86EB /* DecodeStopPolicy.swift in Sources */,
 				400E1A5145FC8C5BA2FAED0A /* DeepGeometryWalkThrottle.swift in Sources */,
 				CF39EB76C3ECF8F764C1B4FB /* DeviceInfo.swift in Sources */,
@@ -2081,6 +2099,7 @@
 				7DEFC57991AB0C5379AD9CBF /* SuggestionModels.swift in Sources */,
 				EE2C9177CE615298595215A8 /* SuggestionOverlayPresenter.swift in Sources */,
 				DFF3AA49E0770DE3CFBC24C1 /* SuggestionOverlayStabilityGate.swift in Sources */,
+				18680D0D66469A2954A50B6C /* SuggestionQualityMetricsStore.swift in Sources */,
 				B691B8378FD73E186A72450C /* SuggestionRequestFactory.swift in Sources */,
 				532283A7651F7E66635F4281 /* SuggestionSessionReconciler.swift in Sources */,
 				C8CA6DACEAA83336551D4EFA /* SuggestionSettingsData.swift in Sources */,
@@ -2167,6 +2186,7 @@
 				0431AE1DBEE36C90C7F39C19 /* CustomRulesCatalog.swift in Sources */,
 				4B4DDB569CAD806F765224DE /* CustomRulesEditor.swift in Sources */,
 				DAD77998F793468D4D64B705 /* DateMacroEvaluator.swift in Sources */,
+				B849D68E0474CECAE809881C /* DebouncePolicy.swift in Sources */,
 				586B36CD813E1432D0AB1380 /* DecodeStopPolicy.swift in Sources */,
 				261FA692D19C48E53D6999BC /* DeepGeometryWalkThrottle.swift in Sources */,
 				1450746C690B3D98203B71EC /* DeviceInfo.swift in Sources */,
@@ -2310,6 +2330,7 @@
 				0AF568AB234033BA2DE4CAA7 /* SuggestionModels.swift in Sources */,
 				02DA43985CDAE6859014F14F /* SuggestionOverlayPresenter.swift in Sources */,
 				0F3267956257401F39386773 /* SuggestionOverlayStabilityGate.swift in Sources */,
+				5687320132AD97B4086260DF /* SuggestionQualityMetricsStore.swift in Sources */,
 				46F341472191BC451B6BF6B5 /* SuggestionRequestFactory.swift in Sources */,
 				CA5B2D226FBAA5419E78F14F /* SuggestionSessionReconciler.swift in Sources */,
 				7EEE6AEBFBD419FFE7C544BA /* SuggestionSettingsData.swift in Sources */,
@@ -2378,6 +2399,7 @@
 				64599CD334AAD79266224689 /* CurrentWordExtractorTests.swift in Sources */,
 				91D1F16B8C5DA281D4B7F699 /* CustomRulesTests.swift in Sources */,
 				4CCF29A7EA1B7D37841C135D /* DateMacroEvaluatorTests.swift in Sources */,
+				6C59B369AAC6948C53E41654 /* DebouncePolicyTests.swift in Sources */,
 				79B0AEA0D2FC6A865E9303F9 /* DecodeStopPolicyTests.swift in Sources */,
 				664A5D62A723EB204ADEF2F9 /* DeepGeometryWalkThrottleTests.swift in Sources */,
 				43DED8ABEFF9894ED54097A9 /* DeviceInfoTests.swift in Sources */,
@@ -2421,6 +2443,7 @@
 				F66F0D982EBAF5A3E99C5342 /* KeyCodeLabelsTests.swift in Sources */,
 				475FB7450EEC3C1B16E66CC4 /* LLMIOFileHandlerTests.swift in Sources */,
 				E912D4617AE1376061DF1F00 /* LanguageSupportTests.swift in Sources */,
+				C0F757D74758B76DA2962BC5 /* LlamaDecodeGateDefaultsTests.swift in Sources */,
 				3D82280EFF7F7E9F3FFF45ED /* LlamaEvalScoring.swift in Sources */,
 				3D56E9B3AA378400E2C081E3 /* LlamaEvalScoringTests.swift in Sources */,
 				E38801433B99E65BD7E45A0E /* LlamaPromptCacheHintTrackerTests.swift in Sources */,
@@ -2472,6 +2495,7 @@
 				5CED06E89FBEF557DCD6C684 /* SuggestionFocusFreshnessTests.swift in Sources */,
 				6CBEF02FCDFCF406E378C27C /* SuggestionInteractionStateTests.swift in Sources */,
 				4C6D8ED0A7B45D2EADF06DA5 /* SuggestionOverlayStabilityGateTests.swift in Sources */,
+				695E431AC3FF79769E2C5EEF /* SuggestionQualityMetricsStoreTests.swift in Sources */,
 				B93AB7E845086F6FBB068369 /* SuggestionRequestFactoryTests.swift in Sources */,
 				7E9413CE7C999C4612348248 /* SuggestionSessionReconcilerTests.swift in Sources */,
 				7C6D42EAD04C8144538B132A /* SuggestionSettingsModelTests.swift in Sources */,
diff --git a/Cotabby/App/Coordinators/SettingsCoordinator.swift b/Cotabby/App/Coordinators/SettingsCoordinator.swift
index 96a8132e..092b54a4 100644
--- a/Cotabby/App/Coordinators/SettingsCoordinator.swift
+++ b/Cotabby/App/Coordinators/SettingsCoordinator.swift
@@ -20,6 +20,7 @@ final class SettingsCoordinator: NSObject, NSWindowDelegate {
     private let modelDownloadManager: ModelDownloadManager
     private let huggingFaceSearchService: HuggingFaceSearchService
     private let performanceMetricsStore: PerformanceMetricsStore
+    private let qualityMetricsStore: SuggestionQualityMetricsStore
     private let systemMetricsStore: SystemMetricsStore
     private let onShowWelcome: () -> Void
     private let clearEmojiHistory: () -> Void
@@ -36,6 +37,7 @@ final class SettingsCoordinator: NSObject, NSWindowDelegate {
         modelDownloadManager: ModelDownloadManager,
         huggingFaceSearchService: HuggingFaceSearchService,
         performanceMetricsStore: PerformanceMetricsStore,
+        qualityMetricsStore: SuggestionQualityMetricsStore,
         systemMetricsStore: SystemMetricsStore,
         onShowWelcome: @escaping () -> Void,
         clearEmojiHistory: @escaping () -> Void
@@ -49,6 +51,7 @@ final class SettingsCoordinator: NSObject, NSWindowDelegate {
         self.modelDownloadManager = modelDownloadManager
         self.huggingFaceSearchService = huggingFaceSearchService
         self.performanceMetricsStore = performanceMetricsStore
+        self.qualityMetricsStore = qualityMetricsStore
         self.systemMetricsStore = systemMetricsStore
         self.onShowWelcome = onShowWelcome
         self.clearEmojiHistory = clearEmojiHistory
@@ -76,6 +79,7 @@ final class SettingsCoordinator: NSObject, NSWindowDelegate {
                     modelDownloadManager: modelDownloadManager,
                     huggingFaceSearchService: huggingFaceSearchService,
                     performanceMetricsStore: performanceMetricsStore,
+                    qualityMetricsStore: qualityMetricsStore,
                     systemMetricsStore: systemMetricsStore,
                     onShowWelcome: onShowWelcome,
                     clearEmojiHistory: clearEmojiHistory
diff --git a/Cotabby/App/Coordinators/SuggestionCoordinator+Acceptance.swift b/Cotabby/App/Coordinators/SuggestionCoordinator+Acceptance.swift
index f5290b64..ba27b091 100644
--- a/Cotabby/App/Coordinators/SuggestionCoordinator+Acceptance.swift
+++ b/Cotabby/App/Coordinators/SuggestionCoordinator+Acceptance.swift
@@ -122,6 +122,7 @@ extension SuggestionCoordinator {
 
         deferAcceptanceBookkeeping { [weak self] in
             self?.recordAcceptedWords(from: acceptedChunk)
+            self?.recordSuggestionAcceptedIfFirstChunk(of: sessionForAcceptance)
         }
 
         cancelPredictionWork()
@@ -563,6 +564,14 @@ extension SuggestionCoordinator {
         }
     }
 
+    /// Marks the session's suggestion accepted in the quality counters, once per suggestion: only
+    /// the first chunk counts, so word-by-word walks of one suggestion add nothing further and the
+    /// acceptance rate stays suggestions-accepted over suggestions-shown.
+    private func recordSuggestionAcceptedIfFirstChunk(of session: ActiveSuggestionSession) {
+        guard session.consumedCharacterCount == 0 else { return }
+        qualityMetricsStore.recordAcceptedSuggestion()
+    }
+
     /// Updates the global productivity counter from text accepted via Tab.
     func recordAcceptedWords(from acceptedChunk: String) {
         let acceptedWordCount = SuggestionSessionReconciler.acceptedWordCount(in: acceptedChunk)
diff --git a/Cotabby/App/Coordinators/SuggestionCoordinator+Prediction.swift b/Cotabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
index 33b009b4..5b0ae7aa 100644
--- a/Cotabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
+++ b/Cotabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
@@ -20,11 +20,18 @@ extension SuggestionCoordinator {
             return
         }
 
+        // The debounce window adapts to the last generation latency: snappier when the model is
+        // fast, calmer when it is slow (fewer doomed generations to cancel). The configured value
+        // is the fallback until a first latency exists.
+        let debounceMilliseconds = DebouncePolicy.milliseconds(
+            lastGenerationLatencyMilliseconds: latestLatencyMilliseconds,
+            fallback: settingsSnapshot.debounceMilliseconds
+        )
         // The debounce clock starts at the keystroke, not here. The host-publish poll has already
         // consumed real wall time waiting for the host to publish the keystroke to AX, and that
         // wait collapses bursts just as well as sleeping does. Stacking the full debounce on top
         // of the publish wait was pure added latency, so only the unconsumed remainder is slept.
-        let remainingDelay = max(0, settingsSnapshot.debounceMilliseconds - consumedDelayMilliseconds)
+        let remainingDelay = max(0, debounceMilliseconds - consumedDelayMilliseconds)
 
         // Task cancellation in Swift is cooperative, so we also use an explicit work id.
         // That gives us strict "latest request wins" semantics even if an old task wakes up late.
@@ -42,7 +49,7 @@ extension SuggestionCoordinator {
         logStage(
             "debouncing",
             workID: workID,
-            message: "Debouncing (\(settingsSnapshot.debounceMilliseconds)ms window) before generating."
+            message: "Debouncing (\(debounceMilliseconds)ms window, \(remainingDelay)ms remaining) before generating."
         )
     }
 
@@ -496,6 +503,10 @@ extension SuggestionCoordinator {
         guard liveContext.generation == result.generation else {
 
             latestRawModelOutput = SuggestionDebugLogger.debugPreview(result.rawText)
+            // Lifecycle discards are counted under their own reasons so `generated` always equals
+            // `shown` plus the suppression histogram; without this, every drop here silently
+            // inflated the generated count against the others.
+            qualityMetricsStore.recordSuppressed(reason: "discardedStaleContext")
             logStage(
                 "stale-drop",
                 workID: workID,
@@ -514,6 +525,11 @@ extension SuggestionCoordinator {
             clearSuggestion()
             hideOverlay(reason: "Overlay hidden because the model returned an empty continuation.")
             state = .idle
+            // The router already counted engine-attributed suppressions (normalizer, confidence
+            // floor); only the unattributed "model produced nothing" case needs a ledger entry.
+            if result.suppressionReason == nil {
+                qualityMetricsStore.recordSuppressed(reason: "emptyUnattributed")
+            }
             logStage(
                 "empty-result",
                 workID: workID,
@@ -529,6 +545,7 @@ extension SuggestionCoordinator {
             clearSuggestion(clearDiagnostics: true)
             hideOverlay(reason: "Overlay hidden because text is selected.")
             state = .idle
+            qualityMetricsStore.recordSuppressed(reason: "discardedSelection")
             logStage(
                 "selected-text",
                 workID: workID,
@@ -553,6 +570,7 @@ extension SuggestionCoordinator {
             clearSuggestion(clearDiagnostics: false)
             hideOverlay(reason: "Overlay hidden because the regeneration only echoed the just-accepted text before the host published it.")
             state = .idle
+            qualityMetricsStore.recordSuppressed(reason: "discardedAcceptEcho")
             logStage(
                 "stale-accept-echo",
                 workID: workID,
@@ -576,6 +594,8 @@ extension SuggestionCoordinator {
             clearSuggestion()
             hideOverlay(reason: "Overlay hidden because the completion failed the seam guard.")
             state = .idle
+            let seamReason = if case .seamMisspelling = seamVerdict { "seamMisspelling" } else { "seamJunkPunctuationRun" }
+            qualityMetricsStore.recordSuppressed(reason: seamReason)
             logStage(
                 "seam-suppressed",
                 workID: workID,
@@ -589,6 +609,9 @@ extension SuggestionCoordinator {
 
         latestLatencyMilliseconds = Int(result.latency * 1000)
         latestGenerationNumber = liveContext.generation
+        // One shown event per suggestion: this is the only place a fresh generation becomes
+        // visible (re-presentations after partial accepts reuse the same session).
+        qualityMetricsStore.recordShown()
         let session = interactionState.startSession(
             fullText: result.text,
             liveContext: liveContext,
diff --git a/Cotabby/App/Coordinators/SuggestionCoordinator.swift b/Cotabby/App/Coordinators/SuggestionCoordinator.swift
index 61516984..18733160 100644
--- a/Cotabby/App/Coordinators/SuggestionCoordinator.swift
+++ b/Cotabby/App/Coordinators/SuggestionCoordinator.swift
@@ -54,6 +54,9 @@ final class SuggestionCoordinator: ObservableObject {
     /// `CotabbyAppEnvironment`) so the underlying `NSSpellChecker` document tag persists across the
     /// coordinator's lifetime instead of churning per keystroke.
     let spellChecker: CurrentWordSpellChecker
+    /// Always-on quality counters (shown / suppressed / accepted). The router counts generation
+    /// outcomes; the coordinator owns the display-time and acceptance events only it can see.
+    let qualityMetricsStore: SuggestionQualityMetricsStore
     /// Frequency-ranked correction source (SymSpell). Used first for the correction word, with
     /// `spellChecker` as the fallback while its index is still loading or when it has no suggestion.
     let symSpellCorrector: SymSpellCorrector
@@ -163,6 +166,7 @@ final class SuggestionCoordinator: ObservableObject {
         spellChecker: CurrentWordSpellChecker,
         symSpellCorrector: SymSpellCorrector,
         spellingLanguageResolver: SpellingLanguageResolver = SpellingLanguageResolver(),
+        qualityMetricsStore: SuggestionQualityMetricsStore,
         userDefaults: UserDefaults = .standard
     ) {
         let storedTotalTabAcceptedWordCount = userDefaults.integer(
@@ -184,6 +188,7 @@ final class SuggestionCoordinator: ObservableObject {
         self.spellChecker = spellChecker
         self.symSpellCorrector = symSpellCorrector
         self.spellingLanguageResolver = spellingLanguageResolver
+        self.qualityMetricsStore = qualityMetricsStore
         self.userDefaults = userDefaults
         settingsSnapshot = suggestionSettings.snapshot
         // These collaborators isolate "how overlay/logging works" from "when the coordinator
diff --git a/Cotabby/App/Core/CotabbyAppEnvironment.swift b/Cotabby/App/Core/CotabbyAppEnvironment.swift
index 9eed835a..06d4d389 100644
--- a/Cotabby/App/Core/CotabbyAppEnvironment.swift
+++ b/Cotabby/App/Core/CotabbyAppEnvironment.swift
@@ -34,6 +34,7 @@ final class CotabbyAppEnvironment {
     let welcomeCoordinator: WelcomeCoordinator
     let huggingFaceSearchService: HuggingFaceSearchService
     let performanceMetricsStore: PerformanceMetricsStore
+    let qualityMetricsStore: SuggestionQualityMetricsStore
     let settingsCoordinator: SettingsCoordinator
     let activationIndicatorController: ActivationIndicatorController
     let focusDebugOverlayController: FocusDebugOverlayController?
@@ -113,6 +114,9 @@ final class CotabbyAppEnvironment {
         )
         let huggingFaceSearchService = HuggingFaceSearchService()
         let performanceMetricsStore = PerformanceMetricsStore()
+        // Always-on quality counters (generated / shown / suppressed-by-reason / accepted).
+        // Counters only, no content, so unlike latency tracking there is no opt-in gate.
+        let qualityMetricsStore = SuggestionQualityMetricsStore()
         // Live CPU/RAM graph backing for the Performance pane. Holds no state until the pane asks it
         // to start sampling, so constructing it eagerly here costs nothing.
         let systemMetricsStore = SystemMetricsStore()
@@ -157,6 +161,7 @@ final class CotabbyAppEnvironment {
             foundationModelEngine: foundationModelEngine,
             llamaEngine: LlamaSuggestionEngine(runtimeManager: runtimeManager),
             performanceMetricsStore: performanceMetricsStore,
+            qualityMetricsStore: qualityMetricsStore,
             llamaModelNameProvider: { [weak runtimeManager] in
                 runtimeManager?.currentModelFilename
             }
@@ -176,6 +181,7 @@ final class CotabbyAppEnvironment {
             modelDownloadManager: modelDownloadManager,
             huggingFaceSearchService: huggingFaceSearchService,
             performanceMetricsStore: performanceMetricsStore,
+            qualityMetricsStore: qualityMetricsStore,
             systemMetricsStore: systemMetricsStore,
             onShowWelcome: { [weak welcomeCoordinator] in
                 welcomeCoordinator?.showWelcome()
@@ -213,7 +219,8 @@ final class CotabbyAppEnvironment {
             configuration: configuration,
             spellChecker: spellChecker,
             symSpellCorrector: symSpellCorrector,
-            spellingLanguageResolver: SpellingLanguageResolver()
+            spellingLanguageResolver: SpellingLanguageResolver(),
+            qualityMetricsStore: qualityMetricsStore
         )
 
         // The emoji picker is a sibling to the suggestion coordinator. It reuses the input monitor,
@@ -276,6 +283,7 @@ final class CotabbyAppEnvironment {
         self.welcomeCoordinator = welcomeCoordinator
         self.huggingFaceSearchService = huggingFaceSearchService
         self.performanceMetricsStore = performanceMetricsStore
+        self.qualityMetricsStore = qualityMetricsStore
         self.settingsCoordinator = settingsCoordinator
         self.activationIndicatorController = activationIndicatorController
         self.focusDebugOverlayController = FocusDebugOverlayController.isEnabled
diff --git a/Cotabby/Models/LlamaRuntimeModels.swift b/Cotabby/Models/LlamaRuntimeModels.swift
index 954c44d0..1755e434 100644
--- a/Cotabby/Models/LlamaRuntimeModels.swift
+++ b/Cotabby/Models/LlamaRuntimeModels.swift
@@ -197,6 +197,28 @@ struct LlamaGenerationOptions: Equatable, Sendable {
     /// degenerate instant stops (e.g. a lone leading period). Lives here so length presets can tune
     /// the floor without reaching into `DecodeStopPolicy`; the default preserves prior behavior.
     var sentenceStopMinimumTokens: Int = 2
+
+    /// Stop decoding the moment the raw distribution's most-likely next token is end-of-generation,
+    /// even when the stochastic sampler drew something else. The model's top choice being "stop"
+    /// is the strongest anti-rambling signal available per token, and the engine computes it while
+    /// the logits row is hot, so honoring it costs nothing here.
+    var stopAtArgmaxEOG: Bool = true
+}
+
+/// One generation's text plus the confidence signals the caller needs for suppression accounting.
+/// Returned instead of a bare string so a confidence-suppressed completion is attributed to the
+/// real reason rather than reading as "the model produced nothing".
+struct LlamaGenerationOutput: Equatable, Sendable {
+    let text: String
+    /// Mean per-token log-probability of the generated tokens; nil when confidence gating was off
+    /// (the engine skips the per-token logprob work entirely) or nothing was generated.
+    let averageLogprob: Double?
+    /// True when the completion was withheld because `averageLogprob` fell below the floor.
+    let suppressedByLowConfidence: Bool
+
+    static func text(_ text: String) -> LlamaGenerationOutput {
+        LlamaGenerationOutput(text: text, averageLogprob: nil, suppressedByLowConfidence: false)
+    }
 }
 
 /// The concrete runtime assets selected during bootstrap after checking available model files.
diff --git a/Cotabby/Models/SuggestionModels.swift b/Cotabby/Models/SuggestionModels.swift
index a0f55c88..dfc44b15 100644
--- a/Cotabby/Models/SuggestionModels.swift
+++ b/Cotabby/Models/SuggestionModels.swift
@@ -449,6 +449,25 @@ struct SuggestionResult: Equatable, Sendable {
     let rawText: String
     let text: String
     let latency: TimeInterval
+    /// Raw value of the `CompletionSuppressionReason` that emptied `text`, when one applies.
+    /// Carried as a string so the coordinator's quality accounting never needs the normalizer
+    /// type, and so engine-specific reasons can ride along without enum churn. The explicit
+    /// initializer default keeps existing call sites compiling unchanged.
+    let suppressionReason: String?
+
+    init(
+        generation: UInt64,
+        rawText: String,
+        text: String,
+        latency: TimeInterval,
+        suppressionReason: String? = nil
+    ) {
+        self.generation = generation
+        self.rawText = rawText
+        self.text = text
+        self.latency = latency
+        self.suppressionReason = suppressionReason
+    }
 }
 
 /// Represents one active inline-completion session after the model has produced a suggestion.
diff --git a/Cotabby/Models/SuggestionQualityMetricsStore.swift b/Cotabby/Models/SuggestionQualityMetricsStore.swift
new file mode 100644
index 00000000..12d81612
--- /dev/null
+++ b/Cotabby/Models/SuggestionQualityMetricsStore.swift
@@ -0,0 +1,86 @@
+import Combine
+import Foundation
+
+/// Local, always-on counters that answer "is suggestion quality improving for real use": how many
+/// completions were generated, how many were shown, why the withheld ones were withheld, and how
+/// many shown suggestions the user actually accepted.
+///
+/// Latency tracking (`PerformanceMetricsStore`) stays opt-in because it records per-request rows;
+/// these are lifetime counters with zero content, so they run unconditionally and survive restarts.
+/// Acceptance rate (accepted / shown) is the closest thing to ground truth the app can measure on
+/// device, and the suppression histogram tells the difference between "the model produced nothing"
+/// and "a specific guard fired", which otherwise only exists scattered through debug-only JSONL.
+@MainActor
+final class SuggestionQualityMetricsStore: ObservableObject {
+    struct Counters: Codable, Equatable {
+        var generated = 0
+        var shown = 0
+        /// Sessions the user accepted at least once. Counted per suggestion, not per Tab press,
+        /// so word-by-word acceptance of one suggestion is one acceptance.
+        var acceptedSuggestions = 0
+        /// Keyed by `CompletionSuppressionReason` raw values plus coordinator-level reasons
+        /// (the seam guard verdicts). String-keyed so new reasons never need a schema migration.
+        var suppressedByReason: [String: Int] = [:]
+        var firstRecordedAt: Date?
+
+        var suppressedTotal: Int { suppressedByReason.values.reduce(0, +) }
+
+        var acceptanceRate: Double? {
+            guard shown > 0 else { return nil }
+            return Double(acceptedSuggestions) / Double(shown)
+        }
+    }
+
+    @Published private(set) var counters: Counters
+
+    private let userDefaults: UserDefaults
+    private static let defaultsKey = "cotabbyQualityMetricsCounters"
+
+    /// Stored-property @MainActor classes deallocated inside app-hosted tests double-free without
+    /// an explicitly nonisolated deinit (the isolated-deinit runtime path over-releases). Same
+    /// workaround as the other main-actor stores exercised by tests.
+    nonisolated deinit {}
+
+    init(userDefaults: UserDefaults = .standard) {
+        self.userDefaults = userDefaults
+        if let data = userDefaults.data(forKey: Self.defaultsKey),
+           let decoded = try? JSONDecoder().decode(Counters.self, from: data) {
+            counters = decoded
+        } else {
+            counters = Counters()
+        }
+    }
+
+    func recordGenerated() {
+        mutate { $0.generated += 1 }
+    }
+
+    func recordShown() {
+        mutate { $0.shown += 1 }
+    }
+
+    func recordAcceptedSuggestion() {
+        mutate { $0.acceptedSuggestions += 1 }
+    }
+
+    func recordSuppressed(reason: String) {
+        mutate { $0.suppressedByReason[reason, default: 0] += 1 }
+    }
+
+    func reset() {
+        counters = Counters()
+        userDefaults.removeObject(forKey: Self.defaultsKey)
+    }
+
+    private func mutate(_ change: (inout Counters) -> Void) {
+        var updated = counters
+        change(&updated)
+        if updated.firstRecordedAt == nil {
+            updated.firstRecordedAt = Date()
+        }
+        counters = updated
+        if let data = try? JSONEncoder().encode(updated) {
+            userDefaults.set(data, forKey: Self.defaultsKey)
+        }
+    }
+}
diff --git a/Cotabby/Models/SuggestionSubsystemContracts.swift b/Cotabby/Models/SuggestionSubsystemContracts.swift
index 77ece468..79655f7c 100644
--- a/Cotabby/Models/SuggestionSubsystemContracts.swift
+++ b/Cotabby/Models/SuggestionSubsystemContracts.swift
@@ -127,16 +127,21 @@ extension SuggestionGenerating {
 /// a fake runtime instead of loading a real model. `LlamaRuntimeManager` is the production conformer.
 @MainActor
 protocol LlamaRuntimeGenerating: AnyObject {
-    func generate(prompt: String, cachedPrefixBytes: Int?, options: LlamaGenerationOptions) async throws -> String
+    func generate(
+        prompt: String,
+        cachedPrefixBytes: Int?,
+        options: LlamaGenerationOptions
+    ) async throws -> LlamaGenerationOutput
     /// Streaming variant: `onPartialRawText` receives the cumulative raw completion after each
     /// sampled token, called from the decode thread (hence `@Sendable`); callers own hopping to
-    /// their actor. The returned string is still the authoritative final completion.
+    /// their actor. The returned output's text is still the authoritative final completion, and
+    /// its confidence fields describe the whole generation (partials are pre-gate by nature).
     func generate(
         prompt: String,
         cachedPrefixBytes: Int?,
         options: LlamaGenerationOptions,
         onPartialRawText: (@Sendable (String) -> Void)?
-    ) async throws -> String
+    ) async throws -> LlamaGenerationOutput
     func resetPromptCache()
     /// Decodes `prompt` into the native prompt cache without sampling any tokens, so the next
     /// `generate` whose prompt extends this one only decodes the typed delta. Best-effort warmup:
@@ -157,7 +162,7 @@ extension LlamaRuntimeGenerating {
         cachedPrefixBytes: Int?,
         options: LlamaGenerationOptions,
         onPartialRawText: (@Sendable (String) -> Void)?
-    ) async throws -> String {
+    ) async throws -> LlamaGenerationOutput {
         try await generate(prompt: prompt, cachedPrefixBytes: cachedPrefixBytes, options: options)
     }
 }
diff --git a/Cotabby/Services/Runtime/FoundationModelSuggestionEngine.swift b/Cotabby/Services/Runtime/FoundationModelSuggestionEngine.swift
index 570a6dc9..fae711e0 100644
--- a/Cotabby/Services/Runtime/FoundationModelSuggestionEngine.swift
+++ b/Cotabby/Services/Runtime/FoundationModelSuggestionEngine.swift
@@ -175,7 +175,8 @@ final class FoundationModelSuggestionEngine {
                 generation: request.generation,
                 rawText: rawSuggestion,
                 text: normalizedSuggestion,
-                latency: latency
+                latency: latency,
+                suppressionReason: normalization.suppression?.rawValue
             )
         } catch is CancellationError {
             CotabbyLogger.suggestion.debug("Foundation model generation cancelled", metadata: baseMetadata)
diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
index f9313d6c..492a25c2 100644
--- a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
+++ b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
@@ -140,7 +140,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
         cachedPrefixBytes: Int? = nil,
         options: LlamaGenerationOptions,
         onPartialRawText: ((String) -> Void)? = nil
-    ) throws -> String {
+    ) throws -> LlamaGenerationOutput {
         let preparation = try preparedPrompt(prompt: prompt, cachedPrefixBytes: cachedPrefixBytes, options: options, kind: "generate")
 
         lifecycleCondition.lock()
@@ -199,7 +199,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
             engine.destroySequence(sequenceID)
             autocompleteSequenceID = -1
         }
-        return decode.text
+        return decode.output
     }
 
     /// Decodes `prompt` into the autocomplete KV cache without sampling, so the next `generate`
@@ -364,7 +364,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
         sequenceID: Int32,
         options: LlamaGenerationOptions,
         onPartialRawText: ((String) -> Void)? = nil
-    ) -> (text: String, engineCancelled: Bool) {
+    ) -> (output: LlamaGenerationOutput, engineCancelled: Bool) {
         var generatedText = ""
         var tokensGenerated = 0
         var sumLogprob = 0.0
@@ -392,6 +392,14 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
                 stopReason = "eos"
                 break
             }
+            // The raw distribution's most-likely token is end-of-generation: the model wants to
+            // stop here even though the stochastic sampler drew something else. Finalize with the
+            // text accumulated so far and discard the sampled-but-unwanted token; this is the
+            // anti-rambling stop the sentence classifier cannot express (lists, fragments, code).
+            if options.stopAtArgmaxEOG, result.argmax_is_eog {
+                stopReason = "argmax_eog"
+                break
+            }
 
             let piece = Self.extractPiece(result)
             generatedText += piece
@@ -428,10 +436,25 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
             ]
         )
 
+        // The average is only meaningful when the engine actually computed per-token logprobs,
+        // which is keyed on the floor being enabled (see setComputeLogprob at sequence setup).
+        let averageLogprob: Double? = options.confidenceFloor > -.infinity && tokensGenerated > 0
+            ? sumLogprob / Double(tokensGenerated)
+            : nil
         if Self.shouldSuppress(sumLogprob: sumLogprob, tokensGenerated: tokensGenerated, options: options) {
-            return ("", engineCancelled)
+            let suppressed = LlamaGenerationOutput(
+                text: "",
+                averageLogprob: averageLogprob,
+                suppressedByLowConfidence: true
+            )
+            return (suppressed, engineCancelled)
         }
-        return (generatedText, engineCancelled)
+        let output = LlamaGenerationOutput(
+            text: generatedText,
+            averageLogprob: averageLogprob,
+            suppressedByLowConfidence: false
+        )
+        return (output, engineCancelled)
     }
 
     /// Low-confidence gate for the sampled decoder: drop completions the model itself was unsure
diff --git a/Cotabby/Services/Runtime/LlamaRuntimeManager.swift b/Cotabby/Services/Runtime/LlamaRuntimeManager.swift
index 889522ca..af02d8e5 100644
--- a/Cotabby/Services/Runtime/LlamaRuntimeManager.swift
+++ b/Cotabby/Services/Runtime/LlamaRuntimeManager.swift
@@ -100,7 +100,7 @@ final class LlamaRuntimeManager: ObservableObject {
         prompt: String,
         cachedPrefixBytes: Int? = nil,
         options: LlamaGenerationOptions
-    ) async throws -> String {
+    ) async throws -> LlamaGenerationOutput {
         try await generate(
             prompt: prompt,
             cachedPrefixBytes: cachedPrefixBytes,
@@ -116,7 +116,7 @@ final class LlamaRuntimeManager: ObservableObject {
         cachedPrefixBytes: Int? = nil,
         options: LlamaGenerationOptions,
         onPartialRawText: (@Sendable (String) -> Void)?
-    ) async throws -> String {
+    ) async throws -> LlamaGenerationOutput {
         _ = try await preparedRuntime()
 
         let core = self.core
diff --git a/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift b/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift
index 4092bbb9..b090ddc9 100644
--- a/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift
+++ b/Cotabby/Services/Runtime/LlamaSuggestionEngine.swift
@@ -20,6 +20,30 @@ final class LlamaSuggestionEngine {
         self.runtimeManager = runtimeManager
     }
 
+    /// Shipped confidence floor (mean per-token log-probability). -infinity disables the gate AND
+    /// the per-token logprob computation behind it; any other value turns both on. -1.5 came from
+    /// an eval sweep over {off, -4, -3, -2.5, -2, -1.5, -1, -0.5, -0.3}: floors at or below -2
+    /// never fire on this model at temperature 0.1, -1 and tighter buy precision at a brutal
+    /// coverage cost, and -1.5 is the unique point where the composite quality score rose
+    /// (0.734 to 0.744), wrong-shows fell 27% relative, and not a single must-show case was lost.
+    static let defaultConfidenceFloor: Double = -1.5
+    /// `defaults write` escape hatches for dogfooding and field diagnosis without a rebuild.
+    static let confidenceFloorOverrideKey = "cotabbyConfidenceFloorOverride"
+    static let argmaxStopDisabledKey = "cotabbyArgmaxStopDisabled"
+
+    static func resolvedConfidenceFloor(_ defaults: UserDefaults = .standard) -> Double {
+        guard defaults.object(forKey: confidenceFloorOverrideKey) != nil else {
+            return defaultConfidenceFloor
+        }
+        return defaults.double(forKey: confidenceFloorOverrideKey)
+    }
+
+    /// Mirrors `resolvedConfidenceFloor`: injectable defaults so the disable toggle is testable
+    /// against an isolated suite instead of process-global state.
+    static func resolvedStopAtArgmaxEOG(_ defaults: UserDefaults = .standard) -> Bool {
+        !defaults.bool(forKey: argmaxStopDisabledKey)
+    }
+
     /// Prefills the prompt KV for the field the user just focused, so the first real suggestion
     /// there only decodes the typed delta instead of the whole cold prompt.
     ///
@@ -89,9 +113,9 @@ final class LlamaSuggestionEngine {
                 ]) { _, new in new }
             )
             let options = Self.makeGenerationOptions(for: request)
-            let rawSuggestion: String
+            let output: LlamaGenerationOutput
             if let onPartial {
-                rawSuggestion = try await runtimeManager.generate(
+                output = try await runtimeManager.generate(
                     prompt: request.prompt,
                     cachedPrefixBytes: cachedPrefixBytes,
                     options: options,
@@ -115,7 +139,7 @@ final class LlamaSuggestionEngine {
                     }
                 )
             } else {
-                rawSuggestion = try await runtimeManager.generate(
+                output = try await runtimeManager.generate(
                     prompt: request.prompt,
                     cachedPrefixBytes: cachedPrefixBytes,
                     options: options
@@ -124,7 +148,15 @@ final class LlamaSuggestionEngine {
             try Task.checkCancellation()
 
             promptCacheHintTracker.recordSuccessfulRequest(request)
-            let normalization = SuggestionTextNormalizer.normalizeDetailed(rawSuggestion, for: request)
+            let rawSuggestion = output.text
+            // A confidence-suppressed completion never reaches the normalizer (the runtime already
+            // withheld the text); attribute the real reason instead of "the model produced nothing".
+            // Streamed partials are pre-gate by nature: a completion the floor later withholds can
+            // briefly paint and then clear, which is the same contract as any other final-result
+            // suppression under streaming.
+            let normalization = output.suppressedByLowConfidence
+                ? SuggestionNormalizationResult(text: "", suppression: .lowConfidence)
+                : SuggestionTextNormalizer.normalizeDetailed(rawSuggestion, for: request)
             let normalizedSuggestion = normalization.text
             let latency = Date().timeIntervalSince(startTime)
             let rawChars = rawSuggestion.count
@@ -133,12 +165,14 @@ final class LlamaSuggestionEngine {
             // `suppression_reason` distinguishes an empty ghost text caused by the model producing
             // nothing from one a filter dropped — the join key for judging decode quality on device.
             let suppressionReason = normalization.suppression?.rawValue ?? "none"
+            let averageLogprobDescription = output.averageLogprob.map { String(format: "%.3f", $0) } ?? "off"
             CotabbyLogger.suggestion.debug(
                 "Llama generated",
                 metadata: baseMetadata.merging([
                     "raw_chars": .stringConvertible(rawChars),
                     "normalized_chars": .stringConvertible(normalizedChars),
                     "suppression_reason": .string(suppressionReason),
+                    "avg_logprob": .string(averageLogprobDescription),
                     "latency_ms": .stringConvertible(latencyMs)
                 ]) { _, new in new }
             )
@@ -152,6 +186,7 @@ final class LlamaSuggestionEngine {
                     "raw_chars": .stringConvertible(rawChars),
                     "normalized_chars": .stringConvertible(normalizedChars),
                     "suppression_reason": .string(suppressionReason),
+                    "avg_logprob": .string(averageLogprobDescription),
                     "latency_ms": .stringConvertible(latencyMs),
                     "cache_hint_bytes": .string(hintDesc),
                     "max_tokens": .stringConvertible(request.maxPredictionTokens)
@@ -161,7 +196,8 @@ final class LlamaSuggestionEngine {
                 generation: request.generation,
                 rawText: rawSuggestion,
                 text: normalizedSuggestion,
-                latency: latency
+                latency: latency,
+                suppressionReason: normalization.suppression?.rawValue
             )
         } catch is CancellationError {
             CotabbyLogger.suggestion.debug("Llama generation cancelled", metadata: baseMetadata)
@@ -233,7 +269,9 @@ final class LlamaSuggestionEngine {
             forceWordContinuation: MidWordContinuationPolicy.shouldForceContinuation(
                 precedingText: request.context.precedingText,
                 trailingText: request.context.trailingText
-            )
+            ),
+            confidenceFloor: resolvedConfidenceFloor(),
+            stopAtArgmaxEOG: resolvedStopAtArgmaxEOG()
         )
     }
 }
diff --git a/Cotabby/Services/Runtime/SuggestionEngineRouter.swift b/Cotabby/Services/Runtime/SuggestionEngineRouter.swift
index 65b0cfbe..8d217f6e 100644
--- a/Cotabby/Services/Runtime/SuggestionEngineRouter.swift
+++ b/Cotabby/Services/Runtime/SuggestionEngineRouter.swift
@@ -11,6 +11,7 @@ final class SuggestionEngineRouter {
     private let foundationModelEngine: any SuggestionGenerating
     private let llamaEngine: any SuggestionGenerating
     private let performanceMetricsStore: PerformanceMetricsStore
+    private let qualityMetricsStore: SuggestionQualityMetricsStore
     /// Closure that returns the currently selected llama model filename (e.g. `Qwen3-0.6B-Q8_0.gguf`).
     /// A closure instead of a direct `LlamaRuntimeManager` reference keeps the router from depending
     /// on the concrete runtime type — useful for tests that want to fake the model label.
@@ -21,12 +22,14 @@ final class SuggestionEngineRouter {
         foundationModelEngine: any SuggestionGenerating,
         llamaEngine: any SuggestionGenerating,
         performanceMetricsStore: PerformanceMetricsStore,
+        qualityMetricsStore: SuggestionQualityMetricsStore,
         llamaModelNameProvider: @escaping @MainActor () -> String?
     ) {
         self.suggestionSettings = suggestionSettings
         self.foundationModelEngine = foundationModelEngine
         self.llamaEngine = llamaEngine
         self.performanceMetricsStore = performanceMetricsStore
+        self.qualityMetricsStore = qualityMetricsStore
         self.llamaModelNameProvider = llamaModelNameProvider
     }
 
@@ -48,6 +51,7 @@ final class SuggestionEngineRouter {
             do {
                 let result = try await foundationModelEngine.generateSuggestion(for: request, onPartial: onPartial)
                 recordPerformanceMetric(modelName: "Apple Intelligence", latency: result.latency)
+                recordQualityOutcome(result)
                 return result
             } catch SuggestionClientError.unsupportedLanguageOrLocale(let message) {
                 CotabbyLogger.suggestion.info(
@@ -67,10 +71,23 @@ final class SuggestionEngineRouter {
             CotabbyLogger.suggestion.debug("Routing to open-source llama engine", metadata: metadata)
             let result = try await llamaEngine.generateSuggestion(for: request, onPartial: onPartial)
             recordPerformanceMetric(modelName: llamaModelNameProvider() ?? "Llama", latency: result.latency)
+            recordQualityOutcome(result)
             return result
         }
     }
 
+    /// Counts every finished generation plus the engine-attributed suppression reason when the
+    /// pipeline emptied the text. The router is the single point that sees every finished result
+    /// regardless of engine or fallback, which keeps these counters complete by construction.
+    /// Display-time outcomes (shown, seam-guard suppressions, acceptance) are recorded by the
+    /// coordinator, the only layer that knows them.
+    private func recordQualityOutcome(_ result: SuggestionResult) {
+        qualityMetricsStore.recordGenerated()
+        if let reason = result.suppressionReason {
+            qualityMetricsStore.recordSuppressed(reason: reason)
+        }
+    }
+
     /// Persists one (timestamp, model, latency) triple into the rolling ring buffer when the
     /// Performance pane toggle is on. The router is the right home for this seam because it is
     /// the single point that sees a finished `SuggestionResult` and knows which engine produced
@@ -121,6 +138,7 @@ final class SuggestionEngineRouter {
         do {
             let result = try await llamaEngine.generateSuggestion(for: request, onPartial: onPartial)
             recordPerformanceMetric(modelName: llamaModelNameProvider() ?? "Llama", latency: result.latency)
+            recordQualityOutcome(result)
             return result
         } catch SuggestionClientError.cancelled {
             throw SuggestionClientError.cancelled
diff --git a/Cotabby/Support/DebouncePolicy.swift b/Cotabby/Support/DebouncePolicy.swift
new file mode 100644
index 00000000..6d2af60a
--- /dev/null
+++ b/Cotabby/Support/DebouncePolicy.swift
@@ -0,0 +1,21 @@
+import Foundation
+
+/// Chooses the prediction debounce from the last observed generation latency.
+///
+/// A fixed debounce serves two masters badly: on fast hardware it adds avoidable delay before
+/// every suggestion, and on slow hardware it lets keystrokes pile doomed generations onto a model
+/// that cannot keep up (each cancel still costs a decode setup and teardown). Keying the debounce
+/// to the most recent generation latency makes fast machines snappier and slow machines calmer,
+/// with no configuration. The configured value remains the fallback until a first latency exists.
+nonisolated enum DebouncePolicy {
+    static func milliseconds(lastGenerationLatencyMilliseconds: Int?, fallback: Int) -> Int {
+        guard let last = lastGenerationLatencyMilliseconds, last > 0 else {
+            return fallback
+        }
+        switch last {
+        case ...70: return 15
+        case ...140: return 25
+        default: return 55
+        }
+    }
+}
diff --git a/Cotabby/Support/SuggestionTextNormalizer.swift b/Cotabby/Support/SuggestionTextNormalizer.swift
index eafd1422..8a90df82 100644
--- a/Cotabby/Support/SuggestionTextNormalizer.swift
+++ b/Cotabby/Support/SuggestionTextNormalizer.swift
@@ -24,6 +24,11 @@ enum CompletionSuppressionReason: String, Sendable, Equatable {
     case echoesPrecedingText
     /// Printable characters survived but carried control/replacement glyphs the safety gate rejects.
     case unsafeToInsert
+    /// The runtime withheld the completion because its mean per-token log-probability fell below
+    /// the confidence floor: the model itself was unsure, and showing nothing beats a guess.
+    /// Attributed by the engine (the runtime reports it on `LlamaGenerationOutput`), not by the
+    /// normalizer, which never sees the withheld text.
+    case lowConfidence
 }
 
 /// Outcome of normalizing one raw completion: the ghost text, plus the attributable reason when that
diff --git a/Cotabby/UI/Settings/Panes/PerformancePaneView.swift b/Cotabby/UI/Settings/Panes/PerformancePaneView.swift
index b001167f..13d99ba1 100644
--- a/Cotabby/UI/Settings/Panes/PerformancePaneView.swift
+++ b/Cotabby/UI/Settings/Panes/PerformancePaneView.swift
@@ -11,12 +11,15 @@ import SwiftUI
 struct PerformancePaneView: View {
     @ObservedObject var suggestionSettings: SuggestionSettingsModel
     @ObservedObject var performanceMetricsStore: PerformanceMetricsStore
+    @ObservedObject var qualityMetricsStore: SuggestionQualityMetricsStore
     @ObservedObject var systemMetricsStore: SystemMetricsStore
 
     var body: some View {
         SettingsPaneScaffold {
             liveResourceSection
 
+            suggestionQualitySection
+
             Section("Tracking") {
                 Toggle(isOn: trackingEnabledBinding) {
                     SettingsRowLabel(
@@ -59,6 +62,71 @@ struct PerformancePaneView: View {
         .onDisappear { systemMetricsStore.endSampling() }
     }
 
+    // MARK: - Suggestion quality counters
+
+    /// Lifetime counters: how often suggestions appear, why withheld ones were withheld, and how
+    /// many shown suggestions the user accepted. Always on (counters carry no content), unlike the
+    /// per-request latency log below, which records timestamps and stays opt-in.
+    private var suggestionQualitySection: some View {
+        Section {
+            qualityCounterRow(label: "Suggestions shown", value: "\(qualityMetricsStore.counters.shown)")
+            qualityCounterRow(label: "Accepted", value: acceptedLabel)
+            qualityCounterRow(label: "Generations", value: "\(qualityMetricsStore.counters.generated)")
+            if !topSuppressionReasons.isEmpty {
+                qualityCounterRow(
+                    label: "Withheld (\(qualityMetricsStore.counters.suppressedTotal))",
+                    value: topSuppressionReasons
+                )
+            }
+        } header: {
+            HStack {
+                Text(qualityHeaderLabel)
+                Spacer()
+                if qualityMetricsStore.counters.shown > 0 || qualityMetricsStore.counters.generated > 0 {
+                    Button("Reset") {
+                        qualityMetricsStore.reset()
+                    }
+                    .buttonStyle(.borderless)
+                    .controlSize(.small)
+                }
+            }
+        }
+    }
+
+    private func qualityCounterRow(label: String, value: String) -> some View {
+        HStack(alignment: .firstTextBaseline) {
+            Text(label)
+            Spacer()
+            Text(value)
+                .foregroundStyle(.secondary)
+                .multilineTextAlignment(.trailing)
+        }
+    }
+
+    private var acceptedLabel: String {
+        let accepted = qualityMetricsStore.counters.acceptedSuggestions
+        guard let rate = qualityMetricsStore.counters.acceptanceRate else {
+            return "\(accepted)"
+        }
+        return "\(accepted) (\(Int((rate * 100).rounded()))%)"
+    }
+
+    private var topSuppressionReasons: String {
+        qualityMetricsStore.counters.suppressedByReason
+            .sorted { lhs, rhs in lhs.value == rhs.value ? lhs.key < rhs.key : lhs.value > rhs.value }
+            .prefix(4)
+            .map { "\($0.key) \($0.value)" }
+            .joined(separator: ", ")
+    }
+
+    private var qualityHeaderLabel: String {
+        guard let since = qualityMetricsStore.counters.firstRecordedAt else {
+            return "Suggestion Quality"
+        }
+        let formatted = since.formatted(date: .abbreviated, time: .omitted)
+        return "Suggestion Quality (since \(formatted))"
+    }
+
     // MARK: - Live resource graphs
 
     private var liveResourceSection: some View {
diff --git a/Cotabby/UI/Settings/SettingsContainerView.swift b/Cotabby/UI/Settings/SettingsContainerView.swift
index 9300aacc..0059d760 100644
--- a/Cotabby/UI/Settings/SettingsContainerView.swift
+++ b/Cotabby/UI/Settings/SettingsContainerView.swift
@@ -21,6 +21,7 @@ struct SettingsContainerView: View {
     @ObservedObject var modelDownloadManager: ModelDownloadManager
     @ObservedObject var huggingFaceSearchService: HuggingFaceSearchService
     @ObservedObject var performanceMetricsStore: PerformanceMetricsStore
+    @ObservedObject var qualityMetricsStore: SuggestionQualityMetricsStore
     @ObservedObject var systemMetricsStore: SystemMetricsStore
 
     let onShowWelcome: () -> Void
@@ -130,6 +131,7 @@ struct SettingsContainerView: View {
             PerformancePaneView(
                 suggestionSettings: suggestionSettings,
                 performanceMetricsStore: performanceMetricsStore,
+                qualityMetricsStore: qualityMetricsStore,
                 systemMetricsStore: systemMetricsStore
             )
         case .about:
diff --git a/Cotabby/UI/Settings/SettingsIndex.swift b/Cotabby/UI/Settings/SettingsIndex.swift
index ef13b543..9865d002 100644
--- a/Cotabby/UI/Settings/SettingsIndex.swift
+++ b/Cotabby/UI/Settings/SettingsIndex.swift
@@ -71,6 +71,7 @@ enum SettingsItem: String, CaseIterable, Identifiable {
     case screenRecording
     // Performance
     case performanceTracking
+    case suggestionQualityStats
     case resourceUsage
     case recentRequests
     // About
@@ -137,6 +138,7 @@ enum SettingsItem: String, CaseIterable, Identifiable {
         case .inputMonitoring: return "Input Monitoring"
         case .screenRecording: return "Screen Recording"
         case .performanceTracking: return "Enable Performance Tracking"
+        case .suggestionQualityStats: return "Suggestion Quality"
         case .resourceUsage: return "Live Resource Usage"
         case .recentRequests: return "Recent Requests"
         case .checkForUpdates: return "Check for Updates"
@@ -202,6 +204,7 @@ enum SettingsItem: String, CaseIterable, Identifiable {
         case .inputMonitoring: return "keyboard"
         case .screenRecording: return "camera.viewfinder"
         case .performanceTracking: return "stopwatch"
+        case .suggestionQualityStats: return "checkmark.seal"
         case .resourceUsage: return "chart.line.uptrend.xyaxis"
         case .recentRequests: return "list.bullet.clipboard"
         case .checkForUpdates: return "arrow.triangle.2.circlepath"
@@ -238,7 +241,7 @@ enum SettingsItem: String, CaseIterable, Identifiable {
             return .apps
         case .accessibility, .inputMonitoring, .screenRecording:
             return .permissions
-        case .performanceTracking, .resourceUsage, .recentRequests:
+        case .performanceTracking, .suggestionQualityStats, .resourceUsage, .recentRequests:
             return .performance
         case .checkForUpdates, .support, .githubRepository, .wiki,
              .acknowledgements, .uninstall:
@@ -409,6 +412,9 @@ enum SettingsItem: String, CaseIterable, Identifiable {
         case .performanceTracking:
             return ["performance", "tracking", "latency", "metrics", "timing",
                     "telemetry", "analytics", "diagnostics", "measure"]
+        case .suggestionQualityStats:
+            return ["quality", "acceptance", "accepted", "shown", "suppressed", "withheld",
+                    "rate", "stats", "counters", "suggestions"]
         case .resourceUsage:
             return ["cpu", "memory", "ram", "usage", "resource", "graph", "chart",
                     "live", "load", "monitor"]
diff --git a/CotabbyTests/DebouncePolicyTests.swift b/CotabbyTests/DebouncePolicyTests.swift
new file mode 100644
index 00000000..bcb1afbc
--- /dev/null
+++ b/CotabbyTests/DebouncePolicyTests.swift
@@ -0,0 +1,24 @@
+import XCTest
+@testable import Cotabby
+
+final class DebouncePolicyTests: XCTestCase {
+    func testNoLatencyDataUsesFallback() {
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: nil, fallback: 20), 20)
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: 0, fallback: 20), 20)
+    }
+
+    func testFastGenerationsGetTheShortDebounce() {
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: 45, fallback: 20), 15)
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: 70, fallback: 20), 15)
+    }
+
+    func testMediumGenerationsGetTheMiddleDebounce() {
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: 71, fallback: 20), 25)
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: 140, fallback: 20), 25)
+    }
+
+    func testSlowGenerationsBackOff() {
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: 141, fallback: 20), 55)
+        XCTAssertEqual(DebouncePolicy.milliseconds(lastGenerationLatencyMilliseconds: 900, fallback: 20), 55)
+    }
+}
diff --git a/CotabbyTests/LlamaDecodeGateDefaultsTests.swift b/CotabbyTests/LlamaDecodeGateDefaultsTests.swift
new file mode 100644
index 00000000..90ba2da3
--- /dev/null
+++ b/CotabbyTests/LlamaDecodeGateDefaultsTests.swift
@@ -0,0 +1,46 @@
+import Foundation
+import XCTest
+@testable import Cotabby
+
+/// Tests the `defaults write` escape hatches for the decode gates against an isolated suite, so
+/// the confidence floor and the argmax-EOG stop are provably adjustable in the field without a
+/// rebuild (and without touching process-global defaults from the test host).
+@MainActor
+final class LlamaDecodeGateDefaultsTests: XCTestCase {
+    private let suiteName = "LlamaDecodeGateDefaultsTests"
+    private var defaults: UserDefaults!
+
+    override func setUp() {
+        super.setUp()
+        defaults = UserDefaults(suiteName: suiteName)
+        defaults.removePersistentDomain(forName: suiteName)
+    }
+
+    override func tearDown() {
+        defaults.removePersistentDomain(forName: suiteName)
+        defaults = nil
+        super.tearDown()
+    }
+
+    func test_confidenceFloor_defaultsToShippedValue() {
+        XCTAssertEqual(
+            LlamaSuggestionEngine.resolvedConfidenceFloor(defaults),
+            LlamaSuggestionEngine.defaultConfidenceFloor
+        )
+    }
+
+    func test_confidenceFloor_overrideWins_includingDisable() {
+        defaults.set(-0.8, forKey: LlamaSuggestionEngine.confidenceFloorOverrideKey)
+        XCTAssertEqual(LlamaSuggestionEngine.resolvedConfidenceFloor(defaults), -0.8)
+
+        defaults.set(-Double.infinity, forKey: LlamaSuggestionEngine.confidenceFloorOverrideKey)
+        XCTAssertEqual(LlamaSuggestionEngine.resolvedConfidenceFloor(defaults), -.infinity)
+    }
+
+    func test_argmaxStop_onByDefault_andDisableToggleWorks() {
+        XCTAssertTrue(LlamaSuggestionEngine.resolvedStopAtArgmaxEOG(defaults))
+
+        defaults.set(true, forKey: LlamaSuggestionEngine.argmaxStopDisabledKey)
+        XCTAssertFalse(LlamaSuggestionEngine.resolvedStopAtArgmaxEOG(defaults))
+    }
+}
diff --git a/CotabbyTests/LlamaSuggestionEngineCancellationTests.swift b/CotabbyTests/LlamaSuggestionEngineCancellationTests.swift
index 336c6ed0..ba7dcd41 100644
--- a/CotabbyTests/LlamaSuggestionEngineCancellationTests.swift
+++ b/CotabbyTests/LlamaSuggestionEngineCancellationTests.swift
@@ -53,7 +53,7 @@ final class LlamaSuggestionEngineCancellationTests: XCTestCase {
 
     func test_successfulGeneration_doesNotResetCache() async throws {
         let runtime = FakeLlamaRuntime()
-        runtime.generateResult = .success("world")
+        runtime.generateResult = .success(.text("world"))
         let engine = LlamaSuggestionEngine(runtimeManager: runtime)
 
         let result = try await engine.generateSuggestion(for: makeRequest(prompt: "hello "))
@@ -62,6 +62,24 @@ final class LlamaSuggestionEngineCancellationTests: XCTestCase {
         XCTAssertEqual(runtime.resetCount, 0)
     }
 
+    func test_lowConfidenceSuppression_isAttributedAsLowConfidence() async throws {
+        let runtime = FakeLlamaRuntime()
+        runtime.generateResult = .success(
+            LlamaGenerationOutput(text: "", averageLogprob: -5.2, suppressedByLowConfidence: true)
+        )
+        let engine = LlamaSuggestionEngine(runtimeManager: runtime)
+
+        let result = try await engine.generateSuggestion(for: makeRequest(prompt: "hello "))
+
+        XCTAssertEqual(result.text, "")
+        XCTAssertEqual(
+            result.suppressionReason,
+            CompletionSuppressionReason.lowConfidence.rawValue,
+            "a runtime-withheld completion must not read as 'the model produced nothing'"
+        )
+        XCTAssertEqual(runtime.resetCount, 0)
+    }
+
     func test_suggestionClientError_resetsCache_andRethrowsSameError() async {
         // A `SuggestionClientError` crossing the runtime boundary is a genuine failure, so it must
         // reset the cache but keep its original case and message for the coordinator's diagnostics.
@@ -168,14 +186,14 @@ private struct UnexpectedRuntimeBoom: LocalizedError {
 /// so the engine's failure routing can be exercised without loading a real model.
 @MainActor
 private final class FakeLlamaRuntime: LlamaRuntimeGenerating {
-    var generateResult: Result<String, Error> = .success("")
+    var generateResult: Result<LlamaGenerationOutput, Error> = .success(.text(""))
     private(set) var resetCount = 0
 
     func generate(
         prompt: String,
         cachedPrefixBytes: Int?,
         options: LlamaGenerationOptions
-    ) async throws -> String {
+    ) async throws -> LlamaGenerationOutput {
         try generateResult.get()
     }
 
diff --git a/CotabbyTests/LlamaSuggestionEnginePrewarmTests.swift b/CotabbyTests/LlamaSuggestionEnginePrewarmTests.swift
index f66c4f57..c351779f 100644
--- a/CotabbyTests/LlamaSuggestionEnginePrewarmTests.swift
+++ b/CotabbyTests/LlamaSuggestionEnginePrewarmTests.swift
@@ -106,7 +106,7 @@ final class LlamaSuggestionEnginePrewarmTests: XCTestCase {
 @MainActor
 private final class RecordingPrewarmRuntime: LlamaRuntimeGenerating {
     var prefillError: Error?
-    var generateResult: Result<String, Error> = .success("ok")
+    var generateResult: Result<LlamaGenerationOutput, Error> = .success(.text("ok"))
     private(set) var prefillPrompts: [String] = []
     private(set) var generateCachedPrefixBytes: [Int?] = []
 
@@ -114,7 +114,7 @@ private final class RecordingPrewarmRuntime: LlamaRuntimeGenerating {
         prompt: String,
         cachedPrefixBytes: Int?,
         options: LlamaGenerationOptions
-    ) async throws -> String {
+    ) async throws -> LlamaGenerationOutput {
         generateCachedPrefixBytes.append(cachedPrefixBytes)
         return try generateResult.get()
     }
diff --git a/CotabbyTests/LlamaSuggestionEngineStreamingTests.swift b/CotabbyTests/LlamaSuggestionEngineStreamingTests.swift
index fc138f70..4e28451e 100644
--- a/CotabbyTests/LlamaSuggestionEngineStreamingTests.swift
+++ b/CotabbyTests/LlamaSuggestionEngineStreamingTests.swift
@@ -107,8 +107,8 @@ private final class StreamingFakeRuntime: LlamaRuntimeGenerating {
         prompt: String,
         cachedPrefixBytes: Int?,
         options: LlamaGenerationOptions
-    ) async throws -> String {
-        finalText
+    ) async throws -> LlamaGenerationOutput {
+        .text(finalText)
     }
 
     func generate(
@@ -116,12 +116,12 @@ private final class StreamingFakeRuntime: LlamaRuntimeGenerating {
         cachedPrefixBytes: Int?,
         options: LlamaGenerationOptions,
         onPartialRawText: (@Sendable (String) -> Void)?
-    ) async throws -> String {
+    ) async throws -> LlamaGenerationOutput {
         streamingCallCount += 1
         for partial in partialRawTexts {
             onPartialRawText?(partial)
         }
-        return finalText
+        return .text(finalText)
     }
 
     func resetPromptCache() {}
diff --git a/CotabbyTests/PromptPolicyTests.swift b/CotabbyTests/PromptPolicyTests.swift
index ef23a211..bfdb5f5a 100644
--- a/CotabbyTests/PromptPolicyTests.swift
+++ b/CotabbyTests/PromptPolicyTests.swift
@@ -308,6 +308,7 @@ final class SuggestionEngineRouterTests: XCTestCase {
             foundationModelEngine: appleEngine,
             llamaEngine: openSourceEngine,
             performanceMetricsStore: PerformanceMetricsStore(userDefaults: makeUserDefaults()),
+            qualityMetricsStore: SuggestionQualityMetricsStore(userDefaults: makeUserDefaults()),
             llamaModelNameProvider: { nil }
         )
 
diff --git a/CotabbyTests/SuggestionCoordinatorAcceptanceTests.swift b/CotabbyTests/SuggestionCoordinatorAcceptanceTests.swift
index efaafafa..3b701689 100644
--- a/CotabbyTests/SuggestionCoordinatorAcceptanceTests.swift
+++ b/CotabbyTests/SuggestionCoordinatorAcceptanceTests.swift
@@ -312,6 +312,9 @@ final class SuggestionCoordinatorAcceptanceTests: XCTestCase {
             configuration: .standard,
             spellChecker: CurrentWordSpellChecker(),
             symSpellCorrector: SymSpellCorrector(preloadLanguage: nil),
+            qualityMetricsStore: SuggestionQualityMetricsStore(
+                userDefaults: UserDefaults(suiteName: "CotabbyTests.quality.\(UUID().uuidString)") ?? .standard
+            ),
             userDefaults: UserDefaults(suiteName: "CotabbyTests.\(UUID().uuidString)") ?? .standard
         )
         Self.retainedCoordinators.append(coordinator)
diff --git a/CotabbyTests/SuggestionCoordinatorTestSupport.swift b/CotabbyTests/SuggestionCoordinatorTestSupport.swift
index 37a13b57..42953014 100644
--- a/CotabbyTests/SuggestionCoordinatorTestSupport.swift
+++ b/CotabbyTests/SuggestionCoordinatorTestSupport.swift
@@ -245,6 +245,9 @@ func makeCoordinatorRig(
         configuration: .standard,
         spellChecker: CurrentWordSpellChecker(),
         symSpellCorrector: SymSpellCorrector(preloadLanguage: nil),
+        qualityMetricsStore: SuggestionQualityMetricsStore(
+            userDefaults: UserDefaults(suiteName: "CotabbyTests.rig.quality.\(UUID().uuidString)") ?? .standard
+        ),
         userDefaults: UserDefaults(suiteName: "CotabbyTests.rig.\(UUID().uuidString)") ?? .standard
     )
     return CoordinatorRig(
diff --git a/CotabbyTests/SuggestionEngineRouterTests.swift b/CotabbyTests/SuggestionEngineRouterTests.swift
index 75e4f395..156d8fa9 100644
--- a/CotabbyTests/SuggestionEngineRouterTests.swift
+++ b/CotabbyTests/SuggestionEngineRouterTests.swift
@@ -67,6 +67,7 @@ final class SuggestionEngineRouterRoutingTests: XCTestCase {
             foundationModelEngine: foundation,
             llamaEngine: llama,
             performanceMetricsStore: metrics,
+            qualityMetricsStore: SuggestionQualityMetricsStore(userDefaults: defaults),
             llamaModelNameProvider: { llamaModelName }
         )
         Self.retained.append(contentsOf: [router, settings, metrics] as [AnyObject])
diff --git a/CotabbyTests/SuggestionQualityMetricsStoreTests.swift b/CotabbyTests/SuggestionQualityMetricsStoreTests.swift
new file mode 100644
index 00000000..112aa8e8
--- /dev/null
+++ b/CotabbyTests/SuggestionQualityMetricsStoreTests.swift
@@ -0,0 +1,59 @@
+import XCTest
+@testable import Cotabby
+
+@MainActor
+final class SuggestionQualityMetricsStoreTests: XCTestCase {
+    private func freshDefaults() -> UserDefaults {
+        UserDefaults(suiteName: "CotabbyTests.qualityMetrics.\(UUID().uuidString)") ?? .standard
+    }
+
+    func testCountersAccumulate() {
+        let store = SuggestionQualityMetricsStore(userDefaults: freshDefaults())
+        store.recordGenerated()
+        store.recordGenerated()
+        store.recordShown()
+        store.recordAcceptedSuggestion()
+        store.recordSuppressed(reason: "lowConfidence")
+        store.recordSuppressed(reason: "lowConfidence")
+        store.recordSuppressed(reason: "seamMisspelling")
+
+        XCTAssertEqual(store.counters.generated, 2)
+        XCTAssertEqual(store.counters.shown, 1)
+        XCTAssertEqual(store.counters.acceptedSuggestions, 1)
+        XCTAssertEqual(store.counters.suppressedByReason["lowConfidence"], 2)
+        XCTAssertEqual(store.counters.suppressedByReason["seamMisspelling"], 1)
+        XCTAssertEqual(store.counters.suppressedTotal, 3)
+        XCTAssertNotNil(store.counters.firstRecordedAt)
+    }
+
+    func testAcceptanceRate() {
+        let store = SuggestionQualityMetricsStore(userDefaults: freshDefaults())
+        XCTAssertNil(store.counters.acceptanceRate, "no rate without shown suggestions")
+        store.recordShown()
+        store.recordShown()
+        store.recordShown()
+        store.recordShown()
+        store.recordAcceptedSuggestion()
+        XCTAssertEqual(store.counters.acceptanceRate ?? 0, 0.25, accuracy: 0.0001)
+    }
+
+    func testPersistsAcrossInstances() {
+        let defaults = freshDefaults()
+        let first = SuggestionQualityMetricsStore(userDefaults: defaults)
+        first.recordShown()
+        first.recordSuppressed(reason: "emptyGeneration")
+
+        let second = SuggestionQualityMetricsStore(userDefaults: defaults)
+        XCTAssertEqual(second.counters.shown, 1)
+        XCTAssertEqual(second.counters.suppressedByReason["emptyGeneration"], 1)
+    }
+
+    func testResetClearsEverything() {
+        let defaults = freshDefaults()
+        let store = SuggestionQualityMetricsStore(userDefaults: defaults)
+        store.recordShown()
+        store.reset()
+        XCTAssertEqual(store.counters, SuggestionQualityMetricsStore.Counters())
+        XCTAssertEqual(SuggestionQualityMetricsStore(userDefaults: defaults).counters.shown, 0)
+    }
+}