From d24c1e36dfb03bd74af0fa8d12fda56b6da649f4 Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Thu, 11 Jun 2026 19:03:46 -0700 Subject: [PATCH 1/6] fix(visual): enable Vision language correction for visual-context OCR OCR text conditions the prompt, and the downstream hygiene filters can only drop garbled lines, not repair them. Language correction cuts the garbling at the source; the capture is once per focused field, so the extra Vision work is off every hot path. --- Cotabby/Services/Visual/ScreenTextExtractor.swift | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Cotabby/Services/Visual/ScreenTextExtractor.swift b/Cotabby/Services/Visual/ScreenTextExtractor.swift index 467b725b..2b6b301f 100644 --- a/Cotabby/Services/Visual/ScreenTextExtractor.swift +++ b/Cotabby/Services/Visual/ScreenTextExtractor.swift @@ -127,9 +127,12 @@ struct ScreenTextExtractor: ScreenTextExtracting { } // Accurate OCR is slower, but visual context is only captured once per focused - // field and the result can materially improve autocomplete relevance. + // field and the result can materially improve autocomplete relevance. Language + // correction is on for the same reason: it cuts garbled recognitions at the + // source, which matters because this text conditions the prompt and the + // downstream hygiene filters can only drop junk, not repair it. request.recognitionLevel = .accurate - request.usesLanguageCorrection = false + request.usesLanguageCorrection = true request.minimumTextHeight = 0.008 do { From add09da2d9fb2e43b285846b918d54a5fee95a54 Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Thu, 11 Jun 2026 19:03:46 -0700 Subject: [PATCH 2/6] feat(prompt): condition suggestions on the writing surface The base model previously received bare prefix text: on Mail, Slack, or Docs it had no idea what surface it was continuing, so completions read generic. The prompt preface now states the surface (An email being written in Mail.), the window title (the subject, document name, or channel is the highest-signal cue Accessibility offers), the web domain, and the field placeholder, all sanitized and length-capped. Code editors and terminals are deliberately excluded: app metadata biases small base models toward code and numbers exactly where the text already makes the language obvious. The Foundation Models prompt states the same sanitized facts. Capture is one Accessibility read per field session, cached and frozen for the session so the prompt bytes ahead of the prefix stay stable and llama KV prefix reuse keeps absorbing them; a retitling browser tab cannot thrash the cache. Secure fields are never probed. Classification moves to a shared AppSurfaceClassifier so both engines agree about what kind of app the user is in. New Include App Context toggle (default on, indexed for settings search); everything stays on device. --- Cotabby.xcodeproj/project.pbxproj | 26 ++++ Cotabby/Models/FocusModels.swift | 18 ++- Cotabby/Models/SuggestionEngineModels.swift | 4 + Cotabby/Models/SuggestionModels.swift | 38 ++++- Cotabby/Models/SuggestionSettingsData.swift | 3 + Cotabby/Models/SuggestionSettingsModel.swift | 24 ++- .../Focus/FocusSnapshotResolver.swift | 41 ++++- .../Services/Focus/SurfaceContextCache.swift | 41 +++++ Cotabby/Support/AXHelper.swift | 17 +++ Cotabby/Support/AppSurfaceClassifier.swift | 78 ++++++++++ .../BaseCompletionPromptRenderer.swift | 14 ++ .../FoundationModelPromptRenderer.swift | 70 +++------ .../Support/SuggestionRequestFactory.swift | 20 ++- Cotabby/Support/SuggestionSettingsStore.swift | 12 ++ Cotabby/Support/SurfaceContextComposer.swift | 142 ++++++++++++++++++ .../UI/Settings/Panes/GeneralPaneView.swift | 15 ++ Cotabby/UI/Settings/SettingsIndex.swift | 8 +- CotabbyTests/AppSurfaceClassifierTests.swift | 50 ++++++ CotabbyTests/CotabbyTestFixtures.swift | 22 ++- .../SurfaceContextComposerTests.swift | 132 ++++++++++++++++ 20 files changed, 708 insertions(+), 67 deletions(-) create mode 100644 Cotabby/Services/Focus/SurfaceContextCache.swift create mode 100644 Cotabby/Support/AppSurfaceClassifier.swift create mode 100644 Cotabby/Support/SurfaceContextComposer.swift create mode 100644 CotabbyTests/AppSurfaceClassifierTests.swift create mode 100644 CotabbyTests/SurfaceContextComposerTests.swift diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj index eca38f79..3e3be9f3 100644 --- a/Cotabby.xcodeproj/project.pbxproj +++ b/Cotabby.xcodeproj/project.pbxproj @@ -69,6 +69,7 @@ 15FA56CEF6FB5FF54C2FBA6F /* PermissionAndContextModelTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E7F42112F14026E6253BB865 /* PermissionAndContextModelTests.swift */; }; 1681C0F22323FB1156579D99 /* AGPL-3.0.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6F0EE728C0B1A7AD6B19CD0C /* AGPL-3.0.txt */; }; 175C4FA56C29DEE58C2D4D7E /* SuggestionSettingsModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 86460C747AA883FDE756BDBA /* SuggestionSettingsModel.swift */; }; + 18382D1919D90E3C1EE143C2 /* AppSurfaceClassifierTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C451E144D220D5C63372A8C0 /* AppSurfaceClassifierTests.swift */; }; 1899BC5A35DC96B4D04B18A5 /* es.txt in Resources */ = {isa = PBXBuildFile; fileRef = 0B6816DF5D33863F966240B4 /* es.txt */; }; 19386985A3A91D0843092086 /* AboutPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3FA53BBC3D81503C1D17477 /* AboutPaneView.swift */; }; 19CA1BF8B508E0E219EF4485 /* SuggestionEngineModelsTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 470A7DAE3D6A2C873B395AE3 /* SuggestionEngineModelsTests.swift */; }; @@ -139,6 +140,7 @@ 2F227738D7834B1A7A81D1D6 /* ModelDownloadManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51020F8CD58338BD643FBF63 /* ModelDownloadManager.swift */; }; 2FC40D4BFDD05C2401D7A5E9 /* SuggestionInserter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0A3D1125B962CBE0269EEDDB /* SuggestionInserter.swift */; }; 303652F15C0FE55595669D81 /* SpellingDictionaryResourceTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D562A73C7C680F2AA65F9F7F /* SpellingDictionaryResourceTests.swift */; }; + 30D4728580451C7D3BDF42E3 /* SurfaceContextCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CF6B7FFBF77B4290F5F2FB8 /* SurfaceContextCache.swift */; }; 30F3F2B6D13CD583136CD787 /* AXHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = AC70775535A3428991025AB8 /* AXHelper.swift */; }; 3112A355E61878A6A6D1FDF8 /* EmojiQueryRun.swift in Sources */ = {isa = PBXBuildFile; fileRef = DDF6A4E9CE93FD53C60E67E3 /* EmojiQueryRun.swift */; }; 3124AD2340D4B58AF48A22F3 /* KeyboardInputSourceMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 534F1297DEF3547D0DE56FB2 /* KeyboardInputSourceMonitor.swift */; }; @@ -274,6 +276,7 @@ 6014B31E2570EFFE45557E33 /* TickMarkSlider.swift in Sources */ = {isa = PBXBuildFile; fileRef = 67586807ACE8EB13C9014535 /* TickMarkSlider.swift */; }; 60636D92D12FED132250D8D2 /* PerformancePaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBD6113A3C1038BECC99245 /* PerformancePaneView.swift */; }; 6106B16C0DBA94EBF838D93E /* PermissionOverlayTracker.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6423D6CC8CC371D2DA899DE /* PermissionOverlayTracker.swift */; }; + 61635150B8004F6CB2FACE65 /* AppSurfaceClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 94B0830FBE4F2E239F670DBA /* AppSurfaceClassifier.swift */; }; 61EC9D635D416115E7C96E0F /* PermissionOverlayWindowController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 92C6EB9FDA48ADF425A116A9 /* PermissionOverlayWindowController.swift */; }; 62DBCF429B7F464A6B467725 /* OnboardingFeatureShowcase.swift in Sources */ = {isa = PBXBuildFile; fileRef = 926B332E7B4CFEE42C4CAA75 /* OnboardingFeatureShowcase.swift */; }; 62FADA407797998742502DD9 /* CaretLinePositionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EA827D6A2A54DF4BAD56405 /* CaretLinePositionTests.swift */; }; @@ -432,6 +435,7 @@ A614AD79BE724ABA3721613B /* MacroModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = B41F06FEF208B30ECCF23A6F /* MacroModels.swift */; }; A67B3F97DAEBEC7CA9B1215E /* PerDomainDisableSettings.swift in Sources */ = {isa = PBXBuildFile; fileRef = B25C3087D4A9F4DC52FD5A69 /* PerDomainDisableSettings.swift */; }; A6DAD9EB9AE88A319EADAC7B /* WindowScreenshotService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 77B0121E7BB173F8A2B0B108 /* WindowScreenshotService.swift */; }; + A736C52C0D280A35946E37A3 /* SurfaceContextComposer.swift in Sources */ = {isa = PBXBuildFile; fileRef = C602357DDED5D11C8B4567FB /* SurfaceContextComposer.swift */; }; A773D96AC9EC16231633542C /* DownloadOutcomeClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3DE1975F3B5F4A70478DBF41 /* DownloadOutcomeClassifier.swift */; }; A7A1B9BE242959B3EE396D27 /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = 7B0278A63FEEE8DEDB6C50DB /* LaunchAtLogin */; }; A87978083EBE1AC294377F7C /* HuggingFaceSearchService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8F426127917FCB1096134732 /* HuggingFaceSearchService.swift */; }; @@ -489,6 +493,7 @@ BBE22CE4EF43247F8775B25D /* FocusPollBackoff.swift in Sources */ = {isa = PBXBuildFile; fileRef = 09FADF683BE7B3558377FA76 /* FocusPollBackoff.swift */; }; BC0FDF9998CA892F4EE0E2E2 /* SuggestionCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8F961F5DF2A392F6F5F94F8A /* SuggestionCoordinator.swift */; }; BD77B0CFB09BF0B4EDB1B0C6 /* TagChip.swift in Sources */ = {isa = PBXBuildFile; fileRef = FB317C82CE2CBC69056BA4B8 /* TagChip.swift */; }; + BD94A8663D79D9609461F894 /* SurfaceContextCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CF6B7FFBF77B4290F5F2FB8 /* SurfaceContextCache.swift */; }; BE3CB85508055D159C35020A /* LlamaSuggestionEngineCancellationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = AABCC3FD99B1824A81E665F3 /* LlamaSuggestionEngineCancellationTests.swift */; }; BE688EC1957B4AE004063EFE /* TypoGateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C727BF6FF8ACAAED30B0329F /* TypoGateTests.swift */; }; BE79CFA6A7A37F25B2AA4D56 /* LlamaRuntimeCore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 944065A858D9BC936CB12B23 /* LlamaRuntimeCore.swift */; }; @@ -498,10 +503,12 @@ C0537A515AED443F6C61DB2A /* MenuBarSections.swift in Sources */ = {isa = PBXBuildFile; fileRef = 83A810F9D28A18BA6F2066C7 /* MenuBarSections.swift */; }; C0B833234748E82D3382631A /* emoji.json in Resources */ = {isa = PBXBuildFile; fileRef = C379D77029D6E88C8C1B9AF7 /* emoji.json */; }; C0FE11D76BDF01A5470C554D /* FocusCapabilityFlickerGate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A44BEC8C23FF227731DD0CD /* FocusCapabilityFlickerGate.swift */; }; + C149EAED2CF6F8B6274053E0 /* AppSurfaceClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 94B0830FBE4F2E239F670DBA /* AppSurfaceClassifier.swift */; }; C178E35A9A713BD4D9943E62 /* TypoCaseTransfer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 08CE63B8725EBD71A4C024E1 /* TypoCaseTransfer.swift */; }; C29FE24B690BB883744AD248 /* FocusDebugOverlayController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0F5E263AB69029D5E13D5EE8 /* FocusDebugOverlayController.swift */; }; C2C958D6E5F5FE1CCC414BCE /* SuggestionSubsystemContracts.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEB16474A67CE1D210B944C9 /* SuggestionSubsystemContracts.swift */; }; C3B6C8B9DE20A71C65D390DA /* BrowserDomainTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1F761083EA5465023D82B5F4 /* BrowserDomainTests.swift */; }; + C423CCD7198ECE27DF260268 /* SurfaceContextComposerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 29CDC8BE5312B9BEFD9B22CB /* SurfaceContextComposerTests.swift */; }; C4805FA1A6CD552E572D7012 /* InMemoryLogging in Frameworks */ = {isa = PBXBuildFile; productRef = C42F41C6497D6199DA27D6CD /* InMemoryLogging */; }; C4C6734678797669055988E0 /* AppUpdateManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = AD9573F3504CAE6891DF9B7D /* AppUpdateManager.swift */; }; C56ABA04AE27A9943368035C /* CurrentWordSpellChecker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733BF6287BDE599B02A12271 /* CurrentWordSpellChecker.swift */; }; @@ -632,6 +639,7 @@ FEC24B9C23274B9FA1F0072E /* PromptContextSanitizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = FA4B45B91D4DEAC979C3113E /* PromptContextSanitizer.swift */; }; FEF2CF888D8709D1FB0D2B20 /* Logging in Frameworks */ = {isa = PBXBuildFile; productRef = 6F27073D2818C0218C3F4370 /* Logging */; }; FF3F7B74B561EF0807D28FD8 /* SystemMetricsStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 807148A920E003DEF8BA6092 /* SystemMetricsStore.swift */; }; + FF773F168B20502C68239967 /* SurfaceContextComposer.swift in Sources */ = {isa = PBXBuildFile; fileRef = C602357DDED5D11C8B4567FB /* SurfaceContextComposer.swift */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -716,6 +724,7 @@ 2930EC34057319130393696B /* KeyCodeLabelsTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeyCodeLabelsTests.swift; sourceTree = ""; }; 2960080A726E51198225147A /* InsertionStrategySelectorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InsertionStrategySelectorTests.swift; sourceTree = ""; }; 299BD7B741DA4AAE6A061BAD /* StreamedGhostTextPolicy.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamedGhostTextPolicy.swift; sourceTree = ""; }; + 29CDC8BE5312B9BEFD9B22CB /* SurfaceContextComposerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SurfaceContextComposerTests.swift; sourceTree = ""; }; 29ED42C4BDD0C521101AF95E /* DeviceInfo.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeviceInfo.swift; sourceTree = ""; }; 2A02336442BB735EE2E8D064 /* SettingsAttentionEvaluator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsAttentionEvaluator.swift; sourceTree = ""; }; 2B7A28471B8526C2693FFF65 /* AcknowledgementsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AcknowledgementsView.swift; sourceTree = ""; }; @@ -839,6 +848,7 @@ 89497C35D1825BAE9625EE06 /* ContextPaneView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContextPaneView.swift; sourceTree = ""; }; 8BF8DC1860CCF0DFA3A3DFD7 /* TextLayoutCaretEstimator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextLayoutCaretEstimator.swift; sourceTree = ""; }; 8CB1D4F2681FAF59014AE115 /* SuggestionInteractionStateTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionInteractionStateTests.swift; sourceTree = ""; }; + 8CF6B7FFBF77B4290F5F2FB8 /* SurfaceContextCache.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SurfaceContextCache.swift; sourceTree = ""; }; 8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LLMIOFileHandler.swift; sourceTree = ""; }; 8EA827D6A2A54DF4BAD56405 /* CaretLinePositionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CaretLinePositionTests.swift; sourceTree = ""; }; 8F20A19A24D20E16D25ADCDA /* DeepGeometryWalkThrottleTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeepGeometryWalkThrottleTests.swift; sourceTree = ""; }; @@ -854,6 +864,7 @@ 93AF1246C1C2E296A1162E63 /* CotabbyDebugOptionsTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CotabbyDebugOptionsTests.swift; sourceTree = ""; }; 944065A858D9BC936CB12B23 /* LlamaRuntimeCore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaRuntimeCore.swift; sourceTree = ""; }; 9458F0820B3161FE9CF1DDAF /* GhostFontSizeStabilizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GhostFontSizeStabilizer.swift; sourceTree = ""; }; + 94B0830FBE4F2E239F670DBA /* AppSurfaceClassifier.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppSurfaceClassifier.swift; sourceTree = ""; }; 960F3FDBF283347594F30494 /* SuggestionSettingsStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionSettingsStore.swift; sourceTree = ""; }; 96495E4147D828C0B1B22765 /* ClipboardContentDistiller.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ClipboardContentDistiller.swift; sourceTree = ""; }; 974A8708D2006767BD76862A /* MacroEngine.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MacroEngine.swift; sourceTree = ""; }; @@ -940,7 +951,9 @@ C375227649689775275AA4B3 /* SuggestionCoordinatorAcceptanceTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionCoordinatorAcceptanceTests.swift; sourceTree = ""; }; C379D77029D6E88C8C1B9AF7 /* emoji.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = emoji.json; sourceTree = ""; }; C3A35FAA742408D002B75920 /* WebContentFieldDetector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WebContentFieldDetector.swift; sourceTree = ""; }; + C451E144D220D5C63372A8C0 /* AppSurfaceClassifierTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppSurfaceClassifierTests.swift; sourceTree = ""; }; C49F67B3EEB2F2A577A54085 /* DeviceInfoTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeviceInfoTests.swift; sourceTree = ""; }; + C602357DDED5D11C8B4567FB /* SurfaceContextComposer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SurfaceContextComposer.swift; sourceTree = ""; }; C648EBB10D7F8E0B904DEC91 /* de.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = de.txt; sourceTree = ""; }; C71031E8DB171047318B92FC /* SyntheticReplacePlannerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SyntheticReplacePlannerTests.swift; sourceTree = ""; }; C727BF6FF8ACAAED30B0329F /* TypoGateTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TypoGateTests.swift; sourceTree = ""; }; @@ -1198,6 +1211,7 @@ 04E25414C307A20B6F9F20EC /* FocusSnapshotResolver.swift */, 5C9FDF029F7828CAF3FE8850 /* FocusTracker.swift */, F0C3E6241B0F35C7A2C85965 /* StaticTextRunWalkThrottle.swift */, + 8CF6B7FFBF77B4290F5F2FB8 /* SurfaceContextCache.swift */, ); path = Focus; sourceTree = ""; @@ -1330,6 +1344,7 @@ children = ( A3C1F7BD86257CF93639327C /* Fixtures */, A168A7B6A7AD11559B60C56B /* ApplicationBundleMetadataTests.swift */, + C451E144D220D5C63372A8C0 /* AppSurfaceClassifierTests.swift */, 78AFA4586C82E92D7FBF381B /* ArithmeticEvaluatorTests.swift */, 66B53214C3842F78B202D498 /* AXHelperTests.swift */, C046CB4F3CB4BFE9391DB5DE /* AXTextGeometryResolverTests.swift */, @@ -1452,6 +1467,7 @@ 19DB9558F4D3AFB108D71649 /* SuggestionStateHelperTests.swift */, 87C309CD6A454C415D8BEEC7 /* SuggestionTextColorCodecTests.swift */, 9B55A4362AB7F0528C661C4C /* SuggestionTextNormalizerTests.swift */, + 29CDC8BE5312B9BEFD9B22CB /* SurfaceContextComposerTests.swift */, C850141146422A132B2B3516 /* SymSpellCorrectorTests.swift */, B32482EABE9EA979C40C8A8F /* SymSpellTests.swift */, C71031E8DB171047318B92FC /* SyntheticReplacePlannerTests.swift */, @@ -1585,6 +1601,7 @@ children = ( 67C78D77B58388B15AC8B954 /* Macros */, 352AF5B2834FEE1F597394E4 /* ApplicationBundleMetadata.swift */, + 94B0830FBE4F2E239F670DBA /* AppSurfaceClassifier.swift */, AC70775535A3428991025AB8 /* AXHelper.swift */, 85EF79E6144D6C6AD062B569 /* BaseCompletionPromptRenderer.swift */, B997EC69E1C65B1E18234221 /* BrowserAppDetector.swift */, @@ -1652,6 +1669,7 @@ 960F3FDBF283347594F30494 /* SuggestionSettingsStore.swift */, 1CE61E74928C221B8BB261C6 /* SuggestionTextColorCodec.swift */, B424E2AC97C99D335B0D5751 /* SuggestionTextNormalizer.swift */, + C602357DDED5D11C8B4567FB /* SurfaceContextComposer.swift */, 6E3B1232C4BE8072A5183F9C /* SymSpell.swift */, 27A5D63F390E9B7A7FE343FE /* SystemResourceSampler.swift */, 7F4C4A7EAF886E0CC945BFEF /* TerminalAppDetector.swift */, @@ -1879,6 +1897,7 @@ AF86357E50ADC91838446A9A /* AcknowledgementsView.swift in Sources */, E14AF4034E71EDE34646FEEC /* ActivationIndicatorController.swift in Sources */, 9EB8E3DC796A0C8BFDE8E683 /* AppDelegate.swift in Sources */, + C149EAED2CF6F8B6274053E0 /* AppSurfaceClassifier.swift in Sources */, 7E505E25BD9C45F0DBE2796C /* AppUpdateManager.swift in Sources */, B4D36F5D03E3143CE74582F9 /* AppearancePaneView.swift in Sources */, 1AEA5D46AFB9095716406788 /* ApplicationBundleMetadata.swift in Sources */, @@ -2063,6 +2082,8 @@ 576B3FF30FB457EF04F9A715 /* SuggestionTextColorCodec.swift in Sources */, 90F287ED3B23FB2AB3EF8CCE /* SuggestionTextNormalizer.swift in Sources */, 3682DBB9DCF6C011F382A1B0 /* SuggestionWorkController.swift in Sources */, + BD94A8663D79D9609461F894 /* SurfaceContextCache.swift in Sources */, + FF773F168B20502C68239967 /* SurfaceContextComposer.swift in Sources */, 09FD133AFEFD8C08E7A9969D /* SymSpell.swift in Sources */, 0A08A88B773845EF9A27DF0F /* SymSpellCorrector.swift in Sources */, FF3F7B74B561EF0807D28FD8 /* SystemMetricsStore.swift in Sources */, @@ -2104,6 +2125,7 @@ 0A658BF137DBD0898E40B87F /* AcknowledgementsView.swift in Sources */, 26E0331E9E2F92FAE531BDEE /* ActivationIndicatorController.swift in Sources */, 0A3443AEE6540F11E5E6BF8F /* AppDelegate.swift in Sources */, + 61635150B8004F6CB2FACE65 /* AppSurfaceClassifier.swift in Sources */, C4C6734678797669055988E0 /* AppUpdateManager.swift in Sources */, AB5D37BA744546F14C5566E8 /* AppearancePaneView.swift in Sources */, 66C23A7C2FCDE0266FF425F8 /* ApplicationBundleMetadata.swift in Sources */, @@ -2288,6 +2310,8 @@ 53FB56A095BCF0389DAC0A56 /* SuggestionTextColorCodec.swift in Sources */, BB6325CA50F97B18B9725918 /* SuggestionTextNormalizer.swift in Sources */, E313639E71AE1374D2B9A956 /* SuggestionWorkController.swift in Sources */, + 30D4728580451C7D3BDF42E3 /* SurfaceContextCache.swift in Sources */, + A736C52C0D280A35946E37A3 /* SurfaceContextComposer.swift in Sources */, 1DF03DC12D45C22753F683BC /* SymSpell.swift in Sources */, D2B271E0FAE4F65FC2287930 /* SymSpellCorrector.swift in Sources */, 2D93881A6AE7DA50698600A3 /* SystemMetricsStore.swift in Sources */, @@ -2324,6 +2348,7 @@ 8369356B8E7E7E61787E828D /* AXHelperTests.swift in Sources */, 6D0E79CF3C1A8CE53046FCE5 /* AXTextGeometryResolverTests.swift in Sources */, 55E841977534CBFD8B80E95F /* AXTreeDumpWriterTests.swift in Sources */, + 18382D1919D90E3C1EE143C2 /* AppSurfaceClassifierTests.swift in Sources */, A36481222BB5B2A67349D389 /* ApplicationBundleMetadataTests.swift in Sources */, 4D583CB3DA253FB795EE54F9 /* ArithmeticEvaluatorTests.swift in Sources */, F4A01E4F12F0183449BCCBB9 /* BaseCompletionPromptRendererTests.swift in Sources */, @@ -2444,6 +2469,7 @@ CB65A79F164269991FABC32E /* SuggestionStateHelperTests.swift in Sources */, 7AEF46950EF5E2EBCFE4BBD3 /* SuggestionTextColorCodecTests.swift in Sources */, 0C98ECB5BCEBA72C693AC1C9 /* SuggestionTextNormalizerTests.swift in Sources */, + C423CCD7198ECE27DF260268 /* SurfaceContextComposerTests.swift in Sources */, 000EBFCBA8CE49537690613B /* SymSpellCorrectorTests.swift in Sources */, A3AFE27EDE956ADC04C91C94 /* SymSpellTests.swift in Sources */, EF5BAB96DDADABB86F9E02D9 /* SyntheticReplacePlannerTests.swift in Sources */, diff --git a/Cotabby/Models/FocusModels.swift b/Cotabby/Models/FocusModels.swift index 400624ff..b14433a5 100644 --- a/Cotabby/Models/FocusModels.swift +++ b/Cotabby/Models/FocusModels.swift @@ -207,6 +207,18 @@ struct FocusedInputSnapshot: Equatable { /// call sites compiling unchanged. let resolvedFieldStyle: ResolvedFieldStyle? + /// The focused window's title, read once per field session (cached by `SurfaceContextCache`) + /// when surface context is enabled. The window title carries the highest-signal surface cue + /// available over Accessibility: the email subject, document name, channel, or page title. + /// Nil when disabled, unavailable, or the field is secure. The initializer default keeps + /// existing call sites compiling unchanged. + let windowTitle: String? + + /// The focused field's placeholder text (`AXPlaceholderValue`), read with the window title and + /// under the same gating. Nil when absent. The initializer default keeps existing call sites + /// compiling unchanged. + let fieldPlaceholder: String? + /// Explicit initializer keeps `focusChangeSequence` immutable while preserving the old /// memberwise-call ergonomics for tests that do not care about focus identity. /// @@ -234,7 +246,9 @@ struct FocusedInputSnapshot: Equatable { isWebContentField: Bool = false, focusChangeSequence: UInt64 = 0, focusedURLString: String? = nil, - resolvedFieldStyle: ResolvedFieldStyle? = nil + resolvedFieldStyle: ResolvedFieldStyle? = nil, + windowTitle: String? = nil, + fieldPlaceholder: String? = nil ) { self.applicationName = applicationName self.bundleIdentifier = bundleIdentifier @@ -257,6 +271,8 @@ struct FocusedInputSnapshot: Equatable { self.focusChangeSequence = focusChangeSequence self.focusedURLString = focusedURLString self.resolvedFieldStyle = resolvedFieldStyle + self.windowTitle = windowTitle + self.fieldPlaceholder = fieldPlaceholder } var identity: FocusedInputIdentity { diff --git a/Cotabby/Models/SuggestionEngineModels.swift b/Cotabby/Models/SuggestionEngineModels.swift index 699cc050..49b6b9de 100644 --- a/Cotabby/Models/SuggestionEngineModels.swift +++ b/Cotabby/Models/SuggestionEngineModels.swift @@ -93,6 +93,10 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable { let isUsingCustomWordCountRange: Bool let customWordCountRange: SuggestionWordRange let isClipboardContextEnabled: Bool + /// When true (the default), the request factory may condition prompts on the focused surface + /// (app class, window title, web domain, field placeholder). Travels in the snapshot so + /// generation uses the same value the Settings UI shows. + let isSurfaceContextEnabled: Bool /// User-authored profile data for Cotabby's base-model completion prompt. /// This travels in the snapshot so generation uses the same value the Settings UI shows. let userName: String diff --git a/Cotabby/Models/SuggestionModels.swift b/Cotabby/Models/SuggestionModels.swift index 1aa66814..f56f2367 100644 --- a/Cotabby/Models/SuggestionModels.swift +++ b/Cotabby/Models/SuggestionModels.swift @@ -136,6 +136,11 @@ struct SuggestionConfiguration: Equatable, Sendable { let maxPrefixWordsFoundationModel: Int let maxPrefixCharactersFoundationModel: Int let maxSuffixCharacters: Int + /// Estimated-token ceiling for the llama prompt (preface + prefix). Derived from the runtime's + /// per-sequence context window minus the output budget and a safety margin, so the renderer + /// truncates against what the model can actually hold instead of a flat character guess that + /// misjudges code, CJK, and punctuation-heavy text. + let llamaPromptTokenBudget: Int /// Shipped first-launch default for the user's saved profile. /// `SuggestionSettingsModel` persists the user's real preference; configuration only provides /// the app's starting value for a fresh install. @@ -160,17 +165,23 @@ struct SuggestionConfiguration: Equatable, Sendable { minP: 0.08, repetitionPenalty: 1.05, randomSeed: nil, - maxPrefixWords: 50, - // Prompt windows should stay small for the local llama path. Sending an entire editor - // buffer hurts latency with little quality gain because Cotabby is only completing the - // immediate local continuation. - maxPrefixCharacters: 1000, + maxPrefixWords: 150, + // The llama prefix window matches the Foundation Models one. The earlier 1000-char/50-word + // cap predates KV prefix reuse: the prefill cost of a larger window is now paid once per + // focused field (and on reuse misses), not per keystroke, while the extra preceding + // sentences carry the topic and voice that multi-paragraph email/docs continuations need. + // The token budget below keeps the total prompt bounded by what the model can hold. + maxPrefixCharacters: 2500, // Apple's on-device model has a 4096-token shared context. Even with instructions plus // visual/clipboard context, there is room to send ~3x the llama window before crowding // the prompt, and the extra surrounding sentences materially help mid-thought completions. maxPrefixWordsFoundationModel: 150, maxPrefixCharactersFoundationModel: 2500, maxSuffixCharacters: 192, + // 2048 (LlamaRuntimeConfiguration.default.contextWindowTokens, the per-sequence KV + // capacity) minus the 50-token output ceiling and a 64-token margin for BOS plus + // estimator error. The estimator skews conservative, so real prompts land under this. + llamaPromptTokenBudget: 1934, // Seed the profile settings with lightweight defaults on first launch. defaultUserName: "Jacob", defaultWordCountPreset: .twelveToTwenty, @@ -210,6 +221,12 @@ struct FocusedInputContext: Equatable, Sendable { let isWebContentField: Bool /// The host field's own text font/color, carried through so the overlay can match it. let resolvedFieldStyle: ResolvedFieldStyle? + /// Surface metadata captured once per field session, carried through so the request factory + /// can condition the prompt on what the user is writing in (see `SurfaceContextComposer`). + let windowTitle: String? + let fieldPlaceholder: String? + let focusedURLString: String? + let isIntegratedTerminal: Bool /// Carries the immutable focus-observation identity across debounce/generation boundaries. /// Without this, later visual-context lookups could fall back to `elementIdentifier` alone and /// reintroduce the CFHash collision class this sequence is meant to avoid. @@ -235,6 +252,10 @@ struct FocusedInputContext: Equatable, Sendable { isSecure = snapshot.isSecure isWebContentField = snapshot.isWebContentField resolvedFieldStyle = snapshot.resolvedFieldStyle + windowTitle = snapshot.windowTitle + fieldPlaceholder = snapshot.fieldPlaceholder + focusedURLString = snapshot.focusedURLString + isIntegratedTerminal = snapshot.isIntegratedTerminal focusChangeSequence = snapshot.focusChangeSequence self.generation = generation } @@ -345,6 +366,11 @@ struct SuggestionRequest: Equatable, Sendable { let clipboardContext: String? /// Ephemeral screen context summary injected only when available for the active text field. let visualContextSummary: String? + /// The composed writing-surface description (app class, window title, domain, placeholder), + /// nil when the user disabled surface context or the surface class suppresses it. The llama + /// prompt has already folded it in; this field exists so the Foundation Models renderer can + /// state the same sanitized facts in its own prompt shape. + let surfaceContext: SurfaceContext? /// When enabled, the normalizer keeps multiple lines instead of truncating to the first line. let isMultiLineEnabled: Bool /// Correlation ID stamped onto every log line touching this request — coordinator state @@ -373,6 +399,7 @@ struct SuggestionRequest: Equatable, Sendable { languageInstruction: String?, clipboardContext: String?, visualContextSummary: String?, + surfaceContext: SurfaceContext? = nil, isMultiLineEnabled: Bool, requestID: String = "req_unknown" ) { @@ -395,6 +422,7 @@ struct SuggestionRequest: Equatable, Sendable { self.languageInstruction = languageInstruction self.clipboardContext = clipboardContext self.visualContextSummary = visualContextSummary + self.surfaceContext = surfaceContext self.isMultiLineEnabled = isMultiLineEnabled self.requestID = requestID } diff --git a/Cotabby/Models/SuggestionSettingsData.swift b/Cotabby/Models/SuggestionSettingsData.swift index c4f5fbe0..42ffc3ce 100644 --- a/Cotabby/Models/SuggestionSettingsData.swift +++ b/Cotabby/Models/SuggestionSettingsData.swift @@ -34,6 +34,9 @@ struct SuggestionSettingsData: Equatable { var customWordCountLowWords: Int var customWordCountHighWords: Int var isClipboardContextEnabled: Bool + /// When on (the default), prompts may state which app, window title, web domain, and field the + /// user is typing in, so suggestions stay on-topic for the surface. Everything stays on device. + var isSurfaceContextEnabled: Bool var isFastModeEnabled: Bool /// When on, Cotabby checks the user's current word with `NSSpellChecker` and hides the normal /// continuation when the word looks misspelled, so completions don't pile onto a broken word. diff --git a/Cotabby/Models/SuggestionSettingsModel.swift b/Cotabby/Models/SuggestionSettingsModel.swift index 4a58151c..a29acdb9 100644 --- a/Cotabby/Models/SuggestionSettingsModel.swift +++ b/Cotabby/Models/SuggestionSettingsModel.swift @@ -57,6 +57,9 @@ final class SuggestionSettingsModel: ObservableObject { @Published private(set) var customWordCountLowWords: Int @Published private(set) var customWordCountHighWords: Int @Published private(set) var isClipboardContextEnabled: Bool + /// When on (the default), prompts may state which app, window, domain, and field the user is + /// typing in. See `SurfaceContextComposer` for what is actually rendered. + @Published private(set) var isSurfaceContextEnabled: Bool @Published private(set) var isFastModeEnabled: Bool /// When on, a misspelled current word hides the normal continuation (see the typo gate). @Published private(set) var suppressCompletionsOnTypo: Bool @@ -162,6 +165,7 @@ final class SuggestionSettingsModel: ObservableObject { customWordCountLowWords = data.customWordCountLowWords customWordCountHighWords = data.customWordCountHighWords isClipboardContextEnabled = data.isClipboardContextEnabled + isSurfaceContextEnabled = data.isSurfaceContextEnabled isFastModeEnabled = data.isFastModeEnabled suppressCompletionsOnTypo = data.suppressCompletionsOnTypo offerTypoCorrections = data.offerTypoCorrections @@ -218,6 +222,7 @@ final class SuggestionSettingsModel: ObservableObject { high: customWordCountHighWords ), isClipboardContextEnabled: isClipboardContextEnabled, + isSurfaceContextEnabled: isSurfaceContextEnabled, userName: userName, customRules: customRules, extendedContext: extendedContext, @@ -368,6 +373,15 @@ final class SuggestionSettingsModel: ObservableObject { store.saveCustomWordCountRange(low: normalized.lowWords, high: normalized.highWords) } + func setSurfaceContextEnabled(_ enabled: Bool) { + guard isSurfaceContextEnabled != enabled else { + return + } + + isSurfaceContextEnabled = enabled + store.saveSurfaceContextEnabled(enabled) + } + func setClipboardContextEnabled(_ enabled: Bool) { guard isClipboardContextEnabled != enabled else { return @@ -949,12 +963,13 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding { $customWordCountLowWords, $customWordCountHighWords ) - // `extendedContext` shares its outer slot with `suggestInIntegratedTerminals` via a paired - // `CombineLatest` so the new toggle costs no extra top-level slot (the outer is at the cap). + // `extendedContext` shares its outer slot with `suggestInIntegratedTerminals` and + // `isSurfaceContextEnabled` via one grouped `CombineLatest3` so new toggles cost no extra + // top-level slot (the outer is at the cap). return Publishers.CombineLatest4( primary, $acceptanceGranularity, - Publishers.CombineLatest($extendedContext, $suggestInIntegratedTerminals), + Publishers.CombineLatest3($extendedContext, $suggestInIntegratedTerminals, $isSurfaceContextEnabled), customRange ) .map { primaryTuple, granularity, extendedContextTuple, customRangeTuple in @@ -966,7 +981,7 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding { let (debounce, focusPoll, multiLine, acceptToggles) = timing let (autoAcceptPunctuation, addSpaceAfterAccept) = acceptToggles let (isCustomActive, customLow, customHigh) = customRangeTuple - let (extendedContext, suggestInIntegratedTerminals) = extendedContextTuple + let (extendedContext, suggestInIntegratedTerminals, surfaceContextEnabled) = extendedContextTuple return SuggestionSettingsSnapshot( isGloballyEnabled: globallyEnabled, disabledAppBundleIdentifiers: Set(disabledAppRules.map(\.bundleIdentifier)), @@ -976,6 +991,7 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding { isUsingCustomWordCountRange: isCustomActive, customWordCountRange: SuggestionWordRange.clamped(low: customLow, high: customHigh), isClipboardContextEnabled: clipboardContextEnabled, + isSurfaceContextEnabled: surfaceContextEnabled, userName: userName, customRules: customRules, extendedContext: extendedContext, diff --git a/Cotabby/Services/Focus/FocusSnapshotResolver.swift b/Cotabby/Services/Focus/FocusSnapshotResolver.swift index aefb50f6..5fb35929 100644 --- a/Cotabby/Services/Focus/FocusSnapshotResolver.swift +++ b/Cotabby/Services/Focus/FocusSnapshotResolver.swift @@ -45,6 +45,10 @@ struct FocusSnapshotResolver { /// `deepWalkThrottle`: it carries state across the value-typed resolver's non-mutating polls. private let fieldStyleCache = FieldStyleCache() + /// Caches the per-field surface metadata (window title, placeholder, URL) the same way; see + /// `SurfaceContextCache` for why the value is frozen for the whole field session. + private let surfaceContextCache = SurfaceContextCache() + init(geometryResolver: AXTextGeometryResolver? = nil) { self.geometryResolver = geometryResolver ?? AXTextGeometryResolver() } @@ -209,12 +213,33 @@ struct FocusSnapshotResolver { let nsValue = contextWindow.text as NSString let safeSelectionLocation = min(contextWindow.selection.location, nsValue.length) let trailingStart = min(contextWindow.selection.location + contextWindow.selection.length, nsValue.length) - // Per-site disable: read the page URL only when the feature is enabled, so the default - // focus-capture path performs no extra Accessibility round-trips. The read is fail-safe (nil on - // any miss), so the worst case is the per-site gate staying inert. - let focusedURLString = PerDomainDisableSettings.isEnabled() - ? AXHelper.webURL(near: focusedElement) - : nil + // Surface metadata (window title, field placeholder, page URL), captured once per field + // session and frozen for its lifetime: re-reading per poll would add cross-process AX + // round-trips to the hot path, and a retitling window mid-typing would change the prompt + // bytes ahead of the prefix and break llama KV prefix reuse. The URL is read for browsers + // (surface conditioning) or whenever per-site disable wants it; navigation recreates the + // focused web element, so a stale URL cannot outlive its page. Secure fields are never + // probed. + let capturedSurface: CapturedSurfaceContext + if resolvedCandidate.isSecure { + capturedSurface = .empty + } else { + let surfaceKey = + "\(application.processIdentifier):\(resolvedCandidate.elementIdentifier):\(focusChangeSequence)" + capturedSurface = surfaceContextCache.capture(forKey: surfaceKey) { + let wantsURL = PerDomainDisableSettings.isEnabled() + || BrowserAppDetector.isBrowser(bundleIdentifier: bundleIdentifier) + return CapturedSurfaceContext( + windowTitle: AXHelper.windowTitle(near: focusedElement), + fieldPlaceholder: AXHelper.stringValue( + for: kAXPlaceholderValueAttribute as CFString, + on: resolvedCandidate.element + ), + urlString: wantsURL ? AXHelper.webURL(near: focusedElement) : nil + ) + } + } + let focusedURLString = capturedSurface.urlString // Resolve the host field's own font/color so ghost text can match it. Cached by element // identity (this is a synchronous AX read and the resolver runs on the focus poll), and // skipped for secure fields, which are never styled or assisted. @@ -277,7 +302,9 @@ struct FocusSnapshotResolver { isWebContentField: isWebContentField, focusChangeSequence: focusChangeSequence, focusedURLString: focusedURLString, - resolvedFieldStyle: resolvedFieldStyle + resolvedFieldStyle: resolvedFieldStyle, + windowTitle: capturedSurface.windowTitle, + fieldPlaceholder: capturedSurface.fieldPlaceholder ) if resolvedCandidate.isSecure { diff --git a/Cotabby/Services/Focus/SurfaceContextCache.swift b/Cotabby/Services/Focus/SurfaceContextCache.swift new file mode 100644 index 00000000..4f06ff65 --- /dev/null +++ b/Cotabby/Services/Focus/SurfaceContextCache.swift @@ -0,0 +1,41 @@ +import Foundation + +/// Surface metadata captured once per field session: the window title, the field's placeholder, +/// and (for browsers or the per-site gate) the page URL. +struct CapturedSurfaceContext: Equatable { + let windowTitle: String? + let fieldPlaceholder: String? + let urlString: String? + + static let empty = CapturedSurfaceContext(windowTitle: nil, fieldPlaceholder: nil, urlString: nil) +} + +/// Caches the captured surface metadata per focused field session so the synchronous AX reads +/// (window title, placeholder, URL walk) happen once per field, not on every focus poll. +/// +/// Keyed on process + element + focusChangeSequence: the value is deliberately frozen for the +/// lifetime of one field session even if the window retitles mid-typing (a browser tab updating +/// its title on every keystroke would otherwise change the prompt bytes ahead of the prefix and +/// destroy the llama KV prefix reuse). A genuine focus change bumps the sequence and re-captures. +/// +/// A reference type for the same reason as `FieldStyleCache`: it carries state across the +/// value-typed `FocusSnapshotResolver`'s non-mutating polls. +@MainActor +final class SurfaceContextCache { + private var key: String? + private var captured: CapturedSurfaceContext = .empty + + /// Returns the cached capture when `key` matches the last resolution, otherwise resolves once + /// and caches the result (including all-nil results, so a host exposing nothing is not + /// re-probed every poll). + func capture(forKey key: String, resolve: () -> CapturedSurfaceContext) -> CapturedSurfaceContext { + if key == self.key { + return captured + } + + let resolved = resolve() + self.key = key + captured = resolved + return resolved + } +} diff --git a/Cotabby/Support/AXHelper.swift b/Cotabby/Support/AXHelper.swift index 10bf0705..78921d3a 100644 --- a/Cotabby/Support/AXHelper.swift +++ b/Cotabby/Support/AXHelper.swift @@ -603,6 +603,23 @@ enum AXHelper { return unsafeBitCast(value, to: AXUIElement.self) } + /// Best-effort, fail-safe read of the title of the window containing `element`. Most apps vend + /// `kAXWindowAttribute` directly on any descendant element; when that misses, nil is returned + /// rather than walking the tree, so the read stays a single bounded round-trip on the focus + /// path. Used for surface conditioning (the title carries the email subject, document name, + /// channel, or page title) and cached per field session by the caller. + static func windowTitle(near element: AXUIElement) -> String? { + guard let value = copyAttributeValue(kAXWindowAttribute as CFString, on: element) else { + return nil + } + guard CFGetTypeID(value) == AXUIElementGetTypeID() else { + return nil + } + // Same Core Foundation bridging rule as `parentElement(of:)`. + let window = unsafeBitCast(value, to: AXUIElement.self) + return stringValue(for: kAXTitleAttribute as CFString, on: window) + } + /// Best-effort, fail-safe read of the web page URL near `element`, used only for per-site rules. /// Browsers expose `kAXURLAttribute` on the web area or window rather than the focused field, so /// this walks up a bounded number of ancestors. It returns nil on any miss (non-browser focus, an diff --git a/Cotabby/Support/AppSurfaceClassifier.swift b/Cotabby/Support/AppSurfaceClassifier.swift new file mode 100644 index 00000000..30873b84 --- /dev/null +++ b/Cotabby/Support/AppSurfaceClassifier.swift @@ -0,0 +1,78 @@ +import Foundation + +/// The coarse kind of writing surface the focused app presents. Drives prompt conditioning: what +/// surface the model is told it is continuing, and for which surfaces saying anything at all would +/// hurt (app metadata biases small base models toward code/numbers in editors and terminals). +nonisolated enum AppSurfaceClass: Equatable, Sendable { + case codeEditor + case terminal + case email + case chat + case browser + case other +} + +/// Single source of truth for bundle-identifier → surface classification, shared by the +/// Foundation Models tone hints and the base-model surface preface so the two engines never +/// disagree about what kind of app the user is in. +/// +/// The sets are intentionally small: each entry has to earn its place, so they cover the surfaces +/// real users write in most (code editors, email/chat clients, browsers) and everything else falls +/// through to `.other`. Cursor ships under opaque ToDesktop hashes (com.todesktop.) that change +/// between builds, so prefix-matching it is unreliable and it is omitted intentionally. +nonisolated enum AppSurfaceClassifier { + static func classify(bundleIdentifier: String?, isIntegratedTerminal: Bool = false) -> AppSurfaceClass { + // An xterm.js surface inside an editor/browser process is a terminal regardless of the + // host bundle, and terminal beats every other classification: shell prompts and pagers + // must never get app-conditioned prose. + if isIntegratedTerminal { + return .terminal + } + guard let rawIdentifier = bundleIdentifier, !rawIdentifier.isEmpty else { + return .other + } + // TerminalAppDetector matches exact, case-sensitive bundle ids; hand it the original. + // The prefix tables below are lowercase, so everything else compares case-folded. + if TerminalAppDetector.isTerminal(bundleIdentifier: rawIdentifier) { + return .terminal + } + let identifier = rawIdentifier.lowercased() + if codeEditorBundlePrefixes.contains(where: { identifier.hasPrefix($0) }) { + return .codeEditor + } + if emailBundlePrefixes.contains(where: { identifier.hasPrefix($0) }) { + return .email + } + if chatBundlePrefixes.contains(where: { identifier.hasPrefix($0) }) { + return .chat + } + if BrowserAppDetector.isBrowser(bundleIdentifier: identifier) { + return .browser + } + return .other + } + + static let codeEditorBundlePrefixes: [String] = [ + "com.apple.dt.xcode", + "com.microsoft.vscode", + "com.jetbrains.", + "com.sublimetext.", + "com.panic.nova" + ] + + static let emailBundlePrefixes: [String] = [ + "com.apple.mail", + "com.readdle.smartemail", + "com.airmailapp.airmail", + "com.microsoft.outlook" + ] + + static let chatBundlePrefixes: [String] = [ + "com.tinyspeck.slackmacgap", + "com.microsoft.teams", + "com.hnc.discord", + "com.apple.mobilesms", + "ru.keepcoder.telegram", + "net.whatsapp.whatsapp" + ] +} diff --git a/Cotabby/Support/BaseCompletionPromptRenderer.swift b/Cotabby/Support/BaseCompletionPromptRenderer.swift index dd1f5dc9..3c50304d 100644 --- a/Cotabby/Support/BaseCompletionPromptRenderer.swift +++ b/Cotabby/Support/BaseCompletionPromptRenderer.swift @@ -27,12 +27,26 @@ enum BaseCompletionPromptRenderer { languageInstruction: String? = nil, clipboardContext: String? = nil, visualContextSummary: String? = nil, + surfaceContext: SurfaceContext? = nil, contextBudget: Int = defaultContextBudget, tokenBudget: Int? = nil ) -> String { let trimmedPrefix = Self.trimmingTrailingWhitespace(prefixText) var sections: [PromptSection] = [] + // The surface description leads the preface: knowing the writing surface (email in Mail, + // a chat in Slack, a document title) is the strongest situational cue a base model gets, + // and the composer already omits it for the app classes where metadata would hurt. The + // value is frozen per field session upstream, so these bytes stay stable across keystrokes + // and the llama KV prefix reuse keeps amortizing them. + if let surface = surfaceContext { + let lines = SurfaceContextComposer.prefaceLines(for: surface) + if !lines.isEmpty { + sections.append( + Self.contextSection("surface", lines.joined(separator: " "), priority: 70, maxChars: 240) + ) + } + } if let persona = Self.personaLine(userName) { sections.append(Self.contextSection("persona", persona, priority: 60, maxChars: 200)) } diff --git a/Cotabby/Support/FoundationModelPromptRenderer.swift b/Cotabby/Support/FoundationModelPromptRenderer.swift index bf362492..420d57af 100644 --- a/Cotabby/Support/FoundationModelPromptRenderer.swift +++ b/Cotabby/Support/FoundationModelPromptRenderer.swift @@ -116,6 +116,19 @@ enum FoundationModelPromptRenderer { sections.append(toneHint) } + // The same sanitized surface facts the llama preface states: the window title (subject, + // document, channel) and the web domain are the strongest on-topic cues available. Apple's + // session cache holds instructions, not the per-request prompt, so per-app variance here + // costs nothing. + if let surface = request.surfaceContext { + if let title = surface.windowTitle { + sections.append("The window is titled \"\(title)\".") + } + if let domain = surface.domain { + sections.append("The user is on \(domain).") + } + } + if let summary = request.visualContextSummary, !summary.isEmpty { sections.append("Screen content:") @@ -161,59 +174,26 @@ enum FoundationModelPromptRenderer { return sections.joined(separator: "\n") } - /// Maps the focused app's bundle identifier to a one-line tone cue or nil if no rule matches. - /// The set is intentionally small: each entry has to earn its tokens, so we cover the - /// surfaces real users complain about (code editors, email/chat clients, browsers) and let - /// everything else fall back to the generic instructions. + /// Maps the focused app's surface class to a one-line tone cue or nil if no rule matches. + /// Classification lives in the shared `AppSurfaceClassifier` so both engines agree about what + /// kind of app the user is in. Terminal emulators and unrecognized apps get no hint: a shell + /// prompt, log pager, or `git commit` buffer is mostly prose, not code, so the no-hint default + /// is safer than a guessed cue. private static func appToneHint(forBundleIdentifier identifier: String) -> String? { - let lower = identifier.lowercased() - if codeEditorBundlePrefixes.contains(where: { lower.hasPrefix($0) }) { + switch AppSurfaceClassifier.classify(bundleIdentifier: identifier) { + case .codeEditor: return "The user is writing code, so the continuation should be code rather than prose." - } - if emailBundlePrefixes.contains(where: { lower.hasPrefix($0) }) { + case .email: return "The user is writing an email, so keep the same register and finish the current thought." - } - if chatBundlePrefixes.contains(where: { lower.hasPrefix($0) }) { + case .chat: return "The user is in a chat app, so keep the continuation short and informal." - } - if BrowserAppDetector.isBrowser(bundleIdentifier: lower) { + case .browser: return "The user is typing inside a browser, so keep the continuation concise." + case .terminal, .other: + return nil } - return nil } - // Cursor ships under opaque ToDesktop hashes (com.todesktop.) that change between builds, - // so prefix-matching is unreliable; omitted intentionally. - // Terminal emulators (iTerm2, Apple Terminal, Hyper) are also omitted: a shell prompt, log - // pager, or `git commit` buffer is mostly prose, not code, so the no-hint default is safer - // than a guessed code hint. - private static let codeEditorBundlePrefixes: [String] = [ - "com.apple.dt.xcode", - "com.microsoft.vscode", - "com.jetbrains.", - "com.sublimetext.", - "com.panic.nova" - ] - - private static let emailBundlePrefixes: [String] = [ - "com.apple.mail", - "com.readdle.smartemail", - "com.airmailapp.airmail", - "com.microsoft.outlook" - ] - - private static let chatBundlePrefixes: [String] = [ - "com.tinyspeck.slackmacgap", - "com.microsoft.teams", - "com.hnc.discord", - "com.apple.mobilesms", - "ru.keepcoder.telegram", - "net.whatsapp.whatsapp" - ] - - // Browser detection now lives in the shared `BrowserAppDetector` so the AX recovery paths and - // the prompt tone hint classify apps identically. - /// Diagnostics need to show both payloads Apple receives: the high-priority instructions and /// the shorter request prompt. Keeping this renderer-owned prevents the menu/debug preview from /// accidentally showing the llama prompt while Apple Intelligence is the selected engine. diff --git a/Cotabby/Support/SuggestionRequestFactory.swift b/Cotabby/Support/SuggestionRequestFactory.swift index 94038d29..3ede3175 100644 --- a/Cotabby/Support/SuggestionRequestFactory.swift +++ b/Cotabby/Support/SuggestionRequestFactory.swift @@ -66,6 +66,21 @@ enum SuggestionRequestFactory { let boundedVisualContextSummary = activeVisualContextSummary( rawSummary: visualContextSummary ) + // The composed surface description; nil when the user disabled it or the surface class + // suppresses it (code editors, terminals, anonymous generic apps). The composer sanitizes + // titles/placeholders and reduces the URL to a bare domain before anything reaches a prompt. + let surfaceContext = settings.isSurfaceContextEnabled + ? SurfaceContextComposer.compose( + surfaceClass: AppSurfaceClassifier.classify( + bundleIdentifier: context.bundleIdentifier, + isIntegratedTerminal: context.isIntegratedTerminal + ), + applicationName: context.applicationName, + windowTitle: context.windowTitle, + focusedURLString: context.focusedURLString, + fieldPlaceholder: context.fieldPlaceholder + ) + : nil // Cotabby 2 is a base-model continuation product on the Open Source path, so the local // prompt is always the base render: no instruction blob, prefix last, trailing-trimmed. // Custom instructions and persona condition the output rather than being obeyed. The @@ -79,7 +94,9 @@ enum SuggestionRequestFactory { extendedContext: activeExtendedContext, languageInstruction: languageInstruction, clipboardContext: boundedClipboardContext, - visualContextSummary: boundedVisualContextSummary + visualContextSummary: boundedVisualContextSummary, + surfaceContext: surfaceContext, + tokenBudget: configuration.llamaPromptTokenBudget ) let request = SuggestionRequest( @@ -107,6 +124,7 @@ enum SuggestionRequestFactory { languageInstruction: languageInstruction, clipboardContext: boundedClipboardContext, visualContextSummary: boundedVisualContextSummary, + surfaceContext: surfaceContext, isMultiLineEnabled: settings.isMultiLineEnabled, requestID: RequestID.generate() ) diff --git a/Cotabby/Support/SuggestionSettingsStore.swift b/Cotabby/Support/SuggestionSettingsStore.swift index 6f01f1df..726cfc6c 100644 --- a/Cotabby/Support/SuggestionSettingsStore.swift +++ b/Cotabby/Support/SuggestionSettingsStore.swift @@ -82,6 +82,7 @@ struct SuggestionSettingsStore { /// rewrite it to `.fourToSeven` on launch; never re-emitted to UserDefaults. private static let legacyShortPresetRawValue = "3-7" private static let clipboardContextEnabledDefaultsKey = "cotabbyClipboardContextEnabled" + private static let surfaceContextEnabledDefaultsKey = "cotabbySurfaceContextEnabled" private static let fastModeEnabledDefaultsKey = "cotabbyFastModeEnabled" private static let suppressCompletionsOnTypoDefaultsKey = "cotabbySuppressCompletionsOnTypo" private static let offerTypoCorrectionsDefaultsKey = "cotabbyOfferTypoCorrections" @@ -183,6 +184,11 @@ struct SuggestionSettingsStore { ) let resolvedClipboardContextEnabled = userDefaults.object(forKey: Self.clipboardContextEnabledDefaultsKey) as? Bool ?? false + // Defaults to true: knowing the app/window/domain is the difference between on-topic and + // generic suggestions, the data never leaves the device, and the capture is one cached + // Accessibility read per field. Users who want fully context-free prompts can switch it off. + let resolvedSurfaceContextEnabled = + userDefaults.object(forKey: Self.surfaceContextEnabledDefaultsKey) as? Bool ?? true // Defaults to false so the visual-context pipeline keeps running for existing users; opting // into fast mode turns it off. let resolvedFastModeEnabled = @@ -351,6 +357,7 @@ struct SuggestionSettingsStore { customWordCountLowWords: resolvedCustomRange.lowWords, customWordCountHighWords: resolvedCustomRange.highWords, isClipboardContextEnabled: resolvedClipboardContextEnabled, + isSurfaceContextEnabled: resolvedSurfaceContextEnabled, isFastModeEnabled: resolvedFastModeEnabled, suppressCompletionsOnTypo: resolvedSuppressCompletionsOnTypo, offerTypoCorrections: resolvedOfferTypoCorrections, @@ -404,6 +411,7 @@ struct SuggestionSettingsStore { saveUsingCustomWordCountRange(data.isUsingCustomWordCountRange) saveCustomWordCountRange(low: data.customWordCountLowWords, high: data.customWordCountHighWords) saveClipboardContextEnabled(data.isClipboardContextEnabled) + saveSurfaceContextEnabled(data.isSurfaceContextEnabled) saveFastModeEnabled(data.isFastModeEnabled) saveSuppressCompletionsOnTypo(data.suppressCompletionsOnTypo) saveOfferTypoCorrections(data.offerTypoCorrections) @@ -543,6 +551,10 @@ struct SuggestionSettingsStore { userDefaults.set(enabled, forKey: Self.clipboardContextEnabledDefaultsKey) } + func saveSurfaceContextEnabled(_ enabled: Bool) { + userDefaults.set(enabled, forKey: Self.surfaceContextEnabledDefaultsKey) + } + func saveFastModeEnabled(_ enabled: Bool) { userDefaults.set(enabled, forKey: Self.fastModeEnabledDefaultsKey) } diff --git a/Cotabby/Support/SurfaceContextComposer.swift b/Cotabby/Support/SurfaceContextComposer.swift new file mode 100644 index 00000000..cd18c194 --- /dev/null +++ b/Cotabby/Support/SurfaceContextComposer.swift @@ -0,0 +1,142 @@ +import Foundation + +/// The surface description that conditions a prompt: what kind of app the user is writing in, +/// plus the sanitized window title, web domain, and field placeholder when available. +nonisolated struct SurfaceContext: Equatable, Sendable { + let surfaceClass: AppSurfaceClass + let applicationName: String + let windowTitle: String? + let domain: String? + let fieldPlaceholder: String? +} + +/// Builds the surface description from raw focus-capture metadata and renders it as the short +/// declarative preface the base model conditions on. +/// +/// Two invariants matter here: +/// +/// - **Omission beats noise.** Code editors and terminals get NO surface section at all: app +/// metadata biases a small base model toward code/numbers over prose, which is exactly wrong in +/// the one class of app where the text itself already screams "code". An unrecognized app with +/// nothing else to say is also omitted, preserving the old bare-prefix behavior. +/// - **Declaratives, not instructions.** A base model has no instruction channel; like the persona +/// and style lines around it, the section describes the document ("An email being written in +/// Mail.") rather than commanding the model. +nonisolated enum SurfaceContextComposer { + /// Window titles are capped hard: they exist to carry the subject/document/channel cue, and a + /// runaway title would crowd the budgeted preface. + private static let maxTitleLength = 80 + private static let maxPlaceholderLength = 60 + + static func compose( + surfaceClass: AppSurfaceClass, + applicationName: String, + windowTitle: String?, + focusedURLString: String?, + fieldPlaceholder: String? + ) -> SurfaceContext? { + switch surfaceClass { + case .codeEditor, .terminal: + return nil + case .email, .chat, .browser, .other: + break + } + + let cleanedApplicationName = collapseWhitespace(applicationName) + guard !cleanedApplicationName.isEmpty else { return nil } + let title = sanitizedTitle(windowTitle, applicationName: cleanedApplicationName) + let placeholder = sanitizedPlaceholder(fieldPlaceholder) + let domain = registrableDomain(from: focusedURLString) + + // A generic app with no title, domain, or placeholder has nothing useful to say; keep the + // prompt bare like before rather than stating an app name of unknown signal. + if surfaceClass == .other, title == nil, domain == nil, placeholder == nil { + return nil + } + + return SurfaceContext( + surfaceClass: surfaceClass, + applicationName: cleanedApplicationName, + windowTitle: title, + domain: domain, + fieldPlaceholder: placeholder + ) + } + + /// The conditioning sentences for the base-model preface, ready to join into one section. + static func prefaceLines(for surface: SurfaceContext) -> [String] { + var lines: [String] = [] + switch surface.surfaceClass { + case .email: + lines.append("An email being written in \(surface.applicationName).") + case .chat: + lines.append("A chat message being typed in \(surface.applicationName).") + case .browser: + if let domain = surface.domain { + lines.append("Text being typed on \(domain) in \(surface.applicationName).") + } else { + lines.append("Text being typed in \(surface.applicationName).") + } + case .other: + lines.append("Text being typed in \(surface.applicationName).") + case .codeEditor, .terminal: + // compose() never produces these; returning nothing keeps the invariant obvious here. + return [] + } + if let title = surface.windowTitle { + lines.append("The window is titled \"\(title)\".") + } + if let placeholder = surface.fieldPlaceholder { + lines.append("The text field is labeled \"\(placeholder)\".") + } + return lines + } + + // MARK: - Sanitization + + /// Strips the app-name suffix browsers and many apps append (`Inbox - Google Chrome`, + /// `Notes — Pages`), collapses whitespace, drops control characters and quotes (they would + /// corrupt the quoted prompt line), and caps the length. + static func sanitizedTitle(_ rawTitle: String?, applicationName: String) -> String? { + guard var title = nonEmptyCleaned(rawTitle) else { return nil } + for separator in [" - ", " — ", " – "] { + let suffix = separator + applicationName + if title.lowercased().hasSuffix(suffix.lowercased()) { + title = String(title.dropLast(suffix.count)) + break + } + } + title = title.trimmingCharacters(in: .whitespacesAndNewlines) + guard !title.isEmpty else { return nil } + return String(title.prefix(maxTitleLength)) + } + + private static func sanitizedPlaceholder(_ rawPlaceholder: String?) -> String? { + guard let placeholder = nonEmptyCleaned(rawPlaceholder) else { return nil } + return String(placeholder.prefix(maxPlaceholderLength)) + } + + /// The registrable host of the page URL with a leading `www.` dropped: enough to say which + /// site the user is on without leaking the path or query, which can carry identifiers. + static func registrableDomain(from urlString: String?) -> String? { + guard let urlString, !urlString.isEmpty, + let host = URL(string: urlString)?.host?.lowercased(), !host.isEmpty + else { return nil } + let trimmed = host.hasPrefix("www.") ? String(host.dropFirst(4)) : host + return trimmed.isEmpty ? nil : trimmed + } + + private static func nonEmptyCleaned(_ text: String?) -> String? { + guard let text else { return nil } + let cleaned = collapseWhitespace( + String(text.unicodeScalars.filter { scalar in + !CharacterSet.controlCharacters.contains(scalar) && scalar != "\"" + }) + ) + return cleaned.isEmpty ? nil : cleaned + } + + private static func collapseWhitespace(_ text: String) -> String { + text.split(whereSeparator: { $0.isWhitespace }).joined(separator: " ") + } +} diff --git a/Cotabby/UI/Settings/Panes/GeneralPaneView.swift b/Cotabby/UI/Settings/Panes/GeneralPaneView.swift index 3e64bc8a..cbc64c54 100644 --- a/Cotabby/UI/Settings/Panes/GeneralPaneView.swift +++ b/Cotabby/UI/Settings/Panes/GeneralPaneView.swift @@ -54,6 +54,14 @@ struct GeneralPaneView: View { ) } + Toggle(isOn: surfaceContextEnabledBinding) { + SettingsRowLabel( + title: "Include App Context", + description: "Let suggestions know which app and window you are typing in. Everything stays on this Mac.", + systemImage: "macwindow" + ) + } + Toggle(isOn: multiLineEnabledBinding) { SettingsRowLabel( title: "Allow Multi-line Suggestions", @@ -154,6 +162,13 @@ struct GeneralPaneView: View { ) } + private var surfaceContextEnabledBinding: Binding { + Binding( + get: { suggestionSettings.isSurfaceContextEnabled }, + set: { suggestionSettings.setSurfaceContextEnabled($0) } + ) + } + private var fastModeEnabledBinding: Binding { Binding( get: { suggestionSettings.isFastModeEnabled }, diff --git a/Cotabby/UI/Settings/SettingsIndex.swift b/Cotabby/UI/Settings/SettingsIndex.swift index e20003be..ef13b543 100644 --- a/Cotabby/UI/Settings/SettingsIndex.swift +++ b/Cotabby/UI/Settings/SettingsIndex.swift @@ -14,6 +14,7 @@ enum SettingsItem: String, CaseIterable, Identifiable { case fastMode case openAtLogin case includeClipboardContext + case includeAppContext case allowMultiLine case acceptPunctuation case addSpaceAfterAccept @@ -88,6 +89,7 @@ enum SettingsItem: String, CaseIterable, Identifiable { case .fastMode: return "Fast Mode" case .openAtLogin: return "Open at Login" case .includeClipboardContext: return "Include Clipboard Context" + case .includeAppContext: return "Include App Context" case .allowMultiLine: return "Allow Multi-line Suggestions" case .acceptPunctuation: return "Accept Punctuation With Word" case .addSpaceAfterAccept: return "Add Space After Accepting" @@ -152,6 +154,7 @@ enum SettingsItem: String, CaseIterable, Identifiable { case .fastMode: return "bolt.fill" case .openAtLogin: return "arrow.right.circle" case .includeClipboardContext: return "doc.on.clipboard" + case .includeAppContext: return "macwindow" case .allowMultiLine: return "text.alignleft" case .acceptPunctuation: return "textformat.abc" case .addSpaceAfterAccept: return "space" @@ -212,7 +215,7 @@ enum SettingsItem: String, CaseIterable, Identifiable { var category: SettingsCategory { switch self { - case .enableGlobally, .fastMode, .openAtLogin, .includeClipboardContext, + case .enableGlobally, .fastMode, .openAtLogin, .includeClipboardContext, .includeAppContext, .allowMultiLine, .acceptPunctuation, .addSpaceAfterAccept, .inlineMacros, .onboarding: return .general case .suggestionDisplay, .showFieldIndicator, .showWordCount, .showKeyHint, @@ -259,6 +262,9 @@ enum SettingsItem: String, CaseIterable, Identifiable { "auto-start", "launch at login", "login items", "open at startup"] case .includeClipboardContext: return ["clipboard", "paste", "copy", "pasteboard", "context"] + case .includeAppContext: + return ["app", "window", "title", "surface", "domain", "site", "context", + "privacy", "metadata", "application"] case .allowMultiLine: return ["multiline", "multi-line", "line", "newline", "wrap", "paragraph", "long", "multiple lines", "line break"] diff --git a/CotabbyTests/AppSurfaceClassifierTests.swift b/CotabbyTests/AppSurfaceClassifierTests.swift new file mode 100644 index 00000000..be1c2854 --- /dev/null +++ b/CotabbyTests/AppSurfaceClassifierTests.swift @@ -0,0 +1,50 @@ +import XCTest +@testable import Cotabby + +/// Pins the shared bundle-to-surface classification both prompt renderers depend on, including the +/// precedence rules (integrated terminal beats everything; code editor beats the Electron/browser +/// overlap for VS Code). +final class AppSurfaceClassifierTests: XCTestCase { + func testIntegratedTerminalBeatsEverything() { + XCTAssertEqual( + AppSurfaceClassifier.classify(bundleIdentifier: "com.google.Chrome", isIntegratedTerminal: true), + .terminal + ) + } + + func testTerminalApps() { + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.apple.Terminal"), .terminal) + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.googlecode.iterm2"), .terminal) + } + + func testCodeEditors() { + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.apple.dt.Xcode"), .codeEditor) + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.jetbrains.intellij"), .codeEditor) + } + + func testVSCodeClassifiesAsCodeEditorNotBrowser() { + // VS Code is also in the Electron-editor browser-priming set; code editor must win. + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.microsoft.VSCode"), .codeEditor) + } + + func testEmailClients() { + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.apple.mail"), .email) + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.microsoft.Outlook"), .email) + } + + func testChatApps() { + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.tinyspeck.slackmacgap"), .chat) + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.hnc.Discord"), .chat) + } + + func testBrowsers() { + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.apple.Safari"), .browser) + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.google.Chrome"), .browser) + } + + func testUnknownAndNil() { + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: "com.example.unknown"), .other) + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: nil), .other) + XCTAssertEqual(AppSurfaceClassifier.classify(bundleIdentifier: ""), .other) + } +} diff --git a/CotabbyTests/CotabbyTestFixtures.swift b/CotabbyTests/CotabbyTestFixtures.swift index 553a54f9..09f60508 100644 --- a/CotabbyTests/CotabbyTestFixtures.swift +++ b/CotabbyTests/CotabbyTestFixtures.swift @@ -28,7 +28,10 @@ enum CotabbyTestFixtures { isSecure: Bool = false, isIntegratedTerminal: Bool = false, isWebContentField: Bool = false, - focusChangeSequence: UInt64 = 1 + focusChangeSequence: UInt64 = 1, + focusedURLString: String? = nil, + windowTitle: String? = nil, + fieldPlaceholder: String? = nil ) -> FocusedInputSnapshot { let resolvedSelection = selection ?? NSRange(location: (precedingText as NSString).length, length: 0) @@ -52,7 +55,10 @@ enum CotabbyTestFixtures { isSecure: isSecure, isIntegratedTerminal: isIntegratedTerminal, isWebContentField: isWebContentField, - focusChangeSequence: focusChangeSequence + focusChangeSequence: focusChangeSequence, + focusedURLString: focusedURLString, + windowTitle: windowTitle, + fieldPlaceholder: fieldPlaceholder ) } @@ -70,8 +76,12 @@ enum CotabbyTestFixtures { trailingText: String = "", selection: NSRange? = nil, isSecure: Bool = false, + isIntegratedTerminal: Bool = false, isWebContentField: Bool = false, focusChangeSequence: UInt64 = 1, + focusedURLString: String? = nil, + windowTitle: String? = nil, + fieldPlaceholder: String? = nil, generation: UInt64 = 1 ) -> FocusedInputContext { FocusedInputContext( @@ -89,8 +99,12 @@ enum CotabbyTestFixtures { trailingText: trailingText, selection: selection, isSecure: isSecure, + isIntegratedTerminal: isIntegratedTerminal, isWebContentField: isWebContentField, - focusChangeSequence: focusChangeSequence + focusChangeSequence: focusChangeSequence, + focusedURLString: focusedURLString, + windowTitle: windowTitle, + fieldPlaceholder: fieldPlaceholder ), generation: generation ) @@ -230,6 +244,7 @@ enum CotabbyTestFixtures { isUsingCustomWordCountRange: Bool = false, customWordCountRange: SuggestionWordRange = SuggestionWordRange(lowWords: 5, highWords: 15), isClipboardContextEnabled: Bool = true, + isSurfaceContextEnabled: Bool = true, userName: String = "", customRules: [String] = [], extendedContext: String = "", @@ -256,6 +271,7 @@ enum CotabbyTestFixtures { isUsingCustomWordCountRange: isUsingCustomWordCountRange, customWordCountRange: customWordCountRange, isClipboardContextEnabled: isClipboardContextEnabled, + isSurfaceContextEnabled: isSurfaceContextEnabled, userName: userName, customRules: customRules, extendedContext: extendedContext, diff --git a/CotabbyTests/SurfaceContextComposerTests.swift b/CotabbyTests/SurfaceContextComposerTests.swift new file mode 100644 index 00000000..2625f6e6 --- /dev/null +++ b/CotabbyTests/SurfaceContextComposerTests.swift @@ -0,0 +1,132 @@ +import XCTest +@testable import Cotabby + +/// Locks in the two invariants of surface conditioning: omission beats noise (code editors, +/// terminals, and anonymous generic apps get no section at all), and everything user-derived +/// (titles, placeholders, URLs) is sanitized before it can reach a prompt. +final class SurfaceContextComposerTests: XCTestCase { + private func compose( + applicationName: String = "Mail", + bundleIdentifier: String? = "com.apple.mail", + isIntegratedTerminal: Bool = false, + windowTitle: String? = nil, + focusedURLString: String? = nil, + fieldPlaceholder: String? = nil + ) -> SurfaceContext? { + SurfaceContextComposer.compose( + surfaceClass: AppSurfaceClassifier.classify( + bundleIdentifier: bundleIdentifier, + isIntegratedTerminal: isIntegratedTerminal + ), + applicationName: applicationName, + windowTitle: windowTitle, + focusedURLString: focusedURLString, + fieldPlaceholder: fieldPlaceholder + ) + } + + // MARK: - Class gating + + func testCodeEditorsGetNoSurfaceContext() { + XCTAssertNil(compose(applicationName: "Xcode", bundleIdentifier: "com.apple.dt.Xcode", windowTitle: "Project.swift")) + } + + func testTerminalsGetNoSurfaceContext() { + XCTAssertNil(compose(applicationName: "Terminal", bundleIdentifier: "com.apple.Terminal", windowTitle: "zsh")) + XCTAssertNil(compose(bundleIdentifier: "com.google.Chrome", isIntegratedTerminal: true, windowTitle: "Cloud Shell")) + } + + func testAnonymousGenericAppIsOmitted() { + // Unknown app, no title, no domain, no placeholder: nothing useful to say. + XCTAssertNil(compose(applicationName: "SomeApp", bundleIdentifier: "com.example.someapp")) + } + + func testGenericAppWithTitleIsIncluded() { + let surface = compose( + applicationName: "Bear", + bundleIdentifier: "net.shinyfrog.bear", + windowTitle: "Travel plans" + ) + XCTAssertEqual(surface?.surfaceClass, .other) + XCTAssertEqual(surface?.windowTitle, "Travel plans") + } + + // MARK: - Preface lines + + func testEmailPreface() throws { + let surface = compose(windowTitle: "Re: Q3 budget review") + XCTAssertEqual( + SurfaceContextComposer.prefaceLines(for: try XCTUnwrap(surface)), + ["An email being written in Mail.", "The window is titled \"Re: Q3 budget review\"."] + ) + } + + func testChatPreface() throws { + let surface = compose( + applicationName: "Slack", + bundleIdentifier: "com.tinyspeck.slackmacgap", + fieldPlaceholder: "Message #design" + ) + XCTAssertEqual( + SurfaceContextComposer.prefaceLines(for: try XCTUnwrap(surface)), + ["A chat message being typed in Slack.", "The text field is labeled \"Message #design\"."] + ) + } + + func testBrowserPrefaceUsesDomain() throws { + let surface = compose( + applicationName: "Google Chrome", + bundleIdentifier: "com.google.Chrome", + focusedURLString: "https://www.notion.so/workspace/page-123" + ) + XCTAssertEqual( + SurfaceContextComposer.prefaceLines(for: try XCTUnwrap(surface)), + ["Text being typed on notion.so in Google Chrome."] + ) + } + + // MARK: - Sanitization + + func testTitleAppNameSuffixIsStripped() { + XCTAssertEqual( + SurfaceContextComposer.sanitizedTitle("Inbox (3) - Google Chrome", applicationName: "Google Chrome"), + "Inbox (3)" + ) + XCTAssertEqual( + SurfaceContextComposer.sanitizedTitle("Notes — Pages", applicationName: "Pages"), + "Notes" + ) + } + + func testTitleIsCappedAndWhitespaceCollapsed() { + let long = String(repeating: "title ", count: 40) + let sanitized = SurfaceContextComposer.sanitizedTitle(long, applicationName: "Mail") + XCTAssertLessThanOrEqual(sanitized?.count ?? 0, 80) + + XCTAssertEqual( + SurfaceContextComposer.sanitizedTitle(" Re:\n budget review ", applicationName: "Mail"), + "Re: budget review" + ) + } + + func testTitleQuotesAndControlCharactersAreDropped() { + XCTAssertEqual( + SurfaceContextComposer.sanitizedTitle("Say \"hello\"\u{07} there", applicationName: "Mail"), + "Say hello there" + ) + } + + func testEmptyTitleBecomesNil() { + XCTAssertNil(SurfaceContextComposer.sanitizedTitle(" ", applicationName: "Mail")) + XCTAssertNil(SurfaceContextComposer.sanitizedTitle(nil, applicationName: "Mail")) + } + + func testDomainExtractionDropsPathQueryAndWWW() { + XCTAssertEqual( + SurfaceContextComposer.registrableDomain(from: "https://www.mail.google.com/u/0/?compose=new"), + "mail.google.com" + ) + XCTAssertNil(SurfaceContextComposer.registrableDomain(from: nil)) + XCTAssertNil(SurfaceContextComposer.registrableDomain(from: "not a url")) + } +} From fd451b30c38b168941768df6326939adb5132cbe Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Thu, 11 Jun 2026 19:03:46 -0700 Subject: [PATCH 3/6] feat(prompt): budget the llama prompt in tokens and widen the prefix window The token-aware section allocator existed but nothing called it; the shipped path budgeted 2400 characters flat, which misjudges code, CJK, and punctuation-heavy text. The factory now passes a budget derived from the runtime's per-sequence context window (2048) minus the output ceiling and a safety margin, with per-section character caps retained as a second bound. With token-true budgeting in place, the llama prefix window rises from 1000 characters / 50 words to Foundation Models parity (2500 / 150). The old cap predates KV prefix reuse: prefill for a larger window is now paid once per focused field rather than per keystroke, and the extra preceding sentences carry the topic and voice that multi-paragraph email and docs continuations need. New long-document eval cases show completions correctly referencing content 1500+ characters before the caret at 344-734ms cold start, well inside the existing p95. --- .../BaseCompletionPromptRendererTests.swift | 45 ++++++++++ CotabbyTests/Fixtures/llama-eval-cases.json | 88 ++++++++++++++++++- .../SuggestionRequestFactoryTests.swift | 63 +++++++++++++ 3 files changed, 195 insertions(+), 1 deletion(-) diff --git a/CotabbyTests/BaseCompletionPromptRendererTests.swift b/CotabbyTests/BaseCompletionPromptRendererTests.swift index 891043c4..774fc211 100644 --- a/CotabbyTests/BaseCompletionPromptRendererTests.swift +++ b/CotabbyTests/BaseCompletionPromptRendererTests.swift @@ -93,4 +93,49 @@ final class BaseCompletionPromptRendererTests: XCTestCase { XCTAssertTrue(withContext.contains("Nearby on screen: build is green")) XCTAssertTrue(withContext.hasSuffix("Status:")) } + + func test_surfaceContextLeadsThePrefaceAndPrefixStaysLast() { + let surface = SurfaceContext( + surfaceClass: .email, + applicationName: "Mail", + windowTitle: "Re: Q3 budget review", + domain: nil, + fieldPlaceholder: nil + ) + let prompt = BaseCompletionPromptRenderer.prompt( + prefixText: "Thanks again for", + applicationName: "Mail", + userName: "Jacob", + surfaceContext: surface + ) + XCTAssertTrue(prompt.hasPrefix("An email being written in Mail. The window is titled \"Re: Q3 budget review\".")) + XCTAssertTrue(prompt.contains("Written by Jacob")) + XCTAssertTrue(prompt.hasSuffix("Thanks again for")) + } + + func test_noSurfaceContextMeansPromptIsUnchanged() { + let without = BaseCompletionPromptRenderer.prompt( + prefixText: "Once upon", + applicationName: "Notes", + userName: nil + ) + XCTAssertEqual(without, "Once upon") + } + + func test_tokenBudgetAdmitsAPrefixLargerThanTheOldCharacterBudget() { + // 2500 characters of ordinary prose is ~600 estimated tokens: comfortably inside the + // shipped token budget even though it exceeds the old 2400-character cap. The whole + // prefix must survive. + let prefix = String(repeating: "every word counts here ", count: 109) + "and the end" + XCTAssertGreaterThan(prefix.count, 2400) + let prompt = BaseCompletionPromptRenderer.prompt( + prefixText: prefix, + applicationName: "Pages", + userName: "Jacob", + tokenBudget: SuggestionConfiguration.standard.llamaPromptTokenBudget + ) + XCTAssertTrue(prompt.hasSuffix("and the end")) + XCTAssertTrue(prompt.contains("every word counts here"), "the full prefix survives the token budget") + XCTAssertTrue(prompt.contains("Written by Jacob"), "context still fits alongside a large prefix") + } } diff --git a/CotabbyTests/Fixtures/llama-eval-cases.json b/CotabbyTests/Fixtures/llama-eval-cases.json index 88349911..8995de83 100644 --- a/CotabbyTests/Fixtures/llama-eval-cases.json +++ b/CotabbyTests/Fixtures/llama-eval-cases.json @@ -552,7 +552,10 @@ "12", "1", "the", - "that" + "that", + "12:30", + "12:15", + "1:30" ] } }, @@ -2618,5 +2621,88 @@ "<|eot_id|>" ] } + }, + { + "id": "longdoc-email-01", + "tags": [ + "longdoc", + "email", + "en" + ], + "applicationName": "Mail", + "bundleIdentifier": "com.apple.mail", + "precedingText": "Hi all,\n\nQuick update on where the migration stands. Last week we finished moving the billing exports to the new pipeline and validated the output against three months of historical invoices; the totals matched to the cent in every region except Brazil, where a rounding rule in the legacy system produced one-cent differences on about two percent of line items. We decided to carry the legacy rounding forward for now rather than restate old invoices, and we documented the rule in the runbook so nobody rediscovers it the hard way. This week the focus shifts to the reporting dashboards. The hardest part is the revenue recognition view, which still reads from the old warehouse tables; finance signed off on a two-week dual-write period so we can compare results before cutting over. Infra capacity is fine, the new jobs run in about forty minutes against the old pipeline's three hours, and the on-call burden has already dropped noticeably since the flaky nightly sync was retired. Hi all,\n\nQuick update on where the migration stands. Last week we finished moving the billing exports to the new pipeline and validated the output against three months of historical invoices; the totals matched to the cent in every region except Brazil, where a rounding rule in the legacy system produced one-cent differences on about two percent of line items. We decided to carry the legacy rounding forward for now rather than restate old invoices, and we documented the rule in the runbook so nobody rediscovers it the hard way. This week the focus shifts to the reporting dashboards. The hardest part is the revenue recognition view, which still reads from the old warehouse tables; finance signed off on a two-week dual-write period so we can compare results before cutting over. Infra capacity is fine, the new jobs run in about forty minutes against the old pipeline's three hours, and the on-call burden has already dropped noticeably since the flaky nightly sync was retired. The one risk I want to flag for next week is ", + "expectation": { + "kind": "positive", + "acceptable": [ + "the", + "that", + "finance", + "the dual-write", + "the reporting", + "the revenue", + "Brazil", + "the cutover", + "the dashboards", + "capacity", + "timing", + "scope" + ] + } + }, + { + "id": "longdoc-prose-02", + "tags": [ + "longdoc", + "prose", + "en" + ], + "applicationName": "Pages", + "bundleIdentifier": "com.apple.iWork.Pages", + "precedingText": "Chapter notes. The valley settlement grew slowly at first, hemmed in by the river on one side and the escarpment on the other, and its earliest houses were built from the gray stone that still shows in the oldest foundations. Trade came up the river in flat-bottomed barges, salt and iron heading north, grain and wool heading south, and the toll bridge paid for the first school. When the railway arrived the town doubled in a decade, and the old market square, once the whole of public life, became a quiet courtyard behind the new station hotel. Photographs from that era show awnings over every shopfront and a clock tower that no longer stands; it was struck by lightning twice in one summer and pulled down before it could fall on the post office. Chapter notes. The valley settlement grew slowly at first, hemmed in by the river on one side and the escarpment on the other, and its earliest houses were built from the gray stone that still shows in the oldest foundations. Trade came up the river in flat-bottomed barges, salt and iron heading north, grain and wool heading south, and the toll bridge paid for the first school. When the railway arrived the town doubled in a decade, and the old market square, once the whole of public life, became a quiet courtyard behind the new station hotel. Photographs from that era show awnings over every shopfront and a clock tower that no longer stands; it was struck by lightning twice in one summer and pulled down before it could fall on the post office. Chapter notes. The valley settlement grew slowly at first, hemmed in by the river on one side and the escarpment on the other, and its earliest houses were built from the gray stone that still shows in the oldest foundations. Trade came up the river in flat-bottomed barges, salt and iron heading north, grain and wool heading south, and the toll bridge paid for the first school. When the railway arrived the town doubled in a decade, and the old market square, once the whole of public life, became a quiet courtyard behind the new station hotel. Photographs from that era show awnings over every shopfront and a clock tower that no longer stands; it was struck by lightning twice in one summer and pulled down before it could fall on the post office. What the photographs cannot show is ", + "expectation": { + "kind": "positive", + "acceptable": [ + "the", + "how", + "what", + "is", + "the sound", + "the smell", + "the noise", + "the life", + "the people", + "the bustle" + ] + } + }, + { + "id": "longdoc-chat-03", + "tags": [ + "longdoc", + "chat", + "en" + ], + "applicationName": "Slack", + "bundleIdentifier": "com.tinyspeck.slackmacgap", + "isMultiLineEnabled": false, + "precedingText": "[09:12] maya: morning! the staging deploy from last night looks healthy, error rate is flat\n[09:14] tom: nice. did the cache change go out with it?\n[09:15] maya: yes, hit rate is up to 94 percent from 81\n[09:17] tom: that explains the latency graph, p95 dropped almost 200ms overnight\n[09:20] priya: customer success just pinged me, the Acme folks noticed the speedup already\n[09:22] tom: love that. anything left before we promote to prod?\n[09:25] maya: two things: the migration dry run on the replica, and sign-off from security on the new header config\n[09:12] maya: morning! the staging deploy from last night looks healthy, error rate is flat\n[09:14] tom: nice. did the cache change go out with it?\n[09:15] maya: yes, hit rate is up to 94 percent from 81\n[09:17] tom: that explains the latency graph, p95 dropped almost 200ms overnight\n[09:20] priya: customer success just pinged me, the Acme folks noticed the speedup already\n[09:22] tom: love that. anything left before we promote to prod?\n[09:25] maya: two things: the migration dry run on the replica, and sign-off from security on the new header config\n[09:12] maya: morning! the staging deploy from last night looks healthy, error rate is flat\n[09:14] tom: nice. did the cache change go out with it?\n[09:15] maya: yes, hit rate is up to 94 percent from 81\n[09:17] tom: that explains the latency graph, p95 dropped almost 200ms overnight\n[09:20] priya: customer success just pinged me, the Acme folks noticed the speedup already\n[09:22] tom: love that. anything left before we promote to prod?\n[09:25] maya: two things: the migration dry run on the replica, and sign-off from security on the new header config\n[09:27] priya: ok, so the plan for tomorrow is ", + "expectation": { + "kind": "positive", + "acceptable": [ + "the migration", + "migration", + "dry run", + "the dry", + "security", + "sign-off", + "to run", + "to do", + "to get", + "to promote", + "1", + "first" + ] + } } ] diff --git a/CotabbyTests/SuggestionRequestFactoryTests.swift b/CotabbyTests/SuggestionRequestFactoryTests.swift index fdc1a11e..623041d8 100644 --- a/CotabbyTests/SuggestionRequestFactoryTests.swift +++ b/CotabbyTests/SuggestionRequestFactoryTests.swift @@ -76,6 +76,7 @@ final class SuggestionRequestFactoryTests: XCTestCase { maxPrefixWordsFoundationModel: 9, maxPrefixCharactersFoundationModel: 96, maxSuffixCharacters: 192, + llamaPromptTokenBudget: 1934, defaultUserName: nil, defaultWordCountPreset: .sevenToTwelve, focusPollIntervalMilliseconds: 50 @@ -113,6 +114,7 @@ final class SuggestionRequestFactoryTests: XCTestCase { maxPrefixWordsFoundationModel: 6, maxPrefixCharactersFoundationModel: 96, maxSuffixCharacters: 192, + llamaPromptTokenBudget: 1934, defaultUserName: nil, defaultWordCountPreset: .sevenToTwelve, focusPollIntervalMilliseconds: 50 @@ -152,6 +154,7 @@ final class SuggestionRequestFactoryTests: XCTestCase { maxPrefixWordsFoundationModel: 150, maxPrefixCharactersFoundationModel: 2500, maxSuffixCharacters: 192, + llamaPromptTokenBudget: 1934, defaultUserName: nil, defaultWordCountPreset: .sevenToTwelve, focusPollIntervalMilliseconds: 50 @@ -292,4 +295,64 @@ final class SuggestionRequestFactoryTests: XCTestCase { XCTAssertEqual(clipboardContext.count, 1_200) XCTAssertTrue(clipboardContext.hasSuffix("...")) } + + func test_buildRequest_includesSurfaceContextWhenEnabled() { + let context = CotabbyTestFixtures.focusedInputContext( + applicationName: "Mail", + bundleIdentifier: "com.apple.mail", + precedingText: "Thanks again for", + windowTitle: "Re: Q3 budget - Mail" + ) + + let result = SuggestionRequestFactory.buildRequest( + context: context, + settings: CotabbyTestFixtures.settingsSnapshot(), + configuration: .standard + ) + + XCTAssertEqual(result.request.surfaceContext?.surfaceClass, .email) + XCTAssertTrue(result.request.prompt.contains("An email being written in Mail.")) + XCTAssertTrue( + result.request.prompt.contains("The window is titled \"Re: Q3 budget\"."), + "the app-name suffix is stripped from the title before it reaches the prompt" + ) + XCTAssertTrue(result.request.prompt.hasSuffix("Thanks again for")) + } + + func test_buildRequest_omitsSurfaceContextWhenDisabled() { + let context = CotabbyTestFixtures.focusedInputContext( + applicationName: "Mail", + bundleIdentifier: "com.apple.mail", + precedingText: "Thanks again for", + windowTitle: "Re: Q3 budget" + ) + + let result = SuggestionRequestFactory.buildRequest( + context: context, + settings: CotabbyTestFixtures.settingsSnapshot(isSurfaceContextEnabled: false), + configuration: .standard + ) + + XCTAssertNil(result.request.surfaceContext) + XCTAssertFalse(result.request.prompt.contains("An email being written")) + XCTAssertFalse(result.request.prompt.contains("Re: Q3 budget")) + } + + func test_buildRequest_omitsSurfaceContextForCodeEditors() { + let context = CotabbyTestFixtures.focusedInputContext( + applicationName: "Xcode", + bundleIdentifier: "com.apple.dt.Xcode", + precedingText: "// Returns the", + windowTitle: "Project.swift" + ) + + let result = SuggestionRequestFactory.buildRequest( + context: context, + settings: CotabbyTestFixtures.settingsSnapshot(), + configuration: .standard + ) + + XCTAssertNil(result.request.surfaceContext, "app metadata biases base models toward code; editors stay bare") + XCTAssertFalse(result.request.prompt.contains("Project.swift")) + } } From a4964820e632213a77b7af7e1df9af472d5b55f5 Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Thu, 11 Jun 2026 20:18:00 -0700 Subject: [PATCH 4/6] fix(focus): nonisolated deinit on SurfaceContextCache for app-hosted test teardown Stored-property @MainActor classes deallocated inside app-hosted tests double-free without an explicitly nonisolated deinit; FieldStyleCache carries the same workaround. Surfaced by the live resolver tests once this branch rebased onto them. --- Cotabby/Services/Focus/SurfaceContextCache.swift | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Cotabby/Services/Focus/SurfaceContextCache.swift b/Cotabby/Services/Focus/SurfaceContextCache.swift index 4f06ff65..6930eca7 100644 --- a/Cotabby/Services/Focus/SurfaceContextCache.swift +++ b/Cotabby/Services/Focus/SurfaceContextCache.swift @@ -25,6 +25,11 @@ final class SurfaceContextCache { private var key: String? private var captured: CapturedSurfaceContext = .empty + /// Stored-property @MainActor classes deallocated inside app-hosted tests double-free without + /// an explicitly nonisolated deinit (the isolated-deinit runtime path over-releases). Same + /// workaround as the other main-actor stores exercised by tests. + nonisolated deinit {} + /// Returns the cached capture when `key` matches the last resolution, otherwise resolves once /// and caches the result (including all-nil results, so a host exposing nothing is not /// re-probed every poll). From 5623a7195a3624d182e05c531d85d2db9c01f486 Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Thu, 11 Jun 2026 20:22:09 -0700 Subject: [PATCH 5/6] review: anchored case-insensitive title-suffix strip, derived token budget, FM placeholder parity The lowercased hasSuffix paired with an original-string dropLast count could clip the wrong amount for characters that expand under case folding; the strip now uses an anchored backwards case-insensitive range. The 1934 token budget is now derived from LlamaRuntimeConfiguration.default so a context-window change cannot silently desynchronize it, with the output ceiling and safety margin as named constants. The FM prompt now states the field placeholder exactly like the llama preface, and the prefix-window comment states the real latency contract on trim-rejecting catalog models instead of assuming reuse. --- Cotabby/Models/SuggestionModels.swift | 33 ++++++++++++++----- .../FoundationModelPromptRenderer.swift | 5 +++ Cotabby/Support/SurfaceContextComposer.swift | 10 ++++-- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/Cotabby/Models/SuggestionModels.swift b/Cotabby/Models/SuggestionModels.swift index f56f2367..a0f55c88 100644 --- a/Cotabby/Models/SuggestionModels.swift +++ b/Cotabby/Models/SuggestionModels.swift @@ -148,6 +148,20 @@ struct SuggestionConfiguration: Equatable, Sendable { let defaultWordCountPreset: SuggestionWordCountPreset let focusPollIntervalMilliseconds: Int + /// Output ceiling reserved out of the llama context window when sizing the prompt budget: + /// the largest realistic per-request token budget (multi-line doubles the 26-token default). + static let llamaPromptOutputCeilingTokens = 50 + /// Margin for BOS plus token-estimator error; the estimator skews conservative, so real + /// prompts land under the derived budget. + static let llamaPromptSafetyMarginTokens = 64 + /// The per-sequence KV capacity minus the output ceiling and safety margin. Computed from + /// `LlamaRuntimeConfiguration.default` so the two constants cannot drift apart silently. + static var derivedLlamaPromptTokenBudget: Int { + Int(LlamaRuntimeConfiguration.default.contextWindowTokens) + - llamaPromptOutputCeilingTokens + - llamaPromptSafetyMarginTokens + } + /// The configuration shipped by the app today. /// These are product defaults, not temporary debug overrides. static let standard = SuggestionConfiguration( @@ -166,11 +180,13 @@ struct SuggestionConfiguration: Equatable, Sendable { repetitionPenalty: 1.05, randomSeed: nil, maxPrefixWords: 150, - // The llama prefix window matches the Foundation Models one. The earlier 1000-char/50-word - // cap predates KV prefix reuse: the prefill cost of a larger window is now paid once per - // focused field (and on reuse misses), not per keystroke, while the extra preceding - // sentences carry the topic and voice that multi-paragraph email/docs continuations need. - // The token budget below keeps the total prompt bounded by what the model can hold. + // The llama prefix window matches the Foundation Models one: the extra preceding sentences + // carry the topic and voice that multi-paragraph email/docs continuations need, and the + // token budget below keeps the total prompt bounded by what the model can hold. Latency + // honesty: where KV prefix reuse works (dense models), the larger window is prefilled once + // per field; the hybrid/SWA catalog models reject partial trims and re-prefill per request, + // so there the wider window costs prefill only when the field actually holds more than the + // old 1000-char cap, i.e. long-document sessions, which is exactly where it buys quality. maxPrefixCharacters: 2500, // Apple's on-device model has a 4096-token shared context. Even with instructions plus // visual/clipboard context, there is room to send ~3x the llama window before crowding @@ -178,10 +194,9 @@ struct SuggestionConfiguration: Equatable, Sendable { maxPrefixWordsFoundationModel: 150, maxPrefixCharactersFoundationModel: 2500, maxSuffixCharacters: 192, - // 2048 (LlamaRuntimeConfiguration.default.contextWindowTokens, the per-sequence KV - // capacity) minus the 50-token output ceiling and a 64-token margin for BOS plus - // estimator error. The estimator skews conservative, so real prompts land under this. - llamaPromptTokenBudget: 1934, + // Derived from the runtime constant so a context-window change can never silently + // desynchronize the prompt budget from the KV capacity the model actually has. + llamaPromptTokenBudget: SuggestionConfiguration.derivedLlamaPromptTokenBudget, // Seed the profile settings with lightweight defaults on first launch. defaultUserName: "Jacob", defaultWordCountPreset: .twelveToTwenty, diff --git a/Cotabby/Support/FoundationModelPromptRenderer.swift b/Cotabby/Support/FoundationModelPromptRenderer.swift index 420d57af..b0f17a74 100644 --- a/Cotabby/Support/FoundationModelPromptRenderer.swift +++ b/Cotabby/Support/FoundationModelPromptRenderer.swift @@ -127,6 +127,11 @@ enum FoundationModelPromptRenderer { if let domain = surface.domain { sections.append("The user is on \(domain).") } + // Same fact the llama preface states; dropping it here made the two prompts disagree + // about what the field is for (the placeholder is often the only label a field has). + if let placeholder = surface.fieldPlaceholder { + sections.append("The text field is labeled \"\(placeholder)\".") + } } if let summary = request.visualContextSummary, diff --git a/Cotabby/Support/SurfaceContextComposer.swift b/Cotabby/Support/SurfaceContextComposer.swift index cd18c194..619f8042 100644 --- a/Cotabby/Support/SurfaceContextComposer.swift +++ b/Cotabby/Support/SurfaceContextComposer.swift @@ -101,8 +101,14 @@ nonisolated enum SurfaceContextComposer { guard var title = nonEmptyCleaned(rawTitle) else { return nil } for separator in [" - ", " — ", " – "] { let suffix = separator + applicationName - if title.lowercased().hasSuffix(suffix.lowercased()) { - title = String(title.dropLast(suffix.count)) + // Anchored backwards range search instead of fold-then-count: characters that expand + // under case folding would make a lowercased `hasSuffix` length disagree with the + // original title's character count and clip the wrong amount. + if let range = title.range( + of: suffix, + options: [.caseInsensitive, .anchored, .backwards] + ) { + title = String(title[.. Date: Thu, 11 Jun 2026 20:32:28 -0700 Subject: [PATCH 6/6] ci: retrigger checks after force-push synchronize was dropped