Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Cotabby.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

18 changes: 17 additions & 1 deletion Cotabby/Models/FocusModels.swift
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@ struct FocusedInputSnapshot: Equatable {
/// call sites compiling unchanged.
let resolvedFieldStyle: ResolvedFieldStyle?

/// The focused window's title, read once per field session (cached by `SurfaceContextCache`)
/// when surface context is enabled. The window title carries the highest-signal surface cue
/// available over Accessibility: the email subject, document name, channel, or page title.
/// Nil when disabled, unavailable, or the field is secure. The initializer default keeps
/// existing call sites compiling unchanged.
let windowTitle: String?

/// The focused field's placeholder text (`AXPlaceholderValue`), read with the window title and
/// under the same gating. Nil when absent. The initializer default keeps existing call sites
/// compiling unchanged.
let fieldPlaceholder: String?

/// Explicit initializer keeps `focusChangeSequence` immutable while preserving the old
/// memberwise-call ergonomics for tests that do not care about focus identity.
///
Expand Down Expand Up @@ -234,7 +246,9 @@ struct FocusedInputSnapshot: Equatable {
isWebContentField: Bool = false,
focusChangeSequence: UInt64 = 0,
focusedURLString: String? = nil,
resolvedFieldStyle: ResolvedFieldStyle? = nil
resolvedFieldStyle: ResolvedFieldStyle? = nil,
windowTitle: String? = nil,
fieldPlaceholder: String? = nil
) {
self.applicationName = applicationName
self.bundleIdentifier = bundleIdentifier
Expand All @@ -257,6 +271,8 @@ struct FocusedInputSnapshot: Equatable {
self.focusChangeSequence = focusChangeSequence
self.focusedURLString = focusedURLString
self.resolvedFieldStyle = resolvedFieldStyle
self.windowTitle = windowTitle
self.fieldPlaceholder = fieldPlaceholder
}

var identity: FocusedInputIdentity {
Expand Down
4 changes: 4 additions & 0 deletions Cotabby/Models/SuggestionEngineModels.swift
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable {
let isUsingCustomWordCountRange: Bool
let customWordCountRange: SuggestionWordRange
let isClipboardContextEnabled: Bool
/// When true (the default), the request factory may condition prompts on the focused surface
/// (app class, window title, web domain, field placeholder). Travels in the snapshot so
/// generation uses the same value the Settings UI shows.
let isSurfaceContextEnabled: Bool
/// User-authored profile data for Cotabby's base-model completion prompt.
/// This travels in the snapshot so generation uses the same value the Settings UI shows.
let userName: String
Expand Down
53 changes: 48 additions & 5 deletions Cotabby/Models/SuggestionModels.swift
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,32 @@ struct SuggestionConfiguration: Equatable, Sendable {
let maxPrefixWordsFoundationModel: Int
let maxPrefixCharactersFoundationModel: Int
let maxSuffixCharacters: Int
/// Estimated-token ceiling for the llama prompt (preface + prefix). Derived from the runtime's
/// per-sequence context window minus the output budget and a safety margin, so the renderer
/// truncates against what the model can actually hold instead of a flat character guess that
/// misjudges code, CJK, and punctuation-heavy text.
let llamaPromptTokenBudget: Int
/// Shipped first-launch default for the user's saved profile.
/// `SuggestionSettingsModel` persists the user's real preference; configuration only provides
/// the app's starting value for a fresh install.
let defaultUserName: String?
let defaultWordCountPreset: SuggestionWordCountPreset
let focusPollIntervalMilliseconds: Int

/// Output ceiling reserved out of the llama context window when sizing the prompt budget:
/// the largest realistic per-request token budget (multi-line doubles the 26-token default).
static let llamaPromptOutputCeilingTokens = 50
/// Margin for BOS plus token-estimator error; the estimator skews conservative, so real
/// prompts land under the derived budget.
static let llamaPromptSafetyMarginTokens = 64
/// The per-sequence KV capacity minus the output ceiling and safety margin. Computed from
/// `LlamaRuntimeConfiguration.default` so the two constants cannot drift apart silently.
static var derivedLlamaPromptTokenBudget: Int {
Int(LlamaRuntimeConfiguration.default.contextWindowTokens)
- llamaPromptOutputCeilingTokens
- llamaPromptSafetyMarginTokens
}

/// The configuration shipped by the app today.
/// These are product defaults, not temporary debug overrides.
static let standard = SuggestionConfiguration(
Expand All @@ -160,17 +179,24 @@ struct SuggestionConfiguration: Equatable, Sendable {
minP: 0.08,
repetitionPenalty: 1.05,
randomSeed: nil,
maxPrefixWords: 50,
// Prompt windows should stay small for the local llama path. Sending an entire editor
// buffer hurts latency with little quality gain because Cotabby is only completing the
// immediate local continuation.
maxPrefixCharacters: 1000,
maxPrefixWords: 150,
// The llama prefix window matches the Foundation Models one: the extra preceding sentences
// carry the topic and voice that multi-paragraph email/docs continuations need, and the
// token budget below keeps the total prompt bounded by what the model can hold. Latency
// honesty: where KV prefix reuse works (dense models), the larger window is prefilled once
// per field; the hybrid/SWA catalog models reject partial trims and re-prefill per request,
// so there the wider window costs prefill only when the field actually holds more than the
// old 1000-char cap, i.e. long-document sessions, which is exactly where it buys quality.
maxPrefixCharacters: 2500,
// Apple's on-device model has a 4096-token shared context. Even with instructions plus
// visual/clipboard context, there is room to send ~3x the llama window before crowding
// the prompt, and the extra surrounding sentences materially help mid-thought completions.
maxPrefixWordsFoundationModel: 150,
maxPrefixCharactersFoundationModel: 2500,
maxSuffixCharacters: 192,
// Derived from the runtime constant so a context-window change can never silently
// desynchronize the prompt budget from the KV capacity the model actually has.
llamaPromptTokenBudget: SuggestionConfiguration.derivedLlamaPromptTokenBudget,
// Seed the profile settings with lightweight defaults on first launch.
defaultUserName: "Jacob",
defaultWordCountPreset: .twelveToTwenty,
Expand Down Expand Up @@ -210,6 +236,12 @@ struct FocusedInputContext: Equatable, Sendable {
let isWebContentField: Bool
/// The host field's own text font/color, carried through so the overlay can match it.
let resolvedFieldStyle: ResolvedFieldStyle?
/// Surface metadata captured once per field session, carried through so the request factory
/// can condition the prompt on what the user is writing in (see `SurfaceContextComposer`).
let windowTitle: String?
let fieldPlaceholder: String?
let focusedURLString: String?
let isIntegratedTerminal: Bool
/// Carries the immutable focus-observation identity across debounce/generation boundaries.
/// Without this, later visual-context lookups could fall back to `elementIdentifier` alone and
/// reintroduce the CFHash collision class this sequence is meant to avoid.
Expand All @@ -235,6 +267,10 @@ struct FocusedInputContext: Equatable, Sendable {
isSecure = snapshot.isSecure
isWebContentField = snapshot.isWebContentField
resolvedFieldStyle = snapshot.resolvedFieldStyle
windowTitle = snapshot.windowTitle
fieldPlaceholder = snapshot.fieldPlaceholder
focusedURLString = snapshot.focusedURLString
isIntegratedTerminal = snapshot.isIntegratedTerminal
focusChangeSequence = snapshot.focusChangeSequence
self.generation = generation
}
Expand Down Expand Up @@ -345,6 +381,11 @@ struct SuggestionRequest: Equatable, Sendable {
let clipboardContext: String?
/// Ephemeral screen context summary injected only when available for the active text field.
let visualContextSummary: String?
/// The composed writing-surface description (app class, window title, domain, placeholder),
/// nil when the user disabled surface context or the surface class suppresses it. The llama
/// prompt has already folded it in; this field exists so the Foundation Models renderer can
/// state the same sanitized facts in its own prompt shape.
let surfaceContext: SurfaceContext?
/// When enabled, the normalizer keeps multiple lines instead of truncating to the first line.
let isMultiLineEnabled: Bool
/// Correlation ID stamped onto every log line touching this request — coordinator state
Expand Down Expand Up @@ -373,6 +414,7 @@ struct SuggestionRequest: Equatable, Sendable {
languageInstruction: String?,
clipboardContext: String?,
visualContextSummary: String?,
surfaceContext: SurfaceContext? = nil,
isMultiLineEnabled: Bool,
requestID: String = "req_unknown"
) {
Expand All @@ -395,6 +437,7 @@ struct SuggestionRequest: Equatable, Sendable {
self.languageInstruction = languageInstruction
self.clipboardContext = clipboardContext
self.visualContextSummary = visualContextSummary
self.surfaceContext = surfaceContext
self.isMultiLineEnabled = isMultiLineEnabled
self.requestID = requestID
}
Expand Down
3 changes: 3 additions & 0 deletions Cotabby/Models/SuggestionSettingsData.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ struct SuggestionSettingsData: Equatable {
var customWordCountLowWords: Int
var customWordCountHighWords: Int
var isClipboardContextEnabled: Bool
/// When on (the default), prompts may state which app, window title, web domain, and field the
/// user is typing in, so suggestions stay on-topic for the surface. Everything stays on device.
var isSurfaceContextEnabled: Bool
var isFastModeEnabled: Bool
/// When on, Cotabby checks the user's current word with `NSSpellChecker` and hides the normal
/// continuation when the word looks misspelled, so completions don't pile onto a broken word.
Expand Down
24 changes: 20 additions & 4 deletions Cotabby/Models/SuggestionSettingsModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ final class SuggestionSettingsModel: ObservableObject {
@Published private(set) var customWordCountLowWords: Int
@Published private(set) var customWordCountHighWords: Int
@Published private(set) var isClipboardContextEnabled: Bool
/// When on (the default), prompts may state which app, window, domain, and field the user is
/// typing in. See `SurfaceContextComposer` for what is actually rendered.
@Published private(set) var isSurfaceContextEnabled: Bool
@Published private(set) var isFastModeEnabled: Bool
/// When on, a misspelled current word hides the normal continuation (see the typo gate).
@Published private(set) var suppressCompletionsOnTypo: Bool
Expand Down Expand Up @@ -162,6 +165,7 @@ final class SuggestionSettingsModel: ObservableObject {
customWordCountLowWords = data.customWordCountLowWords
customWordCountHighWords = data.customWordCountHighWords
isClipboardContextEnabled = data.isClipboardContextEnabled
isSurfaceContextEnabled = data.isSurfaceContextEnabled
isFastModeEnabled = data.isFastModeEnabled
suppressCompletionsOnTypo = data.suppressCompletionsOnTypo
offerTypoCorrections = data.offerTypoCorrections
Expand Down Expand Up @@ -218,6 +222,7 @@ final class SuggestionSettingsModel: ObservableObject {
high: customWordCountHighWords
),
isClipboardContextEnabled: isClipboardContextEnabled,
isSurfaceContextEnabled: isSurfaceContextEnabled,
userName: userName,
customRules: customRules,
extendedContext: extendedContext,
Expand Down Expand Up @@ -368,6 +373,15 @@ final class SuggestionSettingsModel: ObservableObject {
store.saveCustomWordCountRange(low: normalized.lowWords, high: normalized.highWords)
}

func setSurfaceContextEnabled(_ enabled: Bool) {
guard isSurfaceContextEnabled != enabled else {
return
}

isSurfaceContextEnabled = enabled
store.saveSurfaceContextEnabled(enabled)
}

func setClipboardContextEnabled(_ enabled: Bool) {
guard isClipboardContextEnabled != enabled else {
return
Expand Down Expand Up @@ -949,12 +963,13 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding {
$customWordCountLowWords,
$customWordCountHighWords
)
// `extendedContext` shares its outer slot with `suggestInIntegratedTerminals` via a paired
// `CombineLatest` so the new toggle costs no extra top-level slot (the outer is at the cap).
// `extendedContext` shares its outer slot with `suggestInIntegratedTerminals` and
// `isSurfaceContextEnabled` via one grouped `CombineLatest3` so new toggles cost no extra
// top-level slot (the outer is at the cap).
return Publishers.CombineLatest4(
primary,
$acceptanceGranularity,
Publishers.CombineLatest($extendedContext, $suggestInIntegratedTerminals),
Publishers.CombineLatest3($extendedContext, $suggestInIntegratedTerminals, $isSurfaceContextEnabled),
customRange
)
.map { primaryTuple, granularity, extendedContextTuple, customRangeTuple in
Expand All @@ -966,7 +981,7 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding {
let (debounce, focusPoll, multiLine, acceptToggles) = timing
let (autoAcceptPunctuation, addSpaceAfterAccept) = acceptToggles
let (isCustomActive, customLow, customHigh) = customRangeTuple
let (extendedContext, suggestInIntegratedTerminals) = extendedContextTuple
let (extendedContext, suggestInIntegratedTerminals, surfaceContextEnabled) = extendedContextTuple
return SuggestionSettingsSnapshot(
isGloballyEnabled: globallyEnabled,
disabledAppBundleIdentifiers: Set(disabledAppRules.map(\.bundleIdentifier)),
Expand All @@ -976,6 +991,7 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding {
isUsingCustomWordCountRange: isCustomActive,
customWordCountRange: SuggestionWordRange.clamped(low: customLow, high: customHigh),
isClipboardContextEnabled: clipboardContextEnabled,
isSurfaceContextEnabled: surfaceContextEnabled,
userName: userName,
customRules: customRules,
extendedContext: extendedContext,
Expand Down
41 changes: 34 additions & 7 deletions Cotabby/Services/Focus/FocusSnapshotResolver.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ struct FocusSnapshotResolver {
/// `deepWalkThrottle`: it carries state across the value-typed resolver's non-mutating polls.
private let fieldStyleCache = FieldStyleCache()

/// Caches the per-field surface metadata (window title, placeholder, URL) the same way; see
/// `SurfaceContextCache` for why the value is frozen for the whole field session.
private let surfaceContextCache = SurfaceContextCache()

init(geometryResolver: AXTextGeometryResolver? = nil) {
self.geometryResolver = geometryResolver ?? AXTextGeometryResolver()
}
Expand Down Expand Up @@ -209,12 +213,33 @@ struct FocusSnapshotResolver {
let nsValue = contextWindow.text as NSString
let safeSelectionLocation = min(contextWindow.selection.location, nsValue.length)
let trailingStart = min(contextWindow.selection.location + contextWindow.selection.length, nsValue.length)
// Per-site disable: read the page URL only when the feature is enabled, so the default
// focus-capture path performs no extra Accessibility round-trips. The read is fail-safe (nil on
// any miss), so the worst case is the per-site gate staying inert.
let focusedURLString = PerDomainDisableSettings.isEnabled()
? AXHelper.webURL(near: focusedElement)
: nil
// Surface metadata (window title, field placeholder, page URL), captured once per field
// session and frozen for its lifetime: re-reading per poll would add cross-process AX
// round-trips to the hot path, and a retitling window mid-typing would change the prompt
// bytes ahead of the prefix and break llama KV prefix reuse. The URL is read for browsers
// (surface conditioning) or whenever per-site disable wants it; navigation recreates the
// focused web element, so a stale URL cannot outlive its page. Secure fields are never
// probed.
let capturedSurface: CapturedSurfaceContext
if resolvedCandidate.isSecure {
capturedSurface = .empty
} else {
let surfaceKey =
"\(application.processIdentifier):\(resolvedCandidate.elementIdentifier):\(focusChangeSequence)"
capturedSurface = surfaceContextCache.capture(forKey: surfaceKey) {
let wantsURL = PerDomainDisableSettings.isEnabled()
|| BrowserAppDetector.isBrowser(bundleIdentifier: bundleIdentifier)
return CapturedSurfaceContext(
windowTitle: AXHelper.windowTitle(near: focusedElement),
fieldPlaceholder: AXHelper.stringValue(
for: kAXPlaceholderValueAttribute as CFString,
on: resolvedCandidate.element
),
urlString: wantsURL ? AXHelper.webURL(near: focusedElement) : nil
)
}
}
let focusedURLString = capturedSurface.urlString
// Resolve the host field's own font/color so ghost text can match it. Cached by element
// identity (this is a synchronous AX read and the resolver runs on the focus poll), and
// skipped for secure fields, which are never styled or assisted.
Expand Down Expand Up @@ -277,7 +302,9 @@ struct FocusSnapshotResolver {
isWebContentField: isWebContentField,
focusChangeSequence: focusChangeSequence,
focusedURLString: focusedURLString,
resolvedFieldStyle: resolvedFieldStyle
resolvedFieldStyle: resolvedFieldStyle,
windowTitle: capturedSurface.windowTitle,
fieldPlaceholder: capturedSurface.fieldPlaceholder
)

if resolvedCandidate.isSecure {
Expand Down
Loading