Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,12 +303,12 @@ This script automatically:
- **`Cmd+Shift+V`** - Voice edit clipboard text

> [!NOTE]
> After setup, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility
> After setup, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility. The transcription hotkey may also prompt for Microphone access for skhd the first time it records.

> [!TIP]
> To keep the recording-status notification visible for the whole flow, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions).
> Also enable "Allow notification when mirroring or sharing the display".
> The hotkey scripts keep only the recording notification pinned; status and result toasts auto-dismiss.
> The hotkey scripts keep only the recording notification pinned; status and result toasts auto-dismiss. Transcription hotkey diagnostics are written to `~/.cache/agent-cli/transcribe-hotkey.log` and `~/.cache/agent-cli/transcribe-hotkey-agent.log`.

### Linux Hotkeys

Expand Down
5 changes: 5 additions & 0 deletions agent_cli/install/hotkeys.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def install_hotkeys() -> None:
2. Creates config at `~/.config/skhd/skhdrc`
3. Starts skhd as a background service
4. May require Accessibility permissions: System Settings → Privacy & Security → Accessibility → enable 'skhd'
5. May require Microphone permissions when the transcription hotkey first records

**Linux** (manual DE configuration):

Expand Down Expand Up @@ -76,3 +77,7 @@ def install_hotkeys() -> None:
"cyan",
)
print_with_style(" 2. Add and enable 'skhd'", "cyan")
print_with_style(
" 3. Allow Microphone access for 'skhd' when prompted by the transcription hotkey",
"cyan",
)
2 changes: 1 addition & 1 deletion docs/commands/install-hotkeys.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Sets up hotkeys for common workflows:
- Super+Shift+A: Autocorrect clipboard text
- Super+Shift+V: Voice edit clipboard text

On macOS, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility.
On macOS, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility. The transcription hotkey may also prompt for Microphone access for skhd the first time it records.

On a fresh install, `agent-cli install-hotkeys` may take longer than expected because it can download Python extras before setting up the platform-specific scripts.

Expand Down
7 changes: 7 additions & 0 deletions docs/system-integration.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ This automatically installs missing `audio` and `llm` extras, then sets up the p
>
> After installation, grant Accessibility permissions to skhd:
> System Settings → Privacy & Security → Accessibility → enable "skhd"
>
> The transcription hotkey may also prompt for Microphone permission for "skhd"
> the first time it starts recording. Allow it so hotkey-launched recordings
> contain real audio instead of silence.

### Manual skhd Configuration

Expand Down Expand Up @@ -67,6 +71,9 @@ skhd --reload

The `install-hotkeys` command automatically installs terminal-notifier for visual feedback and keeps the active recording-status notification pinned while transient status/result toasts auto-dismiss.

Transcription hotkey diagnostics are written to `~/.cache/agent-cli/transcribe-hotkey.log`
and `~/.cache/agent-cli/transcribe-hotkey-agent.log`.

Configure notifications in System Settings:
1. Settings → Notifications → terminal-notifier
2. Enable "Allow Notifications"
Expand Down
5 changes: 5 additions & 0 deletions scripts/macos-hotkeys/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ System-wide hotkeys for agent-cli voice AI features on macOS.
- **`Cmd+Shift+V`** → Toggle voice edit mode for clipboard

Results appear in notifications and clipboard.
Diagnostics are written to `~/.cache/agent-cli/transcribe-hotkey.log` and
`~/.cache/agent-cli/transcribe-hotkey-agent.log`.

> [!TIP]
> For a persistent recording-status notification, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions).
Expand All @@ -32,6 +34,9 @@ Results appear in notifications and clipboard.
**Hotkey not working?**
- Grant accessibility permissions in System Settings

**Recording is silent?**
- Allow microphone permissions for `skhd` in System Settings

**No notifications?**
```bash
terminal-notifier -title "Test" -message "Hello"
Expand Down
54 changes: 43 additions & 11 deletions scripts/macos-hotkeys/toggle-transcription.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,76 @@

NOTIFIER=${NOTIFIER:-/opt/homebrew/bin/terminal-notifier}
AGENT_CLI=${AGENT_CLI:-"$HOME/.local/bin/agent-cli"}
PID_FILE=${PID_FILE:-"$HOME/.cache/agent-cli/transcribe.pid"}
CACHE_DIR=${CACHE_DIR:-"$HOME/.cache/agent-cli"}
LAST_FILE=${LAST_FILE:-"$CACHE_DIR/transcribe-hotkey.last"}
LOG_FILE=${LOG_FILE:-"$CACHE_DIR/transcribe-hotkey.log"}
AGENT_LOG_FILE=${AGENT_LOG_FILE:-"$CACHE_DIR/transcribe-hotkey-agent.log"}
DEBOUNCE_SECONDS=${DEBOUNCE_SECONDS:-1}
RECORDING_GROUP="agent-cli-transcribe-recording"
TEMP_PREFIX="agent-cli-transcribe-temp"

mkdir -p "$CACHE_DIR"

log_event() {
printf '%s %s\n' "$(date '+%Y-%m-%dT%H:%M:%S%z')" "$*" >> "$LOG_FILE"
}

notify_temp() {
local title=$1
local message=$2
local duration=${3:-4} # 4 seconds default
local group="${TEMP_PREFIX}-${RANDOM}-$$"

"$NOTIFIER" -title "$title" -message "$message" -group "$group"
"$NOTIFIER" -title "$title" -message "$message" -group "$group" || true
(
sleep "$duration"
"$NOTIFIER" -remove "$group" >/dev/null 2>&1 || true
) &
}

is_transcribe_running() {
[ -f "$PID_FILE" ] || return 1
local pid
pid=$(cat "$PID_FILE" 2>/dev/null || true)
[ -n "$pid" ] && kill -0 "$pid" >/dev/null 2>&1
local status_json
if ! status_json=$("$AGENT_CLI" transcribe --status --json 2>> "$LOG_FILE"); then
log_event "status check failed"
return 1
fi
log_event "status: $status_json"
printf '%s\n' "$status_json" | grep -Eq '"running"[[:space:]]*:[[:space:]]*true'
}

now=$(date +%s)
last=$(cat "$LAST_FILE" 2>/dev/null || echo 0)

# skhd can emit repeated key-down events while the hotkey chord is held.
# Ignore immediate repeats so one press cannot start and stop recording.
if [ "$((now - last))" -lt "$DEBOUNCE_SECONDS" ]; then
log_event "ignored repeat hotkey"
exit 0
fi

echo "$now" > "$LAST_FILE"
log_event "hotkey pressed"

if is_transcribe_running; then
"$AGENT_CLI" transcribe --toggle --quiet >/dev/null 2>&1 || true
"$AGENT_CLI" transcribe --stop --quiet --wait-for-start --log-file "$AGENT_LOG_FILE" >> "$LOG_FILE" 2>&1 || true

"$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true
notify_temp "🛑 Stopped" "Processing results..."
log_event "stop requested"
else
"$NOTIFIER" -title "🎙️ Starting" -message "Preparing transcription..." -group "$RECORDING_GROUP"
"$NOTIFIER" -title "🎙️ Recording" -message "Press Cmd+Shift+R again to stop." -group "$RECORDING_GROUP" || true
(
OUTPUT=$("$AGENT_CLI" transcribe --toggle --llm --quiet 2>/dev/null)
OUTPUT=$("$AGENT_CLI" transcribe --start --llm --quiet --log-file "$AGENT_LOG_FILE" 2>> "$LOG_FILE")
status=$?
"$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true
if [ -n "$OUTPUT" ]; then
if [ "$status" -ne 0 ]; then
notify_temp "❌ Error" "Transcription failed. See $AGENT_LOG_FILE"
elif [ -n "$OUTPUT" ]; then
notify_temp "📄 Result" "$OUTPUT"
else
notify_temp "❌ Error" "No output"
notify_temp "⚠️ No Transcript" "Recording captured no speech."
fi
log_event "recording finished with status $status"
) &
log_event "recording started"
fi
31 changes: 31 additions & 0 deletions scripts/setup-macos-hotkeys.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,33 @@ fi
brew install terminal-notifier
brew tap jackielii/tap && brew install jackielii/tap/skhd-zig

REREGISTER_SKHD_SERVICE=0

patch_skhd_microphone_usage() {
local app_path
app_path="$(brew --prefix skhd-zig)/skhd.app"

if [[ ! -d "$app_path" ]]; then
return
fi

if /usr/libexec/PlistBuddy -c "Print :NSMicrophoneUsageDescription" "$app_path/Contents/Info.plist" >/dev/null 2>&1; then
return
fi

echo "🎙️ Adding microphone usage description to skhd.app..."
plutil -insert NSMicrophoneUsageDescription \
-string "skhd launches agent-cli voice transcription hotkeys that record from the microphone." \
"$app_path/Contents/Info.plist"
codesign --force --deep --sign - "$app_path" >/dev/null
tccutil reset Microphone com.jackielii.skhd >/dev/null 2>&1 || true
tccutil reset Accessibility com.jackielii.skhd >/dev/null 2>&1 || true
tccutil reset ListenEvent com.jackielii.skhd >/dev/null 2>&1 || true
REREGISTER_SKHD_SERVICE=1
}

patch_skhd_microphone_usage

# Setup configuration
echo "⚙️ Setting up configuration..."
mkdir -p ~/.config/skhd
Expand All @@ -38,6 +65,9 @@ EOF

# Start service
echo "🚀 Starting skhd..."
if [[ "$REREGISTER_SKHD_SERVICE" = "1" ]]; then
skhd --uninstall-service >/dev/null 2>&1 || true
fi
skhd --start-service

# Test
Expand All @@ -53,6 +83,7 @@ echo ""
echo "If the hotkey doesn't work:"
echo "1. Open System Settings → Privacy & Security → Accessibility"
echo "2. Add and enable 'skhd'"
echo "3. If prompted, allow Microphone access for 'skhd'"
echo ""
echo "If the notification doesn't show:"
echo "1. Open System Settings → Notifications"
Expand Down
120 changes: 120 additions & 0 deletions tests/test_macos_hotkeys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Tests for macOS hotkey shell script behavior."""

from __future__ import annotations

import os
import subprocess
import time
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
TOGGLE_TRANSCRIPTION = ROOT / "scripts" / "macos-hotkeys" / "toggle-transcription.sh"


def _write_executable(path: Path, content: str) -> None:
path.write_text(content, encoding="utf-8")
path.chmod(0o755)


def _run_toggle_script(
tmp_path: Path,
agent_cli_body: str,
*,
expected_notifications: int = 2,
) -> tuple[list[str], list[str]]:
bin_dir = tmp_path / "bin"
bin_dir.mkdir()
command_log = tmp_path / "commands.log"
notification_log = tmp_path / "notifications.log"

_write_executable(
bin_dir / "agent-cli",
f"""#!/usr/bin/env bash
set -euo pipefail
printf '%s\\n' "$*" >> "{command_log}"
{agent_cli_body}
""",
)
_write_executable(
bin_dir / "terminal-notifier",
f"""#!/usr/bin/env bash
printf '%s\\n' "$*" >> "{notification_log}"
""",
)

env = {
**os.environ,
"AGENT_CLI": str(bin_dir / "agent-cli"),
"NOTIFIER": str(bin_dir / "terminal-notifier"),
"HOME": str(tmp_path),
}
subprocess.run([str(TOGGLE_TRANSCRIPTION)], check=True, env=env)

deadline = time.monotonic() + 2
while time.monotonic() < deadline:
command_count = (
len(command_log.read_text(encoding="utf-8").splitlines()) if command_log.exists() else 0
)
notification_count = (
len(notification_log.read_text(encoding="utf-8").splitlines())
if notification_log.exists()
else 0
)
if command_count >= 2 and notification_count >= expected_notifications:
break
time.sleep(0.01)

commands = command_log.read_text(encoding="utf-8").splitlines() if command_log.exists() else []
notifications = (
notification_log.read_text(encoding="utf-8").splitlines()
if notification_log.exists()
else []
)
return commands, notifications


def test_macos_transcription_hotkey_stops_using_process_status_json(tmp_path: Path) -> None:
commands, notifications = _run_toggle_script(
tmp_path,
"""
if [[ "$*" == "transcribe --status --json" ]]; then
printf '{"action":"status","process":"transcribe","running":true,"status":"running","pid":123,"stale_cleaned":false}\\n'
exit 0
fi
if [[ "$*" == "transcribe --stop --quiet --wait-for-start"* ]]; then
exit 0
fi
printf 'unexpected command: %s\\n' "$*" >&2
exit 2
""",
)

assert commands[0] == "transcribe --status --json"
assert commands[1].startswith("transcribe --stop --quiet --wait-for-start")
assert not any("--toggle" in command for command in commands)
assert any("Stopped" in notification for notification in notifications)


def test_macos_transcription_hotkey_starts_from_process_status_json(tmp_path: Path) -> None:
commands, notifications = _run_toggle_script(
tmp_path,
"""
if [[ "$*" == "transcribe --status --json" ]]; then
printf '{"action":"status","process":"transcribe","running":false,"status":"stopped","pid":null,"stale_cleaned":false}\\n'
exit 0
fi
if [[ "$*" == "transcribe --start --llm --quiet"* ]]; then
printf 'hello from transcript\\n'
exit 0
fi
printf 'unexpected command: %s\\n' "$*" >&2
exit 2
""",
expected_notifications=3,
)

assert commands[0] == "transcribe --status --json"
assert commands[1].startswith("transcribe --start --llm --quiet")
assert not any("--toggle" in command for command in commands)
assert any("Recording" in notification for notification in notifications)
assert any("hello from transcript" in notification for notification in notifications)
Loading