diff --git a/README.md b/README.md index 86108ca7a..806f71332 100644 --- a/README.md +++ b/README.md @@ -303,12 +303,12 @@ This script automatically: - **`Cmd+Shift+V`** - Voice edit clipboard text > [!NOTE] -> After setup, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility +> After setup, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility. The transcription hotkey may also prompt for Microphone access for skhd the first time it records. > [!TIP] > To keep the recording-status notification visible for the whole flow, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions). > Also enable "Allow notification when mirroring or sharing the display". -> The hotkey scripts keep only the recording notification pinned; status and result toasts auto-dismiss. +> The hotkey scripts keep only the recording notification pinned; status and result toasts auto-dismiss. Transcription hotkey diagnostics are written to `~/.cache/agent-cli/transcribe-hotkey.log` and `~/.cache/agent-cli/transcribe-hotkey-agent.log`. ### Linux Hotkeys diff --git a/agent_cli/install/hotkeys.py b/agent_cli/install/hotkeys.py index bb8c24f76..8f1edef6e 100644 --- a/agent_cli/install/hotkeys.py +++ b/agent_cli/install/hotkeys.py @@ -33,6 +33,7 @@ def install_hotkeys() -> None: 2. Creates config at `~/.config/skhd/skhdrc` 3. Starts skhd as a background service 4. May require Accessibility permissions: System Settings → Privacy & Security → Accessibility → enable 'skhd' + 5. May require Microphone permissions when the transcription hotkey first records **Linux** (manual DE configuration): @@ -76,3 +77,7 @@ def install_hotkeys() -> None: "cyan", ) print_with_style(" 2. Add and enable 'skhd'", "cyan") + print_with_style( + " 3. Allow Microphone access for 'skhd' when prompted by the transcription hotkey", + "cyan", + ) diff --git a/docs/commands/install-hotkeys.md b/docs/commands/install-hotkeys.md index 9d8ff8c03..0ca94b09c 100644 --- a/docs/commands/install-hotkeys.md +++ b/docs/commands/install-hotkeys.md @@ -32,7 +32,7 @@ Sets up hotkeys for common workflows: - Super+Shift+A: Autocorrect clipboard text - Super+Shift+V: Voice edit clipboard text -On macOS, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility. +On macOS, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility. The transcription hotkey may also prompt for Microphone access for skhd the first time it records. On a fresh install, `agent-cli install-hotkeys` may take longer than expected because it can download Python extras before setting up the platform-specific scripts. diff --git a/docs/system-integration.md b/docs/system-integration.md index bcb0c0f39..fe8defc91 100644 --- a/docs/system-integration.md +++ b/docs/system-integration.md @@ -36,6 +36,10 @@ This automatically installs missing `audio` and `llm` extras, then sets up the p > > After installation, grant Accessibility permissions to skhd: > System Settings → Privacy & Security → Accessibility → enable "skhd" +> +> The transcription hotkey may also prompt for Microphone permission for "skhd" +> the first time it starts recording. Allow it so hotkey-launched recordings +> contain real audio instead of silence. ### Manual skhd Configuration @@ -67,6 +71,9 @@ skhd --reload The `install-hotkeys` command automatically installs terminal-notifier for visual feedback and keeps the active recording-status notification pinned while transient status/result toasts auto-dismiss. +Transcription hotkey diagnostics are written to `~/.cache/agent-cli/transcribe-hotkey.log` +and `~/.cache/agent-cli/transcribe-hotkey-agent.log`. + Configure notifications in System Settings: 1. Settings → Notifications → terminal-notifier 2. Enable "Allow Notifications" diff --git a/scripts/macos-hotkeys/README.md b/scripts/macos-hotkeys/README.md index 9b12a31f9..e0e9afbc0 100644 --- a/scripts/macos-hotkeys/README.md +++ b/scripts/macos-hotkeys/README.md @@ -15,6 +15,8 @@ System-wide hotkeys for agent-cli voice AI features on macOS. - **`Cmd+Shift+V`** → Toggle voice edit mode for clipboard Results appear in notifications and clipboard. +Diagnostics are written to `~/.cache/agent-cli/transcribe-hotkey.log` and +`~/.cache/agent-cli/transcribe-hotkey-agent.log`. > [!TIP] > For a persistent recording-status notification, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions). @@ -32,6 +34,9 @@ Results appear in notifications and clipboard. **Hotkey not working?** - Grant accessibility permissions in System Settings +**Recording is silent?** +- Allow microphone permissions for `skhd` in System Settings + **No notifications?** ```bash terminal-notifier -title "Test" -message "Hello" diff --git a/scripts/macos-hotkeys/toggle-transcription.sh b/scripts/macos-hotkeys/toggle-transcription.sh index 53738c191..06bd03f0d 100755 --- a/scripts/macos-hotkeys/toggle-transcription.sh +++ b/scripts/macos-hotkeys/toggle-transcription.sh @@ -4,17 +4,27 @@ NOTIFIER=${NOTIFIER:-/opt/homebrew/bin/terminal-notifier} AGENT_CLI=${AGENT_CLI:-"$HOME/.local/bin/agent-cli"} -PID_FILE=${PID_FILE:-"$HOME/.cache/agent-cli/transcribe.pid"} +CACHE_DIR=${CACHE_DIR:-"$HOME/.cache/agent-cli"} +LAST_FILE=${LAST_FILE:-"$CACHE_DIR/transcribe-hotkey.last"} +LOG_FILE=${LOG_FILE:-"$CACHE_DIR/transcribe-hotkey.log"} +AGENT_LOG_FILE=${AGENT_LOG_FILE:-"$CACHE_DIR/transcribe-hotkey-agent.log"} +DEBOUNCE_SECONDS=${DEBOUNCE_SECONDS:-1} RECORDING_GROUP="agent-cli-transcribe-recording" TEMP_PREFIX="agent-cli-transcribe-temp" +mkdir -p "$CACHE_DIR" + +log_event() { + printf '%s %s\n' "$(date '+%Y-%m-%dT%H:%M:%S%z')" "$*" >> "$LOG_FILE" +} + notify_temp() { local title=$1 local message=$2 local duration=${3:-4} # 4 seconds default local group="${TEMP_PREFIX}-${RANDOM}-$$" - "$NOTIFIER" -title "$title" -message "$message" -group "$group" + "$NOTIFIER" -title "$title" -message "$message" -group "$group" || true ( sleep "$duration" "$NOTIFIER" -remove "$group" >/dev/null 2>&1 || true @@ -22,26 +32,48 @@ notify_temp() { } is_transcribe_running() { - [ -f "$PID_FILE" ] || return 1 - local pid - pid=$(cat "$PID_FILE" 2>/dev/null || true) - [ -n "$pid" ] && kill -0 "$pid" >/dev/null 2>&1 + local status_json + if ! status_json=$("$AGENT_CLI" transcribe --status --json 2>> "$LOG_FILE"); then + log_event "status check failed" + return 1 + fi + log_event "status: $status_json" + printf '%s\n' "$status_json" | grep -Eq '"running"[[:space:]]*:[[:space:]]*true' } +now=$(date +%s) +last=$(cat "$LAST_FILE" 2>/dev/null || echo 0) + +# skhd can emit repeated key-down events while the hotkey chord is held. +# Ignore immediate repeats so one press cannot start and stop recording. +if [ "$((now - last))" -lt "$DEBOUNCE_SECONDS" ]; then + log_event "ignored repeat hotkey" + exit 0 +fi + +echo "$now" > "$LAST_FILE" +log_event "hotkey pressed" + if is_transcribe_running; then - "$AGENT_CLI" transcribe --toggle --quiet >/dev/null 2>&1 || true + "$AGENT_CLI" transcribe --stop --quiet --wait-for-start --log-file "$AGENT_LOG_FILE" >> "$LOG_FILE" 2>&1 || true "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true notify_temp "🛑 Stopped" "Processing results..." + log_event "stop requested" else - "$NOTIFIER" -title "🎙️ Starting" -message "Preparing transcription..." -group "$RECORDING_GROUP" + "$NOTIFIER" -title "🎙️ Recording" -message "Press Cmd+Shift+R again to stop." -group "$RECORDING_GROUP" || true ( - OUTPUT=$("$AGENT_CLI" transcribe --toggle --llm --quiet 2>/dev/null) + OUTPUT=$("$AGENT_CLI" transcribe --start --llm --quiet --log-file "$AGENT_LOG_FILE" 2>> "$LOG_FILE") + status=$? "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true - if [ -n "$OUTPUT" ]; then + if [ "$status" -ne 0 ]; then + notify_temp "❌ Error" "Transcription failed. See $AGENT_LOG_FILE" + elif [ -n "$OUTPUT" ]; then notify_temp "📄 Result" "$OUTPUT" else - notify_temp "❌ Error" "No output" + notify_temp "⚠️ No Transcript" "Recording captured no speech." fi + log_event "recording finished with status $status" ) & + log_event "recording started" fi diff --git a/scripts/setup-macos-hotkeys.sh b/scripts/setup-macos-hotkeys.sh index 0ad572b7e..f013b3845 100755 --- a/scripts/setup-macos-hotkeys.sh +++ b/scripts/setup-macos-hotkeys.sh @@ -20,6 +20,33 @@ fi brew install terminal-notifier brew tap jackielii/tap && brew install jackielii/tap/skhd-zig +REREGISTER_SKHD_SERVICE=0 + +patch_skhd_microphone_usage() { + local app_path + app_path="$(brew --prefix skhd-zig)/skhd.app" + + if [[ ! -d "$app_path" ]]; then + return + fi + + if /usr/libexec/PlistBuddy -c "Print :NSMicrophoneUsageDescription" "$app_path/Contents/Info.plist" >/dev/null 2>&1; then + return + fi + + echo "🎙️ Adding microphone usage description to skhd.app..." + plutil -insert NSMicrophoneUsageDescription \ + -string "skhd launches agent-cli voice transcription hotkeys that record from the microphone." \ + "$app_path/Contents/Info.plist" + codesign --force --deep --sign - "$app_path" >/dev/null + tccutil reset Microphone com.jackielii.skhd >/dev/null 2>&1 || true + tccutil reset Accessibility com.jackielii.skhd >/dev/null 2>&1 || true + tccutil reset ListenEvent com.jackielii.skhd >/dev/null 2>&1 || true + REREGISTER_SKHD_SERVICE=1 +} + +patch_skhd_microphone_usage + # Setup configuration echo "⚙️ Setting up configuration..." mkdir -p ~/.config/skhd @@ -38,6 +65,9 @@ EOF # Start service echo "🚀 Starting skhd..." +if [[ "$REREGISTER_SKHD_SERVICE" = "1" ]]; then + skhd --uninstall-service >/dev/null 2>&1 || true +fi skhd --start-service # Test @@ -53,6 +83,7 @@ echo "" echo "If the hotkey doesn't work:" echo "1. Open System Settings → Privacy & Security → Accessibility" echo "2. Add and enable 'skhd'" +echo "3. If prompted, allow Microphone access for 'skhd'" echo "" echo "If the notification doesn't show:" echo "1. Open System Settings → Notifications" diff --git a/tests/test_macos_hotkeys.py b/tests/test_macos_hotkeys.py new file mode 100644 index 000000000..ac7354ac0 --- /dev/null +++ b/tests/test_macos_hotkeys.py @@ -0,0 +1,120 @@ +"""Tests for macOS hotkey shell script behavior.""" + +from __future__ import annotations + +import os +import subprocess +import time +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +TOGGLE_TRANSCRIPTION = ROOT / "scripts" / "macos-hotkeys" / "toggle-transcription.sh" + + +def _write_executable(path: Path, content: str) -> None: + path.write_text(content, encoding="utf-8") + path.chmod(0o755) + + +def _run_toggle_script( + tmp_path: Path, + agent_cli_body: str, + *, + expected_notifications: int = 2, +) -> tuple[list[str], list[str]]: + bin_dir = tmp_path / "bin" + bin_dir.mkdir() + command_log = tmp_path / "commands.log" + notification_log = tmp_path / "notifications.log" + + _write_executable( + bin_dir / "agent-cli", + f"""#!/usr/bin/env bash +set -euo pipefail +printf '%s\\n' "$*" >> "{command_log}" +{agent_cli_body} +""", + ) + _write_executable( + bin_dir / "terminal-notifier", + f"""#!/usr/bin/env bash +printf '%s\\n' "$*" >> "{notification_log}" +""", + ) + + env = { + **os.environ, + "AGENT_CLI": str(bin_dir / "agent-cli"), + "NOTIFIER": str(bin_dir / "terminal-notifier"), + "HOME": str(tmp_path), + } + subprocess.run([str(TOGGLE_TRANSCRIPTION)], check=True, env=env) + + deadline = time.monotonic() + 2 + while time.monotonic() < deadline: + command_count = ( + len(command_log.read_text(encoding="utf-8").splitlines()) if command_log.exists() else 0 + ) + notification_count = ( + len(notification_log.read_text(encoding="utf-8").splitlines()) + if notification_log.exists() + else 0 + ) + if command_count >= 2 and notification_count >= expected_notifications: + break + time.sleep(0.01) + + commands = command_log.read_text(encoding="utf-8").splitlines() if command_log.exists() else [] + notifications = ( + notification_log.read_text(encoding="utf-8").splitlines() + if notification_log.exists() + else [] + ) + return commands, notifications + + +def test_macos_transcription_hotkey_stops_using_process_status_json(tmp_path: Path) -> None: + commands, notifications = _run_toggle_script( + tmp_path, + """ +if [[ "$*" == "transcribe --status --json" ]]; then + printf '{"action":"status","process":"transcribe","running":true,"status":"running","pid":123,"stale_cleaned":false}\\n' + exit 0 +fi +if [[ "$*" == "transcribe --stop --quiet --wait-for-start"* ]]; then + exit 0 +fi +printf 'unexpected command: %s\\n' "$*" >&2 +exit 2 +""", + ) + + assert commands[0] == "transcribe --status --json" + assert commands[1].startswith("transcribe --stop --quiet --wait-for-start") + assert not any("--toggle" in command for command in commands) + assert any("Stopped" in notification for notification in notifications) + + +def test_macos_transcription_hotkey_starts_from_process_status_json(tmp_path: Path) -> None: + commands, notifications = _run_toggle_script( + tmp_path, + """ +if [[ "$*" == "transcribe --status --json" ]]; then + printf '{"action":"status","process":"transcribe","running":false,"status":"stopped","pid":null,"stale_cleaned":false}\\n' + exit 0 +fi +if [[ "$*" == "transcribe --start --llm --quiet"* ]]; then + printf 'hello from transcript\\n' + exit 0 +fi +printf 'unexpected command: %s\\n' "$*" >&2 +exit 2 +""", + expected_notifications=3, + ) + + assert commands[0] == "transcribe --status --json" + assert commands[1].startswith("transcribe --start --llm --quiet") + assert not any("--toggle" in command for command in commands) + assert any("Recording" in notification for notification in notifications) + assert any("hello from transcript" in notification for notification in notifications)