Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 154 additions & 4 deletions packages/app/src/components/prompt-input.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { useFilteredList } from "@opencode-ai/ui/hooks"
import { useSpring } from "@opencode-ai/ui/motion-spring"
import { createEffect, on, Component, Show, onCleanup, createMemo, createSignal } from "solid-js"
import { createEffect, on, Component, Show, onCleanup, createMemo, createSignal, For } from "solid-js"
import { createStore } from "solid-js/store"
import { useLocal } from "@/context/local"
import { selectionFromLines, type SelectedLineRange, useFile } from "@/context/file"
Expand All @@ -25,6 +25,7 @@ import { ProviderIcon } from "@opencode-ai/ui/provider-icon"
import { Tooltip, TooltipKeybind } from "@opencode-ai/ui/tooltip"
import { IconButton } from "@opencode-ai/ui/icon-button"
import { Select } from "@opencode-ai/ui/select"
import { DropdownMenu } from "@opencode-ai/ui/dropdown-menu"
import { useDialog } from "@opencode-ai/ui/context/dialog"
import { ModelSelectorPopover } from "@/components/dialog-select-model"
import { DialogSelectModelUnpaid } from "@/components/dialog-select-model-unpaid"
Expand Down Expand Up @@ -56,6 +57,7 @@ import { PromptImageAttachments } from "./prompt-input/image-attachments"
import { PromptDragOverlay } from "./prompt-input/drag-overlay"
import { promptPlaceholder } from "./prompt-input/placeholder"
import { ImagePreview } from "@opencode-ai/ui/image-preview"
import { useSpeechRecognition } from "@/hooks/use-speech-recognition"

interface PromptInputProps {
class?: string
Expand Down Expand Up @@ -283,6 +285,84 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
applyingHistory: false,
})

// Speech recognition state
const browserLangs = createMemo(() => {
const langs = Array.from(navigator.languages ?? [navigator.language]).filter(Boolean)
if (!langs.some((l) => l.startsWith("en"))) langs.push("en-US")
return langs
})

const [speechLang, setSpeechLang] = persisted(
Persist.global("speech-lang"),
createStore({ lang: navigator.language || "en-US" }),
)

const showLangSelector = createMemo(() => true)

const getSpeechLang = () => speechLang.lang || navigator.language || "en-US"

let finalTranscript = ""
let speechInsertCursor = 0
let speechBaseParts: ContentPart[] = []

const speech = useSpeechRecognition({
lang: getSpeechLang(),
onResult: (text, isFinal) => {
if (!text.trim()) return

if (isFinal) {
finalTranscript += (finalTranscript ? " " : "") + text.trim()
}

const spoken = isFinal ? finalTranscript : finalTranscript + (finalTranscript ? " " : "") + text.trim()
const before = speechBaseParts
const pos = speechInsertCursor

const textBefore = before
.filter((p): p is ContentPart & { type: "text" } => p.type === "text")
.map((p) => p.content)
.join("")
.slice(0, pos)
const textAfter = before
.filter((p): p is ContentPart & { type: "text" } => p.type === "text")
.map((p) => p.content)
.join("")
.slice(pos)
const nonText = before.filter((p) => p.type !== "text")

const merged = textBefore + spoken + textAfter
const newPart: ContentPart = { type: "text", content: merged, start: 0, end: merged.length }
const newParts: ContentPart[] = [...nonText, newPart]
const newCursor = pos + spoken.length

prompt.set(newParts, newCursor)

requestAnimationFrame(() => {
editorRef?.focus()
setCursorPosition(editorRef, newCursor)
queueScroll()
})
},
onError: (error) => {
console.error("Speech recognition error:", error)
},
})

const toggleRecording = () => {
if (store.mode !== "normal") return
if (speech.state() !== "recording") {
finalTranscript = ""
speechInsertCursor = getCursorPosition(editorRef) ?? promptLength(prompt.current())
speechBaseParts = [...prompt.current()]
}
speech.toggle()
}

const stopRecording = () => {
speech.abort() // Stop and ignore any pending results
finalTranscript = "" // Clear accumulated transcript
}

const buttonsSpring = useSpring(() => (store.mode === "normal" ? 1 : 0), { visualDuration: 0.2, bounce: 0 })
const motion = (value: number) => ({
opacity: value,
Expand Down Expand Up @@ -1094,7 +1174,10 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
shouldQueue: props.shouldQueue,
onQueue: props.onQueue,
onAbort: props.onAbort,
onSubmit: props.onSubmit,
onSubmit: () => {
stopRecording()
props.onSubmit?.()
},
})

const handleKeyDown = (event: KeyboardEvent) => {
Expand Down Expand Up @@ -1303,11 +1386,12 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
if (!(target instanceof HTMLElement)) return
if (
target.closest(
'[data-action="prompt-attach"], [data-action="prompt-submit"], [data-action="prompt-permissions"]',
'[data-action="prompt-attach"], [data-action="prompt-submit"], [data-action="prompt-permissions"], [data-action="prompt-mic"]',
)
) {
return
}
stopRecording()
editorRef?.focus()
}}
>
Expand Down Expand Up @@ -1378,7 +1462,73 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
}}
/>

<div class="flex items-center gap-1 pointer-events-auto">
<div class="flex items-center gap-2 pointer-events-auto">
<Show when={speech.isSupported()}>
<div class="flex items-center rounded-md border border-border-weak-base bg-surface-panel overflow-hidden">
<Tooltip
placement="top"
value={
speech.state() === "recording"
? language.t("prompt.action.stopVoice")
: language.t("prompt.action.startVoice")
}
>
<IconButton
data-action="prompt-mic"
type="button"
icon="mic"
variant="ghost"
class="size-8 rounded-none"
data-recording={speech.state() === "recording"}
style={buttons()}
onClick={toggleRecording}
disabled={store.mode !== "normal"}
tabIndex={store.mode === "normal" ? undefined : -1}
aria-label={
speech.state() === "recording"
? language.t("prompt.action.stopVoice")
: language.t("prompt.action.startVoice")
}
/>
</Tooltip>

<Show when={showLangSelector()}>
<DropdownMenu placement="top-start">
<DropdownMenu.Trigger class="h-8 px-1.5 text-11-regular flex items-center rounded-none border-l border-border-weak-base hover:bg-white/5 transition-colors">
{speechLang.lang}
</DropdownMenu.Trigger>
<DropdownMenu.Portal>
<DropdownMenu.Content>
<DropdownMenu.RadioGroup
value={speechLang.lang}
onChange={(v) => setSpeechLang("lang", v as string)}
>
<For each={browserLangs()}>
{(lang) => (
<DropdownMenu.RadioItem value={lang}>
<DropdownMenu.ItemLabel>{lang}</DropdownMenu.ItemLabel>
<DropdownMenu.ItemIndicator>
<Icon name="check-small" size="small" />
</DropdownMenu.ItemIndicator>
</DropdownMenu.RadioItem>
)}
</For>
</DropdownMenu.RadioGroup>
</DropdownMenu.Content>
</DropdownMenu.Portal>
</DropdownMenu>
</Show>
</div>
</Show>

<Show when={!speech.isSupported()}>
<Tooltip placement="top" value={language.t("prompt.action.voiceNotSupported")}>
<div class="size-8 flex items-center justify-center opacity-40 cursor-not-allowed">
<Icon name="mic-off" class="size-4" />
</div>
</Tooltip>
</Show>

<Tooltip placement="top" inactive={!prompt.dirty() && !working()} value={tip()}>
<IconButton
data-action="prompt-submit"
Expand Down
144 changes: 144 additions & 0 deletions packages/app/src/hooks/use-speech-recognition.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import { createSignal, createEffect, onCleanup } from "solid-js"

export type SpeechRecognitionState = "idle" | "recording" | "error"

export interface UseSpeechRecognitionOptions {
lang?: string
onResult?: (text: string, isFinal: boolean) => void
onError?: (error: string) => void
onStart?: () => void
onEnd?: () => void
}

export function useSpeechRecognition(options: UseSpeechRecognitionOptions = {}) {
const [state, setState] = createSignal<SpeechRecognitionState>("idle")
const [transcript, setTranscript] = createSignal("")
const [isSupported, setIsSupported] = createSignal(false)
const [error, setError] = createSignal<string | null>(null)

let recognition: SpeechRecognition | null = null
let isAborted = false

createEffect(() => {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition

if (!SpeechRecognition) {
setIsSupported(false)
return
}

setIsSupported(true)

recognition = new SpeechRecognition()
recognition.continuous = true
recognition.interimResults = true
recognition.lang = options.lang || navigator.language || "en-US"

recognition.onstart = () => {
isAborted = false
setState("recording")
setError(null)
options.onStart?.()
}

recognition.onresult = (event: SpeechRecognitionEvent) => {
if (isAborted) return

let finalTranscript = ""
let interimTranscript = ""

for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i]
if (result.isFinal) {
finalTranscript += result[0].transcript
} else {
interimTranscript += result[0].transcript
}
}

const fullTranscript = finalTranscript || interimTranscript
setTranscript(fullTranscript)
options.onResult?.(fullTranscript, !!finalTranscript)
}

recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
if (isAborted) return

const errorMsg =
event.error === "not-allowed" ? "Microphone access denied" : `Speech recognition error: ${event.error}`
setError(errorMsg)
setState("error")
options.onError?.(errorMsg)
}

recognition.onend = () => {
if (state() === "recording") {
setState("idle")
}
options.onEnd?.()
}

onCleanup(() => {
if (recognition) {
recognition.stop()
recognition = null
}
})
})

const start = () => {
if (!recognition || state() === "recording") return

try {
isAborted = false
recognition.start()
} catch (err) {
setError("Failed to start recording")
setState("error")
}
}

const stop = () => {
if (!recognition || state() !== "recording") return

try {
recognition.stop()
setState("idle")
} catch (err) {
// Ignore errors when stopping
}
}

const abort = () => {
isAborted = true
stop()
setTranscript("")
setError(null)
}

const toggle = () => {
if (state() === "recording") {
stop()
} else {
start()
}
}

const reset = () => {
setTranscript("")
setError(null)
setState("idle")
}

return {
state,
transcript,
isSupported,
error,
start,
stop,
abort,
toggle,
reset,
}
}
3 changes: 3 additions & 0 deletions packages/app/src/i18n/ar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ export const dict = {
"prompt.attachment.remove": "إزالة المرفق",
"prompt.action.send": "إرسال",
"prompt.action.stop": "توقف",
"prompt.action.startVoice": "بدء الإدخال الصوتي",
"prompt.action.stopVoice": "إيقاف التسجيل",
"prompt.action.voiceNotSupported": "التعرف على الكلام غير مدعوم",
"prompt.toast.pasteUnsupported.title": "مرفق غير مدعوم",
"prompt.toast.pasteUnsupported.description": "يمكن إرفاق الصور أو ملفات PDF أو الملفات النصية فقط هنا.",
"prompt.toast.modelAgentRequired.title": "حدد وكيلاً ونموذجاً",
Expand Down
3 changes: 3 additions & 0 deletions packages/app/src/i18n/br.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ export const dict = {
"prompt.attachment.remove": "Remover anexo",
"prompt.action.send": "Enviar",
"prompt.action.stop": "Parar",
"prompt.action.startVoice": "Iniciar entrada de voz",
"prompt.action.stopVoice": "Parar gravação",
"prompt.action.voiceNotSupported": "Reconhecimento de voz não suportado",
"prompt.toast.pasteUnsupported.title": "Anexo não suportado",
"prompt.toast.pasteUnsupported.description": "Apenas imagens, PDFs ou arquivos de texto podem ser anexados aqui.",
"prompt.toast.modelAgentRequired.title": "Selecione um agente e modelo",
Expand Down
3 changes: 3 additions & 0 deletions packages/app/src/i18n/bs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ export const dict = {
"prompt.attachment.remove": "Ukloni prilog",
"prompt.action.send": "Pošalji",
"prompt.action.stop": "Zaustavi",
"prompt.action.startVoice": "Pokreni glasovni unos",
"prompt.action.stopVoice": "Zaustavi snimanje",
"prompt.action.voiceNotSupported": "Prepoznavanje govora nije podržano",

"prompt.toast.pasteUnsupported.title": "Nepodržan prilog",
"prompt.toast.pasteUnsupported.description": "Ovdje se mogu priložiti samo slike, PDF-ovi ili tekstualne datoteke.",
Expand Down
3 changes: 3 additions & 0 deletions packages/app/src/i18n/da.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ export const dict = {
"prompt.attachment.remove": "Fjern vedhæftning",
"prompt.action.send": "Send",
"prompt.action.stop": "Stop",
"prompt.action.startVoice": "Start stemmeinput",
"prompt.action.stopVoice": "Stop optagelse",
"prompt.action.voiceNotSupported": "Talegenkendelse understøttes ikke",

"prompt.toast.pasteUnsupported.title": "Ikke understøttet vedhæftning",
"prompt.toast.pasteUnsupported.description": "Kun billeder, PDF'er eller tekstfiler kan vedhæftes her.",
Expand Down
Loading
Loading