docs: surface audio transcription capability across README, samples, and SDK examples

MaanavD · MaanavD · commit 16f4488c5dbc · 2026-03-27T16:53:28.000-04:00
- Update README.md to prominently feature audio transcription (STT) alongside
  chat completions, including a Supported Tasks table, JS code examples for
  audio transcription and unified chat+audio, and updated Features section
- Add samples/js/audio-transcription-foundry-local: standalone Whisper STT sample
- Add samples/js/chat-and-audio-foundry-local: unified chat + audio sample
  demonstrating single FoundryLocalManager managing both model types
- Add sdk_v2/js/examples/audio-transcription.ts: TypeScript audio example
- Update docs/README.md with capabilities table and sample links

Addresses the discoverability gap where LLMs and developers do not know
Foundry Local supports audio transcription via Whisper models.
diff --git a/README.md b/README.md
@@ -215,27 +215,111 @@ Explore complete working examples in the [`samples/`](samples/) folder:
 
 The SDK also supports audio transcription via Whisper models. Use `model.createAudioClient()` to transcribe audio files on-device:
 
+> [!TIP]
+> The JavaScript SDK does not require end users to have the Foundry Local CLI installed. It is a completely self-contained SDK that includes native in-process Chat Completions and Audio Transcription APIs — no HTTP calls or external services needed.
+
+#### Chat Completions
+
 ```javascript
-import { FoundryLocalManager } from 'foundry-local-sdk';
+import { FoundryLocalManager } from "foundry-local-sdk";
+
+// Initialize the SDK
+const manager = FoundryLocalManager.create({ appName: "MyApp" });
+
+// Get and load a chat model
+const model = await manager.catalog.getModel("phi-3.5-mini");
+await model.download();
+await model.load();
+
+// Create a chat client and generate a response
+const chatClient = model.createChatClient();
+chatClient.settings.temperature = 0.7;
+chatClient.settings.maxTokens = 800;
+
+const response = await chatClient.completeChat([
+  { role: "user", content: "What is the golden ratio?" },
+]);
+console.log(response.choices[0].message.content);
+
+// Stream responses in real-time
+for await (const chunk of chatClient.completeStreamingChat([
+  { role: "user", content: "Explain quantum computing simply." },
+])) {
+  const content = chunk.choices?.[0]?.message?.content;
+  if (content) process.stdout.write(content);
+}
 
-const manager = FoundryLocalManager.create({ appName: 'MyApp' });
+// Clean up
+await model.unload();
+```
+
+#### Audio Transcription (Speech-to-Text)
 
-// Download and load the Whisper model
-const whisperModel = await manager.catalog.getModel('whisper-tiny');
+```javascript
+import { FoundryLocalManager } from "foundry-local-sdk";
+
+// Initialize the SDK (reuses the same singleton if already created)
+const manager = FoundryLocalManager.create({ appName: "MyApp" });
+
+// Get and load the Whisper model for audio transcription
+const whisperModel = await manager.catalog.getModel("whisper-tiny");
 await whisperModel.download();
 await whisperModel.load();
 
-// Transcribe an audio file
+// Create an audio client and transcribe
 const audioClient = whisperModel.createAudioClient();
-audioClient.settings.language = 'en';
-const result = await audioClient.transcribe('recording.wav');
-console.log('Transcription:', result.text);
+audioClient.settings.language = "en";
+
+// Transcribe an audio file
+const result = await audioClient.transcribe("recording.wav");
+console.log("Transcription:", result.text);
 
-// Or stream in real-time
-for await (const chunk of audioClient.transcribeStreaming('recording.wav')) {
-    process.stdout.write(chunk.text);
+// Or stream the transcription in real-time
+for await (const chunk of audioClient.transcribeStreaming("recording.wav")) {
+  process.stdout.write(chunk.text);
 }
 
+// Clean up
+await whisperModel.unload();
+```
+
+#### Chat + Audio Together
+
+A single `FoundryLocalManager` can manage both chat and audio models simultaneously — no need for separate runtimes:
+
+```javascript
+import { FoundryLocalManager } from "foundry-local-sdk";
+
+const manager = FoundryLocalManager.create({ appName: "VoiceJournal" });
+
+// Load both models
+const chatModel = await manager.catalog.getModel("phi-3.5-mini");
+await chatModel.download();
+await chatModel.load();
+
+const whisperModel = await manager.catalog.getModel("whisper-tiny");
+await whisperModel.download();
+await whisperModel.load();
+
+// Step 1: Transcribe audio
+const audioClient = whisperModel.createAudioClient();
+audioClient.settings.language = "en";
+const transcription = await audioClient.transcribe("journal-entry.wav");
+console.log("You said:", transcription.text);
+
+// Step 2: Analyze the transcription with the chat model
+const chatClient = chatModel.createChatClient();
+const analysis = await chatClient.completeChat([
+  {
+    role: "system",
+    content: "Summarize this journal entry and extract key themes.",
+  },
+  { role: "user", content: transcription.text },
+]);
+console.log("Summary:", analysis.choices[0].message.content);
+
+// Clean up
+await chatModel.unload();
 await whisperModel.unload();
 ```
 
diff --git a/samples/js/audio-transcription-foundry-local/README.md b/samples/js/audio-transcription-foundry-local/README.md
@@ -0,0 +1,39 @@
+# Sample: Audio Transcription with Foundry Local
+
+This sample demonstrates how to use Foundry Local for **speech-to-text (audio transcription)** using the Whisper model — entirely on-device, with no cloud services required.
+
+## What This Shows
+
+- Loading the `whisper-tiny` model via the Foundry Local SDK
+- Transcribing an audio file (`.wav`, `.mp3`, etc.) to text
+- Both standard and streaming transcription modes
+- Automatic hardware acceleration (NPU > GPU > CPU)
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed on your machine
+- Node.js 18+
+
+## Getting Started
+
+Install the Foundry Local SDK:
+
+```bash
+npm install foundry-local-sdk
+```
+
+Place an audio file (e.g., `recording.wav` or `recording.mp3`) in the project directory, then run:
+
+```bash
+node src/app.js
+```
+
+## How It Works
+
+The Foundry Local SDK handles everything:
+1. **Model discovery** — finds the best `whisper-tiny` variant for your hardware
+2. **Model download** — downloads the model if not already cached
+3. **Model loading** — loads the model into memory with optimized hardware acceleration
+4. **Transcription** — runs Whisper inference entirely on-device
+
+No need for `whisper.cpp`, `@huggingface/transformers`, or any other separate STT tool.
diff --git a/samples/js/audio-transcription-foundry-local/package.json b/samples/js/audio-transcription-foundry-local/package.json
@@ -0,0 +1,11 @@
+{
+  "name": "audio-transcription-foundry-local",
+  "type": "module",
+  "description": "Audio transcription (speech-to-text) sample using Foundry Local",
+  "scripts": {
+    "start": "node src/app.js"
+  },
+  "dependencies": {
+    "foundry-local-sdk": "latest"
+  }
+}
diff --git a/samples/js/audio-transcription-foundry-local/src/app.js b/samples/js/audio-transcription-foundry-local/src/app.js
@@ -0,0 +1,64 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import { FoundryLocalManager } from "foundry-local-sdk";
+import path from "path";
+
+// The Whisper model alias for audio transcription
+const alias = "whisper-tiny";
+
+async function main() {
+  console.log("Initializing Foundry Local SDK...");
+  const manager = FoundryLocalManager.create({
+    appName: "AudioTranscriptionSample",
+    logLevel: "info",
+  });
+
+  // Get the Whisper model from the catalog
+  const catalog = manager.catalog;
+  const model = await catalog.getModel(alias);
+  if (!model) {
+    throw new Error(
+      `Model "${alias}" not found. Run "foundry model list" to see available models.`
+    );
+  }
+
+  // Download the model if not already cached
+  if (!model.isCached) {
+    console.log(`Downloading model "${alias}"...`);
+    await model.download((progress) => {
+      process.stdout.write(`\rDownload progress: ${progress.toFixed(1)}%`);
+    });
+    console.log("\nDownload complete.");
+  }
+
+  // Load the model into memory
+  console.log(`Loading model "${model.id}"...`);
+  await model.load();
+  console.log("Model loaded.\n");
+
+  // Create an audio client for transcription
+  const audioClient = model.createAudioClient();
+  audioClient.settings.language = "en";
+
+  // Update this path to point to your audio file
+  const audioFilePath = path.resolve("recording.mp3");
+
+  // --- Standard transcription ---
+  console.log("=== Standard Transcription ===");
+  const result = await audioClient.transcribe(audioFilePath);
+  console.log("Transcription:", result.text);
+
+  // --- Streaming transcription ---
+  console.log("\n=== Streaming Transcription ===");
+  await audioClient.transcribeStreaming(audioFilePath, (chunk) => {
+    process.stdout.write(chunk.text);
+  });
+  console.log("\n");
+
+  // Clean up
+  await model.unload();
+  console.log("Done.");
+}
+
+main().catch(console.error);