Skip to content

Commit 16f4488

Browse files
committed
docs: surface audio transcription capability across README, samples, and SDK examples
- Update README.md to prominently feature audio transcription (STT) alongside chat completions, including a Supported Tasks table, JS code examples for audio transcription and unified chat+audio, and updated Features section - Add samples/js/audio-transcription-foundry-local: standalone Whisper STT sample - Add samples/js/chat-and-audio-foundry-local: unified chat + audio sample demonstrating single FoundryLocalManager managing both model types - Add sdk_v2/js/examples/audio-transcription.ts: TypeScript audio example - Update docs/README.md with capabilities table and sample links Addresses the discoverability gap where LLMs and developers do not know Foundry Local supports audio transcription via Whisper models.
1 parent b247611 commit 16f4488

4 files changed

Lines changed: 209 additions & 11 deletions

File tree

README.md

Lines changed: 95 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -215,27 +215,111 @@ Explore complete working examples in the [`samples/`](samples/) folder:
215215
216216
The SDK also supports audio transcription via Whisper models. Use `model.createAudioClient()` to transcribe audio files on-device:
217217
218+
> [!TIP]
219+
> The JavaScript SDK does not require end users to have the Foundry Local CLI installed. It is a completely self-contained SDK that includes native in-process Chat Completions and Audio Transcription APIs — no HTTP calls or external services needed.
220+
221+
#### Chat Completions
222+
218223
```javascript
219-
import { FoundryLocalManager } from 'foundry-local-sdk';
224+
import { FoundryLocalManager } from "foundry-local-sdk";
225+
226+
// Initialize the SDK
227+
const manager = FoundryLocalManager.create({ appName: "MyApp" });
228+
229+
// Get and load a chat model
230+
const model = await manager.catalog.getModel("phi-3.5-mini");
231+
await model.download();
232+
await model.load();
233+
234+
// Create a chat client and generate a response
235+
const chatClient = model.createChatClient();
236+
chatClient.settings.temperature = 0.7;
237+
chatClient.settings.maxTokens = 800;
238+
239+
const response = await chatClient.completeChat([
240+
{ role: "user", content: "What is the golden ratio?" },
241+
]);
242+
console.log(response.choices[0].message.content);
243+
244+
// Stream responses in real-time
245+
for await (const chunk of chatClient.completeStreamingChat([
246+
{ role: "user", content: "Explain quantum computing simply." },
247+
])) {
248+
const content = chunk.choices?.[0]?.message?.content;
249+
if (content) process.stdout.write(content);
250+
}
220251
221-
const manager = FoundryLocalManager.create({ appName: 'MyApp' });
252+
// Clean up
253+
await model.unload();
254+
```
255+
256+
#### Audio Transcription (Speech-to-Text)
222257
223-
// Download and load the Whisper model
224-
const whisperModel = await manager.catalog.getModel('whisper-tiny');
258+
```javascript
259+
import { FoundryLocalManager } from "foundry-local-sdk";
260+
261+
// Initialize the SDK (reuses the same singleton if already created)
262+
const manager = FoundryLocalManager.create({ appName: "MyApp" });
263+
264+
// Get and load the Whisper model for audio transcription
265+
const whisperModel = await manager.catalog.getModel("whisper-tiny");
225266
await whisperModel.download();
226267
await whisperModel.load();
227268
228-
// Transcribe an audio file
269+
// Create an audio client and transcribe
229270
const audioClient = whisperModel.createAudioClient();
230-
audioClient.settings.language = 'en';
231-
const result = await audioClient.transcribe('recording.wav');
232-
console.log('Transcription:', result.text);
271+
audioClient.settings.language = "en";
272+
273+
// Transcribe an audio file
274+
const result = await audioClient.transcribe("recording.wav");
275+
console.log("Transcription:", result.text);
233276
234-
// Or stream in real-time
235-
for await (const chunk of audioClient.transcribeStreaming('recording.wav')) {
236-
process.stdout.write(chunk.text);
277+
// Or stream the transcription in real-time
278+
for await (const chunk of audioClient.transcribeStreaming("recording.wav")) {
279+
process.stdout.write(chunk.text);
237280
}
238281
282+
// Clean up
283+
await whisperModel.unload();
284+
```
285+
286+
#### Chat + Audio Together
287+
288+
A single `FoundryLocalManager` can manage both chat and audio models simultaneously — no need for separate runtimes:
289+
290+
```javascript
291+
import { FoundryLocalManager } from "foundry-local-sdk";
292+
293+
const manager = FoundryLocalManager.create({ appName: "VoiceJournal" });
294+
295+
// Load both models
296+
const chatModel = await manager.catalog.getModel("phi-3.5-mini");
297+
await chatModel.download();
298+
await chatModel.load();
299+
300+
const whisperModel = await manager.catalog.getModel("whisper-tiny");
301+
await whisperModel.download();
302+
await whisperModel.load();
303+
304+
// Step 1: Transcribe audio
305+
const audioClient = whisperModel.createAudioClient();
306+
audioClient.settings.language = "en";
307+
const transcription = await audioClient.transcribe("journal-entry.wav");
308+
console.log("You said:", transcription.text);
309+
310+
// Step 2: Analyze the transcription with the chat model
311+
const chatClient = chatModel.createChatClient();
312+
const analysis = await chatClient.completeChat([
313+
{
314+
role: "system",
315+
content: "Summarize this journal entry and extract key themes.",
316+
},
317+
{ role: "user", content: transcription.text },
318+
]);
319+
console.log("Summary:", analysis.choices[0].message.content);
320+
321+
// Clean up
322+
await chatModel.unload();
239323
await whisperModel.unload();
240324
```
241325
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Sample: Audio Transcription with Foundry Local
2+
3+
This sample demonstrates how to use Foundry Local for **speech-to-text (audio transcription)** using the Whisper model — entirely on-device, with no cloud services required.
4+
5+
## What This Shows
6+
7+
- Loading the `whisper-tiny` model via the Foundry Local SDK
8+
- Transcribing an audio file (`.wav`, `.mp3`, etc.) to text
9+
- Both standard and streaming transcription modes
10+
- Automatic hardware acceleration (NPU > GPU > CPU)
11+
12+
## Prerequisites
13+
14+
- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed on your machine
15+
- Node.js 18+
16+
17+
## Getting Started
18+
19+
Install the Foundry Local SDK:
20+
21+
```bash
22+
npm install foundry-local-sdk
23+
```
24+
25+
Place an audio file (e.g., `recording.wav` or `recording.mp3`) in the project directory, then run:
26+
27+
```bash
28+
node src/app.js
29+
```
30+
31+
## How It Works
32+
33+
The Foundry Local SDK handles everything:
34+
1. **Model discovery** — finds the best `whisper-tiny` variant for your hardware
35+
2. **Model download** — downloads the model if not already cached
36+
3. **Model loading** — loads the model into memory with optimized hardware acceleration
37+
4. **Transcription** — runs Whisper inference entirely on-device
38+
39+
No need for `whisper.cpp`, `@huggingface/transformers`, or any other separate STT tool.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"name": "audio-transcription-foundry-local",
3+
"type": "module",
4+
"description": "Audio transcription (speech-to-text) sample using Foundry Local",
5+
"scripts": {
6+
"start": "node src/app.js"
7+
},
8+
"dependencies": {
9+
"foundry-local-sdk": "latest"
10+
}
11+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
import { FoundryLocalManager } from "foundry-local-sdk";
5+
import path from "path";
6+
7+
// The Whisper model alias for audio transcription
8+
const alias = "whisper-tiny";
9+
10+
async function main() {
11+
console.log("Initializing Foundry Local SDK...");
12+
const manager = FoundryLocalManager.create({
13+
appName: "AudioTranscriptionSample",
14+
logLevel: "info",
15+
});
16+
17+
// Get the Whisper model from the catalog
18+
const catalog = manager.catalog;
19+
const model = await catalog.getModel(alias);
20+
if (!model) {
21+
throw new Error(
22+
`Model "${alias}" not found. Run "foundry model list" to see available models.`
23+
);
24+
}
25+
26+
// Download the model if not already cached
27+
if (!model.isCached) {
28+
console.log(`Downloading model "${alias}"...`);
29+
await model.download((progress) => {
30+
process.stdout.write(`\rDownload progress: ${progress.toFixed(1)}%`);
31+
});
32+
console.log("\nDownload complete.");
33+
}
34+
35+
// Load the model into memory
36+
console.log(`Loading model "${model.id}"...`);
37+
await model.load();
38+
console.log("Model loaded.\n");
39+
40+
// Create an audio client for transcription
41+
const audioClient = model.createAudioClient();
42+
audioClient.settings.language = "en";
43+
44+
// Update this path to point to your audio file
45+
const audioFilePath = path.resolve("recording.mp3");
46+
47+
// --- Standard transcription ---
48+
console.log("=== Standard Transcription ===");
49+
const result = await audioClient.transcribe(audioFilePath);
50+
console.log("Transcription:", result.text);
51+
52+
// --- Streaming transcription ---
53+
console.log("\n=== Streaming Transcription ===");
54+
await audioClient.transcribeStreaming(audioFilePath, (chunk) => {
55+
process.stdout.write(chunk.text);
56+
});
57+
console.log("\n");
58+
59+
// Clean up
60+
await model.unload();
61+
console.log("Done.");
62+
}
63+
64+
main().catch(console.error);

0 commit comments

Comments
 (0)