High-level audio manipulations
audio('raw.wav').trim(-30).normalize('podcast').fade(0.3, 0.5).save('clean.mp3')- Any Format — fast wasm codecs, no ffmpeg.
- Streaming — playback during decode.
- Non-destructive — virtual edits, instant undo.
- Page cache — open 10Gb+ files.
- Analysis — loudness, spectrum, peaks, and more.
- Modular – pluggable ops, tree-shakable.
- CLI — playback, unix pipes, tab completion.
- Cross-platform — browsers, node, deno, bun.
- Audio-first – dB, Hz, LUFS, not bytes and indices.
npm i audio
import audio from 'audio'
let a = audio('voice.mp3')
a.trim().normalize('podcast').fade(0.3, 0.5)
await a.save('clean.mp3')<script type="module">
import audio from './dist/audio.min.js'
let a = audio('./song.mp3')
a.trim().normalize().fade(0.5, 2)
a.clip({ at: 60, duration: 30 }).play() // play the chorus
</script>Codecs load on demand via import() — map them with an import map or your bundler.
Import map example
<script type="importmap">
{
"imports": {
"@audio/decode-mp3": "https://esm.sh/@audio/decode-mp3",
"@audio/decode-wav": "https://esm.sh/@audio/decode-wav",
"@audio/decode-flac": "https://esm.sh/@audio/decode-flac",
"@audio/decode-opus": "https://esm.sh/@audio/decode-opus",
"@audio/decode-vorbis": "https://esm.sh/@audio/decode-vorbis",
"@audio/decode-aac": "https://esm.sh/@audio/decode-aac",
"@audio/decode-qoa": "https://esm.sh/@audio/decode-qoa",
"@audio/decode-aiff": "https://esm.sh/@audio/decode-aiff",
"@audio/decode-caf": "https://esm.sh/@audio/decode-caf",
"@audio/decode-webm": "https://esm.sh/@audio/decode-webm",
"@audio/decode-amr": "https://esm.sh/@audio/decode-amr",
"@audio/decode-wma": "https://esm.sh/@audio/decode-wma",
"@audio/encode-wav": "https://esm.sh/@audio/encode-wav",
"@audio/encode-mp3": "https://esm.sh/@audio/encode-mp3",
"@audio/encode-flac": "https://esm.sh/@audio/encode-flac",
"@audio/encode-opus": "https://esm.sh/@audio/encode-opus",
"@audio/encode-ogg": "https://esm.sh/@audio/encode-ogg",
"@audio/encode-aiff": "https://esm.sh/@audio/encode-aiff"
}
}
</script>npm i -g audio
audio voice.wav trim normalize podcast fade 0.3s -0.5s -o clean.mp3let a = audio('raw-take.wav')
a.trim(-30).normalize('podcast').fade(0.3, 0.5)
await a.save('clean.wav')let intro = audio('intro.mp3')
let body = audio('interview.wav')
let outro = audio('outro.mp3')
body.trim().normalize('podcast')
let ep = audio([intro, body, outro])
ep.fade(0.5, 2)
await ep.save('episode.mp3')let a = audio('track.mp3')
let [mins, peaks] = await a.stat(['min', 'max'], { bins: canvas.width })
for (let i = 0; i < peaks.length; i++)
ctx.fillRect(i, h/2 - peaks[i] * h/2, 1, (peaks[i] - mins[i]) * h/2)let a = audio('long.flac')
a.on('data', ({ delta }) => appendBars(delta.max[0], delta.min[0]))
await alet music = audio('bg.mp3')
let voice = audio('narration.wav')
music.gain(-12).mix(voice, { at: 2 })
await music.save('mixed.wav')let a = audio('audiobook.mp3')
let [ch1, ch2, ch3] = a.split(1800, 3600)
for (let [i, ch] of [ch1, ch2, ch3].entries())
await ch.save(`chapter-${i + 1}.mp3`)let a = audio()
a.record()
await new Promise(r => setTimeout(r, 5000))
a.stop()
a.trim().normalize()
await a.save('recording.wav')let a = audio('speech.wav')
let mfcc = await a.stat('cepstrum', { bins: 13 })
let spec = await a.stat('spectrum', { bins: 128 })
let [loud, rms] = await a.stat(['loudness', 'rms'])let a = audio.from(t => Math.sin(440 * Math.PI * 2 * t), { duration: 2 })
await a.save('440hz.wav')audio.op('crush', { params: ['bits'], process: (input, output, ctx) => {
let steps = 2 ** (ctx.bits ?? 8)
for (let c = 0; c < input.length; c++)
for (let i = 0; i < input[c].length; i++)
output[c][i] = Math.round(input[c][i] * steps) / steps
}})
a.crush(4)
a.crush({bits: 4, at: 1, duration: 2})let json = JSON.stringify(a) // { source, edits, ... }
let b = audio(JSON.parse(json)) // re-decode + replay editslet a = audio('interview.wav')
a.remove({ at: 120, duration: 15 }) // cut 2:00–2:15
a.fade(0.1, { at: 120 }) // smooth the splice
await a.save('edited.wav')let a = audio('song.mp3')
a.crop({ at: 45, duration: 30 }).fade(0.5, 2).normalize()
await a.save('ringtone.mp3')let a = audio('master.wav')
let clips = await a.stat('clipping')
if (clips.length) console.warn(`${clips.length} clipped blocks`)let a = audio('2hour-mix.flac')
a.highpass(40).normalize('broadcast')
for await (let chunk of a) socket.send(chunk[0].buffer)let a = audio('beat.wav')
let v = a.clip({ at: 1, duration: 0.25 })
let glitch = audio([v, v, v, v])
glitch.reverse({ at: 0.25, duration: 0.25 })
await glitch.save('glitch.wav')let a = audio('pad.wav')
a.gain(t => -12 * (0.5 + 0.5 * Math.cos(t * Math.PI * 4))) // 2Hz tremolo in dB
await a.save('tremolo.wav')let prices = [100, 102, 98, 105, 110, 95, 88, 92, 101, 107]
let a = audio.from(t => {
let freq = 200 + (prices[Math.min(Math.floor(t / 0.2), prices.length - 1)] - 80) * 10
return Math.sin(freq * Math.PI * 2 * t) * 0.5
}, { duration: prices.length * 0.2 })
await a.save('sonification.wav')audio(source, opts?)– decode from file, URL, or bytes. Returns instantly — decodes in background.audio.from(source, opts?)– wrap existing PCM, AudioBuffer, silence, or function. Sync, no I/O.
let a = audio('voice.mp3') // file path
let b = audio('https://cdn.ex/track.mp3') // URL
let c = audio(inputEl.files[0]) // Blob, File, Response, ArrayBuffer
let d = audio() // empty, ready for .push() or .record()
let e = audio([intro, body, outro]) // concat (virtual, no copy)
// opts: { sampleRate, channels, storage: 'memory' | 'persistent' | 'auto' }
await a // await for decode — if you need .duration, full stats etc
let a = audio.from([left, right]) // Float32Array[] channels
let b = audio.from(3, { channels: 2 }) // 3s silence
let c = audio.from(t => Math.sin(440*TAU*t), { duration: 2 }) // generator
let d = audio.from(audioBuffer) // Web Audio AudioBuffer
let e = audio.from(int16arr, { format: 'int16' }) // typed array + format// format
a.duration // total seconds (reflects edits)
a.channels // channel count
a.sampleRate // sample rate
a.length // total samples per channel
// playback
a.currentTime // position in seconds (smooth interpolation during playback)
a.playing // true during playback
a.paused // true when paused
a.volume = 0.5 // 0..1 linear (settable)
a.muted = true // mute gate (independent of volume)
a.loop = true // on/off (settable)
a.ended // true when playback ended naturally (not via stop)
a.seeking // true during a seek operation
a.played // promise, resolves when playback starts
a.recording // true during mic recording
// state
a.ready // promise, resolves when fully decoded
a.source // original source reference
a.pages // Float32Array page store
a.stats // per-block stats (peak, rms, etc.)
a.edits // edit list (non-destructive ops)
a.version // increments on each editNon-destructive time/channel rearrangement. All support {at, duration, channel}.
.trim(threshold?)– strip leading/trailing silence (dB, default auto)..crop({at, duration})– keep range, discard rest..remove({at, duration})– cut range, close gap..insert(source, {at})– insert audio or silence (number of seconds) at position..clip({at, duration})– zero-copy range reference..split(...offsets)– zero-copy split at timestamps..pad(before, after?)– silence at edges (seconds)..repeat(n)– repeat n times..reverse({at?, duration?})– reverse audio or range..speed(rate)– playback speed (affects pitch and duration)..remix(channels)– channel count: number or array map ([1, 0]swaps L/R).
a.trim(-30) // strip silence below -30dB
a.remove({ at: '2m', duration: 15 }) // cut 2:00–2:15, close gap
a.insert(intro, { at: 0 }) // prepend; .insert(3) appends 3s silence
let [pt1, pt2] = a.split('30m') // zero-copy views
let hook = a.clip({ at: 60, duration: 30 }) // zero-copy excerpt
a.remix([0, 0]) // L→both; .remix(1) for monoAmplitude, mixing, normalization. All support {at, duration, channel} ranges.
.gain(dB, opts?)– volume. Number, range, ort => dBfunction.{ unit: 'linear' }for multiplier..fade(in, out?, curve?)– fade in/out. Curves:'linear''exp''log''cos'..normalize(target?)– remove DC offset, clamp, and normalize loudness.'podcast'– -16 LUFS, -1 dBTP.'streaming'– -14 LUFS.'broadcast'– -23 LUFS.-3– custom dB target (peak mode).- no arg – peak 0dBFS.
{ mode: 'rms' }– RMS normalization. Also'peak','lufs'.{ ceiling: -1 }– true peak limiter in dB.{ dc: false }– skip DC removal.
.mix(source, opts?)– overlay another audio (additive)..pan(value, opts?)– stereo balance (−1 left, 0 center, 1 right). Accepts function..write(data, {at?})– overwrite samples with raw PCM..transform(fn)– inline processor:(input, output, ctx) => void. Not serialized.
a.gain(-3) // reduce 3dB
a.gain(6, { at: 10, duration: 5 }) // boost range
a.gain(t => -12 * Math.cos(t * TAU)) // automate over time
a.fade(0.5, -2, 'exp') // 0.5s in, 2s exp fade-out
a.normalize('podcast') // -16 LUFS; also 'streaming', 'broadcast'
a.mix(voice, { at: 2 }) // overlay at 2s
a.pan(-0.3, { at: 10, duration: 5 }) // pan left for rangeBiquad filters, chainable. All support {at, duration} ranges.
.highpass(freq),.lowpass(freq)– pass filter..bandpass(freq, Q?),.notch(freq, Q?)– band-pass / notch..lowshelf(freq, dB),.highshelf(freq, dB)– shelf EQ..eq(freq, gain, Q?)– parametric EQ..filter(type, ...params)– generic dispatch.
a.highpass(80).lowshelf(200, -3) // rumble + mud
a.eq(3000, 2, 1.5).highshelf(8000, 3) // presence + air
a.notch(50) // remove hum
a.filter(customFn, { cutoff: 2000 }) // custom filter functionRead PCM, encode, stream, push. Format inferred from extension.
await .read(opts?)– rendered PCM.{ format, channel }to convert.await .save(path, opts?)– encode + write.{ at, duration }for sub-range.await .encode(format?, opts?)– encode toUint8Array.for await (let block of a)– async-iterable over blocks..clone()– deep copy, independent edits, shared pages..push(data, format?)– feed PCM into pushable instance..stop()to finalize.
let pcm = await a.read() // Float32Array[]
let raw = await a.read({ format: 'int16', channel: 0 })
await a.save('out.mp3') // format from extension
let bytes = await a.encode('flac') // Uint8Array
for await (let block of a) send(block) // stream blocks
let b = a.clone() // independent copy, shared pages
let src = audio() // pushable source
src.push(buf, 'int16') // feed PCM
src.stop() // finalizeLive playback with dB volume, seeking, looping.
.play(opts?)– start playback.{ at, duration, volume, loop }..playedpromise resolves when output starts..pause(),.resume(),.seek(t),.stop()– playback control..record(opts?)– mic recording.{ deviceId, sampleRate, channels }.
a.play({ at: 30, duration: 10 }) // play 30s–40s
await a.played // wait for output to start
a.volume = 0.5; a.loop = true // live adjustments
a.muted = true // mute without changing volume
a.pause(); a.seek(60); a.resume() // jump to 1:00
a.stop() // end playback or recording
let mic = audio()
mic.record({ sampleRate: 16000, channels: 1 })
mic.stop()await .stat(name, opts?) — without bins returns scalar, with bins returns Float32Array. Array of names returns array of results. Sub-ranges via {at, duration}, per-channel via {channel}.
'db'– peak amplitude in dBFS.'rms'– RMS amplitude (linear).'loudness'– integrated LUFS (ITU-R BS.1770).'dc'– DC offset.'clipping'– clipped samples (scalar: timestamps, binned: counts).'silence'– silent ranges as{at, duration}.'max','min'– peak envelope (use together for waveform rendering).'spectrum'– mel-frequency spectrum in dB (A-weighted).'cepstrum'– MFCCs.
let loud = await a.stat('loudness') // LUFS
let [db, clips] = await a.stat(['db', 'clipping']) // multiple at once
let spec = await a.stat('spectrum', { bins: 128 }) // frequency bins
let peaks = await a.stat('max', { bins: 800 }) // waveform data
await a.stat('rms', { channel: 0 }) // left only → number
await a.stat('rms', { channel: [0, 1] }) // per-channel → [n, n]
let gaps = await a.stat('silence', { threshold: -40 }) // [{at, duration}, ...]Events, lifecycle, undo/redo, serialization.
.on(event, fn)/.off(event?, fn?)– subscribe / unsubscribe.'data'– pages decoded/pushed. Payload:{ delta, offset, sampleRate, channels }.'change'– any edit or undo.'metadata'– stream header decoded. Payload:{ sampleRate, channels }.'timeupdate'– playback position. Payload:currentTime.'play'– playback started or resumed.'pause'– playback paused.'volumechange'– volume or muted changed.'ended'– playback finished (not on loop).'progress'– during save/encode. Payload:{ offset, total }in seconds.
.dispose()– release resources. Supportsusingfor auto-dispose..undo(n?)– undo last edit(s). Returns edit for redo via.run()..run(...edits)– apply edits as arrays['type', opts?]. Batch or replay.
Edits use [type, opts] shape, where opts is params (value, freq, etc.) plus range keys (at, duration, channel).
a.run(
['gain', { value: -3, at: 10, duration: 5 }],
['crop', { at: 1, duration: 2 }],
['fade', { in: 1, curve: 'exp' }],
['insert', { source: ref, at: 2 }],
['gain', { value: -3 }],
)
let saved = JSON.stringify([
['gain', { value: -3 }],
['crop', { at: 1, duration: 2 }],
])
a.run(...JSON.parse(saved))a.on('data', ({ delta }) => draw(delta)) // decode progress
a.on('timeupdate', t => ui.update(t)) // playback position
a.undo() // undo last edit
b.run(...a.edits) // replay onto another file
JSON.stringify(a); audio(json) // serialize / restoreExtend with custom ops and stats. See Plugin Tutorial.
audio.op(name, fn)– register op. Shorthand for{ process: fn }. Full descriptor:{ params, process, plan, resolve }.audio.op(name)– query descriptor.audio.op()– all ops.audio.stat(name, descriptor)– register stat. Shorthand(chs, ctx) => [...]or{ block, reduce, query }.
// op: params declares named args → ctx.bits; process receives (input, output, ctx) per 1024-sample block
audio.op('crush', { params: ['bits'], process: (input, output, ctx) => {
let steps = 2 ** (ctx.bits ?? 8)
for (let c = 0; c < input.length; c++)
for (let i = 0; i < input[c].length; i++)
output[c][i] = Math.round(input[c][i] * steps) / steps
}})
// stat: block function collects per-block, reduce enables scalar queries across blocks
audio.stat('peak', {
block: (chs) => chs.map(ch => { let m = 0; for (let s of ch) m = Math.max(m, Math.abs(s)); return m }),
reduce: (blockValues, from, to) => { let m = 0; for (let i = from; i < to; i++) m = Math.max(m, blockValues[i]); return m },
})
a.crush(4) // chainable like built-in ops
a.stat('peak') // → scalar from reduce
a.stat('peak', { bins: 100 }) // → binned arraynpm i -g audio
audio [file] [ops...] [-o output] [options]
# ops
eq mix pad pan crop
fade gain stat trim notch
remix speed split insert remove
repeat bandpass highpass lowpass reverse
lowshelf highshelf normalize
# options
-p play -l loop -o output -f force --format␣ pause · ←/→ seek ±10s · ⇧←/⇧→ seek ±60s · ↑/↓ volume ±3dB · l loop · q quit
# Play fragment of the song
audio song.mp3 10s..15s -p
# Play clip (not full song)
audio song.mp3 clip 10s..20s -p -l
# Normalize before# clean up
audio raw-take.wav trim -30db normalize podcast fade 0.3s -0.5s -o clean.wav
# ranges
audio in.wav gain -3db 1s..10s -o out.wav
# filter chain
audio in.mp3 highpass 80hz lowshelf 200hz -3db -o out.wav
# join
audio intro.mp3 + content.wav + outro.mp3 trim normalize fade 0.5s -2s -o ep.mp3
# voiceover
audio bg.mp3 gain -12db mix narration.wav 2s -o mixed.wav
# split
audio audiobook.mp3 split 30m 60m -o 'chapter-{i}.mp3'# all default stats (db, rms, loudness, clipping, dc)
audio speech.wav stat
# specific stats
audio speech.wav stat loudness rms
# spectrum / cepstrum with bin count
audio speech.wav stat spectrum 128
audio speech.wav stat cepstrum 13
# stat after transforms
audio speech.wav gain -3db stat dbaudio '*.wav' trim normalize podcast -o '{name}.clean.{ext}'
audio '*.wav' gain -3db -o '{name}.out.{ext}'cat in.wav | audio gain -3db > out.wav
curl -s https://example.com/speech.mp3 | audio normalize -o clean.wav
ffmpeg -i video.mp4 -f wav - | audio trim normalize podcast > voice.waveval "$(audio --completions zsh)" # add to ~/.zshrc
eval "$(audio --completions bash)" # add to ~/.bashrc
audio --completions fish | source # fish- What formats are supported?
- Decode: WAV, MP3, FLAC, OGG Vorbis, Opus, AAC, AIFF, CAF, WebM, AMR, WMA, QOA via audio-decode. Encode: WAV, MP3, FLAC, Opus, OGG, AIFF via audio-encode. Codecs are WASM-based, lazy-loaded on first use.
- Does it need ffmpeg or native addons?
- No, pure JS + WASM. For CLI, you can install globally:
npm i -g audio. - How big is the bundle?
- ~20K gzipped core. Codecs load on demand via
import(), so unused formats aren't fetched. - How does it handle large files?
- Audio is stored in fixed-size pages. In the browser, cold pages can evict to OPFS when memory exceeds budget. Stats stay resident (~7 MB for 2h stereo).
- Are edits destructive?
- No.
a.gain(-3).trim()pushes entries to an edit list — source pages aren't touched. Edits replay onread()/save()/for await. - Can I use it in the browser?
- Yes, same API. See Browser for bundle options and import maps.
- Does it need the full file before I can work with it?
- No. Playback, edits, and structural ops (crop, repeat, pad, insert, etc.) all stream incrementally during decode — output begins before the file finishes loading. The edit plan recompiles as data arrives, tracking a safe output boundary per op. Only ops that depend on total length (open-end reverse, negative
at) wait for full decode. - TypeScript?
- Yes, ships with
audio.d.ts. - How is this different from SoX?
- SoX is a C command-line tool — powerful but native-only, no browser, no programmatic API, no streaming edits, no undo.
audioruns in Node and the browser with the same API, edits are non-destructive and lazy (nothing is rendered until you read/save), and it streams during decode. SoX has more effects (reverb, compressor, noise reduction, chorus, flanger, phaser) — these are on the roadmap. - How is this different from Audacity?
- Audacity is a GUI desktop app.
audiois a library and CLI — designed for scripting, automation, pipelines, and embedding in apps. Audacity is destructive (edits mutate samples);audiois non-destructive (edits are a plan replayed on read). Audacity can't run in the browser or benpm installed into your project. - How is this different from ffmpeg?
- ffmpeg is a video-first tool that also handles audio. It's a C binary — no JS API, no browser, no streaming edits.
audiois audio-first: dB, Hz, LUFS are native units. Edits are non-destructive, playback streams during decode, and the whole thing is ~20K gzipped with codecs loading on demand. - How is this different from Web Audio API?
- Web Audio API is a real-time audio graph for playback and synthesis — not for editing files. No undo, no save-to-file, no CLI, no Node (without polyfills).
audiois for working on audio files: load, edit, analyze, save. For Web Audio API in Node, see web-audio-api. - How is this different from Tone.js / Howler.js?
- Tone.js is a Web Audio synthesis framework — great for making music in real-time, not for editing files. Howler.js is a playback library — load and play, no editing or analysis.
audiois a complete audio workstation: decode, edit, analyze, encode, play, record, CLI.
Effects from SoX and elsewhere not yet available. Contributions welcome.
- compressor — dynamic range compression / expansion / limiting (SoX
compand) - reverb — freeverb reverberation
- noise — noise reduction via spectral profiling (SoX
noisered) - echo — echo / delay effect
- resample — explicit sample rate conversion
- dither — dithering for bit-depth reduction
- chorus — chorus modulation
- flanger — flanging
- phaser — phaser effect
- vocals — vocal isolation / removal (SoX
oops, out-of-phase stereo) - allpass — all-pass filter
- earwax — headphone crossfeed
- audio-decode – codec decoding (13+ formats)
- encode-audio – codec encoding
- audio-filter – filters (weighting, EQ, auditory)
- audio-speaker – audio output
- audio-mic – audio input
- audio-type – format detection
- pcm-convert – PCM format conversion
