thesofproject
diff --git a/‎src/audio/mfcc/tune/README.md‎
Lines changed: 37 additions & 0 deletions b/‎src/audio/mfcc/tune/README.md‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎src/audio/mfcc/tune/decode_all.m‎
Lines changed: 6 additions & 7 deletions b/‎src/audio/mfcc/tune/decode_all.m‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎src/audio/mfcc/tune/decode_ceps.m‎
Lines changed: 63 additions & 34 deletions b/‎src/audio/mfcc/tune/decode_ceps.m‎
Lines changed: 63 additions & 34 deletions
@@ -150,3 +150,40 @@ Whisper model: whisper-medium-int4-ov (encoder: CPU, decoder: CPU)
 
   >> "Hello computer"
 ```
+
+## Live Spectrogram Viewers
+
+### Mel Spectrogram
+
+The `sof_mel_spectrogram_compress.py` script captures Mel spectrogram
+frames from a SOF compress PCM device and displays them as a live
+scrolling spectrogram with VAD status. This is a lightweight viewer
+that does not run Whisper inference.
+
+```bash
+python3 sof_mel_spectrogram_compress.py --card 0 --device 48 --width 300
+```
+
+### Cepstral Spectrogram
+
+The `sof_ceps_spectrogram_compress.py` script is similar but displays
+cepstral coefficients (MFCC) instead of Mel bands.
+
+```bash
+python3 sof_ceps_spectrogram_compress.py --card 0 --device 48 --num-ceps 13 --width 300
+```
+
+## Live Whisper Transcription with Compress PCM
+
+The `sof_mel_to_text_live_compress.py` script captures Mel spectrogram
+frames from a SOF compress PCM device and performs live Whisper
+transcription using OpenVINO. Unlike `sof_mel_to_text_live_dsp_vad.py`
+which uses `arecord`, this script reads directly from the compress PCM
+device with DTX-aware frame handling.
+
+```bash
+python3 sof_mel_to_text_live_compress.py --card 0 --device 48 --model whisper-medium-int4-ov
+```
+
+The same OpenVINO prerequisites and pip packages apply as described above
+for `sof_mel_to_text_live_dsp_vad.py`.
@@ -6,34 +6,33 @@
 num_ceps = 13;
 num_mel = 80;
 
-% MFCC cepstral output files
+% MFCC cepstral output files (all int32 output, Q9.23)
 ceps_files = {'mfcc_s16.raw', 'mfcc_s24.raw', 'mfcc_s32.raw'};
 
-% Mel output files with corresponding format
+% Mel output files (all int32 output, Q9.23)
 mel_files = {'mel_s16.raw', 'mel_s24.raw', 'mel_s32.raw'};
-mel_fmts  = {'s16',         's24',          's32'};
 
 % Xtensa prefixed variants
 xt_ceps_files = {'xt_mfcc_s16.raw', 'xt_mfcc_s24.raw', 'xt_mfcc_s32.raw'};
 xt_mel_files  = {'xt_mel_s16.raw',  'xt_mel_s24.raw',  'xt_mel_s32.raw'};
 
 all_ceps_files = [ceps_files, xt_ceps_files];
 all_mel_files  = [mel_files, xt_mel_files];
-all_mel_fmts   = [mel_fmts, mel_fmts];
 
 for i = 1:length(all_ceps_files)
 	fn = all_ceps_files{i};
 	if exist(fn, 'file')
 		fprintf('Decoding MFCC ceps: %s\n', fn);
-		[ceps, t, n, vad, energy, noise_energy, frame_num] = decode_ceps(fn, num_ceps);
+		[ceps, t, n, vad, energy, noise_energy, frame_num] = ...
+			decode_ceps(fn, num_ceps);
 	end
 end
 
 for i = 1:length(all_mel_files)
 	fn = all_mel_files{i};
-	fmt = all_mel_fmts{i};
 	if exist(fn, 'file')
 		fprintf('Decoding Mel: %s\n', fn);
-		[mel, t, n, vad, energy, noise_energy, frame_num] = decode_mel(fn, num_mel, fmt);
+		[mel, t, n, vad, energy, noise_energy, frame_num] = ...
+			decode_mel(fn, num_mel);
 	end
 end
@@ -1,9 +1,10 @@
-% [ceps, t, n, vad, energy, noise_energy, frame_number] = decode_ceps(fn, num_ceps, num_channels)
+% [ceps, t, n, vad, energy, noise_energy, frame_number] = decode_ceps(fn, num_ceps, hop, num_channels)
 %
 % Input
 %   fn - File with MFCC data in .raw or .wav format
 %   num_ceps - number of cepstral coefficients per frame
-%   num_channels - needed for .raw format, omit for .wav
+%   hop - STFT hop in seconds, defaults to 10e-3 for 10 ms
+%   num_channels - needed for .raw format, omit for .wav, default 1
 %
 % Outputs
 %   ceps - cepstral coefficients
@@ -18,65 +19,93 @@
 % Copyright(c) 2022-2026 Intel Corporation. All rights reserved.
 
 function [ceps, t, n, vad, energy, noise_energy, frame_number] = ...
-	decode_ceps(fn, num_ceps, num_channels)
+	decode_ceps(fn, num_ceps, hop, num_channels)
 
 if nargin < 3
+	hop = 10e-3;
+end
+if nargin < 4
 	num_channels = 1;
 end
 
 % MFCC stream
-fs = 16e3;
-qformat = 7;
-magic = [25443 28006]; % ASCII 'mfcc' as int16
-num_magic = 2; % magic word is 2 x int16
+qformat = 23; % Q9.23 in int32
+magic = int32(1835426659); % 0x6D666363 as int32
+num_magic = 1; % magic word is 1 x int32
 
-% Load output data
+% Load output data (always int32)
 [data, num_channels] = get_file(fn, num_channels);
 
-idx1 = find(data == magic(1));
-idx = [];
-for i = 1:length(idx1)
-	if data(idx1(i) + 1) == magic(2)
-		idx = [idx idx1(i)];
-	end
-end
+idx = find(data == magic);
 
 if isempty(idx)
 	error('No magic value markers found from stream');
 end
 
-period_ceps = idx(2)-idx(1);
 num_frames = length(idx);
 
 % Header after magic is [frame_number, reserved, energy, noise_energy, vad_flag]
-% as int32 (10 int16 slots), followed by num_ceps coefficients.
-payload_len = 10 + num_ceps; % 5 int32 = 10 int16, then ceps data
+% as int32, followed by num_ceps coefficients (int32).
+payload_len = 5 + num_ceps;
 
 % Last frame can be incomplete due to span over multiple periods
 last = idx(end) + num_magic + payload_len - 1;
 if (last > length(data))
     num_frames = num_frames - 1;
 end
 
-t_ceps = period_ceps / num_channels / fs;
-t = (0:num_frames -1) * t_ceps;
-n = 1:num_ceps;
-
 payload = zeros(payload_len, num_frames);
 for i = 1:num_frames
 	i1 = idx(i) + num_magic;
 	i2 = i1 + payload_len - 1;
 	payload(:,i) = double(data(i1:i2));
 end
 
-% Reassemble int32 from pairs of int16 (little-endian).
-% Low half must be treated as unsigned with mod() to handle negative int16.
-frame_number = mod(payload(1,:), 65536) + payload(2,:) * 65536;
-% payload(3:4,:) is reserved, skip
-energy = (mod(payload(5,:), 65536) + payload(6,:) * 65536) / 2^23;
-noise_energy = (mod(payload(7,:), 65536) + payload(8,:) * 65536) / 2^23;
-vad = mod(payload(9,:), 65536) + payload(10,:) * 65536;
-ceps = payload(11:payload_len, :) / 2^qformat;
+frame_number = payload(1, :);
+% payload(2,:) is reserved, skip
+energy = payload(3, :) / 2^23;
+noise_energy = payload(4, :) / 2^23;
+vad = payload(5, :);
+ceps = payload(6:payload_len, :) / 2^qformat;
+
+% Fill gaps from DTX-suppressed VAD=0 frames to create continuous timeline.
+% Missing frames are filled with the minimum ceps value found in the data.
+first_frame = frame_number(1);
+last_frame = frame_number(end);
+total_frames = last_frame - first_frame + 1;
+if total_frames > num_frames
+	ceps_fill = min(ceps(:));
+	ceps_full = ones(num_ceps, total_frames) * ceps_fill;
+	vad_full = zeros(1, total_frames);
+	energy_full = zeros(1, total_frames);
+	noise_energy_full = zeros(1, total_frames);
+	frame_number_full = first_frame:last_frame;
+	has_data = false(1, total_frames);
+	for i = 1:num_frames
+		fi = frame_number(i) - first_frame + 1;
+		ceps_full(:, fi) = ceps(:, i);
+		vad_full(fi) = vad(i);
+		energy_full(fi) = energy(i);
+		noise_energy_full(fi) = noise_energy(i);
+		has_data(fi) = true;
+	end
+	% Forward-fill gaps with last received values
+	for fi = 2:total_frames
+		if ~has_data(fi)
+			ceps_full(:, fi) = ceps_full(:, fi - 1);
+			energy_full(fi) = energy_full(fi - 1);
+			noise_energy_full(fi) = noise_energy_full(fi - 1);
+		end
+	end
+	ceps = ceps_full;
+	vad = vad_full;
+	energy = energy_full;
+	noise_energy = noise_energy_full;
+	frame_number = frame_number_full;
+end
+
+t = (frame_number - first_frame) * hop;
+n = 1:num_ceps;
 
 figure;
 surf(t, n, ceps, 'EdgeColor', 'none');
@@ -96,18 +125,18 @@
 switch lower(ext)
 	case '.raw'
 		fh = fopen(fn, 'r');
-		data = fread(fh, 'int16');
+		data = fread(fh, 'int32');
 		fclose(fh);
 	case '.wav'
 		tmp = audioread(fn, 'native');
 		t = whos('tmp');
-		if ~strcmp(t.class, 'int16')
-			error('Only 16-bit wav file format is supported');
+		if ~strcmp(t.class, 'int32')
+			error('Expected 32-bit wav for int32 MFCC output format');
 		end
 		s = size(tmp);
 		num_channels = s(2);
 		if num_channels > 1
-			data = int16(zeros(prod(s), 1));
+			data = int32(zeros(prod(s), 1));
 			for i = 1:num_channels
 				data(i:num_channels:end) = tmp(:, i);
 			end