voidful
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 1 deletion b/‎.gitignore‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎SoundCodec/base_codec/descript_audio_codec.py‎
Lines changed: 3 additions & 1 deletion b/‎SoundCodec/base_codec/descript_audio_codec.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎SoundCodec/base_codec/encodec_hf.py‎
Lines changed: 3 additions & 1 deletion b/‎SoundCodec/base_codec/encodec_hf.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎SoundCodec/base_codec/funcodec.py‎
Lines changed: 3 additions & 1 deletion b/‎SoundCodec/base_codec/funcodec.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎SoundCodec/base_codec/general.py‎
Lines changed: 14 additions & 3 deletions b/‎SoundCodec/base_codec/general.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎SoundCodec/base_codec/wavtokenizer.py‎
Lines changed: 2 additions & 1 deletion b/‎SoundCodec/base_codec/wavtokenizer.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎SoundCodec/dataset/__init__.py‎
Lines changed: 18 additions & 2 deletions b/‎SoundCodec/dataset/__init__.py‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎dataset_creator.py‎
Lines changed: 9 additions & 5 deletions b/‎dataset_creator.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎update_leaderboard.py‎
Lines changed: 68 additions & 0 deletions b/‎update_leaderboard.py‎
Lines changed: 68 additions & 0 deletions
@@ -27,4 +27,8 @@ speechtokenizer_hubert_avg/
 check_all_codecs.py
 test_imports.py
 test_imports_detailed.py
-benchmarking.py
+benchmarking.py
+
+# Data
+cached_datasets/
+datasets/
@@ -28,7 +28,9 @@ def synth(self, data, local_save=True):
         data['unit'] = extracted_unit.unit
         decompressed_audio = self.model.decompress(compressed_audio).audio_data.squeeze(0)
         if local_save:
-            audio_path = f"dummy-descript-audio-codec-{self.model_type}/{data['id']}.wav"
+            from SoundCodec.base_codec.general import uuid
+            audio_id = data.get('id', str(uuid.uuid4()))
+            audio_path = f"dummy-descript-audio-codec-{self.model_type}/{audio_id}.wav"
             save_audio(decompressed_audio, audio_path, self.sampling_rate)
             data['audio'] = audio_path
         else:
 
@@ -20,7 +20,9 @@ def synth(self, data, local_save=True):
         data['unit'] = extracted_unit.unit
         audio_values = self.decode_unit(extracted_unit.stuff_for_synth)
         if local_save:
-            audio_path = f"dummy_{self.pretrained_model_name}/{data['id']}.wav"
+            from SoundCodec.base_codec.general import uuid
+            audio_id = data.get('id', str(uuid.uuid4()))
+            audio_path = f"dummy_{self.pretrained_model_name.replace('/', '_')}/{audio_id}.wav"
             save_audio(audio_values, audio_path, self.sampling_rate)
             data['audio'] = audio_path
         else:
 
@@ -47,7 +47,9 @@ def synth(self, data, local_save=True):
         data['unit'] = extracted_unit.unit
         audio_array = self.decode_unit(extracted_unit.stuff_for_synth)
         if local_save:
-            audio_path = f"dummy-funcodec-{self.model_name}/{data['id']}.wav"
+            from SoundCodec.base_codec.general import uuid
+            audio_id = data.get('id', str(uuid.uuid4()))
+            audio_path = f"dummy-funcodec-{self.model_name}/{audio_id}.wav"
             save_audio(audio_array, audio_path, self.sampling_rate)
             data['audio'] = audio_path
         else:
 
@@ -2,6 +2,7 @@
 from dataclasses import dataclass
 from typing import List, Union, Any
 from abc import ABC, abstractmethod
+import uuid
 
 import numpy as np
 import torchaudio
@@ -46,10 +47,18 @@ def to_dict(self):
         }
 
 
-def save_audio(wav: torch.Tensor, path, sample_rate: int, rescale: bool = False):
+def save_audio(wav: Union[torch.Tensor, np.ndarray], path, sample_rate: int, rescale: bool = False):
+    if sample_rate is None:
+        raise ValueError(f"sample_rate cannot be None when saving audio to {path}")
+    if isinstance(wav, np.ndarray):
+        wav = torch.from_numpy(wav)
+    if wav.ndim == 1:
+        wav = wav.unsqueeze(0)
+    
     folder_path = os.path.dirname(path)
     if not os.path.exists(folder_path):
         os.makedirs(folder_path)
+    print(f"Saving audio to {path} with sample_rate {sample_rate}")
     limit = 0.99
     max_val = wav.abs().max()
     wav = wav * min(limit / max_val, 1) if rescale else wav.clamp(-limit, limit)
@@ -86,7 +95,8 @@ def synth(self, data, local_save=True):
         data['unit'] = extracted_unit.unit
         audio_values = self.decode_unit(extracted_unit.stuff_for_synth)
         if local_save:
-            audio_path = f"dummy_{self.setting}/{data['id']}.wav"
+            audio_id = data.get('id', str(uuid.uuid4()))
+            audio_path = f"dummy_{self.setting}/{audio_id}.wav"
             save_audio(audio_values, audio_path, self.sampling_rate)
             data['audio'] = audio_path
         else:
@@ -137,7 +147,8 @@ def batch_synth(self, data_list: List[dict], local_save=True) -> List[dict]:
         for i, (data, audio_values, unit) in enumerate(zip(data_list, batch_audio_values, batch_extracted_unit.units)):
             data['unit'] = unit
             if local_save:
-                audio_path = f"dummy_{self.setting}/{data['id']}.wav"
+                audio_id = data.get('id', str(uuid.uuid4()))
+                audio_path = f"dummy_{self.setting}/{audio_id}.wav"
                 save_audio(torch.tensor(audio_values), audio_path, self.sampling_rate)
                 data['audio'] = audio_path
             else:
 
@@ -24,7 +24,8 @@ def _setup_config_and_model(self):
         self.model = WavTokenizer.from_pretrained0802(self.config_path, self.ckpt_path)
         self.model.eval()
         self.model = self.model.to(self.device)
-        self.sampling_rate = getattr(self, "sampling_rate", 24000)
+        if self.sampling_rate is None:
+            self.sampling_rate = 24000
 
     def _download_resources(self):
         import os
 
@@ -1,3 +1,19 @@
+from datasets import load_dataset as hf_load_dataset
+
 def load_dataset(dataset_name):
-    module = __import__(f"dataset.{dataset_name}", fromlist=[dataset_name])
-    return module.load_data()
+    try:
+        module = __import__(f"SoundCodec.dataset.{dataset_name}", fromlist=[dataset_name])
+        return module.load_data()
+    except ImportError:
+        # Fallback to loading from Hugging Face Hub
+        ds = hf_load_dataset(dataset_name)
+        if isinstance(ds, dict):
+            if "test" in ds:
+                return ds["test"]
+            if "validation" in ds:
+                return ds["validation"]
+            if "train" in ds:
+                return ds["train"]
+            # return the first split if none of the above are found
+            return ds[list(ds.keys())[0]]
+        return ds
@@ -1,19 +1,22 @@
 import argparse
 from datasets import DatasetDict, Audio, load_from_disk
 from SoundCodec.codec import load_codec, list_codec
-from SoundCodec.dataset import load_dataset, apply_audio_cast
-from SoundCodec.dataset.general import extract_unit
+from SoundCodec.dataset import load_dataset
+from SoundCodec.dataset.general import apply_audio_cast, extract_unit
 
 
 def run_experiment(dataset_name):
     cleaned_dataset = load_dataset(dataset_name)
-    d_item = next(iter(cleaned_dataset))
-    sampling_rate = d_item['audio']['sampling_rate']
-    cleaned_dataset = load_dataset(dataset_name)
+    if args.limit:
+        cleaned_dataset = cleaned_dataset.select(range(min(args.limit, len(cleaned_dataset))))
+    
     print("before filter duration", cleaned_dataset)
     cleaned_dataset = cleaned_dataset.filter(
         lambda x: len(x['audio']['array']) / x['audio']['sampling_rate'] <= args.max_duration)
     print("after filter duration", cleaned_dataset)
+    
+    d_item = next(iter(cleaned_dataset))
+    sampling_rate = d_item['audio']['sampling_rate']
     cleaned_dataset = apply_audio_cast(cleaned_dataset, sampling_rate)
     if not args.extract_unit_only:
         datasets_dict = DatasetDict({'original': cleaned_dataset})
@@ -66,5 +69,6 @@ def run_experiment(dataset_name):
     parser.add_argument('--max_duration', required=False, type=int, default=120)
     parser.add_argument('--push_to_hub', required=False, action='store_true')
     parser.add_argument('--upload_name', required=False, default='Codec-SUPERB')
+    parser.add_argument('--limit', required=False, type=int, default=None)
     args = parser.parse_args()
     run_experiment(args.dataset)
@@ -0,0 +1,68 @@
+import json
+
+# Load benchmark results
+with open('._datasets_voidful_codec-superb-tiny_synth_evaluation_results_20251218_204458.json', 'r') as f:
+    benchmark_results = json.load(f)
+
+# Hardcoded BPS mapping (bitrate in kbps or as used in data.js)
+bps_mapping = {
+    'academicodec_hifi_16k_320d': 2,
+    'academicodec_hifi_16k_320d_large_uni': 2,
+    'academicodec_hifi_24k_320d': 3,
+    'audiodec_24k_320d': 6.4,
+    'auv': 1, # Estimated or placeholder
+    'bigcodec_1k': 1, # Estimated or placeholder
+    'dac_16k': 6,
+    'dac_24k': 24,
+    'dac_44k': 8,
+    'encodec_24k_12bps': 12,
+    'encodec_24k_1_5bps': 1.5,
+    'encodec_24k_24bps': 24,
+    'encodec_24k_3bps': 3,
+    'encodec_24k_6bps': 6,
+    'funcodec_en_libritts_16k_gr1nq32ds320': 16,
+    'funcodec_en_libritts_16k_gr8nq32ds320': 16,
+    'funcodec_en_libritts_16k_nq32ds320': 16,
+    'funcodec_en_libritts_16k_nq32ds640': 8,
+    'funcodec_zh_en_16k_nq32ds320': 16,
+    'funcodec_zh_en_16k_nq32ds640': 8,
+    's3tokenizer_v1': 0.1, # Semantic tokenizer
+    'speech_tokenizer_16k': 4,
+    'sqcodec_16k_0k75bps': 0.75,
+    'sqcodec_16k_12kbps': 12,
+    'sqcodec_16k_1k5bps': 1.5,
+    'sqcodec_16k_3kbps': 3,
+    'sqcodec_16k_6kbps': 6,
+    'sqcodec_24k_12kbps': 12,
+    'sqcodec_24k_24kbps': 24,
+    'unicodec_24k': 12, # Estimated
+    'wavtokenizer_24k_small_600_4096': 0.1,
+    'wavtokenizer_24k_medium_600_4096': 0.1,
+    'wavtokenizer_24k_large_600_4096': 0.1,
+    'wavtokenizer_24k_large_speech_75token': 0.1
+}
+
+# Metrics to include
+metrics_to_include = ['mel', 'pesq', 'stoi', 'f0corr']
+
+new_results = {}
+
+for model_name, metrics in benchmark_results.items():
+    entry = {
+        'bps': bps_mapping.get(model_name, 0)
+    }
+    for m in metrics_to_include:
+        val = metrics.get(m, 0)
+        # Handle NaN
+        if val != val: # NaN check
+            val = 0
+        entry[m] = round(float(val), 3)
+    new_results[model_name] = entry
+
+# Format as JavaScript
+js_content = "const results = " + json.dumps(new_results, indent=1) + ";\nexport default results;"
+
+with open('web/src/results/data.js', 'w') as f:
+    f.write(js_content)
+
+print(f"Updated web/src/results/data.js with {len(new_results)} codecs.")