Skip to content

Commit e49b022

Browse files
authored
Add BOOSTED_LM_WORDS env and merge into args (#109)
Expose BOOSTED_LM_WORDS in docker-compose and update ASR args parsing to incorporate it. docker/docker-compose.yml now includes BOOSTED_LM_WORDS env var (default: Tensor). src/om1_speech/riva/args.py imports os, defines an expanded default boosted word list, reads BOOSTED_LM_WORDS (comma-separated) from the environment, filters/uniques additional words, and extends the parser's --boosted-lm-words default accordingly. Also updated the help text to note the env var option.
1 parent 56fbb4f commit e49b022

2 files changed

Lines changed: 27 additions & 11 deletions

File tree

docker/docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ services:
6161
- RIVA_API_KEY=${RIVA_API_KEY}
6262
- RIVA_API_NGC_ORG=${RIVA_API_NGC_ORG:-openmind-1}
6363
- RIVA_EULA=${RIVA_EULA:-accept}
64+
- BOOSTED_LM_WORDS=${BOOSTED_LM_WORDS:-Tensor}
6465
- GLOG_minloglevel=2
6566

6667
volumes:

src/om1_speech/riva/args.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-License-Identifier: MIT
33

44
import argparse
5+
import os
56
from pathlib import Path
67

78

@@ -75,20 +76,34 @@ def add_asr_config_argparse_parameters(
7576
help="Language code of the model to be used.",
7677
)
7778
parser.add_argument("--model-name", default="", help="Model name to be used.")
79+
80+
default_boosted_words = [
81+
"OpenMind",
82+
"Bits",
83+
"Bytes",
84+
"Pixel",
85+
"hello",
86+
"GTC",
87+
"Unitree",
88+
"robot",
89+
"NVIDIA",
90+
]
91+
92+
env_boosted_words = os.getenv("BOOSTED_LM_WORDS", "")
93+
if env_boosted_words:
94+
additional_words = [
95+
word.strip() for word in env_boosted_words.split(",") if word.strip()
96+
]
97+
unique_words = [
98+
word for word in additional_words if word not in default_boosted_words
99+
]
100+
default_boosted_words.extend(unique_words)
101+
78102
parser.add_argument(
79103
"--boosted-lm-words",
80104
action="append",
81-
default=[
82-
"OpenMind",
83-
"Bits",
84-
"hello",
85-
"GTC",
86-
"Unitree",
87-
"robot",
88-
"OM1",
89-
"NVIDIA",
90-
],
91-
help="Words to boost when decoding. Can be used multiple times to boost multiple words.",
105+
default=default_boosted_words,
106+
help="Words to boost when decoding. Can be used multiple times to boost multiple words. Can also be set via BOOSTED_LM_WORDS environment variable (comma-separated).",
92107
)
93108
parser.add_argument(
94109
"--boosted-lm-score",

0 commit comments

Comments
 (0)