NVIDIA-NeMo · quapham · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py b/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py
@@ -15,7 +15,7 @@
 
 # fmt: off
 
-SUPPORTED_LOCALES = ["en-US", "de-DE", "es-ES", "it-IT", "fr-FR", "vi-VN", "ja-JP", "hi-IN"]
+SUPPORTED_LOCALES = ["en-US", "de-DE", "es-ES", "it-IT", "fr-FR", "vi-VN", "ja-JP", "hi-IN","pt-BR"]
 
 DEFAULT_PUNCTUATION = (
     ',', '.', '!', '?', '-',
@@ -106,7 +106,7 @@
         'ॅ', 'ॉ', 'ँ', 'ं', 'ः', '्', '़', 'ॊ', 'ॢ', 'ॣ', 'ॆ',
         # Danda (period)
         '।',
-    ),
+    )
 }
 
 IPA_CHARACTER_SETS = {
@@ -347,5 +347,12 @@ def get_ipa_punctuation_list(locale):
                 '・',
             ]
         )
+    elif locale == "hi-IN":
+        punct_set.update(
+            [
+                '।',
+                '॥',
+            ]
+        )
     punct_list = sorted(list(punct_set))
     return punct_list
diff --git a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py
@@ -29,6 +29,7 @@
     "any_locale_word_tokenize",
     "english_word_tokenize",
     "LATIN_CHARS_ALL",
+    "INDIC_CHARS_ALL",
     "normalize_unicode_text",
     "japanese_text_preprocessing",
 ]
@@ -52,11 +53,24 @@
 LATIN_ALPHABET_BASIC = "A-Za-z"
 ACCENTED_CHARS = "À-ÖØ-öø-ÿ"
 LATIN_CHARS_ALL = f"{LATIN_ALPHABET_BASIC}{ACCENTED_CHARS}"
+
+# Indic characters based on https://www.unicode.org/charts/
+# Hindi, Marathi, Nepali, Sanskrit https://en.wikipedia.org/wiki/Devanagari_(Unicode_block)
+DEVANAGARI_CHARS = (
+    r'\u0900-\u0963\u0966-\u097F'  # excluding danda (U+0964), double danda (U+0965) so they are treated as punctuation
+)
+BENGALI_CHARS = r'\u0980-\u09FF'  # Bengali, Assamese
+TAMIL_CHARS = r'\u0B80-\u0BFF'  # Tamil
+TELUGU_CHARS = r'\u0C00-\u0C7F'  # Telugu
+KANNADA_CHARS = r'\u0C80-\u0CFF'  # Kannada
+GUJARATI_CHARS = r'\u0A80-\u0AFF'  # Gujarati
+INDIC_CHARS_ALL = f"{DEVANAGARI_CHARS}{BENGALI_CHARS}{TAMIL_CHARS}{TELUGU_CHARS}{KANNADA_CHARS}{GUJARATI_CHARS}"
+
 _WORDS_RE_EN = re.compile(
     fr"([{LATIN_ALPHABET_BASIC}]+(?:[{LATIN_ALPHABET_BASIC}\-']*[{LATIN_ALPHABET_BASIC}]+)*)|(\|[^|]*\|)|([^{LATIN_ALPHABET_BASIC}|]+)"
 )
 _WORDS_RE_ANY_LOCALE = re.compile(
-    fr"([{LATIN_CHARS_ALL}]+(?:[{LATIN_CHARS_ALL}\-']*[{LATIN_CHARS_ALL}]+)*)|(\|[^|]*\|)|([^{LATIN_CHARS_ALL}|]+)"
+    fr"([{LATIN_CHARS_ALL}{INDIC_CHARS_ALL}]+(?:[{LATIN_CHARS_ALL}{INDIC_CHARS_ALL}\-']*[{LATIN_CHARS_ALL}{INDIC_CHARS_ALL}]+)*)|(\|[^|]*\|)|([^{LATIN_CHARS_ALL}{INDIC_CHARS_ALL}|]+)"
 )
 
 

diff --git a/nemo/collections/tts/g2p/models/i18n_ipa.py b/nemo/collections/tts/g2p/models/i18n_ipa.py
@@ -20,6 +20,7 @@
 
 from nemo.collections.common.tokenizers.text_to_speech.ipa_lexicon import validate_locale
 from nemo.collections.common.tokenizers.text_to_speech.tokenizer_utils import (
+    INDIC_CHARS_ALL,
     LATIN_CHARS_ALL,
     any_locale_word_tokenize,
     english_word_tokenize,
@@ -29,18 +30,22 @@
 from nemo.collections.tts.g2p.utils import GRAPHEME_CASE_MIXED, GRAPHEME_CASE_UPPER, set_grapheme_case
 from nemo.utils import logging
 
+# Compiled regex pattern for Indic scripts (used in dictionary parsing)
+_INDIC_PATTERN = re.compile(f'^[{INDIC_CHARS_ALL}]')
+
 
 class IpaG2p(BaseG2p):
     # fmt: off
     STRESS_SYMBOLS = ["ˈ", "ˌ"]
     # Regex for roman characters, accented characters, and locale-agnostic numbers/digits
-    CHAR_REGEX = re.compile(fr"[{LATIN_CHARS_ALL}\d]")
-    PUNCT_REGEX = re.compile(fr"[^{LATIN_CHARS_ALL}\d]")
+    CHAR_REGEX = re.compile(fr"[{LATIN_CHARS_ALL}{INDIC_CHARS_ALL}\d]")
+    PUNCT_REGEX = re.compile(fr"[^{LATIN_CHARS_ALL}{INDIC_CHARS_ALL}\d]")
     # fmt: on
 
     def __init__(
         self,
-        phoneme_dict: Union[str, pathlib.Path, Dict[str, List[List[str]]]],
+        # phoneme_dict: Union[str, pathlib.Path, Dict[str, List[List[str]]]],
+        phoneme_dict: Union[str, pathlib.Path, List[Union[str, pathlib.Path]], Dict[str, List[List[str]]]],
         locale: str = "en-US",
         apply_to_oov_word: Optional[Callable[[str], str]] = None,
         ignore_ambiguous_words: bool = True,
@@ -154,10 +159,10 @@ def __init__(
 
     @staticmethod
     def _parse_phoneme_dict(
-        phoneme_dict: Union[str, pathlib.Path, Dict[str, List[List[str]]]]
+        phoneme_dict: Union[str, pathlib.Path, List[Union[str, pathlib.Path]], Dict[str, List[List[str]]]]
     ) -> Dict[str, List[List[str]]]:
         """
-        parse an input IPA dictionary and save it as a dict object.
+        parse an input IPA dictionary (or multiple) and save it as a dict object.
 
         Args:
             phoneme_dict (Union[str, pathlib.Path, dict]): Path to file in CMUdict format or an IPA dict object with
@@ -167,6 +172,14 @@ def _parse_phoneme_dict(
 
         Returns: a dict object (Dict[str, List[List[str]]]).
         """
+        if isinstance(phoneme_dict, list):
+            merged = defaultdict(list)
+            for path in phoneme_dict:
+                parsed = IpaG2p._parse_phoneme_dict(path)
+                for word, prons in parsed.items():
+                    merged[word].extend(prons)
+            return merged
+
         if isinstance(phoneme_dict, str) or isinstance(phoneme_dict, pathlib.Path):
             # load the dictionary file where there may exist a digit suffix after a word, e.g. "Word(2)", which
             # represents the pronunciation variant of that word.
@@ -190,6 +203,7 @@ def _parse_phoneme_dict(
                         or 'À' <= line[0] <= 'Ö'
                         or 'Ø' <= line[0] <= 'ö'
                         or 'ø' <= line[0] <= 'ÿ'
+                        or _INDIC_PATTERN.match(line[0])
                         or line[0] == "'"
                     ):
                         parts = line.strip().split(maxsplit=1)
@@ -217,7 +231,9 @@ def _parse_phoneme_dict(
 
         return phoneme_dict_obj
 
-    def replace_dict(self, phoneme_dict: Union[str, pathlib.Path, Dict[str, List[List[str]]]]):
+    def replace_dict(
+        self, phoneme_dict: Union[str, pathlib.Path, List[Union[str, pathlib.Path]], Dict[str, List[List[str]]]]
+    ):
         """
         Replace model's phoneme dictionary with a custom one
         """