MaartenGr · roli-lpci · Feb 25, 2026 · Feb 25, 2026
diff --git a/bertopic/_bertopic.py b/bertopic/_bertopic.py
diff --git a/bertopic/_save_utils.py b/bertopic/_save_utils.py
@@ -21,9 +21,6 @@
 except ImportError:
     _has_hf_hub = False
 
-# Typing
-from typing import Union
-
 # Pytorch check
 try:
     import torch
@@ -113,7 +110,7 @@ def push_to_hf_hub(
     create_pr: bool = False,
     model_card: bool = True,
     serialization: str = "safetensors",
-    save_embedding_model: Union[str, bool] = True,
+    save_embedding_model: str | bool = True,
     save_ctfidf: bool = False,
 ):
     """Push your BERTopic model to a HuggingFace Hub.
@@ -450,9 +447,9 @@ def save_topics(model, path: str):
         json.dump(topics, f, indent=2, cls=NumpyEncoder)
 
 
-def load_cfg_from_json(json_file: Union[str, os.PathLike]):
+def load_cfg_from_json(json_file: str | os.PathLike):
     """Load configuration from json."""
-    with open(json_file, "r", encoding="utf-8") as reader:
+    with open(json_file, encoding="utf-8") as reader:
         text = reader.read()
     return json.loads(text)
 
@@ -463,7 +460,7 @@ def default(self, obj):
             return int(obj)
         if isinstance(obj, np.floating):
             return float(obj)
-        return super(NumpyEncoder, self).default(obj)
+        return super().default(obj)
 
 
 def get_package_versions():

diff --git a/bertopic/_utils.py b/bertopic/_utils.py
@@ -4,7 +4,7 @@
 from collections.abc import Iterable
 from scipy.sparse import csr_matrix
 from scipy.spatial.distance import squareform
-from typing import Optional, Union, Tuple, Any
+from typing import Any
 
 
 class MyLogger:
@@ -142,7 +142,7 @@ def validate_distance_matrix(X, n_samples):
             "distance matrix of shape (n*(n-1)/2,) or a "
             "2-D square distance matrix of shape (n, n)."
             "where n is the number of documents."
-            "Got a distance matrix of shape %s" % str(s)
+            f"Got a distance matrix of shape {s}"
         )
 
     # Make sure its entries are non-negative
@@ -177,11 +177,11 @@ def get_unique_distances(dists: np.array, noise_max=1e-7) -> np.array:
 
 
 def select_topic_representation(
-    ctfidf_embeddings: Optional[Union[np.ndarray, csr_matrix]] = None,
-    embeddings: Optional[Union[np.ndarray, csr_matrix]] = None,
+    ctfidf_embeddings: np.ndarray | csr_matrix | None = None,
+    embeddings: np.ndarray | csr_matrix | None = None,
     use_ctfidf: bool = True,
     output_ndarray: bool = False,
-) -> Tuple[np.ndarray, bool]:
+) -> tuple[np.ndarray, bool]:
     """Select the topic representation.
 
     Arguments:
@@ -199,7 +199,7 @@ def select_topic_representation(
         The selected topic representation and a boolean indicating whether it is c-TF-IDF.
     """
 
-    def to_ndarray(array: Union[np.ndarray, csr_matrix]) -> np.ndarray:
+    def to_ndarray(array: np.ndarray | csr_matrix) -> np.ndarray:
         if isinstance(array, csr_matrix):
             return array.toarray()
         return array

diff --git a/bertopic/backend/_base.py b/bertopic/backend/_base.py
@@ -1,5 +1,4 @@
 import numpy as np
-from typing import List
 
 
 class BaseEmbedder:
@@ -18,7 +17,7 @@ def __init__(self, embedding_model=None, word_embedding_model=None):
         self.embedding_model = embedding_model
         self.word_embedding_model = word_embedding_model
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 
@@ -32,7 +31,7 @@ def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
         """
         pass
 
-    def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray:
+    def embed_words(self, words: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n words into an n-dimensional
         matrix of embeddings.
 
@@ -47,7 +46,7 @@ def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray:
         """
         return self.embed(words, verbose)
 
-    def embed_documents(self, document: List[str], verbose: bool = False) -> np.ndarray:
+    def embed_documents(self, document: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_cohere.py b/bertopic/backend/_cohere.py
@@ -1,7 +1,8 @@
 import time
 import numpy as np
 from tqdm import tqdm
-from typing import Any, List, Mapping
+from typing import Any
+from collections.abc import Mapping
 from bertopic.backend import BaseEmbedder
 
 
@@ -60,7 +61,7 @@ def __init__(
         else:
             self.embed_kwargs["model"] = self.embedding_model
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_fastembed.py b/bertopic/backend/_fastembed.py
@@ -1,5 +1,4 @@
 import numpy as np
-from typing import List
 from fastembed import TextEmbedding
 
 from bertopic.backend import BaseEmbedder
@@ -38,7 +37,7 @@ def __init__(self, embedding_model: str = "BAAI/bge-small-en-v1.5"):
                 "The supported TextEmbedding model list is here: https://qdrant.github.io/fastembed/examples/Supported_Models/"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_flair.py b/bertopic/backend/_flair.py
@@ -1,6 +1,5 @@
 import numpy as np
 from tqdm import tqdm
-from typing import Union, List
 from flair.data import Sentence
 from flair.embeddings import DocumentEmbeddings, TokenEmbeddings, DocumentPoolEmbeddings
 
@@ -30,7 +29,7 @@ class FlairBackend(BaseEmbedder):
     ```
     """
 
-    def __init__(self, embedding_model: Union[TokenEmbeddings, DocumentEmbeddings]):
+    def __init__(self, embedding_model: TokenEmbeddings | DocumentEmbeddings):
         super().__init__()
 
         # Flair word embeddings
@@ -52,7 +51,7 @@ def __init__(self, embedding_model: Union[TokenEmbeddings, DocumentEmbeddings]):
                 "`roberta = TransformerDocumentEmbeddings('roberta-base')`"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_gensim.py b/bertopic/backend/_gensim.py
@@ -1,6 +1,5 @@
 import numpy as np
 from tqdm import tqdm
-from typing import List
 from bertopic.backend import BaseEmbedder
 from gensim.models.keyedvectors import Word2VecKeyedVectors
 
@@ -36,7 +35,7 @@ def __init__(self, embedding_model: Word2VecKeyedVectors):
                 "`ft = api.load('fasttext-wiki-news-subwords-300')`"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_hftransformers.py b/bertopic/backend/_hftransformers.py
@@ -1,7 +1,6 @@
 import numpy as np
 
 from tqdm import tqdm
-from typing import List
 from torch.utils.data import Dataset
 from sklearn.preprocessing import normalize
 from transformers.pipelines import Pipeline
@@ -42,7 +41,7 @@ def __init__(self, embedding_model: Pipeline):
                 "pipeline('feature-extraction', model='distilbert-base-cased', device=0)"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_langchain.py b/bertopic/backend/_langchain.py
@@ -1,5 +1,3 @@
-from typing import List
-
 import numpy as np
 from bertopic.backend import BaseEmbedder
 from langchain_core.embeddings import Embeddings
@@ -25,7 +23,7 @@ class LangChainBackend(BaseEmbedder):
     def __init__(self, embedding_model: Embeddings):
         self.embedding_model = embedding_model
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_model2vec.py b/bertopic/backend/_model2vec.py
@@ -1,5 +1,4 @@
 import numpy as np
-from typing import List, Union
 from model2vec import StaticModel
 from sklearn.feature_extraction.text import CountVectorizer
 
@@ -53,7 +52,7 @@ class Model2VecBackend(BaseEmbedder):
 
     def __init__(
         self,
-        embedding_model: Union[str, StaticModel],
+        embedding_model: str | StaticModel,
         distill: bool = False,
         distill_kwargs: dict = {},
         distill_vectorizer: str | None = None,
@@ -87,7 +86,7 @@ def __init__(
                 "`model = StaticModel.from_pretrained('minishlab/potion-base-8M')`"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_multimodal.py b/bertopic/backend/_multimodal.py
@@ -1,7 +1,6 @@
 import numpy as np
 from PIL import Image
 from tqdm import tqdm
-from typing import List, Union
 from sentence_transformers import SentenceTransformer
 
 from bertopic.backend import BaseEmbedder
@@ -44,8 +43,8 @@ class MultiModalBackend(BaseEmbedder):
 
     def __init__(
         self,
-        embedding_model: Union[str, SentenceTransformer],
-        image_model: Union[str, SentenceTransformer] = None,
+        embedding_model: str | SentenceTransformer,
+        image_model: str | SentenceTransformer = None,
         batch_size: int = 32,
     ):
         super().__init__()
@@ -84,7 +83,7 @@ def __init__(
         except:  # noqa: E722
             self.tokenizer = None
 
-    def embed(self, documents: List[str], images: List[str] | None = None, verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], images: list[str] | None = None, verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words or images into an n-dimensional
         matrix of embeddings.
 
@@ -122,7 +121,7 @@ def embed(self, documents: List[str], images: List[str] | None = None, verbose:
         elif image_embeddings is not None:
             return image_embeddings
 
-    def embed_documents(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed_documents(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 
@@ -138,7 +137,7 @@ def embed_documents(self, documents: List[str], verbose: bool = False) -> np.nda
         embeddings = self.embedding_model.encode(truncated_docs, show_progress_bar=verbose)
         return embeddings
 
-    def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray:
+    def embed_words(self, words: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_openai.py b/bertopic/backend/_openai.py
@@ -2,7 +2,8 @@
 import openai
 import numpy as np
 from tqdm import tqdm
-from typing import List, Mapping, Any
+from typing import Any
+from collections.abc import Mapping
 from bertopic.backend import BaseEmbedder
 
 
@@ -51,7 +52,7 @@ def __init__(
         elif not self.generator_kwargs.get("engine"):
             self.generator_kwargs["model"] = self.embedding_model
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_sentencetransformers.py b/bertopic/backend/_sentencetransformers.py
@@ -1,5 +1,4 @@
 import numpy as np
-from typing import List, Union
 from sentence_transformers import SentenceTransformer
 from sentence_transformers.models import StaticEmbedding
 
@@ -50,7 +49,7 @@ class SentenceTransformerBackend(BaseEmbedder):
     ```
     """
 
-    def __init__(self, embedding_model: Union[str, SentenceTransformer], model2vec: bool = False):
+    def __init__(self, embedding_model: str | SentenceTransformer, model2vec: bool = False):
         super().__init__()
 
         self._hf_model = None
@@ -69,7 +68,7 @@ def __init__(self, embedding_model: Union[str, SentenceTransformer], model2vec:
                 "`model = SentenceTransformer('all-MiniLM-L6-v2')`"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_spacy.py b/bertopic/backend/_spacy.py
@@ -1,6 +1,5 @@
 import numpy as np
 from tqdm import tqdm
-from typing import List
 from bertopic.backend import BaseEmbedder
 
 
@@ -61,7 +60,7 @@ def __init__(self, embedding_model):
                 "or create a nlp model using: `nlp = spacy.load('en_core_web_md')"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_use.py b/bertopic/backend/_use.py
@@ -1,6 +1,5 @@
 import numpy as np
 from tqdm import tqdm
-from typing import List
 
 from bertopic.backend import BaseEmbedder
 
@@ -37,7 +36,7 @@ def __init__(self, embedding_model):
                 "`embedding_model = tensorflow_hub.load(path_to_model)`"
             )
 
-    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+    def embed(self, documents: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n documents/words into an n-dimensional
         matrix of embeddings.
 

diff --git a/bertopic/backend/_word_doc.py b/bertopic/backend/_word_doc.py
@@ -1,5 +1,4 @@
 import numpy as np
-from typing import List
 from bertopic.backend._base import BaseEmbedder
 from bertopic.backend._utils import select_backend
 
@@ -13,7 +12,7 @@ def __init__(self, embedding_model, word_embedding_model):
         self.embedding_model = select_backend(embedding_model)
         self.word_embedding_model = select_backend(word_embedding_model)
 
-    def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray:
+    def embed_words(self, words: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n words into an n-dimensional
         matrix of embeddings.
 
@@ -28,7 +27,7 @@ def embed_words(self, words: List[str], verbose: bool = False) -> np.ndarray:
         """
         return self.word_embedding_model.embed(words, verbose)
 
-    def embed_documents(self, document: List[str], verbose: bool = False) -> np.ndarray:
+    def embed_documents(self, document: list[str], verbose: bool = False) -> np.ndarray:
         """Embed a list of n words into an n-dimensional
         matrix of embeddings.