ner_utils.train_ner_model now allows a custom trainer class

Aethor · Aethor · commit 0751f5e1095b · 2026-04-05T20:03:25.000+08:00
diff --git a/renard/ner_utils.py b/renard/ner_utils.py
@@ -337,6 +337,7 @@ def train_ner_model(
     targs: TrainingArguments,
     train_split: str = "train",
     valid_split: str = "valid",
+    trainer_class: type[Trainer] = Trainer,
 ) -> PreTrainedModel:
     """Train a NER model on the given dataset.
 
@@ -347,6 +348,8 @@ def train_ner_model(
         trainer.
     :param train_split: split of the dataset used for train.
     :param valid_split: split of the dataset used for validation.
+    :param trainer_class: trainer class to use.  Can be used to
+        override the default huggingface trainer.
     """
     from transformers import DataCollatorForTokenClassification
 
@@ -366,12 +369,11 @@ def train_ner_model(
         label2id={label: i for i, label in enumerate(label_lst)},
     )
 
-    trainer = Trainer(
+    trainer = trainer_class(
         model,
         targs,
         train_dataset=dataset[train_split],
         eval_dataset=dataset[valid_split],
-        # data_collator=DataCollatorForTokenClassificationWithBatchEncoding(tokenizer),
         data_collator=DataCollatorForTokenClassification(tokenizer),
         tokenizer=tokenizer,
     )