Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/cnlpt/cnlp_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def __init__(self, data_dir: str, tasks: set[str] = None, max_train_items=-1):
else:
sep = "\t"

self.dataset = load_dataset("csv", sep=sep, data_files=data_files)
self.dataset = load_dataset("csv", sep=sep, data_files=data_files, keep_default_na=False)

## find out what tasks are available to this dataset, and see the overlap with what the
## user specified at the cli, remove those tasks so we don't also get them from other datasets
Expand Down
22 changes: 6 additions & 16 deletions src/cnlpt/train_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,10 @@ def main(
dataset.tasks_to_labels[task] = dataset.tasks_to_labels[task][1:] + [
dataset.tasks_to_labels[task][0]
]
labels = dataset.processed_dataset["train"][task]
if tagger[task]:
labels = [token_label for sent in dataset.processed_dataset["train"][task] for token_label in sent.split()]
else:
labels = dataset.processed_dataset["train"][task]
weights = []
label_counts = Counter(labels)
for label in dataset.tasks_to_labels[task]:
Expand Down Expand Up @@ -478,25 +481,13 @@ def main(
# in this case we're looking at a fine-tuned model (?)
character_level=data_args.character_level,
)

if training_args.do_train:
# Setting 1) only load weights from the encoder
raise NotImplementedError(
"This functionality has not been restored yet"
)
model = CnlpModelForClassification(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this definition will load a fine-tuned cnlpt model as an encoder, but re-initialize the classifier head. This would be the expected behavior for some use cases, but missing some use cases. I think we want to edit this to explicitly handle the two different cases (even if one still throws an exception), rather than having the user guess what might be happening. We should force them to specify whether to keep or ignore existing classifiers (as in the hier model).

model_path=model_args.encoder_name,
config=config,
cache_dir=model_args.cache_dir,
tagger=tagger,
relations=relations,
class_weights=dataset.class_weights,
final_task_weight=training_args.final_task_weight,
use_prior_tasks=model_args.use_prior_tasks,
argument_regularization=model_args.arg_reg,
)
delattr(model, "classifiers")
delattr(model, "feature_extractors")
if training_args.do_train:
tempmodel = tempfile.NamedTemporaryFile(dir=model_args.cache_dir)
torch.save(model.state_dict(), tempmodel)
Expand All @@ -511,7 +502,6 @@ def main(
freeze=training_args.freeze,
bias_fit=training_args.bias_fit,
)

else:
# This only works when model_args.encoder_name is one of the
# model card from https://huggingface.co/models
Expand Down Expand Up @@ -675,7 +665,7 @@ def compute_metrics_fn(p: EvalPrediction):
model.best_eval_results = metrics
if trainer.is_world_process_zero():
if training_args.do_train:
trainer.save_model()
trainer.save_model() # NOTE: a RobertaConfig is loaded here. why?
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you want to keep this in here?

tokenizer.save_pretrained(training_args.output_dir)
if model_name == "cnn" or model_name == "lstm":
with open(
Expand Down Expand Up @@ -884,7 +874,7 @@ def compute_metrics_fn(p: EvalPrediction):

out_table = process_prediction(
task_names=dataset.tasks,
error_analysis=False,
error_analysis=training_args.error_analysis,
output_prob=training_args.output_prob,
character_level=data_args.character_level,
task_to_label_packet=task_to_label_packet,
Expand Down