-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparameters.py
More file actions
89 lines (66 loc) · 2.93 KB
/
parameters.py
File metadata and controls
89 lines (66 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import argparse
import logging
import os
from constants import *
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
"""
This file defines the parameters for three stages of the pipeline:
1. Pretraining
2. Fine-tuning
3. Link Prediction
The parameters are defined in the parse_args() method.
batch_size: Batch size for training
num_epochs: Number of epochs for training
lr: Learning rate for training
warmup_steps: Number of warmup steps for training
data_dir: Directory where the graph data is stored
log_dir: Directory where the logs are stored
graphs_file: Name of the file where the graph data is stored
classification_model: Name of the classification model to use.
Choices: ['uml-gpt', 'bert-base-cased']
uml-gpt: UML-GPT model
bert-base-cased: BERT model
tokenizer: Name of the tokenizer to use.
This tokenizer can be a pretrained tokenizer or a custom tokenizer.
A custom tokenizer is built using the graph data.
seed: Seed for reproducibility
test_size: Test size for train-test split
from_pretrained: Path to the pretrained model to use for fine-tuning or link prediction
models_dir: Directory where the models are stored
"""
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--stage", type=str)
parser.add_argument("--batch_size", type=int)
parser.add_argument("--num_epochs", type=int)
parser.add_argument("--lr", type=float)
parser.add_argument("--warmup_steps", type=int)
parser.add_argument("--weight_decay", type=float)
parser.add_argument("--data_dir", type=str, default="uploaded_data")
parser.add_argument("--log_dir", type=str, default="logs")
parser.add_argument("--inference_models_dir", type=str, default="trained_models")
parser.add_argument("--gnn_location", type=str)
parser.add_argument("--graphs_file", type=str)
parser.add_argument("--classification_model", type=str)
parser.add_argument("--embedding_model", type=str)
parser.add_argument("--tokenizer", type=str)
parser.add_argument("--from_pretrained", type=str)
parser.add_argument("--tokenizer_file", type=str)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--test_size", type=float)
parser.add_argument("--ontouml_mask_prob", type=float)
parser.add_argument("--exclude_limit", type=int)
parser.add_argument("--distance", type=int)
# parser.add_argument("--from_pretrained", type=str, default=None)
parser.add_argument("--models_dir", type=str, default="models")
parser.add_argument("--class_type", type=str)
parser.add_argument("--phase", type=str)
parser.add_argument("--gpt_model", type=str)
parser.add_argument("--embed_dim", type=int)
parser.add_argument("--num_layers", type=int)
parser.add_argument("--num_heads", type=int)
parser.add_argument("--block_size", type=int)
parser.add_argument("--pooling", type=str)
args = parser.parse_args()
logging.info(args)
return args