-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathllama3_r128.yaml
More file actions
47 lines (41 loc) · 1.04 KB
/
llama3_r128.yaml
File metadata and controls
47 lines (41 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
DATASET:
TEST_SET: "eval/eval.json"
METRIC: 'area'
TRAIN_VAL_TEST_SPLIT: [0.99, 0.01, 0.0]
tokenizer:
padding_side: 'right'
use_fast: True
LLM:
NAME: 'llama3:8b'
TEMP: 0.4
lora:
lora_r: 128
lora_alpha: 64
lora_dropout: 0.1
use_dora: False
bnb:
load_in_4bit: True
bnb_4bit_quant_type: "nf4"
bnb_4bit_use_double_quant: True
train:
epochs: 10
max_steps: -1 # make it -1 to use epochs
max_seq_length: 1048
batch_size: 32
gradient_accumulation_steps: 2 # your effective batch size is batch_size*gradient_accumulation_steps
learning_rate: 0.00002
report_to: "wandb"
gradient_checkpointing: True ## Leads to reduction in memory but slows down training by 20%
dataloader_num_workers: 0
evaluation_strategy: 'steps'
logging_strategy: 'steps'
save_strategy: 'steps'
logging_steps: 1
eval_steps: 100
save_steps: 100
load_best_model_at_end: True
lr_scheduler_type: 'linear'
run_name: llama3_r128
test:
batch_size: 8
num_samples: 10 # n for computing pass@