-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfinetune_fsdp.sh
More file actions
63 lines (58 loc) · 1.93 KB
/
finetune_fsdp.sh
File metadata and controls
63 lines (58 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
MODEL=${MODEL:-facebook/opt-1.3b}
MODEL_NAME=(${MODEL//\// })
MODEL_NAME="${MODEL_NAME[-1]}"
EPOCH=${EPOCH:-5}
BS=${BS:-8}
LR=${LR:-1e-5}
SEED=${SEED:-0}
TRAIN=${TRAIN:-1000}
DEV=${DEV:-500}
EVAL=${EVAL:-1000}
MODE=${MODE:-ft}
EXTRA_ARGS=""
if [ "$MODE" == "prefix" ]; then
EXTRA_ARGS="--prefix_tuning --num_prefix 5 --no_reparam --prefix_init_by_real_act"
elif [ "$MODE" == "lora" ]; then
EXTRA_ARGS="--lora"
fi
TAG=fsdp-$MODE-$EPOCH-$BS-$LR-$SEED
TASK_ARGS=""
case $TASK in
# For Copa, ReCoRD, SQuAD, DROP, we set --train_as_classification False; for others, set this flag to True
CB) # It has <1000 training examples. Only use 100 for dev
DEV=100
;;
Copa) # It has <1000 training examples. Only use 100 for dev
DEV=100
TASK_ARGS="--train_as_classification False"
;;
ReCoRD)
TASK_ARGS="--train_as_classification False"
;;
DROP)
TASK_ARGS="--train_as_classification False"
;;
SQuAD)
TASK_ARGS="--train_as_classification False"
;;
esac
echo $TAG
echo "EPOCH: $EPOCH"
echo "BS (gradient accumulation): $BS"
echo "LR: $LR"
echo "SEED: $SEED"
echo "MODE: $MODE"
echo "Extra args: $EXTRA_ARGS $TASK_ARGS"
OMP_NUM_THREADS=10 torchrun --nproc_per_node=$NUM_GPU --master_port=$(( RANDOM + 1000 )) run.py \
--model_name $MODEL \
--task_name $TASK \
--output_dir result/$TASK-${MODEL_NAME}-$TAG --tag $TAG --train_set_seed $SEED --num_train $TRAIN --num_dev $DEV --num_eval $EVAL --logging_steps 10 \
--trainer regular --fp16 --no_auto_device \
--learning_rate $LR --num_train_epochs $EPOCH --per_device_train_batch_size 1 --gradient_accumulation_steps $BS \
--load_best_model_at_end --evaluation_strategy epoch --save_strategy epoch --save_total_limit 1 \
--train_as_classification \
--fsdp "full_shard auto_wrap" \
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \
$EXTRA_ARGS \
$TASK_ARGS \
"$@"