-
Notifications
You must be signed in to change notification settings - Fork 302
Expand file tree
/
Copy pathgail_seals_hopper_best_hp_eval.json
More file actions
80 lines (80 loc) · 1.75 KB
/
gail_seals_hopper_best_hp_eval.json
File metadata and controls
80 lines (80 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{
"algorithm_kwargs": {
"demo_batch_size": 128,
"gen_replay_buffer_capacity": 4096,
"n_disc_updates_per_round": 8
},
"checkpoint_interval": 0,
"demonstrations": {
"source": "huggingface",
"algo_name": "ppo",
"n_expert_demos": null
},
"reward": {
"add_std_alpha": null,
"ensemble_size": null,
"net_cls": {
"py/type": "imitation.rewards.reward_nets.BasicRewardNet"
},
"net_kwargs": {
"normalize_input_layer": {
"py/type": "imitation.util.networks.RunningNorm"
}
},
"normalize_output_layer": {
"py/type": "imitation.util.networks.RunningNorm"
}
},
"rl": {
"batch_size": 4096,
"rl_cls": {
"py/type": "stable_baselines3.ppo.ppo.PPO"
},
"rl_kwargs": {
"batch_size": 512,
"clip_range": 0.1,
"ent_coef": 0.001255299425412744,
"gae_lambda": 0.98,
"gamma": 0.995,
"learning_rate": 4.3984856156897565e-5,
"max_grad_norm": 0.9,
"n_epochs": 20,
"vf_coef": 0.20315938606555833
}
},
"total_timesteps": 10000000,
"policy": {
"policy_cls": "MlpPolicy",
"policy_kwargs": {
"activation_fn": {
"py/type": "torch.nn.modules.activation.ReLU"
},
"features_extractor_class": {
"py/type": "imitation.policies.base.NormalizeFeaturesExtractor"
},
"features_extractor_kwargs": {
"normalize_class": {
"py/type": "imitation.util.networks.RunningNorm"
}
},
"net_arch": [
{
"pi": [
64,
64
],
"vf": [
64,
64
]
}
]
}
},
"policy_evaluation": {
"n_episodes_eval": 50
},
"environment": {
"gym_id": "seals/Hopper-v1"
}
}