imitation/benchmarking/gail_seals_hopper_best_hp_eval.json at ce8c87ddace0017801e6a5e8fcfcd2ca0dc24cf7 · HumanCompatibleAI/imitation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{
  "algorithm_kwargs": {
    "demo_batch_size": 128,
    "gen_replay_buffer_capacity": 4096,
    "n_disc_updates_per_round": 8
  },
  "checkpoint_interval": 0,
  "demonstrations": {
    "source": "huggingface",
    "algo_name": "ppo",
    "n_expert_demos": null
  },
  "reward": {
    "add_std_alpha": null,
    "ensemble_size": null,
    "net_cls": {
      "py/type": "imitation.rewards.reward_nets.BasicRewardNet"
    },
    "net_kwargs": {
      "normalize_input_layer": {
        "py/type": "imitation.util.networks.RunningNorm"
      }
    },
    "normalize_output_layer": {
      "py/type": "imitation.util.networks.RunningNorm"
    }
  },
  "rl": {
    "batch_size": 4096,
    "rl_cls": {
      "py/type": "stable_baselines3.ppo.ppo.PPO"
    },
    "rl_kwargs": {
      "batch_size": 512,
      "clip_range": 0.1,
      "ent_coef": 0.001255299425412744,
      "gae_lambda": 0.98,
      "gamma": 0.995,
      "learning_rate": 4.3984856156897565e-5,
      "max_grad_norm": 0.9,
      "n_epochs": 20,
      "vf_coef": 0.20315938606555833
    }
  },
  "total_timesteps": 10000000,
  "policy": {
    "policy_cls": "MlpPolicy",
    "policy_kwargs": {
      "activation_fn": {
        "py/type": "torch.nn.modules.activation.ReLU"
      },
      "features_extractor_class": {
        "py/type": "imitation.policies.base.NormalizeFeaturesExtractor"
      },
      "features_extractor_kwargs": {
        "normalize_class": {
          "py/type": "imitation.util.networks.RunningNorm"
        }
      },
      "net_arch": [
        {
          "pi": [
            64,
            64
          ],
          "vf": [
            64,
            64
          ]
        }
      ]
    }
  },
  "policy_evaluation": {
    "n_episodes_eval": 50
  },
  "environment": {
    "gym_id": "seals/Hopper-v1"
  }
}