-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
161 lines (135 loc) · 4.75 KB
/
main.py
File metadata and controls
161 lines (135 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from typing import Optional
from pathlib import Path
from os import path
from dotmap import DotMap
from helpers.board import TensorboardLogger
import typer
import yaml
import coloredlogs
import logging
import pathlib
import os
import time
import shutil
logger = logging.getLogger(__name__)
coloredlogs.install(level="DEBUG")
default_output_dir = path.expanduser("~/.MARLIO-runner/")
if not os.path.exists(default_output_dir):
os.makedirs(default_output_dir)
sample_conf = "./config.yml"
def main(
config: Path = typer.Option(
sample_conf,
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
),
verbose: bool = typer.Option(False, "--verbose/"),
gui: bool = typer.Option(False, "--gui/--no-gui"),
tensorboard: bool = typer.Option(False, "--tensorboard"),
output_dir: Path = typer.Option(
default_output_dir,
exists=True,
file_okay=False,
dir_okay=True,
writable=True,
resolve_path=True,
),
expname: str = typer.Option(None, prompt=True),
train: bool = typer.Option(None, "--train/--test", prompt=True),
):
with open(config) as f:
conf = yaml.load(f, Loader=yaml.FullLoader)
conf = DotMap(conf)
logger.info("Config Loaded")
if conf.game_config.agents == 1:
output_dir = os.path.join(output_dir, expname)
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir)
single_agent(conf, verbose, gui, tensorboard, output_dir, train)
elif conf.game_config.agents == 2:
multi_agent(conf, verbose, gui, tensorboard, output_dir, train)
else:
typer.secho(
"No. of agents should be either 1 or 2",
fg=typer.colors.WHITE,
bg=typer.colors.RED,
err=True,
)
raise typer.Abort()
return
def single_agent(config, verbose, gui, tensorboard, output_dir, train):
if type(config.agents_config.agents) != str:
typer.secho(
"Agent definition error",
fg=typer.colors.WHITE,
bg=typer.colors.RED,
err=True,
)
raise typer.Abort()
if not path.isfile(f"./agents/agent_{config.agents_config.agents}.py"):
typer.secho(
f"Agent strategy file './agents/agent_{config.agents_config.agents}.py' not found",
fg=typer.colors.WHITE,
bg=typer.colors.RED,
err=True,
)
raise typer.Abort()
# Import Agent Strategy
import importlib
strategy = importlib.import_module(f"agents.agent_{config.agents_config.agents}")
strategy = strategy.Strategy
# Initialize Gym
from helpers.utils import game_config_json
config_json = game_config_json(config)
import gym
x = pathlib.Path(__file__).parent.absolute()
x = str(x).replace(" ", "\\ ")
env = gym.make("codeside:codeside-v0", config=f"{x}/config.json")
time.sleep(2)
# Initialize Agent Strategy
agents_config = config.agents_config
agent = strategy(env, agents_config, logger, output_dir)
# Initialize TensorBoard Logger
tensorboard = TensorboardLogger(config, output_dir)
replays = os.path.join(output_dir, "replays")
results = os.path.join(output_dir, "results")
models = os.path.join(output_dir, "models")
tensorboard = TensorboardLogger(config, output_dir)
os.makedirs(replays)
os.makedirs(results)
os.makedirs(models)
# Start
for episode in range(agents_config.start_episode, agents_config.episodes):
# Spawn Our Player
replay = os.path.join(replays, f"ep_{episode}")
result = os.path.join(results, f"ep_{episode}")
_ = env.reset(replay, result, config.game_config.batch_mode)
player = env.create_player(port=config.game_config.port)
cur_state = env.get_state(player)
step = 0
tot_reward = 0
while True:
action, *args = agent.act(cur_state)
logger.debug(action)
new_state, reward, done, _ = env.step(player, action)
tensorboard.log_step(episode, step, action, reward)
if done:
break
agent.custom_logic(cur_state, action, reward,
new_state, done, step, *args)
cur_state = new_state
tot_reward += reward
step += 1
tensorboard.log_episode(episode, step, tot_reward, done) # add win state
if episode % agents_config.save_every == 0:
model = os.path.join(models, f"ep_{episode}.model")
agent.save_model(model)
return
def multi_agent(config, verbose, gui, tensorboard, output_dir, train):
raise NotImplementedError
if __name__ == "__main__":
typer.run(main)