MARLIO/main.py at runner · RusherRG/MARLIO · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from typing import Optional
from pathlib import Path
from os import path
from dotmap import DotMap
from helpers.board import TensorboardLogger

import typer
import yaml
import coloredlogs
import logging
import pathlib
import os
import time
import shutil

logger = logging.getLogger(__name__)
coloredlogs.install(level="DEBUG")

default_output_dir = path.expanduser("~/.MARLIO-runner/")
if not os.path.exists(default_output_dir):
    os.makedirs(default_output_dir)
sample_conf = "./config.yml"


def main(
    config: Path = typer.Option(
        sample_conf,
        exists=True,
        file_okay=True,
        dir_okay=False,
        readable=True,
        resolve_path=True,
    ),
    verbose: bool = typer.Option(False, "--verbose/"),
    gui: bool = typer.Option(False, "--gui/--no-gui"),
    tensorboard: bool = typer.Option(False, "--tensorboard"),
    output_dir: Path = typer.Option(
        default_output_dir,
        exists=True,
        file_okay=False,
        dir_okay=True,
        writable=True,
        resolve_path=True,
    ),
    expname: str = typer.Option(None, prompt=True),
    train: bool = typer.Option(None, "--train/--test", prompt=True),
):

    with open(config) as f:
        conf = yaml.load(f, Loader=yaml.FullLoader)
        conf = DotMap(conf)
    logger.info("Config Loaded")

    if conf.game_config.agents == 1:
        output_dir = os.path.join(output_dir, expname)
        if os.path.exists(output_dir):
            shutil.rmtree(output_dir)
        os.makedirs(output_dir)
        single_agent(conf, verbose, gui, tensorboard, output_dir, train)
    elif conf.game_config.agents == 2:
        multi_agent(conf, verbose, gui, tensorboard, output_dir, train)
    else:
        typer.secho(
            "No. of agents should be either 1 or 2",
            fg=typer.colors.WHITE,
            bg=typer.colors.RED,
            err=True,
        )
        raise typer.Abort()

    return


def single_agent(config, verbose, gui, tensorboard, output_dir, train):
    if type(config.agents_config.agents) != str:
        typer.secho(
            "Agent definition error",
            fg=typer.colors.WHITE,
            bg=typer.colors.RED,
            err=True,
        )
        raise typer.Abort()

    if not path.isfile(f"./agents/agent_{config.agents_config.agents}.py"):
        typer.secho(
            f"Agent strategy file './agents/agent_{config.agents_config.agents}.py' not found",
            fg=typer.colors.WHITE,
            bg=typer.colors.RED,
            err=True,
        )
        raise typer.Abort()

    # Import Agent Strategy
    import importlib

    strategy = importlib.import_module(f"agents.agent_{config.agents_config.agents}")
    strategy = strategy.Strategy

    # Initialize Gym
    from helpers.utils import game_config_json

    config_json = game_config_json(config)

    import gym

    x = pathlib.Path(__file__).parent.absolute()
    x = str(x).replace(" ", "\\ ")
    env = gym.make("codeside:codeside-v0", config=f"{x}/config.json")
    time.sleep(2)

    # Initialize Agent Strategy
    agents_config = config.agents_config
    agent = strategy(env, agents_config, logger, output_dir)

    # Initialize TensorBoard Logger
    tensorboard = TensorboardLogger(config, output_dir)

    replays = os.path.join(output_dir, "replays")
    results = os.path.join(output_dir, "results")
    models = os.path.join(output_dir, "models")
    tensorboard = TensorboardLogger(config, output_dir)
    os.makedirs(replays)
    os.makedirs(results)
    os.makedirs(models)
    # Start
    for episode in range(agents_config.start_episode, agents_config.episodes):
        # Spawn Our Player
        replay = os.path.join(replays, f"ep_{episode}")
        result = os.path.join(results, f"ep_{episode}")
        _ = env.reset(replay, result, config.game_config.batch_mode)
        player = env.create_player(port=config.game_config.port)
        cur_state = env.get_state(player)
        step = 0
        tot_reward = 0
        while True:
            action, *args = agent.act(cur_state)
            logger.debug(action)
            new_state, reward, done, _ = env.step(player, action)
            tensorboard.log_step(episode, step, action, reward)
            if done:
                break

            agent.custom_logic(cur_state, action, reward,
                               new_state, done, step, *args)
            cur_state = new_state
            tot_reward += reward
            step += 1
        tensorboard.log_episode(episode, step, tot_reward, done)  # add win state
        if episode % agents_config.save_every == 0:
            model = os.path.join(models, f"ep_{episode}.model")
            agent.save_model(model)

    return


def multi_agent(config, verbose, gui, tensorboard, output_dir, train):
    raise NotImplementedError


if __name__ == "__main__":
    typer.run(main)