forked from Learning-and-Intelligent-Systems/predicators
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathbase_explorer.py
More file actions
60 lines (49 loc) · 1.87 KB
/
base_explorer.py
File metadata and controls
60 lines (49 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""Base class for an explorer."""
import abc
from typing import List, Set
import numpy as np
from gym.spaces import Box
from predicators.settings import CFG
from predicators.structs import ExplorationStrategy, ParameterizedOption, \
Predicate, Task, Type
class BaseExplorer(abc.ABC):
"""Creates a policy and termination function for exploring in a task.
The explorer is created at the beginning of every interaction cycle
with the latest predicates and options.
"""
def __init__(self, predicates: Set[Predicate],
options: Set[ParameterizedOption], types: Set[Type],
action_space: Box, train_tasks: List[Task]) -> None:
self._predicates = predicates
self._options = options
self._types = types
self._action_space = action_space
self._train_tasks = train_tasks
self._set_seed(CFG.seed)
@classmethod
@abc.abstractmethod
def get_name(cls) -> str:
"""Get the unique name of this explorer."""
raise NotImplementedError("Override me!")
@abc.abstractmethod
def get_exploration_strategy(
self,
train_task_idx: int,
timeout: int,
) -> ExplorationStrategy:
"""Given a train task idx, create an ExplorationStrategy, which is a
tuple of a policy and a termination function."""
raise NotImplementedError("Override me!")
@abc.abstractmethod
def get_exploration_plan_strategy(
self,
train_task_idx: int,
timeout: int,
) -> ExplorationStrategy:
"""Given a train task idx, create an ExplorationStrategy, which is a
tuple of a policy and a termination function."""
raise NotImplementedError("Override me!")
def _set_seed(self, seed: int) -> None:
"""Reset seed and rng."""
self._seed = seed
self._rng = np.random.default_rng(self._seed)