-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_gen.py
More file actions
65 lines (54 loc) · 2.69 KB
/
data_gen.py
File metadata and controls
65 lines (54 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import random
from tqdm import tqdm
from env_setup import *
from human import HumanRobotEnv
from models import ThetaEstimatorTransformer
def gen_physical_state(sim_policy: dict, x1_lo: float, x2_lo: float, x1_hi: float, x2_hi: float):
if sim_policy['human_state'] == 'fixed':
assert 'human_state_init' in sim_policy
return np.array(sim_policy['human_state_init'])
elif sim_policy['human_state'] == 'varying':
x1 = random.uniform(x1_lo, x1_hi)
x2 = random.uniform(x2_lo, x2_hi)
return np.array([[x1], [x2]])
else:
raise ValueError("unknown sim_policy['human_state']")
def gen_mental_state(sim_policy: dict):
if sim_policy['mental_state'] == 'fixed':
assert 'mental_state_init' in sim_policy
return np.array(sim_policy['mental_state_init'])
elif sim_policy['mental_state'] == 'varying':
x = random.uniform(0.0, 1.0)
return np.array([[x]])
else:
raise ValueError("unknown sim_policy['mental_state']")
def generate_simulated_data(sim_policy: dict, sim_time: int, n_demo: int,
is_updating_internal_model: bool, stochastic_human: bool,
human_lr: float, influence_type: str):
# Simulate how human updates the internal model
robot_states = []
human_actions = []
human_obs = []
human_mental_states = []
for i in tqdm(range(n_demo)):
human_env = HumanRobotEnv('passive_teaching', 1.0, 'use_model_human', is_updating_internal_model, human_lr, influence_type)
human_env.set_environment(A, B, Q, R, None, None, sim_time)
human_env.set_action_set(None, u_t0_R_aug_set)
human_env.set_human_internal_model(None)
x1_lo = human_env.observation_space.low[0]
x2_lo = human_env.observation_space.low[1]
x1_hi = human_env.observation_space.high[0]
x2_hi = human_env.observation_space.high[1]
# Initialize the robot state [0.4; 0], human mental state [1.0] (human thinks B[1,0]=1.0)
human_env.set_human_state(gen_physical_state(sim_policy, x1_lo, x2_lo, x1_hi, x2_hi),
gen_mental_state(sim_policy))
human_env.noisy_human = False
human_env.stochastic_human = stochastic_human
for i in range(sim_time):
human_env.step(None)
robot_states.append(human_env.current_demo_state_traj)
human_actions.append(human_env.current_demo_human_action_traj)
human_obs.append(human_env.current_demo_human_obs_traj)
human_mental_states.append(human_env.current_demo_human_mental_state_traj)
return robot_states, human_actions, human_obs, human_mental_states