-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenv_setup.py
More file actions
54 lines (43 loc) · 1.38 KB
/
env_setup.py
File metadata and controls
54 lines (43 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import torch
from dare import get_P
from device import device
# @title Environment dynamics, robot reward function
# Time integral
dt = 0.2
# Robot dynamics matrix
A = [[1., dt], [0., 1.]]
A = np.array(A)
A_tensor = torch.from_numpy(A).to(device).double()
nX = A.shape[0]
# Robot control matrix
B = [[0.], [0.5]]
B = np.array(B)
B_tensor = torch.from_numpy(B).to(device).double()
nU = B.shape[1]
# Optimal LQR cost function (Q and R)
Q = [[2.0, 0.], [0., 1.]]
Q = np.array(Q)
Q_tensor = torch.from_numpy(Q).to(device).double()
R = 20 * np.eye(1)
R_tensor = torch.from_numpy(R).to(device).double()
R_inv = np.linalg.inv(R)
# Human's internal robot dynamics
A_int = A
A_int = np.array(A_int)
# Note that here the human has a wrong estimate about the B matrix
B_int = [[0.], [0.1]]
B_int = np.array(B_int)
# Compute human's LQR DARE solution (i.e., human's believed solution matrix)
P_int = get_P(A_int, B_int, Q, R)
# print("Human internal control matrix:", P_int)
# print()
# Compute robot's LQR DARE solution (i.e., the optimal solution matrix)
P_true = get_P(A, B, Q, R)
# print("Optimal control matrix:", P_true)
# Mask for the dynamics gradient update
A_grad_mask = np.array(A_int != A) * 1.0
B_grad_mask = np.array(B_int != B) * 1.0
# print(A_grad_mask)
# print(B_grad_mask)
u_t0_R_aug_set = [1., 0.2, 0.6, 1.2 ,1.8]