-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathMC_SimMoving.py
More file actions
52 lines (48 loc) · 1.34 KB
/
MC_SimMoving.py
File metadata and controls
52 lines (48 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import numpy as np
import time, os
def Simulating(state, gamma, reward, trans_mat):
value = 0
counter = 0
terminal = False
ShowPath(state)
time.sleep(1)
while not terminal:
action = np.random.randint(0,4)
next_state = np.argmax(trans_mat[:, state, action])
value += reward[next_state]*pow(gamma, counter)
counter += 1
state = next_state
ShowPath(next_state)
print('step: ' + str(counter) + ',action: ' + str(action) +', state: ' +str(state) + ', value: ' + str(value))
if state == 0 or state == 15:
terminal = True
if counter > 50:
return counter, terminal, value
time.sleep(1)
return counter, terminal, value
def ShowPath(state):
os.system('cls' if os.name == 'nt' else 'clear')
position = np.full(16,'_')
position[state] = '*'
print('='*20)
print('[Now State]')
position[state] = '*'
print(position.reshape(4,4))
print('='*20)
def main():
## environment setting
# initial state
InitState = 1
# reward function
FuncReward = np.full(16,-1)
FuncReward[0] = 0
FuncReward[15] = 0
# transition matrix
T = np.load('./gridworld/T.npy')
# parameters
gamma = 0.99
# Run
Simulating(InitState, gamma, FuncReward, T)
## execute
if __name__ == '__main__':
main()