|
|
@ -1,10 +1,7 @@ |
|
|
|
import sys |
|
|
|
import pickle |
|
|
|
import gym |
|
|
|
import numpy as np |
|
|
|
from matplotlib import pyplot as plt |
|
|
|
from matplotlib.animation import FuncAnimation |
|
|
|
from time import sleep |
|
|
|
|
|
|
|
# numpy precision for printing |
|
|
|
np.set_printoptions(precision=3, suppress=True) |
|
|
@ -25,7 +22,7 @@ data = [] |
|
|
|
for lam in samples: |
|
|
|
breakpoints = [] |
|
|
|
score = 0 |
|
|
|
O = [] |
|
|
|
obs = [] |
|
|
|
for n in range(max_steps): |
|
|
|
ax.cla() |
|
|
|
# action = env.action_space.sample() |
|
|
@ -33,8 +30,8 @@ for lam in samples: |
|
|
|
# action = 1 if observation[0] - observation[3] < 0 else 0 |
|
|
|
observation, reward, done, info = env.step(action) |
|
|
|
score += reward |
|
|
|
O.append(observation.tolist()) |
|
|
|
o = np.array(O) |
|
|
|
obs.append(observation.tolist()) |
|
|
|
o = np.array(obs) |
|
|
|
var = np.var(o[-int(score) :, :], axis=0) |
|
|
|
for q in range(4): |
|
|
|
lines = np.hstack([o[:, q], np.zeros(max_steps - n)]) |
|
|
@ -59,9 +56,8 @@ for lam in samples: |
|
|
|
fig.show() |
|
|
|
fig.canvas.flush_events() |
|
|
|
env.render() |
|
|
|
# sleep(0.01) |
|
|
|
|
|
|
|
data.append({"lam": lam, "obs": O, "break": breakpoints}) |
|
|
|
data.append({"lam": lam, "obs": obs, "break": breakpoints}) |
|
|
|
pickle.dump(data, open("data.pkl", "wb")) # dump data frequently |
|
|
|
|
|
|
|
stop = input("Press any key to close.") |
|
|
|