|
|
|
import pickle
|
|
|
|
import gym
|
|
|
|
import numpy as np
|
|
|
|
from matplotlib import pyplot as plt
|
|
|
|
|
|
|
|
# numpy precision for printing
|
|
|
|
np.set_printoptions(precision=3, suppress=True)
|
|
|
|
|
|
|
|
plt.ion() # interactive plotting
|
|
|
|
fig, ax = plt.subplots()
|
|
|
|
colors = ["xkcd:orange", "xkcd:forest green", "xkcd:gray", "xkcd:light blue"]
|
|
|
|
plots = [None] * 4
|
|
|
|
|
|
|
|
env = gym.make("CartPole-v1")
|
|
|
|
observation, info = env.reset(seed=42, return_info=True)
|
|
|
|
|
|
|
|
max_steps = 100
|
|
|
|
num_samples = 500
|
|
|
|
samples = np.random.randn(num_samples, 4)
|
|
|
|
|
|
|
|
data = []
|
|
|
|
for lam in samples:
|
|
|
|
breakpoints = []
|
|
|
|
score = 0
|
|
|
|
obs = []
|
|
|
|
for n in range(max_steps):
|
|
|
|
ax.cla()
|
|
|
|
# action = env.action_space.sample()
|
|
|
|
action = 1 if lam.T @ observation < 0 else 0
|
|
|
|
# action = 1 if observation[0] - observation[3] < 0 else 0
|
|
|
|
observation, reward, done, info = env.step(action)
|
|
|
|
score += reward
|
|
|
|
obs.append(observation.tolist())
|
|
|
|
o = np.array(obs)
|
|
|
|
var = np.var(o[-int(score) :, :], axis=0)
|
|
|
|
for q in range(4):
|
|
|
|
lines = np.hstack([o[:, q], np.zeros(max_steps - n)])
|
|
|
|
ax.plot(range(max_steps + 1), lines, c=colors[q])
|
|
|
|
|
|
|
|
ax.set_title(f"Reward: {int(score)}, Variance: {var}")
|
|
|
|
ax.set_ylim([-3, 3])
|
|
|
|
|
|
|
|
if done or n == max_steps:
|
|
|
|
breakpoints.append(n)
|
|
|
|
observation, info = env.reset(return_info=True)
|
|
|
|
# print(score, observation)
|
|
|
|
score = 0 # reset score
|
|
|
|
|
|
|
|
# draw break-point lines when game is lost
|
|
|
|
for b in breakpoints:
|
|
|
|
ax.vlines(
|
|
|
|
b, np.min(o, axis=0).min(), np.max(o, axis=0).max(), color="black", lw=2
|
|
|
|
)
|
|
|
|
|
|
|
|
fig.canvas.draw()
|
|
|
|
fig.show()
|
|
|
|
fig.canvas.flush_events()
|
|
|
|
env.render()
|
|
|
|
|
|
|
|
data.append({"lam": lam, "obs": obs, "break": breakpoints})
|
|
|
|
pickle.dump(data, open("data.pkl", "wb")) # dump data frequently
|
|
|
|
|
|
|
|
stop = input("Press any key to close.")
|
|
|
|
plt.close()
|
|
|
|
env.close()
|