research-to-production/games/sample.py

import pickle
import gym
import numpy as np
from matplotlib import pyplot as plt

# numpy precision for printing
np.set_printoptions(precision=3, suppress=True)

plt.ion()  # interactive plotting
fig, ax = plt.subplots()
colors = ["xkcd:orange", "xkcd:forest green", "xkcd:gray", "xkcd:light blue"]
plots = [None] * 4

env = gym.make("CartPole-v1")
observation, info = env.reset(seed=42, return_info=True)

max_steps = 100
num_samples = 500
samples = np.random.randn(num_samples, 4)

data = []
for lam in samples:
    breakpoints = []
    score = 0
    obs = []
    for n in range(max_steps):
        ax.cla()
        # action = env.action_space.sample()
        action = 1 if lam.T @ observation < 0 else 0
        # action = 1 if observation[0] - observation[3]  < 0 else 0
        observation, reward, done, info = env.step(action)
        score += reward
        obs.append(observation.tolist())
        o = np.array(obs)
        var = np.var(o[-int(score) :, :], axis=0)
        for q in range(4):
            lines = np.hstack([o[:, q], np.zeros(max_steps - n)])
            ax.plot(range(max_steps + 1), lines, c=colors[q])

        ax.set_title(f"Reward: {int(score)}, Variance: {var}")
        ax.set_ylim([-3, 3])

        if done or n == max_steps:
            breakpoints.append(n)
            observation, info = env.reset(return_info=True)
            # print(score, observation)
            score = 0  # reset score

        # draw break-point lines when game is lost
        for b in breakpoints:
            ax.vlines(
                b, np.min(o, axis=0).min(), np.max(o, axis=0).max(), color="black", lw=2
            )

        fig.canvas.draw()
        fig.show()
        fig.canvas.flush_events()
        env.render()

    data.append({"lam": lam, "obs": obs, "break": breakpoints})
    pickle.dump(data, open("data.pkl", "wb"))  # dump data frequently

stop = input("Press any key to close.")
plt.close()
env.close()
adding RL demo 3 years ago			`import pickle`
			`import gym`
			`import numpy as np`
			`from matplotlib import pyplot as plt`

			`# numpy precision for printing`
			`np.set_printoptions(precision=3, suppress=True)`

			`plt.ion() # interactive plotting`
			`fig, ax = plt.subplots()`
			`colors = ["xkcd:orange", "xkcd:forest green", "xkcd:gray", "xkcd:light blue"]`
			`plots = [None] * 4`

			`env = gym.make("CartPole-v1")`
			`observation, info = env.reset(seed=42, return_info=True)`

			`max_steps = 100`
			`num_samples = 500`
			`samples = np.random.randn(num_samples, 4)`

			`data = []`
			`for lam in samples:`
			`breakpoints = []`
			`score = 0`
			`obs = []`
			`for n in range(max_steps):`
			`ax.cla()`
			`# action = env.action_space.sample()`
			`action = 1 if lam.T @ observation < 0 else 0`
			`# action = 1 if observation[0] - observation[3] < 0 else 0`
			`observation, reward, done, info = env.step(action)`
			`score += reward`
			`obs.append(observation.tolist())`
			`o = np.array(obs)`
			`var = np.var(o[-int(score) :, :], axis=0)`
			`for q in range(4):`
			`lines = np.hstack([o[:, q], np.zeros(max_steps - n)])`
			`ax.plot(range(max_steps + 1), lines, c=colors[q])`

			`ax.set_title(f"Reward: {int(score)}, Variance: {var}")`
			`ax.set_ylim([-3, 3])`

			`if done or n == max_steps:`
			`breakpoints.append(n)`
			`observation, info = env.reset(return_info=True)`
			`# print(score, observation)`
			`score = 0 # reset score`

			`# draw break-point lines when game is lost`
			`for b in breakpoints:`
			`ax.vlines(`
			`b, np.min(o, axis=0).min(), np.max(o, axis=0).max(), color="black", lw=2`
			`)`

			`fig.canvas.draw()`
			`fig.show()`
			`fig.canvas.flush_events()`
			`env.render()`

			`data.append({"lam": lam, "obs": obs, "break": breakpoints})`
			`pickle.dump(data, open("data.pkl", "wb")) # dump data frequently`

			`stop = input("Press any key to close.")`
			`plt.close()`
			`env.close()`