|
@ -5,6 +5,7 @@ import gym |
|
|
import numpy as np |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import pandas as pd |
|
|
from scipy.stats import gaussian_kde as gkde |
|
|
from scipy.stats import gaussian_kde as gkde |
|
|
|
|
|
from scipy.stats import norm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def train(data): |
|
|
def train(data): |
|
@ -24,7 +25,7 @@ def train(data): |
|
|
def test(decision=np.array([-0.09, -0.71, -0.43 , -0.74]), seed=1992): |
|
|
def test(decision=np.array([-0.09, -0.71, -0.43 , -0.74]), seed=1992): |
|
|
env = gym.make("CartPole-v1") |
|
|
env = gym.make("CartPole-v1") |
|
|
observation, info = env.reset(seed=seed, return_info=True) |
|
|
observation, info = env.reset(seed=seed, return_info=True) |
|
|
score = 1 |
|
|
score = 0 |
|
|
for i in range(10000): |
|
|
for i in range(10000): |
|
|
action = 1 if decision.T @ observation < 0 else 0 |
|
|
action = 1 if decision.T @ observation < 0 else 0 |
|
|
observation, reward, done, info = env.step(action) |
|
|
observation, reward, done, info = env.step(action) |
|
@ -34,8 +35,8 @@ def test(decision=np.array([-0.09, -0.71, -0.43 , -0.74]), seed=1992): |
|
|
if score == 500: |
|
|
if score == 500: |
|
|
print("WIN") |
|
|
print("WIN") |
|
|
else: |
|
|
else: |
|
|
print("LOSE: {int(score)}") |
|
|
print(f"LOSE: {int(score)}") |
|
|
score = 1 # reset score |
|
|
score = 0 # reset score |
|
|
observation, info = env.reset(return_info=True) |
|
|
observation, info = env.reset(return_info=True) |
|
|
env.close() |
|
|
env.close() |
|
|
|
|
|
|
|
@ -43,5 +44,5 @@ def test(decision=np.array([-0.09, -0.71, -0.43 , -0.74]), seed=1992): |
|
|
if __name__ == "__main__": |
|
|
if __name__ == "__main__": |
|
|
data = pickle.load(open('data.pkl','rb')) |
|
|
data = pickle.load(open('data.pkl','rb')) |
|
|
mud_point = train(data) |
|
|
mud_point = train(data) |
|
|
print("MUD Point: {mud_point}") |
|
|
print(f"MUD Point: {mud_point}") |
|
|
test(mud_point) |
|
|
test(mud_point) |
|
|