Compare commits
4 Commits
dc8848b895
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
7bbca3dd92 | ||
|
dae0b55b06 | ||
|
146519b5f0 | ||
|
ae89e4d318 |
@ -10,6 +10,11 @@ The examples here do not aim to achieve "perfection" with each example, but rath
|
|||||||
|
|
||||||
In each folder there is a complete example complemented by an internal `README.md` file to explain the intended learning outcome and context for the example.
|
In each folder there is a complete example complemented by an internal `README.md` file to explain the intended learning outcome and context for the example.
|
||||||
|
|
||||||
|
The suggested order of examples is
|
||||||
|
1. `occupancy`
|
||||||
|
1. `images` (TODO)
|
||||||
|
1. `salary`
|
||||||
|
1. `games`
|
||||||
|
|
||||||
## General Guidelines
|
## General Guidelines
|
||||||
|
|
||||||
|
86
games/DemoGym.ipynb
Normal file
86
games/DemoGym.ipynb
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "71383e8d-63f1-462c-bd77-688d8d34a60a",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Demonstration of `gym`: Visualize Interactive Results in Jupyter Notebook"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "eae51654-4ccf-44ed-aaac-f1d993d7e4a1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from pyvirtualdisplay import Display\n",
|
||||||
|
"display = Display(visible=0, size=(1400, 900))\n",
|
||||||
|
"display.start()\n",
|
||||||
|
"\n",
|
||||||
|
"is_ipython = 'inline' in plt.get_backend()\n",
|
||||||
|
"if is_ipython:\n",
|
||||||
|
" from IPython import display\n",
|
||||||
|
"\n",
|
||||||
|
"plt.ion()\n",
|
||||||
|
"\n",
|
||||||
|
"# Load the gym environment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "be872e01-e4fd-4940-874e-d46e97fb3519",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import gym\n",
|
||||||
|
"import random\n",
|
||||||
|
"%matplotlib inline\n",
|
||||||
|
"\n",
|
||||||
|
"env = gym.make('LunarLander-v2')\n",
|
||||||
|
"env.seed(23)\n",
|
||||||
|
"\n",
|
||||||
|
"# Let's watch how an untrained agent moves around\n",
|
||||||
|
"\n",
|
||||||
|
"state = env.reset()\n",
|
||||||
|
"img = plt.imshow(env.render(mode='rgb_array'))\n",
|
||||||
|
"for j in range(200):\n",
|
||||||
|
"# action = agent.act(state)\n",
|
||||||
|
" action = random.choice(range(4))\n",
|
||||||
|
" img.set_data(env.render(mode='rgb_array')) \n",
|
||||||
|
" plt.axis('off')\n",
|
||||||
|
" display.display(plt.gcf())\n",
|
||||||
|
" display.clear_output(wait=True)\n",
|
||||||
|
" state, reward, done, _ = env.step(action)\n",
|
||||||
|
" if done:\n",
|
||||||
|
" break\n",
|
||||||
|
"\n",
|
||||||
|
"env.close()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
74
games/DemoMUD.ipynb
Normal file
74
games/DemoMUD.ipynb
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2e848ca9-c915-4aa2-a7cc-a5654ed06863",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Demonstration of Training and Testing"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a9506e99-a947-4f69-8355-a3ce696793fa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from main import train, test\n",
|
||||||
|
"import pickle"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "de500d9e-40d1-4b6b-900f-96c2ec69e464",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = pickle.load(open(\"data.pkl\", \"rb\"))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "39ca7791-c844-4231-9f3b-e8ae80fe8103",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"mud_point = train(data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8d8c70ab-d055-418c-b67e-ba5109d989f3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"test(mud_point)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
72
games/README.md
Normal file
72
games/README.md
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# PREFACE
|
||||||
|
|
||||||
|
This is a direct migration (stripping `git` history) of [mud-games](https://git.mlden.com/mm/mud-games) (as of commit `1a2259827f`) which shows an actual research-oriented experiment which involves a novel method of "training" (this `mud` stuff) and "testing" (visually).
|
||||||
|
The intent was to explore a utility library named [`gym`](https://github.com/openai/gym) which provides a consistent interface with which to train reinforcement-learning algorithms, and try to "learn to win" one of its most basic games (`Cartpole-V1`).
|
||||||
|
|
||||||
|
|
||||||
|
Takeaways from this example:
|
||||||
|
|
||||||
|
- much more friendly for reproducibility
|
||||||
|
- runs on desktop AND in notebook (handling visual output is tricky, leverage the patterns here if you need to move interactive outputs into the cloud)
|
||||||
|
- functions defined in `main.py` are "clean" but still not "clear"
|
||||||
|
- notice the lack of documntation: where would it be helpful to have it?
|
||||||
|
- data is not only supplied (perhaps not good to commit it) but a method to generate it is also provided (takes some time)
|
||||||
|
- notice the comprehensive `README` below
|
||||||
|
|
||||||
|
# mud-games
|
||||||
|
|
||||||
|
control systems with MUD points
|
||||||
|
|
||||||
|
|
||||||
|
# installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# usage
|
||||||
|
|
||||||
|
A `data.pkl` file is provided for your convenience with input / output samples.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also instead use the included [jupyter notebook](./DemoMUD.ipynb).
|
||||||
|
|
||||||
|
|
||||||
|
# info
|
||||||
|
|
||||||
|
The inputs are the parameters to a `1x4` matrix which is multiplied against the observations of the state in order to make a decision for the next action (push left or right). The output of the vector inner-product is binarized by comparing it to zero as a threshold value.
|
||||||
|
|
||||||
|
The parameter space is standard normal.
|
||||||
|
There is no assumed error in observations; the "data variance" is designed to reflect the acceptable [ranges for the observations](https://www.gymlibrary.ml/pages/environments/classic_control/cart_pole):
|
||||||
|
- The cart x-position (index 0) can be take values between (-4.8, 4.8), but the episode terminates if the cart leaves the (-2.4, 2.4) range.
|
||||||
|
- The pole angle can be observed between (-.418, .418) radians (or ±24°), but the episode terminates if the pole angle is not in the range (-.2095, .2095) (or ±12°)
|
||||||
|
|
||||||
|
|
||||||
|
Therefore, since our objective is to stabilize the cart, the target "time series signal" is zero for all four dimensions of the observation space. The presumed "data variance" should actually correspond to the acceptable bands of signal (WIP).
|
||||||
|
|
||||||
|
|
||||||
|
# generate data
|
||||||
|
|
||||||
|
You can generate your own data with:
|
||||||
|
```bash
|
||||||
|
python sample.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: if you change the presumed sample space in `data.py`, you should make the corresponding changes to the initial distribution in `main.py`.
|
||||||
|
|
||||||
|
|
||||||
|
# improvements
|
||||||
|
|
||||||
|
Using the following presumptions, we can establish better values for the "data variance":
|
||||||
|
|
||||||
|
> The angular momentum of the pole is the most important thing to stabilize.
|
||||||
|
|
||||||
|
|
||||||
|
# headless mode / notebook demos
|
||||||
|
|
||||||
|
Run `./headless.sh` (requires `sudo`) to install virtual displays so you can use the included Jupyter notebooks.
|
||||||
|
|
BIN
games/data.pkl
Normal file
BIN
games/data.pkl
Normal file
Binary file not shown.
3
games/headless.sh
Executable file
3
games/headless.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
sudo apt update && sudo apt install build-essential xvfb swig
|
||||||
|
pip install box2d-py pyvirtualdisplay
|
71
games/main.py
Normal file
71
games/main.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from scipy.stats import gaussian_kde as gkde
|
||||||
|
from scipy.stats import norm
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pyvirtualdisplay import Display
|
||||||
|
display = Display(visible=0, size=(1400, 900))
|
||||||
|
display.start()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
is_ipython = 'inline' in plt.get_backend()
|
||||||
|
if is_ipython:
|
||||||
|
from IPython import display
|
||||||
|
|
||||||
|
plt.ion()
|
||||||
|
|
||||||
|
def train(data):
|
||||||
|
D = pd.DataFrame(data)
|
||||||
|
sd = np.array([1.0, 0.5, 0.2, 0.5])
|
||||||
|
D["qoi"] = D["obs"].apply(lambda o: np.sum(o, axis=0) / sd / np.sqrt(len(o)))
|
||||||
|
D["i"] = D["lam"].apply(lambda l: norm.pdf(l).prod())
|
||||||
|
D["o"] = D["qoi"].apply(lambda q: norm.pdf(q).prod())
|
||||||
|
Q = np.array(D["qoi"].to_list()).reshape(-1, 4)
|
||||||
|
K = [gkde(Q[:, i]) for i in range(4)]
|
||||||
|
D["p"] = D["qoi"].apply(lambda q: np.prod([K[i].pdf(q[i]) for i in range(4)]))
|
||||||
|
D["u"] = D["i"] * D["o"] / D["p"]
|
||||||
|
mud_point_idx = D["u"].argmax()
|
||||||
|
mud_point = D["lam"].iloc[mud_point_idx]
|
||||||
|
print(f"MUD Point {mud_point_idx}: {mud_point}")
|
||||||
|
return mud_point
|
||||||
|
|
||||||
|
|
||||||
|
def test(decision=np.array([-0.09, -0.71, -0.43, -0.74]), seed=1992):
|
||||||
|
env = gym.make("CartPole-v1")
|
||||||
|
observation, info = env.reset(seed=seed, return_info=True)
|
||||||
|
score = 0
|
||||||
|
if is_ipython:
|
||||||
|
img = plt.imshow(env.render(mode='rgb_array'))
|
||||||
|
for i in range(10000):
|
||||||
|
action = 1 if decision.T @ observation < 0 else 0
|
||||||
|
observation, reward, done, info = env.step(action)
|
||||||
|
score += reward
|
||||||
|
if not is_ipython:
|
||||||
|
env.render()
|
||||||
|
else:
|
||||||
|
img.set_data(env.render(mode='rgb_array'))
|
||||||
|
plt.axis('off')
|
||||||
|
display.display(plt.gcf())
|
||||||
|
display.clear_output(wait=True)
|
||||||
|
if done:
|
||||||
|
if score == 500:
|
||||||
|
print("WIN")
|
||||||
|
else:
|
||||||
|
print(f"LOSE: {int(score)}")
|
||||||
|
score = 0 # reset score
|
||||||
|
observation, info = env.reset(return_info=True)
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
data = pickle.load(open("data.pkl", "rb"))
|
||||||
|
mud_point = train(data)
|
||||||
|
test(mud_point)
|
5
games/requirements.txt
Normal file
5
games/requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
scipy
|
||||||
|
numpy
|
||||||
|
gym[classic_control]
|
||||||
|
matplotlib
|
||||||
|
pandas
|
65
games/sample.py
Normal file
65
games/sample.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import pickle
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
# numpy precision for printing
|
||||||
|
np.set_printoptions(precision=3, suppress=True)
|
||||||
|
|
||||||
|
plt.ion() # interactive plotting
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
colors = ["xkcd:orange", "xkcd:forest green", "xkcd:gray", "xkcd:light blue"]
|
||||||
|
plots = [None] * 4
|
||||||
|
|
||||||
|
env = gym.make("CartPole-v1")
|
||||||
|
observation, info = env.reset(seed=42, return_info=True)
|
||||||
|
|
||||||
|
max_steps = 100
|
||||||
|
num_samples = 500
|
||||||
|
samples = np.random.randn(num_samples, 4)
|
||||||
|
|
||||||
|
data = []
|
||||||
|
for lam in samples:
|
||||||
|
breakpoints = []
|
||||||
|
score = 0
|
||||||
|
obs = []
|
||||||
|
for n in range(max_steps):
|
||||||
|
ax.cla()
|
||||||
|
# action = env.action_space.sample()
|
||||||
|
action = 1 if lam.T @ observation < 0 else 0
|
||||||
|
# action = 1 if observation[0] - observation[3] < 0 else 0
|
||||||
|
observation, reward, done, info = env.step(action)
|
||||||
|
score += reward
|
||||||
|
obs.append(observation.tolist())
|
||||||
|
o = np.array(obs)
|
||||||
|
var = np.var(o[-int(score) :, :], axis=0)
|
||||||
|
for q in range(4):
|
||||||
|
lines = np.hstack([o[:, q], np.zeros(max_steps - n)])
|
||||||
|
ax.plot(range(max_steps + 1), lines, c=colors[q])
|
||||||
|
|
||||||
|
ax.set_title(f"Reward: {int(score)}, Variance: {var}")
|
||||||
|
ax.set_ylim([-3, 3])
|
||||||
|
|
||||||
|
if done or n == max_steps:
|
||||||
|
breakpoints.append(n)
|
||||||
|
observation, info = env.reset(return_info=True)
|
||||||
|
# print(score, observation)
|
||||||
|
score = 0 # reset score
|
||||||
|
|
||||||
|
# draw break-point lines when game is lost
|
||||||
|
for b in breakpoints:
|
||||||
|
ax.vlines(
|
||||||
|
b, np.min(o, axis=0).min(), np.max(o, axis=0).max(), color="black", lw=2
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.canvas.draw()
|
||||||
|
fig.show()
|
||||||
|
fig.canvas.flush_events()
|
||||||
|
env.render()
|
||||||
|
|
||||||
|
data.append({"lam": lam, "obs": obs, "break": breakpoints})
|
||||||
|
pickle.dump(data, open("data.pkl", "wb")) # dump data frequently
|
||||||
|
|
||||||
|
stop = input("Press any key to close.")
|
||||||
|
plt.close()
|
||||||
|
env.close()
|
2
salary/.dockerignore
Normal file
2
salary/.dockerignore
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
data.json
|
||||||
|
README.md
|
13
salary/Dockerfile
Normal file
13
salary/Dockerfile
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
FROM python:3.9.3
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY app.py ./app.py
|
||||||
|
COPY data.json ./data.json
|
||||||
|
COPY requirements.txt ./requirements.txt
|
||||||
|
|
||||||
|
RUN pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
# swap health-check endpoint to be where GCP looks for it by default
|
||||||
|
# RUN find /usr/local/lib/python3.9/site-packages/streamlit -type f \( -iname \*.py -o -iname \*.js \) -print0 | xargs -0 sed -i 's/healthz/health-check/g'
|
||||||
|
|
10
salary/Makefile
Normal file
10
salary/Makefile
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
run:
|
||||||
|
./st run app.py
|
||||||
|
|
||||||
|
install:
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
build:
|
||||||
|
docker build -t streamlit:latest .
|
||||||
|
|
||||||
|
.PHONY: run install build
|
36
salary/README.md
Normal file
36
salary/README.md
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# Salary Assessment App
|
||||||
|
|
||||||
|
Demonstration of an interactive web-application built in python.
|
||||||
|
This application takes in an employee roster (built interactively, best for small teams), with names, positions, and salaries for each employee.
|
||||||
|
It then allows someone (e.g. a manager) to simulate promotions and/or salary raises and see the overall impact on the final payroll budget (visualized as a pair of overlaying distributions which show the before/after scenario under the provided ranges).
|
||||||
|
Finally, the app "solves the problem" of assigning a new salary to each employee while staying below some pre-defined budget for payroll, using a novel Monte-Carlo method implemented directly within the app itself (rather than relying on importing another library).
|
||||||
|
|
||||||
|
> This example shows "a small amount of effort" exerted to ensure reproducibility and readability but ultimately lacks in overall user-friendliness.
|
||||||
|
|
||||||
|
Takeaways: *Good reproducibility, passable style/formatting.*
|
||||||
|
|
||||||
|
- Of particular note here is the executable `run` shell script which provides support for running the application using a docker image as well as simultaneously supporting native shell execution if `docker` is not in the system `$PATH`
|
||||||
|
- Note the style in which functions are written in `app.py`
|
||||||
|
- There is a mixture of functions and procedural code, wide abuse of global variables, and a lot of messy plotting code. Is it readable overall?
|
||||||
|
- The app's "state" is held in `data.json`, is saved in a human-readable format (as opposed to binary), and is small enough to be negligible
|
||||||
|
- The functions that are defined at least have readable names
|
||||||
|
- If the app grew any larger than this, one might be wise to migrate functions into a separate module and import it in `app.py`
|
||||||
|
- Note the lack of a proper `README` / we just "presume" the user knows what to do with the presence of a `Makefile`. Is this acceptable in your opinion?
|
||||||
|
- Very minimal `Makefile` is "sort of self-documenting" but many people don't know to look there. It could use documentation so that `make help` at least produces some sort of a helpful guide
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
(if using local `python` instead of `docker`):
|
||||||
|
```
|
||||||
|
make install
|
||||||
|
```
|
||||||
|
|
||||||
|
then
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make run
|
||||||
|
```
|
||||||
|
|
||||||
|
and visit `localhost:8501` (or `<tld>/<user>/proxy/8501/` in Jupyter if you have `jupyter-server-proxy` installed to access the app via proxy)
|
||||||
|
|
269
salary/app.py
Normal file
269
salary/app.py
Normal file
@ -0,0 +1,269 @@
|
|||||||
|
import dataclasses
|
||||||
|
import json
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import streamlit as st
|
||||||
|
from scipy.stats import distributions as dist
|
||||||
|
from scipy.stats import gaussian_kde as gkde
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
# st.set_page_config(layout="wide")
|
||||||
|
titles = ["Architect", "Engineer", "Sr. Engineer"]
|
||||||
|
num_samples = int(1e4)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class Employee:
|
||||||
|
name: str = "John Doe"
|
||||||
|
title: str = "unknown"
|
||||||
|
region: int = 0
|
||||||
|
salary: float = 0.0
|
||||||
|
score: float = 0.5
|
||||||
|
|
||||||
|
def __eq__(cls, other_cls):
|
||||||
|
return cls.name == other_cls.name
|
||||||
|
|
||||||
|
def __lt__(cls, other_cls):
|
||||||
|
return cls.name < other_cls.name
|
||||||
|
|
||||||
|
|
||||||
|
default_employees = [
|
||||||
|
Employee("Alice", "Architect"),
|
||||||
|
Employee("Bob", "Architect"),
|
||||||
|
Employee("Cher", "Engineer"),
|
||||||
|
Employee("David", "Sr. Engineer"),
|
||||||
|
Employee("Eirene", "Engineer"),
|
||||||
|
Employee("Fiona", "Sr. Engineer"),
|
||||||
|
Employee("Gavin", "Engineer"),
|
||||||
|
]
|
||||||
|
# for i in range(5000):
|
||||||
|
# default_employees.append(Employee(f"Gavin {i}", np.random.choice(titles)))
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.load(open("data.json", "r"))
|
||||||
|
except FileNotFoundError:
|
||||||
|
data = [
|
||||||
|
{"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]},
|
||||||
|
{"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||||||
|
{"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||||||
|
]
|
||||||
|
|
||||||
|
titles = [d["title"] for d in data]
|
||||||
|
salaries = {d["title"]: d["salary"] for d in data}
|
||||||
|
increases = {d["title"]: d["raise"] for d in data}
|
||||||
|
# increases = {title: (10, 20) for title in titles}
|
||||||
|
# salaries = {title: (50000, 100000) for title in titles}
|
||||||
|
|
||||||
|
st.title("Payroll Calculator")
|
||||||
|
budget = st.sidebar.number_input(
|
||||||
|
"Maximum Payroll (dollars per year)", value=550000, step=5000
|
||||||
|
)
|
||||||
|
|
||||||
|
if "employees" not in st.session_state:
|
||||||
|
st.session_state["employees"] = set(default_employees)
|
||||||
|
|
||||||
|
|
||||||
|
def add_employee(employee_name, title="unknown", region=0, salary=0, score=0):
|
||||||
|
if "employees" in st.session_state and employee_name:
|
||||||
|
remove_employee(employee_name)
|
||||||
|
st.session_state["employees"].add(
|
||||||
|
Employee(employee_name, title, region, salary, score)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_employee(employee_name):
|
||||||
|
if "employees" in st.session_state and employee_name:
|
||||||
|
for e in st.session_state["employees"]:
|
||||||
|
if e.name == employee_name:
|
||||||
|
st.session_state["employees"].remove(e)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
if "increases" not in st.session_state:
|
||||||
|
st.session_state["increases"] = increases
|
||||||
|
|
||||||
|
|
||||||
|
if "salary_ranges" not in st.session_state:
|
||||||
|
st.session_state["salary_ranges"] = salaries
|
||||||
|
|
||||||
|
with st.expander("Roles"):
|
||||||
|
title = st.selectbox("Position", options=titles)
|
||||||
|
_inc = st.select_slider(
|
||||||
|
f"Percentage Increase for {title}",
|
||||||
|
value=st.session_state.increases[title],
|
||||||
|
options=np.arange(0, 100),
|
||||||
|
)
|
||||||
|
_sal = st.select_slider(
|
||||||
|
f"Salary Range for {title}",
|
||||||
|
value=st.session_state.salary_ranges[title],
|
||||||
|
options=np.arange(50000, 250001, 5000),
|
||||||
|
)
|
||||||
|
if st.button("Set"):
|
||||||
|
st.session_state.increases[title] = [int(i) for i in _inc]
|
||||||
|
st.session_state.salary_ranges[title] = [int(i) for i in _sal]
|
||||||
|
st.markdown("Updated role definition.")
|
||||||
|
|
||||||
|
|
||||||
|
with st.sidebar.expander("THE RED BOX"):
|
||||||
|
a = st.slider("Upper", value=3.0, min_value=1.0, max_value=3.0, step=0.25)
|
||||||
|
b = st.slider("Lower", value=1.0, min_value=1.0, max_value=3.0, step=0.25)
|
||||||
|
c = st.slider("%MAX", value=0.90, min_value=0.5, max_value=1.0, step=0.05)
|
||||||
|
|
||||||
|
|
||||||
|
with st.expander("Employees"):
|
||||||
|
st.markdown("You can consider promotions here as well.")
|
||||||
|
employee_title = st.selectbox("Employee position", options=titles)
|
||||||
|
employee_name = st.text_input("Employee name")
|
||||||
|
col1, col2, col3 = st.columns(3)
|
||||||
|
with col1:
|
||||||
|
salary = st.number_input("salary (optional)", value=0)
|
||||||
|
with col3:
|
||||||
|
region = st.number_input("region", value=0)
|
||||||
|
with col2:
|
||||||
|
performance = st.slider("performance", value=0.5)
|
||||||
|
|
||||||
|
add_new_employee = st.button("Add or update employee")
|
||||||
|
if add_new_employee:
|
||||||
|
add_employee(employee_name, employee_title, region, salary, performance)
|
||||||
|
|
||||||
|
rem_employee = st.button("Remove employee")
|
||||||
|
if rem_employee:
|
||||||
|
remove_employee(employee_name)
|
||||||
|
|
||||||
|
st.sidebar.markdown("### Employee Roster")
|
||||||
|
if st.session_state.get("employees"):
|
||||||
|
st.sidebar.write([e.__dict__ for e in sorted(st.session_state["employees"])])
|
||||||
|
|
||||||
|
|
||||||
|
employees = st.session_state.employees
|
||||||
|
# employees = default_employees
|
||||||
|
|
||||||
|
increases = st.session_state.increases
|
||||||
|
salary_ranges = st.session_state.salary_ranges
|
||||||
|
forecast = st.button("Forecast")
|
||||||
|
samples = {}
|
||||||
|
import multiprocessing as mp
|
||||||
|
import numpy as np
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
# st.write(employees == set(default_employees))
|
||||||
|
|
||||||
|
ss = np.random.SeedSequence()
|
||||||
|
|
||||||
|
|
||||||
|
def random_sampling(A):
|
||||||
|
rng, employee, num_samples, salary_ranges, increases = A
|
||||||
|
# return rng.random(int(num_samples))
|
||||||
|
# return employee.name
|
||||||
|
e = employee
|
||||||
|
sample = {}
|
||||||
|
|
||||||
|
# TODO: revisit by zip code / region. USE salary_ranges[region][title]
|
||||||
|
mnS, mxS = salary_ranges[e.title]
|
||||||
|
if e.salary == 0: # simulate salary if unspecified
|
||||||
|
current_salary = rng.random(int(num_samples)) * (mxS - mnS) + mnS
|
||||||
|
else:
|
||||||
|
current_salary = np.ones(num_samples) * e.salary
|
||||||
|
|
||||||
|
mnI, mxI = increases[e.title]
|
||||||
|
# TODO: revisit how score is used.
|
||||||
|
# now: up to 10% linear increase based on performance, must be over 0.5
|
||||||
|
random_increase = (100 + rng.random(int(num_samples)) * (mxI - mnI) + mnI) / 100
|
||||||
|
if e.score > 0.5:
|
||||||
|
random_increase *= (e.score - 0.5) / 5
|
||||||
|
|
||||||
|
sample["inc"], sample["old"], sample["new"] = (
|
||||||
|
random_increase,
|
||||||
|
current_salary,
|
||||||
|
np.minimum(current_salary * random_increase, mxS),
|
||||||
|
)
|
||||||
|
return (e.name, sample)
|
||||||
|
|
||||||
|
|
||||||
|
if forecast:
|
||||||
|
# n_proc = min(max(( 1, mp.cpu_count() - 1 )), len(employees))
|
||||||
|
n_proc = 8
|
||||||
|
pool = mp.Pool(processes=n_proc)
|
||||||
|
child_seeds = ss.spawn(len(employees))
|
||||||
|
st.sidebar.write("Exploring Possibilities")
|
||||||
|
# samples_raw = pool.starmap(f, [ (np.random.default_rng(s), e, num_samples, salary_ranges, increases) for s, e in zip(child_seeds, employees) ])
|
||||||
|
salary_ranges = st.session_state.salary_ranges
|
||||||
|
increases = st.session_state.increases
|
||||||
|
st.write(salary_ranges, increases)
|
||||||
|
samples_raw = map(
|
||||||
|
random_sampling,
|
||||||
|
[
|
||||||
|
(np.random.default_rng(s), e, num_samples, salary_ranges, increases)
|
||||||
|
for s, e in zip(child_seeds, employees)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
samples = {s[0]: s[1] for s in samples_raw}
|
||||||
|
st.sidebar.write("Predicting Budgets")
|
||||||
|
old_salaries = np.array([samples[s]["old"] for s in samples]).T
|
||||||
|
new_salaries = np.array([samples[s]["new"] for s in samples]).T
|
||||||
|
old_payroll = old_salaries.sum(axis=1)
|
||||||
|
new_payroll = new_salaries.sum(axis=1)
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
mn, mx = round(old_payroll.min()), round(new_payroll.max())
|
||||||
|
fig.add_trace(
|
||||||
|
go.Histogram(x=old_payroll, histnorm="probability density", name="before")
|
||||||
|
)
|
||||||
|
fig.add_trace(
|
||||||
|
go.Histogram(
|
||||||
|
x=new_payroll,
|
||||||
|
histnorm="probability density",
|
||||||
|
name="after",
|
||||||
|
marker_color="yellow",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fig.add_vrect(
|
||||||
|
x0=c * budget,
|
||||||
|
x1=budget,
|
||||||
|
line_color="red",
|
||||||
|
line_width=5,
|
||||||
|
annotation_text="budget",
|
||||||
|
annotation_position="left",
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title="Salary Forecast",
|
||||||
|
xaxis_title="Required Amount ($)",
|
||||||
|
yaxis_title="",
|
||||||
|
font=dict(family="Courier New, monospace", size=18, color="#7f7f7f"),
|
||||||
|
)
|
||||||
|
|
||||||
|
st.sidebar.write("Performing Analysis")
|
||||||
|
kde = gkde(np.random.choice(new_payroll, num_samples // 5))
|
||||||
|
predicted_density = kde.pdf(new_payroll)
|
||||||
|
|
||||||
|
observed_density = dist.beta(a=a, b=b, loc=c * budget, scale=(1 - c) * budget).pdf(
|
||||||
|
new_payroll
|
||||||
|
)
|
||||||
|
ratio = observed_density / predicted_density
|
||||||
|
ratio = ratio / max(ratio)
|
||||||
|
accepted_inds = [r for r in range(num_samples) if np.random.rand() < ratio[r]]
|
||||||
|
new_salaries_updated = new_payroll[accepted_inds]
|
||||||
|
fig.add_trace(
|
||||||
|
go.Histogram(
|
||||||
|
x=new_salaries_updated, histnorm="probability density", name="options"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
legend=dict(
|
||||||
|
orientation="h",
|
||||||
|
yanchor="top",
|
||||||
|
xanchor="right",
|
||||||
|
y=1,
|
||||||
|
x=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
st.plotly_chart(fig, use_container_width=True)
|
||||||
|
|
||||||
|
st.markdown(f"Summary of {len(accepted_inds)} feasible new salaries (ranked)")
|
||||||
|
df = pd.DataFrame(new_salaries[accepted_inds, :], columns=sorted(samples.keys()))
|
||||||
|
df["total"] = new_payroll[accepted_inds]
|
||||||
|
df = df.astype(int)
|
||||||
|
st.write(df.sort_values("total").reset_index(drop=True))
|
5
salary/data.json
Normal file
5
salary/data.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
[
|
||||||
|
{"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]},
|
||||||
|
{"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||||||
|
{"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]}
|
||||||
|
]
|
6
salary/requirements.txt
Normal file
6
salary/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
streamlit
|
||||||
|
plotly-express
|
||||||
|
pandas
|
||||||
|
numpy
|
||||||
|
scipy
|
||||||
|
Equation
|
15
salary/st
Executable file
15
salary/st
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
echo -e "INFO:\tThis executable is a replacement for invoking \`streamlit\`; it will attempt to first launch a docker image \`streamlit:latest\` and if it cannot find \`docker\` then it will attempt to invoke \`streamlit\` directly (you will need to run \`pip install -r requirements.txt\` for it to work)\n\n"
|
||||||
|
|
||||||
|
IMAGE_NAME=streamlit:latest
|
||||||
|
COMMAND="streamlit"
|
||||||
|
OPTS="--browser.serverAddress 0.0.0.0 --server.enableCORS False --server.enableXsrfProtection False"
|
||||||
|
|
||||||
|
if ! command -v docker &> /dev/null
|
||||||
|
then
|
||||||
|
echo -e "WARNING:\tdocker could not be found, attempting running locally...\n"
|
||||||
|
$COMMAND $@ $OPTS
|
||||||
|
else
|
||||||
|
echo -e "INFO:\t mounting \`pwd\` into container at mountpoint (and working directory) \`/tmp\` so that latest version of app & state are reflected.\n"
|
||||||
|
docker run --name streamlit --rm -d -p 8501:8501 -v "$(pwd)":/tmp -w /tmp "$IMAGE_NAME" "$COMMAND" $@ $OPTS
|
||||||
|
fi
|
Loading…
Reference in New Issue
Block a user