make more reproducible and better documented
This commit is contained in:
parent
ae89e4d318
commit
146519b5f0
2
salary/.dockerignore
Normal file
2
salary/.dockerignore
Normal file
@ -0,0 +1,2 @@
|
||||
data.json
|
||||
README.md
|
13
salary/Dockerfile
Normal file
13
salary/Dockerfile
Normal file
@ -0,0 +1,13 @@
|
||||
FROM python:3.9.3
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY app.py ./app.py
|
||||
COPY data.json ./data.json
|
||||
COPY requirements.txt ./requirements.txt
|
||||
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
# swap health-check endpoint to be where GCP looks for it by default
|
||||
# RUN find /usr/local/lib/python3.9/site-packages/streamlit -type f \( -iname \*.py -o -iname \*.js \) -print0 | xargs -0 sed -i 's/healthz/health-check/g'
|
||||
|
10
salary/Makefile
Normal file
10
salary/Makefile
Normal file
@ -0,0 +1,10 @@
|
||||
run:
|
||||
./st run app.py
|
||||
|
||||
install:
|
||||
pip install -r requirements.txt
|
||||
|
||||
build:
|
||||
docker build -t streamlit:latest .
|
||||
|
||||
.PHONY: run install build
|
36
salary/README.md
Normal file
36
salary/README.md
Normal file
@ -0,0 +1,36 @@
|
||||
# Salary Assessment App
|
||||
|
||||
Demonstration of an interactive web-application built in python.
|
||||
This application takes in an employee roster (built interactively, best for small teams), with names, positions, and salaries for each employee.
|
||||
It then allows someone (e.g. a manager) to simulate promotions and/or salary raises and see the overall impact on the final payroll budget (visualized as a pair of overlaying distributions which show the before/after scenario under the provided ranges).
|
||||
Finally, the app "solves the problem" of assigning a new salary to each employee while staying below some pre-defined budget for payroll, using a novel Monte-Carlo method implemented directly within the app itself (rather than relying on importing another library).
|
||||
|
||||
> This example shows "a small amount of effort" exerted to ensure reproducibility and readability but ultimately lacks in overall user-friendliness.
|
||||
|
||||
Takeaways: *Good reproducibility, passable style/formatting.*
|
||||
|
||||
- Of particular note here is the executable `run` shell script which provides support for running the application using a docker image as well as simultaneously supporting native shell execution if `docker` is not in the system `$PATH`
|
||||
- Note the style in which functions are written in `app.py`
|
||||
- There is a mixture of functions and procedural code, wide abuse of global variables, and a lot of messy plotting code. Is it readable overall?
|
||||
- The app's "state" is held in `data.json`, is saved in a human-readable format (as opposed to binary), and is small enough to be negligible
|
||||
- The functions that are defined at least have readable names
|
||||
- If the app grew any larger than this, one might be wise to migrate functions into a separate module and import it in `app.py`
|
||||
- Note the lack of a proper `README` / we just "presume" the user knows what to do with the presence of a `Makefile`. Is this acceptable in your opinion?
|
||||
- Very minimal `Makefile` is "sort of self-documenting" but many people don't know to look there. It could use documentation so that `make help` at least produces some sort of a helpful guide
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
(if using local `python` instead of `docker`):
|
||||
```
|
||||
make install
|
||||
```
|
||||
|
||||
then
|
||||
|
||||
```bash
|
||||
make run
|
||||
```
|
||||
|
||||
and visit `localhost:8501` (or `<tld>/<user>/proxy/8501/` in Jupyter if you have `jupyter-server-proxy` installed to access the app via proxy)
|
||||
|
269
salary/app.py
Normal file
269
salary/app.py
Normal file
@ -0,0 +1,269 @@
|
||||
import dataclasses
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
from scipy.stats import distributions as dist
|
||||
from scipy.stats import gaussian_kde as gkde
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
# st.set_page_config(layout="wide")
|
||||
titles = ["Architect", "Engineer", "Sr. Engineer"]
|
||||
num_samples = int(1e4)
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Employee:
|
||||
name: str = "John Doe"
|
||||
title: str = "unknown"
|
||||
region: int = 0
|
||||
salary: float = 0.0
|
||||
score: float = 0.5
|
||||
|
||||
def __eq__(cls, other_cls):
|
||||
return cls.name == other_cls.name
|
||||
|
||||
def __lt__(cls, other_cls):
|
||||
return cls.name < other_cls.name
|
||||
|
||||
|
||||
default_employees = [
|
||||
Employee("Alice", "Architect"),
|
||||
Employee("Bob", "Architect"),
|
||||
Employee("Cher", "Engineer"),
|
||||
Employee("David", "Sr. Engineer"),
|
||||
Employee("Eirene", "Engineer"),
|
||||
Employee("Fiona", "Sr. Engineer"),
|
||||
Employee("Gavin", "Engineer"),
|
||||
]
|
||||
# for i in range(5000):
|
||||
# default_employees.append(Employee(f"Gavin {i}", np.random.choice(titles)))
|
||||
|
||||
|
||||
try:
|
||||
data = json.load(open("data.json", "r"))
|
||||
except FileNotFoundError:
|
||||
data = [
|
||||
{"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]},
|
||||
{"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||||
{"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||||
]
|
||||
|
||||
titles = [d["title"] for d in data]
|
||||
salaries = {d["title"]: d["salary"] for d in data}
|
||||
increases = {d["title"]: d["raise"] for d in data}
|
||||
# increases = {title: (10, 20) for title in titles}
|
||||
# salaries = {title: (50000, 100000) for title in titles}
|
||||
|
||||
st.title("Payroll Calculator")
|
||||
budget = st.sidebar.number_input(
|
||||
"Maximum Payroll (dollars per year)", value=550000, step=5000
|
||||
)
|
||||
|
||||
if "employees" not in st.session_state:
|
||||
st.session_state["employees"] = set(default_employees)
|
||||
|
||||
|
||||
def add_employee(employee_name, title="unknown", region=0, salary=0, score=0):
|
||||
if "employees" in st.session_state and employee_name:
|
||||
remove_employee(employee_name)
|
||||
st.session_state["employees"].add(
|
||||
Employee(employee_name, title, region, salary, score)
|
||||
)
|
||||
|
||||
|
||||
def remove_employee(employee_name):
|
||||
if "employees" in st.session_state and employee_name:
|
||||
for e in st.session_state["employees"]:
|
||||
if e.name == employee_name:
|
||||
st.session_state["employees"].remove(e)
|
||||
break
|
||||
|
||||
|
||||
if "increases" not in st.session_state:
|
||||
st.session_state["increases"] = increases
|
||||
|
||||
|
||||
if "salary_ranges" not in st.session_state:
|
||||
st.session_state["salary_ranges"] = salaries
|
||||
|
||||
with st.expander("Roles"):
|
||||
title = st.selectbox("Position", options=titles)
|
||||
_inc = st.select_slider(
|
||||
f"Percentage Increase for {title}",
|
||||
value=st.session_state.increases[title],
|
||||
options=np.arange(0, 100),
|
||||
)
|
||||
_sal = st.select_slider(
|
||||
f"Salary Range for {title}",
|
||||
value=st.session_state.salary_ranges[title],
|
||||
options=np.arange(50000, 250001, 5000),
|
||||
)
|
||||
if st.button("Set"):
|
||||
st.session_state.increases[title] = [int(i) for i in _inc]
|
||||
st.session_state.salary_ranges[title] = [int(i) for i in _sal]
|
||||
st.markdown("Updated role definition.")
|
||||
|
||||
|
||||
with st.sidebar.expander("THE RED BOX"):
|
||||
a = st.slider("Upper", value=3.0, min_value=1.0, max_value=3.0, step=0.25)
|
||||
b = st.slider("Lower", value=1.0, min_value=1.0, max_value=3.0, step=0.25)
|
||||
c = st.slider("%MAX", value=0.90, min_value=0.5, max_value=1.0, step=0.05)
|
||||
|
||||
|
||||
with st.expander("Employees"):
|
||||
st.markdown("You can consider promotions here as well.")
|
||||
employee_title = st.selectbox("Employee position", options=titles)
|
||||
employee_name = st.text_input("Employee name")
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
salary = st.number_input("salary (optional)", value=0)
|
||||
with col3:
|
||||
region = st.number_input("region", value=0)
|
||||
with col2:
|
||||
performance = st.slider("performance", value=0.5)
|
||||
|
||||
add_new_employee = st.button("Add or update employee")
|
||||
if add_new_employee:
|
||||
add_employee(employee_name, employee_title, region, salary, performance)
|
||||
|
||||
rem_employee = st.button("Remove employee")
|
||||
if rem_employee:
|
||||
remove_employee(employee_name)
|
||||
|
||||
st.sidebar.markdown("### Employee Roster")
|
||||
if st.session_state.get("employees"):
|
||||
st.sidebar.write([e.__dict__ for e in sorted(st.session_state["employees"])])
|
||||
|
||||
|
||||
employees = st.session_state.employees
|
||||
# employees = default_employees
|
||||
|
||||
increases = st.session_state.increases
|
||||
salary_ranges = st.session_state.salary_ranges
|
||||
forecast = st.button("Forecast")
|
||||
samples = {}
|
||||
import multiprocessing as mp
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
|
||||
# st.write(employees == set(default_employees))
|
||||
|
||||
ss = np.random.SeedSequence()
|
||||
|
||||
|
||||
def random_sampling(A):
|
||||
rng, employee, num_samples, salary_ranges, increases = A
|
||||
# return rng.random(int(num_samples))
|
||||
# return employee.name
|
||||
e = employee
|
||||
sample = {}
|
||||
|
||||
# TODO: revisit by zip code / region. USE salary_ranges[region][title]
|
||||
mnS, mxS = salary_ranges[e.title]
|
||||
if e.salary == 0: # simulate salary if unspecified
|
||||
current_salary = rng.random(int(num_samples)) * (mxS - mnS) + mnS
|
||||
else:
|
||||
current_salary = np.ones(num_samples) * e.salary
|
||||
|
||||
mnI, mxI = increases[e.title]
|
||||
# TODO: revisit how score is used.
|
||||
# now: up to 10% linear increase based on performance, must be over 0.5
|
||||
random_increase = (100 + rng.random(int(num_samples)) * (mxI - mnI) + mnI) / 100
|
||||
if e.score > 0.5:
|
||||
random_increase *= (e.score - 0.5) / 5
|
||||
|
||||
sample["inc"], sample["old"], sample["new"] = (
|
||||
random_increase,
|
||||
current_salary,
|
||||
np.minimum(current_salary * random_increase, mxS),
|
||||
)
|
||||
return (e.name, sample)
|
||||
|
||||
|
||||
if forecast:
|
||||
# n_proc = min(max(( 1, mp.cpu_count() - 1 )), len(employees))
|
||||
n_proc = 8
|
||||
pool = mp.Pool(processes=n_proc)
|
||||
child_seeds = ss.spawn(len(employees))
|
||||
st.sidebar.write("Exploring Possibilities")
|
||||
# samples_raw = pool.starmap(f, [ (np.random.default_rng(s), e, num_samples, salary_ranges, increases) for s, e in zip(child_seeds, employees) ])
|
||||
salary_ranges = st.session_state.salary_ranges
|
||||
increases = st.session_state.increases
|
||||
st.write(salary_ranges, increases)
|
||||
samples_raw = map(
|
||||
random_sampling,
|
||||
[
|
||||
(np.random.default_rng(s), e, num_samples, salary_ranges, increases)
|
||||
for s, e in zip(child_seeds, employees)
|
||||
],
|
||||
)
|
||||
samples = {s[0]: s[1] for s in samples_raw}
|
||||
st.sidebar.write("Predicting Budgets")
|
||||
old_salaries = np.array([samples[s]["old"] for s in samples]).T
|
||||
new_salaries = np.array([samples[s]["new"] for s in samples]).T
|
||||
old_payroll = old_salaries.sum(axis=1)
|
||||
new_payroll = new_salaries.sum(axis=1)
|
||||
|
||||
fig = go.Figure()
|
||||
mn, mx = round(old_payroll.min()), round(new_payroll.max())
|
||||
fig.add_trace(
|
||||
go.Histogram(x=old_payroll, histnorm="probability density", name="before")
|
||||
)
|
||||
fig.add_trace(
|
||||
go.Histogram(
|
||||
x=new_payroll,
|
||||
histnorm="probability density",
|
||||
name="after",
|
||||
marker_color="yellow",
|
||||
)
|
||||
)
|
||||
fig.add_vrect(
|
||||
x0=c * budget,
|
||||
x1=budget,
|
||||
line_color="red",
|
||||
line_width=5,
|
||||
annotation_text="budget",
|
||||
annotation_position="left",
|
||||
)
|
||||
fig.update_layout(
|
||||
title="Salary Forecast",
|
||||
xaxis_title="Required Amount ($)",
|
||||
yaxis_title="",
|
||||
font=dict(family="Courier New, monospace", size=18, color="#7f7f7f"),
|
||||
)
|
||||
|
||||
st.sidebar.write("Performing Analysis")
|
||||
kde = gkde(np.random.choice(new_payroll, num_samples // 5))
|
||||
predicted_density = kde.pdf(new_payroll)
|
||||
|
||||
observed_density = dist.beta(a=a, b=b, loc=c * budget, scale=(1 - c) * budget).pdf(
|
||||
new_payroll
|
||||
)
|
||||
ratio = observed_density / predicted_density
|
||||
ratio = ratio / max(ratio)
|
||||
accepted_inds = [r for r in range(num_samples) if np.random.rand() < ratio[r]]
|
||||
new_salaries_updated = new_payroll[accepted_inds]
|
||||
fig.add_trace(
|
||||
go.Histogram(
|
||||
x=new_salaries_updated, histnorm="probability density", name="options"
|
||||
)
|
||||
)
|
||||
fig.update_layout(
|
||||
legend=dict(
|
||||
orientation="h",
|
||||
yanchor="top",
|
||||
xanchor="right",
|
||||
y=1,
|
||||
x=1,
|
||||
)
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
st.markdown(f"Summary of {len(accepted_inds)} feasible new salaries (ranked)")
|
||||
df = pd.DataFrame(new_salaries[accepted_inds, :], columns=sorted(samples.keys()))
|
||||
df["total"] = new_payroll[accepted_inds]
|
||||
df = df.astype(int)
|
||||
st.write(df.sort_values("total").reset_index(drop=True))
|
5
salary/data.json
Normal file
5
salary/data.json
Normal file
@ -0,0 +1,5 @@
|
||||
[
|
||||
{"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]},
|
||||
{"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||||
{"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]}
|
||||
]
|
6
salary/requirements.txt
Normal file
6
salary/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
streamlit
|
||||
plotly-express
|
||||
pandas
|
||||
numpy
|
||||
scipy
|
||||
Equation
|
15
salary/st
Executable file
15
salary/st
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
echo -e "INFO:\tThis executable is a replacement for invoking \`streamlit\`; it will attempt to first launch a docker image \`streamlit:latest\` and if it cannot find \`docker\` then it will attempt to invoke \`streamlit\` directly (you will need to run \`pip install -r requirements.txt\` for it to work)\n\n"
|
||||
|
||||
IMAGE_NAME=streamlit:latest
|
||||
COMMAND="streamlit"
|
||||
OPTS="--browser.serverAddress 0.0.0.0 --server.enableCORS False --server.enableXsrfProtection False"
|
||||
|
||||
if ! command -v docker &> /dev/null
|
||||
then
|
||||
echo -e "WARNING:\tdocker could not be found, attempting running locally...\n"
|
||||
$COMMAND $@ $OPTS
|
||||
else
|
||||
echo -e "INFO:\t mounting \`pwd\` into container at mountpoint (and working directory) \`/tmp\` so that latest version of app & state are reflected.\n"
|
||||
docker run --name streamlit --rm -d -p 8501:8501 -v "$(pwd)":/tmp -w /tmp "$IMAGE_NAME" "$COMMAND" $@ $OPTS
|
||||
fi
|
Loading…
Reference in New Issue
Block a user