Michael Pilosov
3 years ago
8 changed files with 356 additions and 0 deletions
@ -0,0 +1,2 @@ |
|||||
|
data.json |
||||
|
README.md |
@ -0,0 +1,13 @@ |
|||||
|
FROM python:3.9.3 |
||||
|
|
||||
|
WORKDIR /app |
||||
|
|
||||
|
COPY app.py ./app.py |
||||
|
COPY data.json ./data.json |
||||
|
COPY requirements.txt ./requirements.txt |
||||
|
|
||||
|
RUN pip3 install -r requirements.txt |
||||
|
|
||||
|
# swap health-check endpoint to be where GCP looks for it by default |
||||
|
# RUN find /usr/local/lib/python3.9/site-packages/streamlit -type f \( -iname \*.py -o -iname \*.js \) -print0 | xargs -0 sed -i 's/healthz/health-check/g' |
||||
|
|
@ -0,0 +1,10 @@ |
|||||
|
run: |
||||
|
./st run app.py |
||||
|
|
||||
|
install: |
||||
|
pip install -r requirements.txt |
||||
|
|
||||
|
build: |
||||
|
docker build -t streamlit:latest . |
||||
|
|
||||
|
.PHONY: run install build |
@ -0,0 +1,36 @@ |
|||||
|
# Salary Assessment App |
||||
|
|
||||
|
Demonstration of an interactive web-application built in python. |
||||
|
This application takes in an employee roster (built interactively, best for small teams), with names, positions, and salaries for each employee. |
||||
|
It then allows someone (e.g. a manager) to simulate promotions and/or salary raises and see the overall impact on the final payroll budget (visualized as a pair of overlaying distributions which show the before/after scenario under the provided ranges). |
||||
|
Finally, the app "solves the problem" of assigning a new salary to each employee while staying below some pre-defined budget for payroll, using a novel Monte-Carlo method implemented directly within the app itself (rather than relying on importing another library). |
||||
|
|
||||
|
> This example shows "a small amount of effort" exerted to ensure reproducibility and readability but ultimately lacks in overall user-friendliness. |
||||
|
|
||||
|
Takeaways: *Good reproducibility, passable style/formatting.* |
||||
|
|
||||
|
- Of particular note here is the executable `run` shell script which provides support for running the application using a docker image as well as simultaneously supporting native shell execution if `docker` is not in the system `$PATH` |
||||
|
- Note the style in which functions are written in `app.py` |
||||
|
- There is a mixture of functions and procedural code, wide abuse of global variables, and a lot of messy plotting code. Is it readable overall? |
||||
|
- The app's "state" is held in `data.json`, is saved in a human-readable format (as opposed to binary), and is small enough to be negligible |
||||
|
- The functions that are defined at least have readable names |
||||
|
- If the app grew any larger than this, one might be wise to migrate functions into a separate module and import it in `app.py` |
||||
|
- Note the lack of a proper `README` / we just "presume" the user knows what to do with the presence of a `Makefile`. Is this acceptable in your opinion? |
||||
|
- Very minimal `Makefile` is "sort of self-documenting" but many people don't know to look there. It could use documentation so that `make help` at least produces some sort of a helpful guide |
||||
|
|
||||
|
|
||||
|
## Usage |
||||
|
|
||||
|
(if using local `python` instead of `docker`): |
||||
|
``` |
||||
|
make install |
||||
|
``` |
||||
|
|
||||
|
then |
||||
|
|
||||
|
```bash |
||||
|
make run |
||||
|
``` |
||||
|
|
||||
|
and visit `localhost:8501` (or `<tld>/<user>/proxy/8501/` in Jupyter if you have `jupyter-server-proxy` installed to access the app via proxy) |
||||
|
|
@ -0,0 +1,269 @@ |
|||||
|
import dataclasses |
||||
|
import json |
||||
|
|
||||
|
import numpy as np |
||||
|
import pandas as pd |
||||
|
import streamlit as st |
||||
|
from scipy.stats import distributions as dist |
||||
|
from scipy.stats import gaussian_kde as gkde |
||||
|
import plotly.express as px |
||||
|
import plotly.graph_objects as go |
||||
|
|
||||
|
# st.set_page_config(layout="wide") |
||||
|
titles = ["Architect", "Engineer", "Sr. Engineer"] |
||||
|
num_samples = int(1e4) |
||||
|
|
||||
|
|
||||
|
@dataclasses.dataclass(frozen=True) |
||||
|
class Employee: |
||||
|
name: str = "John Doe" |
||||
|
title: str = "unknown" |
||||
|
region: int = 0 |
||||
|
salary: float = 0.0 |
||||
|
score: float = 0.5 |
||||
|
|
||||
|
def __eq__(cls, other_cls): |
||||
|
return cls.name == other_cls.name |
||||
|
|
||||
|
def __lt__(cls, other_cls): |
||||
|
return cls.name < other_cls.name |
||||
|
|
||||
|
|
||||
|
default_employees = [ |
||||
|
Employee("Alice", "Architect"), |
||||
|
Employee("Bob", "Architect"), |
||||
|
Employee("Cher", "Engineer"), |
||||
|
Employee("David", "Sr. Engineer"), |
||||
|
Employee("Eirene", "Engineer"), |
||||
|
Employee("Fiona", "Sr. Engineer"), |
||||
|
Employee("Gavin", "Engineer"), |
||||
|
] |
||||
|
# for i in range(5000): |
||||
|
# default_employees.append(Employee(f"Gavin {i}", np.random.choice(titles))) |
||||
|
|
||||
|
|
||||
|
try: |
||||
|
data = json.load(open("data.json", "r")) |
||||
|
except FileNotFoundError: |
||||
|
data = [ |
||||
|
{"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]}, |
||||
|
{"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]}, |
||||
|
{"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]}, |
||||
|
] |
||||
|
|
||||
|
titles = [d["title"] for d in data] |
||||
|
salaries = {d["title"]: d["salary"] for d in data} |
||||
|
increases = {d["title"]: d["raise"] for d in data} |
||||
|
# increases = {title: (10, 20) for title in titles} |
||||
|
# salaries = {title: (50000, 100000) for title in titles} |
||||
|
|
||||
|
st.title("Payroll Calculator") |
||||
|
budget = st.sidebar.number_input( |
||||
|
"Maximum Payroll (dollars per year)", value=550000, step=5000 |
||||
|
) |
||||
|
|
||||
|
if "employees" not in st.session_state: |
||||
|
st.session_state["employees"] = set(default_employees) |
||||
|
|
||||
|
|
||||
|
def add_employee(employee_name, title="unknown", region=0, salary=0, score=0): |
||||
|
if "employees" in st.session_state and employee_name: |
||||
|
remove_employee(employee_name) |
||||
|
st.session_state["employees"].add( |
||||
|
Employee(employee_name, title, region, salary, score) |
||||
|
) |
||||
|
|
||||
|
|
||||
|
def remove_employee(employee_name): |
||||
|
if "employees" in st.session_state and employee_name: |
||||
|
for e in st.session_state["employees"]: |
||||
|
if e.name == employee_name: |
||||
|
st.session_state["employees"].remove(e) |
||||
|
break |
||||
|
|
||||
|
|
||||
|
if "increases" not in st.session_state: |
||||
|
st.session_state["increases"] = increases |
||||
|
|
||||
|
|
||||
|
if "salary_ranges" not in st.session_state: |
||||
|
st.session_state["salary_ranges"] = salaries |
||||
|
|
||||
|
with st.expander("Roles"): |
||||
|
title = st.selectbox("Position", options=titles) |
||||
|
_inc = st.select_slider( |
||||
|
f"Percentage Increase for {title}", |
||||
|
value=st.session_state.increases[title], |
||||
|
options=np.arange(0, 100), |
||||
|
) |
||||
|
_sal = st.select_slider( |
||||
|
f"Salary Range for {title}", |
||||
|
value=st.session_state.salary_ranges[title], |
||||
|
options=np.arange(50000, 250001, 5000), |
||||
|
) |
||||
|
if st.button("Set"): |
||||
|
st.session_state.increases[title] = [int(i) for i in _inc] |
||||
|
st.session_state.salary_ranges[title] = [int(i) for i in _sal] |
||||
|
st.markdown("Updated role definition.") |
||||
|
|
||||
|
|
||||
|
with st.sidebar.expander("THE RED BOX"): |
||||
|
a = st.slider("Upper", value=3.0, min_value=1.0, max_value=3.0, step=0.25) |
||||
|
b = st.slider("Lower", value=1.0, min_value=1.0, max_value=3.0, step=0.25) |
||||
|
c = st.slider("%MAX", value=0.90, min_value=0.5, max_value=1.0, step=0.05) |
||||
|
|
||||
|
|
||||
|
with st.expander("Employees"): |
||||
|
st.markdown("You can consider promotions here as well.") |
||||
|
employee_title = st.selectbox("Employee position", options=titles) |
||||
|
employee_name = st.text_input("Employee name") |
||||
|
col1, col2, col3 = st.columns(3) |
||||
|
with col1: |
||||
|
salary = st.number_input("salary (optional)", value=0) |
||||
|
with col3: |
||||
|
region = st.number_input("region", value=0) |
||||
|
with col2: |
||||
|
performance = st.slider("performance", value=0.5) |
||||
|
|
||||
|
add_new_employee = st.button("Add or update employee") |
||||
|
if add_new_employee: |
||||
|
add_employee(employee_name, employee_title, region, salary, performance) |
||||
|
|
||||
|
rem_employee = st.button("Remove employee") |
||||
|
if rem_employee: |
||||
|
remove_employee(employee_name) |
||||
|
|
||||
|
st.sidebar.markdown("### Employee Roster") |
||||
|
if st.session_state.get("employees"): |
||||
|
st.sidebar.write([e.__dict__ for e in sorted(st.session_state["employees"])]) |
||||
|
|
||||
|
|
||||
|
employees = st.session_state.employees |
||||
|
# employees = default_employees |
||||
|
|
||||
|
increases = st.session_state.increases |
||||
|
salary_ranges = st.session_state.salary_ranges |
||||
|
forecast = st.button("Forecast") |
||||
|
samples = {} |
||||
|
import multiprocessing as mp |
||||
|
import numpy as np |
||||
|
from copy import deepcopy |
||||
|
|
||||
|
# st.write(employees == set(default_employees)) |
||||
|
|
||||
|
ss = np.random.SeedSequence() |
||||
|
|
||||
|
|
||||
|
def random_sampling(A): |
||||
|
rng, employee, num_samples, salary_ranges, increases = A |
||||
|
# return rng.random(int(num_samples)) |
||||
|
# return employee.name |
||||
|
e = employee |
||||
|
sample = {} |
||||
|
|
||||
|
# TODO: revisit by zip code / region. USE salary_ranges[region][title] |
||||
|
mnS, mxS = salary_ranges[e.title] |
||||
|
if e.salary == 0: # simulate salary if unspecified |
||||
|
current_salary = rng.random(int(num_samples)) * (mxS - mnS) + mnS |
||||
|
else: |
||||
|
current_salary = np.ones(num_samples) * e.salary |
||||
|
|
||||
|
mnI, mxI = increases[e.title] |
||||
|
# TODO: revisit how score is used. |
||||
|
# now: up to 10% linear increase based on performance, must be over 0.5 |
||||
|
random_increase = (100 + rng.random(int(num_samples)) * (mxI - mnI) + mnI) / 100 |
||||
|
if e.score > 0.5: |
||||
|
random_increase *= (e.score - 0.5) / 5 |
||||
|
|
||||
|
sample["inc"], sample["old"], sample["new"] = ( |
||||
|
random_increase, |
||||
|
current_salary, |
||||
|
np.minimum(current_salary * random_increase, mxS), |
||||
|
) |
||||
|
return (e.name, sample) |
||||
|
|
||||
|
|
||||
|
if forecast: |
||||
|
# n_proc = min(max(( 1, mp.cpu_count() - 1 )), len(employees)) |
||||
|
n_proc = 8 |
||||
|
pool = mp.Pool(processes=n_proc) |
||||
|
child_seeds = ss.spawn(len(employees)) |
||||
|
st.sidebar.write("Exploring Possibilities") |
||||
|
# samples_raw = pool.starmap(f, [ (np.random.default_rng(s), e, num_samples, salary_ranges, increases) for s, e in zip(child_seeds, employees) ]) |
||||
|
salary_ranges = st.session_state.salary_ranges |
||||
|
increases = st.session_state.increases |
||||
|
st.write(salary_ranges, increases) |
||||
|
samples_raw = map( |
||||
|
random_sampling, |
||||
|
[ |
||||
|
(np.random.default_rng(s), e, num_samples, salary_ranges, increases) |
||||
|
for s, e in zip(child_seeds, employees) |
||||
|
], |
||||
|
) |
||||
|
samples = {s[0]: s[1] for s in samples_raw} |
||||
|
st.sidebar.write("Predicting Budgets") |
||||
|
old_salaries = np.array([samples[s]["old"] for s in samples]).T |
||||
|
new_salaries = np.array([samples[s]["new"] for s in samples]).T |
||||
|
old_payroll = old_salaries.sum(axis=1) |
||||
|
new_payroll = new_salaries.sum(axis=1) |
||||
|
|
||||
|
fig = go.Figure() |
||||
|
mn, mx = round(old_payroll.min()), round(new_payroll.max()) |
||||
|
fig.add_trace( |
||||
|
go.Histogram(x=old_payroll, histnorm="probability density", name="before") |
||||
|
) |
||||
|
fig.add_trace( |
||||
|
go.Histogram( |
||||
|
x=new_payroll, |
||||
|
histnorm="probability density", |
||||
|
name="after", |
||||
|
marker_color="yellow", |
||||
|
) |
||||
|
) |
||||
|
fig.add_vrect( |
||||
|
x0=c * budget, |
||||
|
x1=budget, |
||||
|
line_color="red", |
||||
|
line_width=5, |
||||
|
annotation_text="budget", |
||||
|
annotation_position="left", |
||||
|
) |
||||
|
fig.update_layout( |
||||
|
title="Salary Forecast", |
||||
|
xaxis_title="Required Amount ($)", |
||||
|
yaxis_title="", |
||||
|
font=dict(family="Courier New, monospace", size=18, color="#7f7f7f"), |
||||
|
) |
||||
|
|
||||
|
st.sidebar.write("Performing Analysis") |
||||
|
kde = gkde(np.random.choice(new_payroll, num_samples // 5)) |
||||
|
predicted_density = kde.pdf(new_payroll) |
||||
|
|
||||
|
observed_density = dist.beta(a=a, b=b, loc=c * budget, scale=(1 - c) * budget).pdf( |
||||
|
new_payroll |
||||
|
) |
||||
|
ratio = observed_density / predicted_density |
||||
|
ratio = ratio / max(ratio) |
||||
|
accepted_inds = [r for r in range(num_samples) if np.random.rand() < ratio[r]] |
||||
|
new_salaries_updated = new_payroll[accepted_inds] |
||||
|
fig.add_trace( |
||||
|
go.Histogram( |
||||
|
x=new_salaries_updated, histnorm="probability density", name="options" |
||||
|
) |
||||
|
) |
||||
|
fig.update_layout( |
||||
|
legend=dict( |
||||
|
orientation="h", |
||||
|
yanchor="top", |
||||
|
xanchor="right", |
||||
|
y=1, |
||||
|
x=1, |
||||
|
) |
||||
|
) |
||||
|
st.plotly_chart(fig, use_container_width=True) |
||||
|
|
||||
|
st.markdown(f"Summary of {len(accepted_inds)} feasible new salaries (ranked)") |
||||
|
df = pd.DataFrame(new_salaries[accepted_inds, :], columns=sorted(samples.keys())) |
||||
|
df["total"] = new_payroll[accepted_inds] |
||||
|
df = df.astype(int) |
||||
|
st.write(df.sort_values("total").reset_index(drop=True)) |
@ -0,0 +1,5 @@ |
|||||
|
[ |
||||
|
{"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]}, |
||||
|
{"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]}, |
||||
|
{"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]} |
||||
|
] |
@ -0,0 +1,6 @@ |
|||||
|
streamlit |
||||
|
plotly-express |
||||
|
pandas |
||||
|
numpy |
||||
|
scipy |
||||
|
Equation |
@ -0,0 +1,15 @@ |
|||||
|
#!/bin/bash |
||||
|
echo -e "INFO:\tThis executable is a replacement for invoking \`streamlit\`; it will attempt to first launch a docker image \`streamlit:latest\` and if it cannot find \`docker\` then it will attempt to invoke \`streamlit\` directly (you will need to run \`pip install -r requirements.txt\` for it to work)\n\n" |
||||
|
|
||||
|
IMAGE_NAME=streamlit:latest |
||||
|
COMMAND="streamlit" |
||||
|
OPTS="--browser.serverAddress 0.0.0.0 --server.enableCORS False --server.enableXsrfProtection False" |
||||
|
|
||||
|
if ! command -v docker &> /dev/null |
||||
|
then |
||||
|
echo -e "WARNING:\tdocker could not be found, attempting running locally...\n" |
||||
|
$COMMAND $@ $OPTS |
||||
|
else |
||||
|
echo -e "INFO:\t mounting \`pwd\` into container at mountpoint (and working directory) \`/tmp\` so that latest version of app & state are reflected.\n" |
||||
|
docker run --name streamlit --rm -d -p 8501:8501 -v "$(pwd)":/tmp -w /tmp "$IMAGE_NAME" "$COMMAND" $@ $OPTS |
||||
|
fi |
Loading…
Reference in new issue