270 lines
8.6 KiB
Python
270 lines
8.6 KiB
Python
|
import dataclasses
|
||
|
import json
|
||
|
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
import streamlit as st
|
||
|
from scipy.stats import distributions as dist
|
||
|
from scipy.stats import gaussian_kde as gkde
|
||
|
import plotly.express as px
|
||
|
import plotly.graph_objects as go
|
||
|
|
||
|
# st.set_page_config(layout="wide")
|
||
|
titles = ["Architect", "Engineer", "Sr. Engineer"]
|
||
|
num_samples = int(1e4)
|
||
|
|
||
|
|
||
|
@dataclasses.dataclass(frozen=True)
|
||
|
class Employee:
|
||
|
name: str = "John Doe"
|
||
|
title: str = "unknown"
|
||
|
region: int = 0
|
||
|
salary: float = 0.0
|
||
|
score: float = 0.5
|
||
|
|
||
|
def __eq__(cls, other_cls):
|
||
|
return cls.name == other_cls.name
|
||
|
|
||
|
def __lt__(cls, other_cls):
|
||
|
return cls.name < other_cls.name
|
||
|
|
||
|
|
||
|
default_employees = [
|
||
|
Employee("Alice", "Architect"),
|
||
|
Employee("Bob", "Architect"),
|
||
|
Employee("Cher", "Engineer"),
|
||
|
Employee("David", "Sr. Engineer"),
|
||
|
Employee("Eirene", "Engineer"),
|
||
|
Employee("Fiona", "Sr. Engineer"),
|
||
|
Employee("Gavin", "Engineer"),
|
||
|
]
|
||
|
# for i in range(5000):
|
||
|
# default_employees.append(Employee(f"Gavin {i}", np.random.choice(titles)))
|
||
|
|
||
|
|
||
|
try:
|
||
|
data = json.load(open("data.json", "r"))
|
||
|
except FileNotFoundError:
|
||
|
data = [
|
||
|
{"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]},
|
||
|
{"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||
|
{"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]},
|
||
|
]
|
||
|
|
||
|
titles = [d["title"] for d in data]
|
||
|
salaries = {d["title"]: d["salary"] for d in data}
|
||
|
increases = {d["title"]: d["raise"] for d in data}
|
||
|
# increases = {title: (10, 20) for title in titles}
|
||
|
# salaries = {title: (50000, 100000) for title in titles}
|
||
|
|
||
|
st.title("Payroll Calculator")
|
||
|
budget = st.sidebar.number_input(
|
||
|
"Maximum Payroll (dollars per year)", value=550000, step=5000
|
||
|
)
|
||
|
|
||
|
if "employees" not in st.session_state:
|
||
|
st.session_state["employees"] = set(default_employees)
|
||
|
|
||
|
|
||
|
def add_employee(employee_name, title="unknown", region=0, salary=0, score=0):
|
||
|
if "employees" in st.session_state and employee_name:
|
||
|
remove_employee(employee_name)
|
||
|
st.session_state["employees"].add(
|
||
|
Employee(employee_name, title, region, salary, score)
|
||
|
)
|
||
|
|
||
|
|
||
|
def remove_employee(employee_name):
|
||
|
if "employees" in st.session_state and employee_name:
|
||
|
for e in st.session_state["employees"]:
|
||
|
if e.name == employee_name:
|
||
|
st.session_state["employees"].remove(e)
|
||
|
break
|
||
|
|
||
|
|
||
|
if "increases" not in st.session_state:
|
||
|
st.session_state["increases"] = increases
|
||
|
|
||
|
|
||
|
if "salary_ranges" not in st.session_state:
|
||
|
st.session_state["salary_ranges"] = salaries
|
||
|
|
||
|
with st.expander("Roles"):
|
||
|
title = st.selectbox("Position", options=titles)
|
||
|
_inc = st.select_slider(
|
||
|
f"Percentage Increase for {title}",
|
||
|
value=st.session_state.increases[title],
|
||
|
options=np.arange(0, 100),
|
||
|
)
|
||
|
_sal = st.select_slider(
|
||
|
f"Salary Range for {title}",
|
||
|
value=st.session_state.salary_ranges[title],
|
||
|
options=np.arange(50000, 250001, 5000),
|
||
|
)
|
||
|
if st.button("Set"):
|
||
|
st.session_state.increases[title] = [int(i) for i in _inc]
|
||
|
st.session_state.salary_ranges[title] = [int(i) for i in _sal]
|
||
|
st.markdown("Updated role definition.")
|
||
|
|
||
|
|
||
|
with st.sidebar.expander("THE RED BOX"):
|
||
|
a = st.slider("Upper", value=3.0, min_value=1.0, max_value=3.0, step=0.25)
|
||
|
b = st.slider("Lower", value=1.0, min_value=1.0, max_value=3.0, step=0.25)
|
||
|
c = st.slider("%MAX", value=0.90, min_value=0.5, max_value=1.0, step=0.05)
|
||
|
|
||
|
|
||
|
with st.expander("Employees"):
|
||
|
st.markdown("You can consider promotions here as well.")
|
||
|
employee_title = st.selectbox("Employee position", options=titles)
|
||
|
employee_name = st.text_input("Employee name")
|
||
|
col1, col2, col3 = st.columns(3)
|
||
|
with col1:
|
||
|
salary = st.number_input("salary (optional)", value=0)
|
||
|
with col3:
|
||
|
region = st.number_input("region", value=0)
|
||
|
with col2:
|
||
|
performance = st.slider("performance", value=0.5)
|
||
|
|
||
|
add_new_employee = st.button("Add or update employee")
|
||
|
if add_new_employee:
|
||
|
add_employee(employee_name, employee_title, region, salary, performance)
|
||
|
|
||
|
rem_employee = st.button("Remove employee")
|
||
|
if rem_employee:
|
||
|
remove_employee(employee_name)
|
||
|
|
||
|
st.sidebar.markdown("### Employee Roster")
|
||
|
if st.session_state.get("employees"):
|
||
|
st.sidebar.write([e.__dict__ for e in sorted(st.session_state["employees"])])
|
||
|
|
||
|
|
||
|
employees = st.session_state.employees
|
||
|
# employees = default_employees
|
||
|
|
||
|
increases = st.session_state.increases
|
||
|
salary_ranges = st.session_state.salary_ranges
|
||
|
forecast = st.button("Forecast")
|
||
|
samples = {}
|
||
|
import multiprocessing as mp
|
||
|
import numpy as np
|
||
|
from copy import deepcopy
|
||
|
|
||
|
# st.write(employees == set(default_employees))
|
||
|
|
||
|
ss = np.random.SeedSequence()
|
||
|
|
||
|
|
||
|
def random_sampling(A):
|
||
|
rng, employee, num_samples, salary_ranges, increases = A
|
||
|
# return rng.random(int(num_samples))
|
||
|
# return employee.name
|
||
|
e = employee
|
||
|
sample = {}
|
||
|
|
||
|
# TODO: revisit by zip code / region. USE salary_ranges[region][title]
|
||
|
mnS, mxS = salary_ranges[e.title]
|
||
|
if e.salary == 0: # simulate salary if unspecified
|
||
|
current_salary = rng.random(int(num_samples)) * (mxS - mnS) + mnS
|
||
|
else:
|
||
|
current_salary = np.ones(num_samples) * e.salary
|
||
|
|
||
|
mnI, mxI = increases[e.title]
|
||
|
# TODO: revisit how score is used.
|
||
|
# now: up to 10% linear increase based on performance, must be over 0.5
|
||
|
random_increase = (100 + rng.random(int(num_samples)) * (mxI - mnI) + mnI) / 100
|
||
|
if e.score > 0.5:
|
||
|
random_increase *= (e.score - 0.5) / 5
|
||
|
|
||
|
sample["inc"], sample["old"], sample["new"] = (
|
||
|
random_increase,
|
||
|
current_salary,
|
||
|
np.minimum(current_salary * random_increase, mxS),
|
||
|
)
|
||
|
return (e.name, sample)
|
||
|
|
||
|
|
||
|
if forecast:
|
||
|
# n_proc = min(max(( 1, mp.cpu_count() - 1 )), len(employees))
|
||
|
n_proc = 8
|
||
|
pool = mp.Pool(processes=n_proc)
|
||
|
child_seeds = ss.spawn(len(employees))
|
||
|
st.sidebar.write("Exploring Possibilities")
|
||
|
# samples_raw = pool.starmap(f, [ (np.random.default_rng(s), e, num_samples, salary_ranges, increases) for s, e in zip(child_seeds, employees) ])
|
||
|
salary_ranges = st.session_state.salary_ranges
|
||
|
increases = st.session_state.increases
|
||
|
st.write(salary_ranges, increases)
|
||
|
samples_raw = map(
|
||
|
random_sampling,
|
||
|
[
|
||
|
(np.random.default_rng(s), e, num_samples, salary_ranges, increases)
|
||
|
for s, e in zip(child_seeds, employees)
|
||
|
],
|
||
|
)
|
||
|
samples = {s[0]: s[1] for s in samples_raw}
|
||
|
st.sidebar.write("Predicting Budgets")
|
||
|
old_salaries = np.array([samples[s]["old"] for s in samples]).T
|
||
|
new_salaries = np.array([samples[s]["new"] for s in samples]).T
|
||
|
old_payroll = old_salaries.sum(axis=1)
|
||
|
new_payroll = new_salaries.sum(axis=1)
|
||
|
|
||
|
fig = go.Figure()
|
||
|
mn, mx = round(old_payroll.min()), round(new_payroll.max())
|
||
|
fig.add_trace(
|
||
|
go.Histogram(x=old_payroll, histnorm="probability density", name="before")
|
||
|
)
|
||
|
fig.add_trace(
|
||
|
go.Histogram(
|
||
|
x=new_payroll,
|
||
|
histnorm="probability density",
|
||
|
name="after",
|
||
|
marker_color="yellow",
|
||
|
)
|
||
|
)
|
||
|
fig.add_vrect(
|
||
|
x0=c * budget,
|
||
|
x1=budget,
|
||
|
line_color="red",
|
||
|
line_width=5,
|
||
|
annotation_text="budget",
|
||
|
annotation_position="left",
|
||
|
)
|
||
|
fig.update_layout(
|
||
|
title="Salary Forecast",
|
||
|
xaxis_title="Required Amount ($)",
|
||
|
yaxis_title="",
|
||
|
font=dict(family="Courier New, monospace", size=18, color="#7f7f7f"),
|
||
|
)
|
||
|
|
||
|
st.sidebar.write("Performing Analysis")
|
||
|
kde = gkde(np.random.choice(new_payroll, num_samples // 5))
|
||
|
predicted_density = kde.pdf(new_payroll)
|
||
|
|
||
|
observed_density = dist.beta(a=a, b=b, loc=c * budget, scale=(1 - c) * budget).pdf(
|
||
|
new_payroll
|
||
|
)
|
||
|
ratio = observed_density / predicted_density
|
||
|
ratio = ratio / max(ratio)
|
||
|
accepted_inds = [r for r in range(num_samples) if np.random.rand() < ratio[r]]
|
||
|
new_salaries_updated = new_payroll[accepted_inds]
|
||
|
fig.add_trace(
|
||
|
go.Histogram(
|
||
|
x=new_salaries_updated, histnorm="probability density", name="options"
|
||
|
)
|
||
|
)
|
||
|
fig.update_layout(
|
||
|
legend=dict(
|
||
|
orientation="h",
|
||
|
yanchor="top",
|
||
|
xanchor="right",
|
||
|
y=1,
|
||
|
x=1,
|
||
|
)
|
||
|
)
|
||
|
st.plotly_chart(fig, use_container_width=True)
|
||
|
|
||
|
st.markdown(f"Summary of {len(accepted_inds)} feasible new salaries (ranked)")
|
||
|
df = pd.DataFrame(new_salaries[accepted_inds, :], columns=sorted(samples.keys()))
|
||
|
df["total"] = new_payroll[accepted_inds]
|
||
|
df = df.astype(int)
|
||
|
st.write(df.sort_values("total").reset_index(drop=True))
|