import dataclasses import json import numpy as np import pandas as pd import streamlit as st from scipy.stats import distributions as dist from scipy.stats import gaussian_kde as gkde import plotly.express as px import plotly.graph_objects as go # st.set_page_config(layout="wide") titles = ["Architect", "Engineer", "Sr. Engineer"] num_samples = int(1e4) @dataclasses.dataclass(frozen=True) class Employee: name: str = "John Doe" title: str = "unknown" region: int = 0 salary: float = 0.0 score: float = 0.5 def __eq__(cls, other_cls): return cls.name == other_cls.name def __lt__(cls, other_cls): return cls.name < other_cls.name default_employees = [ Employee("Alice", "Architect"), Employee("Bob", "Architect"), Employee("Cher", "Engineer"), Employee("David", "Sr. Engineer"), Employee("Eirene", "Engineer"), Employee("Fiona", "Sr. Engineer"), Employee("Gavin", "Engineer"), ] # for i in range(5000): # default_employees.append(Employee(f"Gavin {i}", np.random.choice(titles))) try: data = json.load(open("data.json", "r")) except FileNotFoundError: data = [ {"title": "Architect", "salary": [50000, 100000], "raise": [10, 20]}, {"title": "Engineer", "salary": [50000, 100000], "raise": [10, 20]}, {"title": "Sr. Engineer", "salary": [50000, 100000], "raise": [10, 20]}, ] titles = [d["title"] for d in data] salaries = {d["title"]: d["salary"] for d in data} increases = {d["title"]: d["raise"] for d in data} # increases = {title: (10, 20) for title in titles} # salaries = {title: (50000, 100000) for title in titles} st.title("Payroll Calculator") budget = st.sidebar.number_input( "Maximum Payroll (dollars per year)", value=550000, step=5000 ) if "employees" not in st.session_state: st.session_state["employees"] = set(default_employees) def add_employee(employee_name, title="unknown", region=0, salary=0, score=0): if "employees" in st.session_state and employee_name: remove_employee(employee_name) st.session_state["employees"].add( Employee(employee_name, title, region, salary, score) ) def remove_employee(employee_name): if "employees" in st.session_state and employee_name: for e in st.session_state["employees"]: if e.name == employee_name: st.session_state["employees"].remove(e) break if "increases" not in st.session_state: st.session_state["increases"] = increases if "salary_ranges" not in st.session_state: st.session_state["salary_ranges"] = salaries with st.expander("Roles"): title = st.selectbox("Position", options=titles) _inc = st.select_slider( f"Percentage Increase for {title}", value=st.session_state.increases[title], options=np.arange(0, 100), ) _sal = st.select_slider( f"Salary Range for {title}", value=st.session_state.salary_ranges[title], options=np.arange(50000, 250001, 5000), ) if st.button("Set"): st.session_state.increases[title] = [int(i) for i in _inc] st.session_state.salary_ranges[title] = [int(i) for i in _sal] st.markdown("Updated role definition.") with st.sidebar.expander("THE RED BOX"): a = st.slider("Upper", value=3.0, min_value=1.0, max_value=3.0, step=0.25) b = st.slider("Lower", value=1.0, min_value=1.0, max_value=3.0, step=0.25) c = st.slider("%MAX", value=0.90, min_value=0.5, max_value=1.0, step=0.05) with st.expander("Employees"): st.markdown("You can consider promotions here as well.") employee_title = st.selectbox("Employee position", options=titles) employee_name = st.text_input("Employee name") col1, col2, col3 = st.columns(3) with col1: salary = st.number_input("salary (optional)", value=0) with col3: region = st.number_input("region", value=0) with col2: performance = st.slider("performance", value=0.5) add_new_employee = st.button("Add or update employee") if add_new_employee: add_employee(employee_name, employee_title, region, salary, performance) rem_employee = st.button("Remove employee") if rem_employee: remove_employee(employee_name) st.sidebar.markdown("### Employee Roster") if st.session_state.get("employees"): st.sidebar.write([e.__dict__ for e in sorted(st.session_state["employees"])]) employees = st.session_state.employees # employees = default_employees increases = st.session_state.increases salary_ranges = st.session_state.salary_ranges forecast = st.button("Forecast") samples = {} import multiprocessing as mp import numpy as np from copy import deepcopy # st.write(employees == set(default_employees)) ss = np.random.SeedSequence() def random_sampling(A): rng, employee, num_samples, salary_ranges, increases = A # return rng.random(int(num_samples)) # return employee.name e = employee sample = {} # TODO: revisit by zip code / region. USE salary_ranges[region][title] mnS, mxS = salary_ranges[e.title] if e.salary == 0: # simulate salary if unspecified current_salary = rng.random(int(num_samples)) * (mxS - mnS) + mnS else: current_salary = np.ones(num_samples) * e.salary mnI, mxI = increases[e.title] # TODO: revisit how score is used. # now: up to 10% linear increase based on performance, must be over 0.5 random_increase = (100 + rng.random(int(num_samples)) * (mxI - mnI) + mnI) / 100 if e.score > 0.5: random_increase *= (e.score - 0.5) / 5 sample["inc"], sample["old"], sample["new"] = ( random_increase, current_salary, np.minimum(current_salary * random_increase, mxS), ) return (e.name, sample) if forecast: # n_proc = min(max(( 1, mp.cpu_count() - 1 )), len(employees)) n_proc = 8 pool = mp.Pool(processes=n_proc) child_seeds = ss.spawn(len(employees)) st.sidebar.write("Exploring Possibilities") # samples_raw = pool.starmap(f, [ (np.random.default_rng(s), e, num_samples, salary_ranges, increases) for s, e in zip(child_seeds, employees) ]) salary_ranges = st.session_state.salary_ranges increases = st.session_state.increases st.write(salary_ranges, increases) samples_raw = map( random_sampling, [ (np.random.default_rng(s), e, num_samples, salary_ranges, increases) for s, e in zip(child_seeds, employees) ], ) samples = {s[0]: s[1] for s in samples_raw} st.sidebar.write("Predicting Budgets") old_salaries = np.array([samples[s]["old"] for s in samples]).T new_salaries = np.array([samples[s]["new"] for s in samples]).T old_payroll = old_salaries.sum(axis=1) new_payroll = new_salaries.sum(axis=1) fig = go.Figure() mn, mx = round(old_payroll.min()), round(new_payroll.max()) fig.add_trace( go.Histogram(x=old_payroll, histnorm="probability density", name="before") ) fig.add_trace( go.Histogram( x=new_payroll, histnorm="probability density", name="after", marker_color="yellow", ) ) fig.add_vrect( x0=c * budget, x1=budget, line_color="red", line_width=5, annotation_text="budget", annotation_position="left", ) fig.update_layout( title="Salary Forecast", xaxis_title="Required Amount ($)", yaxis_title="", font=dict(family="Courier New, monospace", size=18, color="#7f7f7f"), ) st.sidebar.write("Performing Analysis") kde = gkde(np.random.choice(new_payroll, num_samples // 5)) predicted_density = kde.pdf(new_payroll) observed_density = dist.beta(a=a, b=b, loc=c * budget, scale=(1 - c) * budget).pdf( new_payroll ) ratio = observed_density / predicted_density ratio = ratio / max(ratio) accepted_inds = [r for r in range(num_samples) if np.random.rand() < ratio[r]] new_salaries_updated = new_payroll[accepted_inds] fig.add_trace( go.Histogram( x=new_salaries_updated, histnorm="probability density", name="options" ) ) fig.update_layout( legend=dict( orientation="h", yanchor="top", xanchor="right", y=1, x=1, ) ) st.plotly_chart(fig, use_container_width=True) st.markdown(f"Summary of {len(accepted_inds)} feasible new salaries (ranked)") df = pd.DataFrame(new_salaries[accepted_inds, :], columns=sorted(samples.keys())) df["total"] = new_payroll[accepted_inds] df = df.astype(int) st.write(df.sort_values("total").reset_index(drop=True))