|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
from random import sample, seed
|
|
|
|
|
|
|
|
import numpy as np # noqa: F401
|
|
|
|
from lightning_sdk import Machine, Studio # noqa: F401
|
|
|
|
|
|
|
|
# consistency of randomly sampled experiments.
|
|
|
|
seed(19920921)
|
|
|
|
|
|
|
|
NUM_JOBS = 100
|
|
|
|
|
|
|
|
# reference to the current studio
|
|
|
|
# if you run outside of Lightning, you can pass the Studio name
|
|
|
|
# studio = Studio()
|
|
|
|
|
|
|
|
# use the jobs plugin
|
|
|
|
# studio.install_plugin("jobs")
|
|
|
|
# job_plugin = studio.installed_plugins["jobs"]
|
|
|
|
|
|
|
|
# do a sweep over learning rates
|
|
|
|
|
|
|
|
# Define the ranges or sets of values for each hyperparameter
|
|
|
|
# alpha_values = list(np.round(np.linspace(2, 4, 21), 4))
|
|
|
|
# learning_rate_values = list(np.round(np.logspace(-5, -3, 21), 5))
|
|
|
|
learning_rate_values = [1e-3]
|
|
|
|
# learning_rate_values = [5e-4]
|
|
|
|
|
|
|
|
alpha_values = [0, .25, 0.5, 0.75, 1] # alpha = 0 is unsupervised. alpha = 1 is supervised.
|
|
|
|
widths = [2**k for k in range(4, 13)]
|
|
|
|
depths = [1, 2, 4, 8, 16]
|
|
|
|
# widths, depths = [128, 256], [4, 8]
|
|
|
|
|
|
|
|
batch_size_values = [256]
|
|
|
|
max_epochs_values = [10]
|
|
|
|
seeds = list(range(21, 1992))
|
|
|
|
optimizers = [
|
|
|
|
"Adagrad",
|
|
|
|
"Adam",
|
|
|
|
"SGD",
|
|
|
|
"AdamW",
|
|
|
|
"LBFGS",
|
|
|
|
"RAdam",
|
|
|
|
"RMSprop",
|
|
|
|
"Adadelta",
|
|
|
|
]
|
|
|
|
# Generate all possible combinations of hyperparameters
|
|
|
|
all_params = [
|
|
|
|
(alpha, lr, bs, me, s, w, d, opt)
|
|
|
|
for alpha in alpha_values
|
|
|
|
for lr in learning_rate_values
|
|
|
|
for bs in batch_size_values
|
|
|
|
for me in max_epochs_values
|
|
|
|
for s in seeds
|
|
|
|
for w in widths
|
|
|
|
for d in depths
|
|
|
|
for opt in optimizers
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
# perform random search with a limit
|
|
|
|
search_params = sample(all_params, min(NUM_JOBS, len(all_params)))
|
|
|
|
|
|
|
|
for idx, params in enumerate(search_params):
|
|
|
|
a, lr, bs, me, s, w, d, opt = params
|
|
|
|
# cmd = f"cd ~/colors && python main.py --alpha {a} --lr {lr} --bs {bs} --max_epochs {me} --seed {s} --width {w}"
|
|
|
|
cmd = f"""
|
|
|
|
python newmain.py fit \
|
|
|
|
--seed_everything {s} \
|
|
|
|
--data.batch_size {bs} \
|
|
|
|
--data.train_size 10000 \
|
|
|
|
--data.val_size 10000 \
|
|
|
|
--model.alpha {a} \
|
|
|
|
--model.width {w} \
|
|
|
|
--model.depth {d} \
|
|
|
|
--model.bias true \
|
|
|
|
--model.transform tanh \
|
|
|
|
--trainer.min_epochs 10 \
|
|
|
|
--trainer.max_epochs {me} \
|
|
|
|
--trainer.log_every_n_steps 3 \
|
|
|
|
--trainer.check_val_every_n_epoch 1 \
|
|
|
|
--trainer.limit_val_batches 50 \
|
|
|
|
--trainer.callbacks callbacks.SaveImageCallback \
|
|
|
|
--trainer.callbacks.init_args.final_dir out \
|
|
|
|
--trainer.callbacks.init_args.save_interval 0 \
|
|
|
|
--optimizer torch.optim.{opt} \
|
|
|
|
--optimizer.init_args.lr {lr} \
|
|
|
|
--lr_scheduler lightning.pytorch.cli.ReduceLROnPlateau \
|
|
|
|
--lr_scheduler.init_args.monitor hp_metric \
|
|
|
|
--lr_scheduler.init_args.factor 0.05 \
|
|
|
|
--lr_scheduler.init_args.patience 5 \
|
|
|
|
--lr_scheduler.init_args.cooldown 10 \
|
|
|
|
--lr_scheduler.init_args.verbose true
|
|
|
|
"""
|
|
|
|
test_cmd = f"{cmd.strip()} --print_config > out/config_v{idx:04d}.txt"
|
|
|
|
|
|
|
|
# job_name = f"color2_{bs}_{a}_{lr:2.2e}"
|
|
|
|
# job_plugin.run(cmd, machine=Machine.T4, name=job_name)
|
|
|
|
print(f"Running {params}: {cmd}")
|
|
|
|
cmd = f"{test_cmd.strip()} && {cmd}"
|
|
|
|
try:
|
|
|
|
# Run the command and wait for it to complete
|
|
|
|
# subprocess.run(test_cmd, shell=True, check=True)
|
|
|
|
subprocess.run(cmd, shell=True, check=True)
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print("Interrupted by user")
|
|
|
|
sys.exit(1)
|
|
|
|
# except subprocess.CalledProcessError:
|
|
|
|
# pass
|