diff --git a/main.py b/main.py index 93f1767..df1420e 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,9 @@ import argparse +import random +import numpy as np import pytorch_lightning as pl +import torch from pytorch_lightning.callbacks import EarlyStopping from callbacks import SaveImageCallback @@ -29,6 +32,7 @@ def parse_args(): parser.add_argument( "-L", "--log_every_n_steps", type=int, default=5, help="Logging frequency" ) + parser.add_argument("--seed", default=21, type=int, help="Seed") parser.add_argument( "-w", "--num_workers", @@ -42,8 +46,21 @@ def parse_args(): return args +def seed_everything(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + if __name__ == "__main__": args = parse_args() + + seed_everything(args.seed) + early_stop_callback = EarlyStopping( monitor="hp_metric", # Metric to monitor min_delta=1e-5, # Minimum change in the monitored quantity to qualify as an improvement @@ -53,7 +70,7 @@ if __name__ == "__main__": ) save_img_callback = SaveImageCallback( - save_interval=1, + save_interval=0, final_dir="out", ) @@ -77,6 +94,7 @@ if __name__ == "__main__": # Initialize trainer with parsed arguments trainer = pl.Trainer( + deterministic=True, callbacks=[early_stop_callback, save_img_callback], max_epochs=args.max_epochs, log_every_n_steps=args.log_every_n_steps, diff --git a/makefile b/makefile index cff89bb..e290d11 100644 --- a/makefile +++ b/makefile @@ -13,4 +13,8 @@ animate: ffmpeg -i lightning_logs/version_258/e%04d.png \ -c:v libx264 \ -vf "fps=12,format=yuv420p,pad=ceil(iw/2)*2:ceil(ih/2)*2" \ - ~/animated.mp4 \ No newline at end of file + ~/animated.mp4 + +clean: + rm -rf lightning_logs/* + rm out/*.png diff --git a/search.py b/search.py index 881a017..1b6eaa7 100644 --- a/search.py +++ b/search.py @@ -18,19 +18,21 @@ job_plugin = studio.installed_plugins["jobs"] # do a sweep over learning rates # Define the ranges or sets of values for each hyperparameter -alpha_values = list(np.round(np.linspace(2, 6, 41), 4)) +alpha_values = list(np.round(np.linspace(2, 4, 21), 4)) # learning_rate_values = list(np.round(np.logspace(-5, -3, 41), 5)) learning_rate_values = [5e-4] -batch_size_values = [64, 128] +batch_size_values = [128] max_epochs_values = [500] +seeds = list(range(21, 1992)) # Generate all possible combinations of hyperparameters all_params = [ - (alpha, lr, bs, me) + (alpha, lr, bs, me, s) for alpha in alpha_values for lr in learning_rate_values for bs in batch_size_values for me in max_epochs_values + for s in seeds ] @@ -38,8 +40,8 @@ all_params = [ search_params = sample(all_params, min(NUM_JOBS, len(all_params))) for idx, params in enumerate(search_params): - a, lr, bs, me = params - cmd = f"cd ~/colors && python main.py --alpha {a} --lr {lr} --bs {bs} --max_epochs {me}" + a, lr, bs, me, s = params + cmd = f"cd ~/colors && python main.py --alpha {a} --lr {lr} --bs {bs} --max_epochs {me} --seed {s}" # job_name = f"color2_{bs}_{a}_{lr:2.2e}" # job_plugin.run(cmd, machine=Machine.T4, name=job_name) print(f"Running {params}: {cmd}")