import subprocess import sys from random import sample, seed import numpy as np # noqa: F401 from lightning_sdk import Machine, Studio # noqa: F401 # consistency of randomly sampled experiments. seed(19920921) NUM_JOBS = 100 # reference to the current studio # if you run outside of Lightning, you can pass the Studio name # studio = Studio() # use the jobs plugin # studio.install_plugin("jobs") # job_plugin = studio.installed_plugins["jobs"] # do a sweep over learning rates # Define the ranges or sets of values for each hyperparameter # alpha_values = list(np.round(np.linspace(2, 4, 21), 4)) # learning_rate_values = list(np.round(np.logspace(-5, -3, 21), 5)) learning_rate_values = [1e-3] # learning_rate_values = [5e-4] # alpha_values = [0, .25, 0.5, 0.75, 1] # alpha = 0 is unsupervised. alpha = 1 is supervised. alpha_values = [0, 0.1] widths = [2**k for k in range(4, 13)] depths = [1, 2, 4, 8, 16] # widths, depths = [512], [4] batch_size_values = [256] max_epochs_values = [100] seeds = list(range(21, 1992)) optimizers = [ # "Adagrad", "Adam", # "SGD", # "AdamW", # "LBFGS", # "RAdam", # "RMSprop", # "Adadelta", ] # Generate all possible combinations of hyperparameters all_params = [ (alpha, lr, bs, me, s, w, d, opt) for alpha in alpha_values for lr in learning_rate_values for bs in batch_size_values for me in max_epochs_values for s in seeds for w in widths for d in depths for opt in optimizers ] # perform random search with a limit search_params = sample(all_params, min(NUM_JOBS, len(all_params))) # --trainer.callbacks+ lightning.pytorch.callbacks.EarlyStopping \ # --trainer.callbacks.init_args.monitor hp_metric \ for idx, params in enumerate(search_params): a, lr, bs, me, s, w, d, opt = params # cmd = f"cd ~/colors && python main.py --alpha {a} --lr {lr} --bs {bs} --max_epochs {me} --seed {s} --width {w}" cmd = f""" python newmain.py fit \ --seed_everything {s} \ --data.batch_size {bs} \ --data.train_size 0 \ --data.val_size 10000 \ --model.alpha {a} \ --model.width {w} \ --model.depth {d} \ --model.bias true \ --model.loop true \ --model.transform tanh \ --trainer.min_epochs 10 \ --trainer.max_epochs {me} \ --trainer.log_every_n_steps 3 \ --trainer.check_val_every_n_epoch 1 \ --trainer.limit_val_batches 50 \ --trainer.callbacks callbacks.SaveImageCallback \ --trainer.callbacks.init_args.final_dir out \ --trainer.callbacks.init_args.save_interval 1 \ --optimizer torch.optim.{opt} \ --optimizer.init_args.lr {lr} \ --trainer.callbacks+ lightning.pytorch.callbacks.LearningRateFinder # --lr_scheduler lightning.pytorch.cli.ReduceLROnPlateau \ # --lr_scheduler.init_args.monitor hp_metric \ # --lr_scheduler.init_args.factor 0.05 \ # --lr_scheduler.init_args.patience 5 \ # --lr_scheduler.init_args.cooldown 10 \ # --lr_scheduler.init_args.verbose true """ # job_name = f"color2_{bs}_{a}_{lr:2.2e}" # job_plugin.run(cmd, machine=Machine.T4, name=job_name) print(f"Running {params}: {cmd}") try: # Run the command and wait for it to complete # subprocess.run(test_cmd, shell=True, check=True) subprocess.run(cmd, shell=True, check=True) except KeyboardInterrupt: print("Interrupted by user") sys.exit(1) # except subprocess.CalledProcessError: # pass