|
@ -1,8 +1,11 @@ |
|
|
|
|
|
import subprocess |
|
|
|
|
|
import sys |
|
|
from random import sample |
|
|
from random import sample |
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
from lightning_sdk import Machine, Studio |
|
|
from lightning_sdk import Machine, Studio |
|
|
|
|
|
|
|
|
NUM_JOBS = 21 |
|
|
NUM_JOBS = 64 |
|
|
|
|
|
|
|
|
# reference to the current studio |
|
|
# reference to the current studio |
|
|
# if you run outside of Lightning, you can pass the Studio name |
|
|
# if you run outside of Lightning, you can pass the Studio name |
|
@ -15,7 +18,7 @@ job_plugin = studio.installed_plugins["jobs"] |
|
|
# do a sweep over learning rates |
|
|
# do a sweep over learning rates |
|
|
|
|
|
|
|
|
# Define the ranges or sets of values for each hyperparameter |
|
|
# Define the ranges or sets of values for each hyperparameter |
|
|
alpha_values = [0.1, 0.25, 0.5, 0.7, 0.9] |
|
|
alpha_values = list(np.round(np.linspace(0, 2, 21), 4)) |
|
|
learning_rate_values = [1e-3, 1e-4, 1e-5] |
|
|
learning_rate_values = [1e-3, 1e-4, 1e-5] |
|
|
batch_size_values = [128] |
|
|
batch_size_values = [128] |
|
|
max_epochs_values = [5000] |
|
|
max_epochs_values = [5000] |
|
@ -31,7 +34,7 @@ all_params = [ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# perform random search with a limit |
|
|
# perform random search with a limit |
|
|
search_params = sample(all_params, NUM_JOBS) |
|
|
search_params = sample(all_params, min(NUM_JOBS, len(all_params))) |
|
|
|
|
|
|
|
|
for idx, params in enumerate(search_params): |
|
|
for idx, params in enumerate(search_params): |
|
|
a, lr, bs, me = params |
|
|
a, lr, bs, me = params |
|
@ -39,4 +42,9 @@ for idx, params in enumerate(search_params): |
|
|
job_name = f"color2_{bs}_{a}_{lr:2.2e}" |
|
|
job_name = f"color2_{bs}_{a}_{lr:2.2e}" |
|
|
# job_plugin.run(cmd, machine=Machine.T4, name=job_name) |
|
|
# job_plugin.run(cmd, machine=Machine.T4, name=job_name) |
|
|
print(f"Running {params}: {cmd}") |
|
|
print(f"Running {params}: {cmd}") |
|
|
os.system(cmd) |
|
|
try: |
|
|
|
|
|
# Run the command and wait for it to complete |
|
|
|
|
|
subprocess.run(cmd, shell=True, check=True) |
|
|
|
|
|
except KeyboardInterrupt: |
|
|
|
|
|
print("Interrupted by user") |
|
|
|
|
|
sys.exit(1) |
|
|