diff --git a/train.py b/train.py index 94b3eac..062d265 100644 --- a/train.py +++ b/train.py @@ -32,7 +32,7 @@ model = SentenceTransformer(model_name, device="cuda") # (fake.city(), fake.city(), np.random.rand()) # for _ in range(num_examples) # ] -data = pd.read_csv("city_distances_sample.csv") +data = pd.read_csv("city_distances_full.csv") MAX_DISTANCE = 20_037.5 # global max distance # MAX_DISTANCE = data["distance"].max() # about 5k @@ -70,7 +70,7 @@ print("TRAINING") training_args = { "output_path": "./output", # "evaluation_steps": steps_per_epoch, # already evaluates at the end of each epoch - "epochs": 5, + "epochs": 20, "warmup_steps": 500, "optimizer_params": {"lr": 2e-5}, # "weight_decay": 0, # not sure if this helps but works fine without setting it. @@ -78,7 +78,7 @@ training_args = { "save_best_model": True, "checkpoint_path": "./checkpoints_absmax_split", "checkpoint_save_steps": steps_per_epoch, - "checkpoint_save_total_limit": 20, + "checkpoint_save_total_limit": 100, } print(f"TRAINING ARGUMENTS:\n {training_args}")