From 948c337ec28c75cc68578a139bf07981c1beca74 Mon Sep 17 00:00:00 2001
From: mm <mm@clfx.cc>
Date: Fri, 5 May 2023 00:50:57 +0000
Subject: [PATCH] batchsize

---
 train.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/train.py b/train.py
index 062d265..583f711 100644
--- a/train.py
+++ b/train.py
@@ -55,28 +55,28 @@ train_examples, val_examples = train_test_split(
 # validation examples can be something like templated sentences
 # that maintain the same distance as the cities (same context)
 # should probably add training examples like that too if needed
-batch_size = 16
+BATCH_SIZE = 48
 num_examples = len(train_examples)
-steps_per_epoch = num_examples // batch_size
+steps_per_epoch = num_examples // BATCH_SIZE
 
 print(f"\nHead of training data (size: {num_examples}):")
 print(train_data[:10], "\n")
 
 # Create DataLoaders for train and validation datasets
-train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
+train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=BATCH_SIZE)
 
 print("TRAINING")
 # Configure the training arguments
 training_args = {
     "output_path": "./output",
     # "evaluation_steps": steps_per_epoch,  # already evaluates at the end of each epoch
-    "epochs": 20,
+    "epochs": 10,
     "warmup_steps": 500,
     "optimizer_params": {"lr": 2e-5},
     # "weight_decay": 0,  # not sure if this helps but works fine without setting it.
     "scheduler": "WarmupLinear",
     "save_best_model": True,
-    "checkpoint_path": "./checkpoints_absmax_split",
+    "checkpoint_path": "./checkpoints",
     "checkpoint_save_steps": steps_per_epoch,
     "checkpoint_save_total_limit": 100,
 }