From 5c33b5135fb59c383978a3a305fffe283702aa79 Mon Sep 17 00:00:00 2001
From: mm <mm@clfx.cc>
Date: Fri, 5 May 2023 07:11:38 +0000
Subject: [PATCH] rename data file

---
 Makefile         | 4 ++--
 README.md        | 2 +-
 eval.py          | 2 +-
 generate_data.py | 2 +-
 train.py         | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index 4d410e0..13be5b8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,10 @@
 all: install data train eval
 
-city_distances_full.csv: check generate_data.py
+city_distances.csv: check generate_data.py
 	@echo "Generating distance data..."
 	@bash -c 'time python generate_data.py --country US --workers 8 --chunk-size 4200'
 
-data: city_distances_full.csv
+data: city_distances.csv
 
 train: check train.py
 	@echo "Training embeddings..."
diff --git a/README.md b/README.md
index f6e3f36..1c30022 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ The approach demonstrated can be extended to other metrics or features beyond ge
 ## How to Use
 
 1. Install the required dependencies by running `pip install -r requirements.txt`.
-2. Run `make city_distances.csv` to generate the dataset of city distances.
+2. Run `make data` to generate the dataset of city distances.
 3. Run `make train` to train the neural network model.
 4. Run `make eval` to evaluate the trained model and generate evaluation plots.
 
diff --git a/eval.py b/eval.py
index 2403ddf..771df64 100644
--- a/eval.py
+++ b/eval.py
@@ -66,7 +66,7 @@ if __name__ == "__main__":
     model_name = "sentence-transformers/all-MiniLM-L6-v2"
     base_model = SentenceTransformer(model_name, device="cuda")
 
-    data = pd.read_csv("city_distances_full.csv")
+    data = pd.read_csv("city_distances.csv")
     # data_sample = data.sample(1_000)
     checkpoint_dir = "checkpoints_absmax_split"  # no slash
     for checkpoint in sorted(glob.glob(f"{checkpoint_dir}/*")):
diff --git a/generate_data.py b/generate_data.py
index d255a8e..5a0e0f1 100644
--- a/generate_data.py
+++ b/generate_data.py
@@ -32,7 +32,7 @@ parser.add_argument(
     "--output-file",
     help="Specify the name of the output file (file.csv)",
     type=str,
-    default="city_distances_full.csv",
+    default="city_distances.csv",
 )
 parser.add_argument(
     "--shuffle",
diff --git a/train.py b/train.py
index 1d60115..764cec3 100644
--- a/train.py
+++ b/train.py
@@ -32,7 +32,7 @@ model = SentenceTransformer(model_name, device="cuda")
 #     (fake.city(), fake.city(), np.random.rand())
 #     for _ in range(num_examples)
 # ]
-data = pd.read_csv("city_distances_full.csv")
+data = pd.read_csv("city_distances.csv")
 MAX_DISTANCE = 20_037.5  # global max distance
 # MAX_DISTANCE = data["distance"].max()  # about 5k