rename data file

This commit is contained in:
mm 2023-05-05 07:11:38 +00:00
parent 294d4bb1cd
commit 6733677a1a
4 changed files with 5 additions and 5 deletions

View File

@ -1,10 +1,10 @@
all: install data train eval
city_distances_full.csv: check generate_data.py
city_distances.csv: check generate_data.py
@echo "Generating distance data..."
@bash -c 'time python generate_data.py --country US --workers 8 --chunk-size 4200'
data: city_distances_full.csv
data: city_distances.csv
train: check train.py
@echo "Training embeddings..."

View File

@ -66,7 +66,7 @@ if __name__ == "__main__":
model_name = "sentence-transformers/all-MiniLM-L6-v2"
base_model = SentenceTransformer(model_name, device="cuda")
data = pd.read_csv("city_distances_full.csv")
data = pd.read_csv("city_distances.csv")
# data_sample = data.sample(1_000)
checkpoint_dir = "checkpoints_absmax_split" # no slash
for checkpoint in sorted(glob.glob(f"{checkpoint_dir}/*")):

View File

@ -32,7 +32,7 @@ parser.add_argument(
"--output-file",
help="Specify the name of the output file (file.csv)",
type=str,
default="city_distances_full.csv",
default="city_distances.csv",
)
parser.add_argument(
"--shuffle",

View File

@ -32,7 +32,7 @@ model = SentenceTransformer(model_name, device="cuda")
# (fake.city(), fake.city(), np.random.rand())
# for _ in range(num_examples)
# ]
data = pd.read_csv("city_distances_full.csv")
data = pd.read_csv("city_distances.csv")
MAX_DISTANCE = 20_037.5 # global max distance
# MAX_DISTANCE = data["distance"].max() # about 5k