progress bar
This commit is contained in:
parent
4500c1b483
commit
b8ac59d942
2
Makefile
2
Makefile
@ -2,7 +2,7 @@ all: install data train eval
|
||||
|
||||
city_distances_full.csv: check generate_data.py
|
||||
@echo "Generating distance data..."
|
||||
@bash -c 'time python generate_data.py -w 8 -c US -s 10000'
|
||||
@bash -c 'time python generate_data.py --country US --workers 8 --chunk-size 8000'
|
||||
|
||||
data: city_distances_full.csv
|
||||
|
||||
|
@ -8,6 +8,7 @@ from functools import lru_cache
|
||||
import geonamescache
|
||||
import numpy as np
|
||||
from geopy.distance import geodesic
|
||||
from tqdm import tqdm
|
||||
|
||||
MAX_DISTANCE = 20_037.5
|
||||
|
||||
@ -115,6 +116,7 @@ def main():
|
||||
cities = list(us_cities.values())
|
||||
print(f"Num cities: {len(cities)}")
|
||||
city_combinations = list(itertools.combinations(cities, 2))
|
||||
# np.random.shuffle(city_combinations) # will this help or hurt caching? 1.03it/s
|
||||
chunk_size = args.chunk_size
|
||||
num_chunks = len(city_combinations) // chunk_size + 1
|
||||
output_file = args.output_file
|
||||
@ -126,8 +128,13 @@ def main():
|
||||
|
||||
try:
|
||||
executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers)
|
||||
for i in range(num_chunks):
|
||||
print(f"Processing chunk {i}...")
|
||||
for i in tqdm(
|
||||
range(num_chunks),
|
||||
total=num_chunks,
|
||||
desc="Processing chunks",
|
||||
ncols=100,
|
||||
bar_format="{l_bar}{bar}{r_bar}",
|
||||
):
|
||||
chunk = city_combinations[(i * chunk_size) : (i + 1) * chunk_size]
|
||||
futures = {
|
||||
executor.submit(calculate_distance, pair): pair for pair in chunk
|
||||
|
Loading…
Reference in New Issue
Block a user