|
@ -8,6 +8,7 @@ from functools import lru_cache |
|
|
import geonamescache |
|
|
import geonamescache |
|
|
import numpy as np |
|
|
import numpy as np |
|
|
from geopy.distance import geodesic |
|
|
from geopy.distance import geodesic |
|
|
|
|
|
from tqdm import tqdm |
|
|
|
|
|
|
|
|
MAX_DISTANCE = 20_037.5 |
|
|
MAX_DISTANCE = 20_037.5 |
|
|
|
|
|
|
|
@ -115,6 +116,7 @@ def main(): |
|
|
cities = list(us_cities.values()) |
|
|
cities = list(us_cities.values()) |
|
|
print(f"Num cities: {len(cities)}") |
|
|
print(f"Num cities: {len(cities)}") |
|
|
city_combinations = list(itertools.combinations(cities, 2)) |
|
|
city_combinations = list(itertools.combinations(cities, 2)) |
|
|
|
|
|
# np.random.shuffle(city_combinations) # will this help or hurt caching? 1.03it/s |
|
|
chunk_size = args.chunk_size |
|
|
chunk_size = args.chunk_size |
|
|
num_chunks = len(city_combinations) // chunk_size + 1 |
|
|
num_chunks = len(city_combinations) // chunk_size + 1 |
|
|
output_file = args.output_file |
|
|
output_file = args.output_file |
|
@ -126,8 +128,13 @@ def main(): |
|
|
|
|
|
|
|
|
try: |
|
|
try: |
|
|
executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) |
|
|
executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) |
|
|
for i in range(num_chunks): |
|
|
for i in tqdm( |
|
|
print(f"Processing chunk {i}...") |
|
|
range(num_chunks), |
|
|
|
|
|
total=num_chunks, |
|
|
|
|
|
desc="Processing chunks", |
|
|
|
|
|
ncols=100, |
|
|
|
|
|
bar_format="{l_bar}{bar}{r_bar}", |
|
|
|
|
|
): |
|
|
chunk = city_combinations[(i * chunk_size) : (i + 1) * chunk_size] |
|
|
chunk = city_combinations[(i * chunk_size) : (i + 1) * chunk_size] |
|
|
futures = { |
|
|
futures = { |
|
|
executor.submit(calculate_distance, pair): pair for pair in chunk |
|
|
executor.submit(calculate_distance, pair): pair for pair in chunk |
|
|