Browse Source

chunk data gen

main
mm 2 years ago
parent
commit
9083e9d6e1
  1. 37
      generate_data.py

37
generate_data.py

@ -105,34 +105,37 @@ def calculate_distance(pair):
distance = get_distance(city1["name"], city2["name"])
return city1["name"], city2["name"], distance
def main():
cities = list(us_cities.values())
print(f"Num cities: {len(cities)}")
city_combinations = list(itertools.combinations(cities, 2))
chunk_size = 800 # adjust this as needed
num_chunks = len(city_combinations) // chunk_size + 1
output_file = args.output_file
with open(args.output_file, "w", newline="") as csvfile:
with open(output_file, "w", newline="") as csvfile:
fieldnames = ["city_from", "city_to", "distance"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
try:
executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers)
# results = executor.map(calculate_distance, city_combinations)
futures = {
executor.submit(calculate_distance, pair): pair
for pair in city_combinations
}
for future in as_completed(futures):
city_from, city_to, distance = future.result()
if distance is not None:
writer.writerow(
{
"city_from": city_from,
"city_to": city_to,
"distance": distance,
}
)
for i in range(num_chunks):
chunk = city_combinations[i * chunk_size : (i + 1) * chunk_size]
futures = {
executor.submit(calculate_distance, pair): pair for pair in chunk
}
for future in as_completed(futures):
city_from, city_to, distance = future.result()
if distance is not None:
writer.writerow(
{
"city_from": city_from,
"city_to": city_to,
"distance": distance,
}
)
csvfile.flush() # write to disk immediately
except KeyboardInterrupt:
print("Interrupted. Terminating processes...")
executor.shutdown(wait=False)

Loading…
Cancel
Save