From 9083e9d6e184fb61d13bb12cc6acfb52b897e8ec Mon Sep 17 00:00:00 2001 From: mm Date: Fri, 5 May 2023 05:41:52 +0000 Subject: [PATCH] chunk data gen --- generate_data.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/generate_data.py b/generate_data.py index fd5924e..0f12e05 100644 --- a/generate_data.py +++ b/generate_data.py @@ -105,34 +105,37 @@ def calculate_distance(pair): distance = get_distance(city1["name"], city2["name"]) return city1["name"], city2["name"], distance - def main(): cities = list(us_cities.values()) print(f"Num cities: {len(cities)}") city_combinations = list(itertools.combinations(cities, 2)) + chunk_size = 800 # adjust this as needed + num_chunks = len(city_combinations) // chunk_size + 1 + output_file = args.output_file - with open(args.output_file, "w", newline="") as csvfile: + with open(output_file, "w", newline="") as csvfile: fieldnames = ["city_from", "city_to", "distance"] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() try: executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) - # results = executor.map(calculate_distance, city_combinations) - futures = { - executor.submit(calculate_distance, pair): pair - for pair in city_combinations - } - for future in as_completed(futures): - city_from, city_to, distance = future.result() - if distance is not None: - writer.writerow( - { - "city_from": city_from, - "city_to": city_to, - "distance": distance, - } - ) + for i in range(num_chunks): + chunk = city_combinations[i * chunk_size : (i + 1) * chunk_size] + futures = { + executor.submit(calculate_distance, pair): pair for pair in chunk + } + for future in as_completed(futures): + city_from, city_to, distance = future.result() + if distance is not None: + writer.writerow( + { + "city_from": city_from, + "city_to": city_to, + "distance": distance, + } + ) + csvfile.flush() # write to disk immediately except KeyboardInterrupt: print("Interrupted. Terminating processes...") executor.shutdown(wait=False)