chunk data gen
This commit is contained in:
		
							parent
							
								
									1e38ce04c9
								
							
						
					
					
						commit
						9083e9d6e1
					
				| @ -105,34 +105,37 @@ def calculate_distance(pair): | |||||||
|     distance = get_distance(city1["name"], city2["name"]) |     distance = get_distance(city1["name"], city2["name"]) | ||||||
|     return city1["name"], city2["name"], distance |     return city1["name"], city2["name"], distance | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| def main(): | def main(): | ||||||
|     cities = list(us_cities.values()) |     cities = list(us_cities.values()) | ||||||
|     print(f"Num cities: {len(cities)}") |     print(f"Num cities: {len(cities)}") | ||||||
|     city_combinations = list(itertools.combinations(cities, 2)) |     city_combinations = list(itertools.combinations(cities, 2)) | ||||||
|  |     chunk_size = 800  # adjust this as needed | ||||||
|  |     num_chunks = len(city_combinations) // chunk_size + 1 | ||||||
|  |     output_file = args.output_file | ||||||
| 
 | 
 | ||||||
|     with open(args.output_file, "w", newline="") as csvfile: |     with open(output_file, "w", newline="") as csvfile: | ||||||
|         fieldnames = ["city_from", "city_to", "distance"] |         fieldnames = ["city_from", "city_to", "distance"] | ||||||
|         writer = csv.DictWriter(csvfile, fieldnames=fieldnames) |         writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | ||||||
|         writer.writeheader() |         writer.writeheader() | ||||||
| 
 | 
 | ||||||
|         try: |         try: | ||||||
|             executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) |             executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) | ||||||
|             # results = executor.map(calculate_distance, city_combinations) |             for i in range(num_chunks): | ||||||
|             futures = { |                 chunk = city_combinations[i * chunk_size : (i + 1) * chunk_size] | ||||||
|                 executor.submit(calculate_distance, pair): pair |                 futures = { | ||||||
|                 for pair in city_combinations |                     executor.submit(calculate_distance, pair): pair for pair in chunk | ||||||
|             } |                 } | ||||||
|             for future in as_completed(futures): |                 for future in as_completed(futures): | ||||||
|                 city_from, city_to, distance = future.result() |                     city_from, city_to, distance = future.result() | ||||||
|                 if distance is not None: |                     if distance is not None: | ||||||
|                     writer.writerow( |                         writer.writerow( | ||||||
|                         { |                             { | ||||||
|                             "city_from": city_from, |                                 "city_from": city_from, | ||||||
|                             "city_to": city_to, |                                 "city_to": city_to, | ||||||
|                             "distance": distance, |                                 "distance": distance, | ||||||
|                         } |                             } | ||||||
|                     ) |                         ) | ||||||
|  |                         csvfile.flush()  # write to disk immediately | ||||||
|         except KeyboardInterrupt: |         except KeyboardInterrupt: | ||||||
|             print("Interrupted. Terminating processes...") |             print("Interrupted. Terminating processes...") | ||||||
|             executor.shutdown(wait=False) |             executor.shutdown(wait=False) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user