chunking
This commit is contained in:
parent
c111678bb8
commit
4500c1b483
2
Makefile
2
Makefile
@ -2,7 +2,7 @@ all: install data train eval
|
|||||||
|
|
||||||
city_distances_full.csv: check generate_data.py
|
city_distances_full.csv: check generate_data.py
|
||||||
@echo "Generating distance data..."
|
@echo "Generating distance data..."
|
||||||
@bash -c 'time python generate_data.py -w 8 -c US'
|
@bash -c 'time python generate_data.py -w 8 -c US -s 10000'
|
||||||
|
|
||||||
data: city_distances_full.csv
|
data: city_distances_full.csv
|
||||||
|
|
||||||
|
@ -19,6 +19,13 @@ parser.add_argument(
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-w", "--workers", help="Specify the number of workers", type=int, default=1
|
"-w", "--workers", help="Specify the number of workers", type=int, default=1
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-s",
|
||||||
|
"--chunk-size",
|
||||||
|
help="Specify chunk size for batching calculations",
|
||||||
|
type=int,
|
||||||
|
default=1000,
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-o",
|
"-o",
|
||||||
"--output-file",
|
"--output-file",
|
||||||
@ -108,7 +115,7 @@ def main():
|
|||||||
cities = list(us_cities.values())
|
cities = list(us_cities.values())
|
||||||
print(f"Num cities: {len(cities)}")
|
print(f"Num cities: {len(cities)}")
|
||||||
city_combinations = list(itertools.combinations(cities, 2))
|
city_combinations = list(itertools.combinations(cities, 2))
|
||||||
chunk_size = 800 # adjust this as needed
|
chunk_size = args.chunk_size
|
||||||
num_chunks = len(city_combinations) // chunk_size + 1
|
num_chunks = len(city_combinations) // chunk_size + 1
|
||||||
output_file = args.output_file
|
output_file = args.output_file
|
||||||
|
|
||||||
@ -120,6 +127,7 @@ def main():
|
|||||||
try:
|
try:
|
||||||
executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers)
|
executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.workers)
|
||||||
for i in range(num_chunks):
|
for i in range(num_chunks):
|
||||||
|
print(f"Processing chunk {i}...")
|
||||||
chunk = city_combinations[(i * chunk_size) : (i + 1) * chunk_size]
|
chunk = city_combinations[(i * chunk_size) : (i + 1) * chunk_size]
|
||||||
futures = {
|
futures = {
|
||||||
executor.submit(calculate_distance, pair): pair for pair in chunk
|
executor.submit(calculate_distance, pair): pair for pair in chunk
|
||||||
|
Loading…
Reference in New Issue
Block a user