diff --git a/prepare_training_data.py b/prepare_training_data.py index 6cbea77..cdabc8c 100644 --- a/prepare_training_data.py +++ b/prepare_training_data.py @@ -3,6 +3,7 @@ import argparse import pandas as pd INPUT_COLUMNS = ["intersection", "text_on_sign_exact", "latitude", "longitude"] +EXCLUDED_INTERSECTIONS = {"56th-pena"} def parse_args(): @@ -57,6 +58,7 @@ def load_raw_data(path): data = data.dropna(subset=INPUT_COLUMNS) data["text_on_sign_exact"] = data["text_on_sign_exact"].astype(str).str.strip() data = data[data["text_on_sign_exact"] != ""] + data = data[~data["intersection"].isin(EXCLUDED_INTERSECTIONS)] return data