colors/dataloader.py


								import torch

								from torch.utils.data import DataLoader, TensorDataset


								from utils import extract_colors, preprocess_data


								def create_random_dataloader(N: int = 1e8, skip: bool = True, **kwargs):

								    rgb_tensor = torch.rand((int(N), 3), dtype=torch.float32)

								    rgb_tensor = preprocess_data(rgb_tensor, skip=skip)

								    # Creating a dataset and data loader

								    dataset = TensorDataset(rgb_tensor, torch.zeros(len(rgb_tensor)))

								    train_dataloader = DataLoader(dataset, **kwargs)

								    return train_dataloader


								def create_gray_supplement(N: int = 50, skip: bool = True):

								    linear_space = torch.linspace(0, 1, N)

								    gray_tensor = linear_space.unsqueeze(1).repeat(1, 3)

								    gray_tensor = preprocess_data(gray_tensor, skip=skip)

								    return [(gray_tensor[i], f"gray{i/N:2.4f}") for i in range(len(gray_tensor))]


								def create_named_dataloader(N: int = 0, skip: bool = True, **kwargs):

								    rgb_tensor, xkcd_color_names = extract_colors()

								    rgb_tensor = preprocess_data(rgb_tensor, skip=skip)

								    # Creating a dataset with RGB values and their corresponding color names

								    dataset_with_names = [

								        (rgb_tensor[i], xkcd_color_names[i].replace("xkcd:", ""))

								        for i in range(len(rgb_tensor))

								    ]

								    if N > 0:

								        dataset_with_names += create_gray_supplement(N, skip=skip)

								    train_dataloader_with_names = DataLoader(dataset_with_names, **kwargs)

								    return train_dataloader_with_names


								if __name__ == "__main__":

								    batch_size = 4

								    train_dataloader = create_random_dataloader(

								        N=1e6, batch_size=batch_size, shuffle=True

								    )

								    print(len(train_dataloader.dataset))

								    train_dataloader_with_names = create_named_dataloader(

								        batch_size=batch_size, shuffle=True

								    )


								    # Extract a sample from the DataLoader

								    sample_data = next(iter(train_dataloader))


								    # Sample RGB values and their corresponding dummy labels

								    sample_rgb_values, _ = sample_data


								    print(sample_rgb_values)


								    # Extract a sample from the new DataLoader

								    sample_data_with_names = next(iter(train_dataloader_with_names))


								    # Sample RGB values and their corresponding color names

								    sample_rgb_values_with_names, sample_color_names = sample_data_with_names


								    print(sample_rgb_values_with_names, sample_color_names)