60 lines
2.2 KiB
Python
60 lines
2.2 KiB
Python
import torch
|
|
from torch.utils.data import DataLoader, TensorDataset
|
|
|
|
from utils import extract_colors, preprocess_data
|
|
|
|
|
|
def create_dataloader(N: int = 1e8, skip: bool = True, **kwargs):
|
|
rgb_tensor = torch.rand((int(N), 3), dtype=torch.float32)
|
|
rgb_tensor = preprocess_data(rgb_tensor, skip=skip)
|
|
# Creating a dataset and data loader
|
|
dataset = TensorDataset(rgb_tensor, torch.zeros(len(rgb_tensor)))
|
|
train_dataloader = DataLoader(dataset, **kwargs)
|
|
return train_dataloader
|
|
|
|
|
|
def create_gray_supplement(N: int = 50, skip: bool = True):
|
|
linear_space = torch.linspace(0, 1, N)
|
|
gray_tensor = linear_space.unsqueeze(1).repeat(1, 3)
|
|
gray_tensor = preprocess_data(gray_tensor, skip=skip)
|
|
return [(gray_tensor[i], f"gray{i/N:2.4f}") for i in range(len(gray_tensor))]
|
|
|
|
|
|
def create_named_dataloader(N: int = 0, skip: bool = True, **kwargs):
|
|
rgb_tensor, xkcd_color_names = extract_colors()
|
|
rgb_tensor = preprocess_data(rgb_tensor, skip=skip)
|
|
# Creating a dataset with RGB values and their corresponding color names
|
|
dataset_with_names = [
|
|
(rgb_tensor[i], xkcd_color_names[i].replace("xkcd:", ""))
|
|
for i in range(len(rgb_tensor))
|
|
]
|
|
if N > 0:
|
|
dataset_with_names += create_gray_supplement(N, skip=skip)
|
|
train_dataloader_with_names = DataLoader(dataset_with_names, **kwargs)
|
|
return train_dataloader_with_names
|
|
|
|
|
|
if __name__ == "__main__":
|
|
batch_size = 4
|
|
train_dataloader = create_dataloader(N=1e6, batch_size=batch_size, shuffle=True)
|
|
print(len(train_dataloader.dataset))
|
|
train_dataloader_with_names = create_named_dataloader(
|
|
batch_size=batch_size, shuffle=True
|
|
)
|
|
|
|
# Extract a sample from the DataLoader
|
|
sample_data = next(iter(train_dataloader))
|
|
|
|
# Sample RGB values and their corresponding dummy labels
|
|
sample_rgb_values, _ = sample_data
|
|
|
|
print(sample_rgb_values)
|
|
|
|
# Extract a sample from the new DataLoader
|
|
sample_data_with_names = next(iter(train_dataloader_with_names))
|
|
|
|
# Sample RGB values and their corresponding color names
|
|
sample_rgb_values_with_names, sample_color_names = sample_data_with_names
|
|
|
|
print(sample_rgb_values_with_names, sample_color_names)
|