Source code for datasets.seq_tinyimagenet

# Copyright 2022-present, Lorenzo Bonicelli, Pietro Buzzega, Matteo Boschini, Angelo Porrello, Simone Calderara.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import os
from typing import Optional, Tuple

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset

from backbone.ResNet18 import resnet18
from datasets.transforms.denormalization import DeNormalize
from datasets.utils.continual_dataset import (ContinualDataset,
                                              store_masked_loaders)
from utils import smart_joint
from utils.conf import base_path


[docs] class TinyImagenet(Dataset): """Defines the Tiny Imagenet dataset.""" def __init__(self, root: str, train: bool = True, transform: Optional[nn.Module] = None, target_transform: Optional[nn.Module] = None, download: bool = False) -> None: self.not_aug_transform = transforms.Compose([transforms.ToTensor()]) self.root = root self.train = train self.transform = transform self.target_transform = target_transform self.download = download if download: if os.path.isdir(root) and len(os.listdir(root)) > 0: print('Download not needed, files already on disk.') else: from onedrivedownloader import download print('Downloading dataset') ln = "https://unimore365-my.sharepoint.com/:u:/g/personal/263133_unimore_it/EVKugslStrtNpyLGbgrhjaABqRHcE3PB_r2OEaV7Jy94oQ?e=9K29aD" download(ln, filename=smart_joint(root, 'tiny-imagenet-processed.zip'), unzip=True, unzip_path=root, clean=True) self.data = [] for num in range(20): self.data.append(np.load(smart_joint( root, 'processed/x_%s_%02d.npy' % ('train' if self.train else 'val', num + 1)))) self.data = np.concatenate(np.array(self.data)) self.targets = [] for num in range(20): self.targets.append(np.load(smart_joint( root, 'processed/y_%s_%02d.npy' % ('train' if self.train else 'val', num + 1)))) self.targets = np.concatenate(np.array(self.targets)) def __len__(self): return len(self.data) def __getitem__(self, index): img, target = self.data[index], self.targets[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(np.uint8(255 * img)) original_img = img.copy() if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) if hasattr(self, 'logits'): return img, target, original_img, self.logits[index] return img, target
[docs] class MyTinyImagenet(TinyImagenet): """Overrides the TinyImagenet dataset to change the getitem function.""" def __init__(self, root: str, train: bool = True, transform: Optional[nn.Module] = None, target_transform: Optional[nn.Module] = None, download: bool = False) -> None: super(MyTinyImagenet, self).__init__( root, train, transform, target_transform, download) def __getitem__(self, index): img, target = self.data[index], self.targets[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(np.uint8(255 * img)) original_img = img.copy() not_aug_img = self.not_aug_transform(original_img) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) if hasattr(self, 'logits'): return img, target, not_aug_img, self.logits[index] return img, target, not_aug_img
[docs] class SequentialTinyImagenet(ContinualDataset): """The Sequential Tiny Imagenet dataset. Args: NAME (str): name of the dataset. SETTING (str): setting of the dataset. N_CLASSES_PER_TASK (int): number of classes per task. N_TASKS (int): number of tasks. N_CLASSES (int): number of classes. SIZE (tuple): size of the images. MEAN (tuple): mean of the dataset. STD (tuple): standard deviation of the dataset. TRANSFORM (torchvision.transforms): transformations to apply to the dataset. """ NAME = 'seq-tinyimg' SETTING = 'class-il' N_CLASSES_PER_TASK = 20 N_TASKS = 10 N_CLASSES = N_CLASSES_PER_TASK * N_TASKS MEAN, STD = (0.4802, 0.4480, 0.3975), (0.2770, 0.2691, 0.2821) SIZE = (64, 64) TRANSFORM = transforms.Compose( [transforms.RandomCrop(64, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(MEAN, STD)])
[docs] def get_data_loaders(self) -> Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader]: transform = self.TRANSFORM test_transform = transforms.Compose( [transforms.ToTensor(), self.get_normalization_transform()]) train_dataset = MyTinyImagenet(base_path() + 'TINYIMG', train=True, download=True, transform=transform) test_dataset = TinyImagenet(base_path() + 'TINYIMG', train=False, download=True, transform=test_transform) train, test = store_masked_loaders(train_dataset, test_dataset, self) return train, test
[docs] @staticmethod def get_backbone(): return resnet18(SequentialTinyImagenet.N_CLASSES_PER_TASK * SequentialTinyImagenet.N_TASKS)
[docs] @staticmethod def get_loss(): return F.cross_entropy
[docs] def get_transform(self): transform = transforms.Compose( [transforms.ToPILImage(), self.TRANSFORM]) return transform
[docs] @staticmethod def get_normalization_transform(): transform = transforms.Normalize(SequentialTinyImagenet.MEAN, SequentialTinyImagenet.STD) return transform
[docs] @staticmethod def get_denormalization_transform(): transform = DeNormalize(SequentialTinyImagenet.MEAN, SequentialTinyImagenet.STD) return transform
[docs] @staticmethod def get_epochs(): return 50
[docs] @staticmethod def get_batch_size(): return 32