fc100.py 3.17 KB
Newer Older
Yaoyao Liu's avatar
Yaoyao Liu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os.path as osp
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import numpy as np
from os.path import expanduser

class DatasetLoader(Dataset):
    """The class to load the dataset"""
    def __init__(self, setname, args=None):
        data_base_dir = 'data/FC100'
        if os.path.exists(data_base_dir):
            pass
        else:
            print ('Download FC100 from Google Drive.')
            os.makedirs(data_base_dir)
            os.system('sh scripts/download_fc100.sh')

        TRAIN_PATH = 'data/FC100/train'
        VAL_PATH = 'data/FC100/val'
        TEST_PATH = 'data/FC100/test'

        # Set the path according to train, val and test        
        if setname=='train':
            THE_PATH = TRAIN_PATH
            label_list = os.listdir(THE_PATH)
        elif setname=='test':
            THE_PATH = TEST_PATH
            label_list = os.listdir(THE_PATH)
        elif setname=='val':
            THE_PATH = VAL_PATH
            label_list = os.listdir(THE_PATH)
        else:
            raise ValueError('Wrong setname.') 

        # Generate empty list for data and label           
        data = []
        label = []

        # Get folders' name
        folders = [osp.join(THE_PATH, label) for label in label_list if os.path.isdir(osp.join(THE_PATH, label))]

        # Get the images' paths and labels
        for idx, this_folder in enumerate(folders):
            this_folder_images = os.listdir(this_folder)
            for image_path in this_folder_images:
                data.append(osp.join(this_folder, image_path))
                label.append(idx)

        # Set data, label and class number to be accessable from outside
        self.data = data
        self.label = label
        self.num_class = len(set(label))

        # Transformation
        if setname == 'train':
            image_size = 84
            self.transform = transforms.Compose([
                transforms.RandomResizedCrop(image_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(np.array([x / 255.0 for x in [125.3, 123.0, 113.9]]),
                                     np.array([x / 255.0 for x in [63.0, 62.1, 66.7]]))])
        else:
            image_size = 84
            self.transform = transforms.Compose([
                transforms.Resize([92,92]),
                transforms.CenterCrop(image_size),
                transforms.ToTensor(),
                transforms.Normalize(np.array([x / 255.0 for x in [125.3, 123.0, 113.9]]),
                                     np.array([x / 255.0 for x in [63.0, 62.1, 66.7]]))])


    def __len__(self):
        return len(self.data)

    def __getitem__(self, i):
        path, label = self.data[i], self.label[i]
        image = self.transform(Image.open(path).convert('RGB'))
        return image, label


if __name__=='__main__':
    dataset=DatasetLoader('train')
    print ('num image in this set:',dataset.__len__())
    print ('num class  in this set:',np.unique(dataset.label).__len__())
    #test 160 class,206209 image
    #val 97 class, 124261 image
    #train 351 class ,448695 image
    a=dataset.__getitem__(0)