Iterate through customized DatasetFolder does not work

link管理

链接快照平台

输入网页链接，自动生成快照
标签化管理网页链接

相关文章推荐

性感的饼干 · 针锋对决所有肉 - 百度· 3 周前 ·

任性的野马 · 昆明万科金域缇香户型图,房型图,平面图,小区 ...· 7 月前 ·

不拘小节的米饭 · 法治宣传教育_鸠江区人民政府· 1 年前 ·

面冷心慈的马克杯 · 因为性骚扰粉丝，这家头部漫画公司上了知乎热搜 ...· 1 年前 ·

腼腆的水龙头 · jsPDF table example - ...· 1 年前 ·

I have a lot of images with .gif and .oct-stream extension. Since the ImageFolder will ignore those files, I use the DatasetFolder and provide my img_extension and loader as suggested by other forks on this forum. I create a dataloader and try to iterate through it. Unfortunately, it got stuck somewhere forever.

The folder structure is the following. I have attached all images except ‘01.octet-stream’ as this forum does not support it.

# debug/0/01.octet-stream
# debug/0/02.jpeg
# debug/0/03.gif
# debug/1/01.octet-stream
# debug/1/02.jpeg
# debug/1/03.gif
Here is my code, you can run them directly on notebook.
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import torch.nn.functional as F
import time
import os
import copy
from torchvision.datasets import ImageFolder
plt.ion()
data_transforms = transforms.Compose([transforms.Resize(300),
        transforms.CenterCrop(299),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
img_extensions = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.gif', '.octet-stream']
def my_loader(path):
    from torchvision import get_image_backend
    from PIL import Image
    def my_pil_loader(path):
        print ("loading {}".format(path))
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')
    if get_image_backend() == 'accimage':
        print('{} uses accimage'.format(path))
            return accimage_loader(path)
        except IOError:
            print('{} accimage loading fail, using PIL'.format(path))
            return my_pil_loader(path)
    else:
        print('{} uses PIL'.format(path))
        return my_pil_loader(path)
my_loader('./debug/0/03.gif')
data_dir = './debug/'
batch_size = 32
image_datasets = datasets.DatasetFolder(data_dir, my_loader, img_extensions,
                                          data_transforms)
dataloaders = torch.utils.data.DataLoader(image_datasets, batch_size=batch_size,
                                             shuffle=True, num_workers=4)
dataset_sizes = len(image_datasets)
print(dataset_sizes)
Everything works fine so far. However, when I try to iterate through the dataloader and run the following code, the program got stuck forever! It seems the code runs into dead loop somewhere even before loading images as I do not see any print information during loading images. What’s wrong with implementation?
If I replace the DatasetFolder with ‘ImageFolder’ and get rid of the customized loader and extension, everything works fine. Very wired…
index = 0
for inputs, labels in dataloaders:
    print(index)
    print('inputs')
    print(inputs.size())
    print('labels')
    print(labels.size())
              Is your Dataset working without the DataLoader, i.e. do you get a valid image using this code:
data = image_dataset[0]
If so, could you try to use num_workers=0 in your DataLoader and try it again?

I would like to narrow down the possible error source first.
image_datasets[0] gives
./debug/0\01.octet-stream uses PIL
loading ./debug/0\01.octet-stream
(tensor([[[ 0.2967,  0.0912,  0.1254,  ..., -0.3369, -0.2513, -0.2856],
          [ 0.2111,  0.2282,  0.1768,  ..., -0.4054, -0.3369, -0.3198],
          [ 0.1939,  0.2796,  0.2282,  ..., -0.5596, -0.4568, -0.2684],
          [-1.5870, -1.5014, -1.5699,  ..., -1.6555, -1.8097, -1.8439],
          [-1.6727, -1.5699, -1.5699,  ..., -1.9809, -1.9295, -1.7754],
          [-1.6384, -1.5185, -1.5528,  ..., -1.7069, -1.6898, -1.6042]],
         [[ 0.5728,  0.6604,  0.7304,  ..., -0.0924, -0.1099, -0.0749],
          [ 0.7304,  0.6254,  0.6954,  ..., -0.1975, -0.1450, -0.1099],
          [ 0.7304,  0.7129,  0.7479,  ..., -0.3025, -0.1800, -0.0924],
          [-1.1253, -1.1429, -1.1429,  ..., -1.6331, -1.5630, -1.5980],
          [-1.0728, -1.1779, -1.2654,  ..., -1.4755, -1.5455, -1.5980],
          [-1.1604, -1.1429, -1.1604,  ..., -1.5980, -1.6155, -1.6155]],
         [[ 0.7054,  0.6356,  0.6531,  ...,  0.0605,  0.0431,  0.0256],
          [ 0.7576,  0.7402,  0.7228,  ...,  0.0605,  0.0431, -0.0092],
          [ 0.7576,  0.7925,  0.7925,  ..., -0.0267, -0.0615, -0.0441],
          [-0.7064, -0.6715, -0.6541,  ..., -1.2990, -1.2467, -1.2467],
          [-0.7936, -0.6541, -0.6193,  ..., -1.3513, -1.3164, -1.1073],
          [-0.8284, -0.7064, -0.7064,  ..., -1.2816, -1.2990, -1.2119]]]), 0)
The issue seems to be in the dataloader iterator.
dataloader_iter = dataloaders.__iter__()
dataloader_iter.pin_memory
gives
False
It got stuck when I run

dataloader_iter.data_queue.get()
If I set the num_workers = 0, I got the following error
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-26-34fa306d18e2> in <module>()
----> 1 dataloader_iter._get_batch()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\data\dataloader.py in _get_batch(self)
    307                 raise RuntimeError('DataLoader timed out after {} seconds'.format(self.timeout))
    308         else:
--> 309             return self.data_queue.get()
    311     def __next__(self):
AttributeError: '_DataLoaderIter' object has no attribute 'data_queue
              Great to hear, it’s working now!

Multiprocessing is implemented a bit differently on Windows, e.g. it uses spawn instead on fork.

That means that you should guard your code with:
if __name__=='__main__':
    main()
You can read more about these differences in the Windows FAQ.
I’m not sure, how Jupyter notebooks should be handled on Windows machines, but I guess it might be related to the multiprocessing part.

Could you try to export your notebook as a Python script, add the guard, and try to run it again using more workers?
              Hi Patrick
Yes, it indeed works when I rewrite the code with if protection in the following way. Thanks!
My question is why the issue occurs only when I use the customized loader in the ‘datasetfolder’. If I use the ImageFolders with the default loader, this is no dead loop  even without the ‘if’ protection.
# coding: utf-8
# In[ ]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import torch.nn.functional as F
import time
import os
import copy
import sys
from torchvision.datasets import ImageFolder
plt.ion()
# In[ ]:
def my_loader(path):
    from torchvision import get_image_backend
    from PIL import Image
    def my_pil_loader(path):
        print ("loading {}".format(path))
            with open(path, 'rb') as f:
                img = Image.open(f)
                return img.convert('RGB')
        except:
            print('fail to load {} using PIL'.format(img))
    if get_image_backend() == 'accimage':
        print('loading {} uses accimage'.format(path))
            return accimage_loader(path)
        except IOError:
            print('fail to load {} using accimage, instead using PIL'.format(path))
            return my_pil_loader(path)
    else:
        print('{} uses PIL'.format(path))
        return my_pil_loader(path)
# In[ ]:
def main():
    data_transforms = transforms.Compose([transforms.Resize(300),
            transforms.CenterCrop(299),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    img_extensions = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.gif', '.octet-stream']
    data_dir = './debug/'
    batch_size = 32
    image_datasets = datasets.DatasetFolder(data_dir, my_loader, img_extensions,
                                              data_transforms)
    dataloaders = torch.utils.data.DataLoader(image_datasets, batch_size=batch_size, shuffle=True, num_workers=4)
    index = 0
    for inputs, labels in dataloaders:
        print(index)
        print('inputs')
        print(inputs.size())
        print('labels')
        print(labels.size())
# In[ ]:
if __name__ == '__main__':
    main()
              I’m not really sure, what makes the DataLoader hang, but it’s good to hear it’s working now!

Your custom loader looks more or less like the default pil_loader besides the try block.
However, as Windows doesn’t use fork to create new workers, it might often be problematic, if you don’t use a “main guard”, as the whole module-level code will be executed in all child processes.