This happens because of the transformation you use:
self.transform = transforms.Compose([transforms.ToTensor()])
As you can see in the documentation, torchvision.transforms.ToTensor converts a PIL Image or numpy.ndarray to tensor. So if you want to use this transformation, your data has to be of one of the above types.
This happens because of the transformation you use:
self.transform = transforms.Compose([transforms.ToTensor()])
As you can see in the documentation, torchvision.transforms.ToTensor converts a PIL Image or numpy.ndarray to tensor. So if you want to use this transformation, your data has to be of one of the above types.
tf=transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((512,640)),
transforms.ToTensor()
])
it works for me.
transforms.ToPILImage(): pic should be Tensor or ndarray
Pic should be Tensor or ndarray. Got <class 'dict'>
TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
TypeError: pic should be Tensor or ndarray. Got <class 'PIL.Image.Image'>.
I'm using a pretrained VGG19 on a custom image dataset that I would like to be resized to (150, 150, 3). I've used both ToTensor and Resize transforms with transform.Compose(), and I'm running into the following error (below). I have also pasted my code below this. Does anyone know how I can fix this?
in <module>
img, label = ds[sample_idx]
in __getitem__
image = self.transform(image)
in __call__
img = t(img)
in __call__
return F.to_tensor(pic)
in to_tensor
raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
MY CODE:
# create custom image dataset
class CustomImageDataset(torch.utils.data.Dataset):
def __init__(self, annotations_path, data_path, transform=None, target_transform=None):
self.labels = pd.read_csv(annotations_path)
self.data_path = data_path
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
img_path = os.path.join(self.data_path, self.labels.iloc[idx,0])
image = torchvision.io.read_image(img_path)
label = self.labels.iloc[idx,1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
# define transforms
transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize((150,150)),
torchvision.transforms.ToTensor()
])
# initialize dataset
annotations_path = 'cats_dogs_dataset_torch/annotations/annotations.csv'
data_path = 'cats_dogs_dataset_torch/data'
ds = CustomImageDataset(
annotations_path=annotations_path,
data_path=data_path,
transform=transforms
)
# visualize dataset
labels_map = {
0: "Cat",
1: "Dog"
}
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
sample_idx = torch.randint(len(ds), size=(1,)).item()
img, label = ds[sample_idx]
figure.add_subplot(rows, cols, i)
plt.title(labels_map[label])
plt.axis("off")
plt.imshow(img.squeeze().permute(1,2,0))
plt.show()
# split data into train, validation, test
length = ds.__len__()
train_size = int(length * .8)
remaining_after_train = length - train_size
val_size = int(remaining_after_train * 0.5)
remaining_after_val = remaining_after_train - val_size
test_size = remaining_after_val
print(f'length: {length}')
print(f'train_size: {train_size}')
print(f'val_size: {val_size}')
print(f'test_size: {val_size}')
ds_train, ds_val, ds_test = torch.utils.data.random_split(
dataset=ds,
lengths=[train_size,val_size,test_size]
)
# wrap in dataloader
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=64)
dl_val = torch.utils.data.DataLoader(ds_val, batch_size=64)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=64)
# build model
model = torchvision.models.vgg19(pretrained=True)
# freeze the feature extractor
for param in model.features.parameters():
param.requires_grad = False
# replace the classifier with appropriate layers
model.classifier = torch.nn.Sequential(
torch.nn.Linear(25088,8192),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(8192,256),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(256,1),
torch.nn.Sigmoid()
)
print(model)
'''HYPERPARAMETERS'''
lr = 1e-3
batch_size = 64
epochs = 5
# '''OPTIMIZATION LOOP'''
loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr)
'''TRAINING'''
epochs = 25
for epoch in range(epochs):
# train loop
for batch_idx, (x, y) in enumerate(dl_train):
train_size = len(dl_train.dataset)
# forward pass
pred = model(x)
# compute loss
loss = loss_fn(pred, y)
# backward pass
optimizer.zero_grad()
loss.backward()
# update weights
optimizer.step()
# update accuracy / loss
if batch_idx % 100 == 0:
loss, current = loss.item(), batch_idx * len(x)
print(f'loss: {loss:>7f} [{current:>5d}/{train_size:>5d}]')
# test loop
num_batches = len(dl_val)
size = len(dl_val.dataset)
test_loss, correct = 0,0
with torch.no_grad():
for x, y in dl_val:
# foward pass
pred = model(x)
# compute loss
test_loss += loss_fn(pred, y)
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f'Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n')
print('Done!')
transforms.RandomHorizontalFlip() works on PIL.Images, not torch.Tensor. In your code above, you are applying transforms.ToTensor() prior to transforms.RandomHorizontalFlip(), which results in tensor.
But, as per the official pytorch documentation here,
transforms.RandomHorizontalFlip() horizontally flip the given PIL Image randomly with a given probability.
So, just change the order of your transformation in above code, like below:
train_transforms = transforms.Compose([transforms.Resize(255),
transforms.CenterCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
Just add transforms.ToPILImage() to convert into pil image and then it will work, example:
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.RandomHorizontalFlip(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])