本文共 7188 字,大约阅读时间需要 23 分钟。
二分类实战
数据集是RGB三通道图像,由于下载的test数据集没有标签,我们把train的cat.10000.jpg-cat.12499.jpg
和dog.10000.jpg-dog.12499.jpg
作为测试集,这样一共有20000张图片作为训练集,5000张图片作为测试集
文章主要参考,修改数据处理方式,增加一些步骤和注释,以及
image = np.array(image)[:, :, :3]
此步骤产生的
第0个 image(200 * 200 * 3):
[[[203 164 87] [206 167 90] [210 171 94] ...
第0个 label:
0
返回的是tensor(3 * 200 * 200):
tensor([[[0.7961, 0.8078, 0.8235, ..., 0.9608, 0.9490, 0.9373], [0.7961, 0.8078, 0.8235, ..., 0.9608, 0.9529, 0.9412], [0.7961, 0.8078, 0.8235, ..., 0.9608, 0.9569, 0.9451], ..., [[0.6431, 0.6549, 0.6706, ..., 0.8000, 0.7922, 0.7843], [0.6431, 0.6549, 0.6706, ..., 0.8039, 0.7961, 0.7882], [0.6431, 0.6549, 0.6706, ..., 0.8039, 0.8000, 0.7922], ..., [[0.3412, 0.3529, 0.3686, ..., 0.4706, 0.4745, 0.4745], [0.3412, 0.3529, 0.3686, ..., 0.4784, 0.4784, 0.4784], [0.3412, 0.3529, 0.3686, ..., 0.4824, 0.4863, 0.4824], ...,
tensor([0])
可以发现:
ToTensor()
将shape为(H, W, C)的nump.ndarray或img转为shape为(C, H, W)的tensor,其将每一个数值归一化到[0,1] nn.Conv2d的用法见
训练图片见此:
测试:5000 Finished!0.7044
可见正确率为 0.7044
from PIL import Image # pillow库,PIL读取图片from torch.utils.data import Dataset # Dataset的抽象类,所有其他数据集都应该进行子类化,所有子类应该override__len__和__getitem__,前者提供了数据集的大小,后者支持整数索引,范围从0到len(self)import torchvision.transforms as transforms # 一般的图像转换操作类import os # 打开文件夹用import numpy as np # 图片数据转换成numpy数组形式import torchIMAGE_H = 200 # 默认输入网络的图片大小IMAGE_W = 200data_transform = transforms.Compose([ transforms.ToTensor() # transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # 变成[-1,1]的数]) # 定义一个转换关系,用于将图像数据转换成PyTorch的Tensor形式,并且数值归一化到[0.0, 1.0]class DogsVSCatsDataset(Dataset): def __init__(self, mode, dir): self.mode = mode self.list_train_image_path = [] self.list_train_label = [] self.data_train_size = 0 self.list_test_image_path = [] self.list_test_label = [] self.data_test_size = 0 self.transform = data_transform if self.mode == 'train': dir = dir + '/train/' for file in os.listdir(dir): # 遍历dir文件夹 self.list_train_image_path.append(dir + file) # file 形如 cat.0.jpg self.data_train_size += 1 name = file.split('.') if name[0] == 'cat':self.list_train_label.append(0) # 0是cat else: self.list_train_label.append(1) # 1是dog elif self.mode == 'test': dir = dir + '/test/' for file in os.listdir(dir): # 遍历dir文件夹 self.list_test_image_path.append(dir + file) self.data_test_size += 1 name = file.split('.') if name[0] == 'cat':self.list_test_label.append(0) else: self.list_test_label.append(1) # 1是dog else: print('No such mode!') def __getitem__(self, item): if self.mode == 'train': image = Image.open(self.list_train_image_path[item]) image = image.resize((IMAGE_H, IMAGE_W)) # raise ValueError(ValueError: Unknown resampling filter (200). Use Image.NEAREST (0), image = np.array(image)[:, :, :3] # 200 * 200 * 3 label = self.list_train_label[item] return self.transform(image), torch.LongTensor([label]) # 将image和label转换成PyTorch形式并返回 elif self.mode == 'test': image = Image.open(self.list_test_image_path[item]) image = image.resize((IMAGE_H, IMAGE_W)) image = np.array(image)[:, :, :3] label = self.list_test_label[item] return self.transform(image), torch.LongTensor([label]) # 3 * 200 * 200 def __len__(self): return self.data_test_size + self.data_train_sizeimport torch.nn as nnimport torch.nn.functional as F# 输入四维张量,[N, C, H, W]class Net(nn.Module): # 继承PyTorch的nn.Module父类 def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 16, 3, padding=1) # 第一个卷积层,输入通道数3,输出通道数16,卷积核大小3×3,padding大小1 self.conv2 = nn.Conv2d(16, 16, 3, padding=1) self.fc1 = nn.Linear(50 * 50 * 16, 128) # 第一个全连层,线性连接,输入节点数50×50×16,输出节点数128 self.fc2 = nn.Linear(128, 64) self.fc3 = nn.Linear(64, 2) def forward(self, x): # 重写父类forward方法,即前向计算,通过该方法获取网络输入数据后的输出值 x = self.conv1(x) x = F.relu(x) x = F.max_pool2d(x, 2) x = self.conv2(x) x = F.relu(x) x = F.max_pool2d(x, 2) # n * 16 * 50 * 50 x = x.view(x.size()[0], -1) # 由于全连层输入的是一维张量,因此需要对输入的[50×50×16]格式数据排列成[40000×1]形式,n * 40000 x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return F.softmax(x, dim=1) # 采用SoftMax方法将输出的2个输出值调整至[0.0, 1.0],两者和为1,并返回,tensor([[0.4544, 0.5456]], grad_fn=)from torch.utils.data import DataLoaderfrom torch.autograd import Variableimport numpy as npdataset_dir = '... your path/dogs-vs-cats-redux-kernels-edition'if __name__ == '__main__': model = Net() # Train # datafile = DogsVSCatsDataset('train', dataset_dir) # dataloader = DataLoader(datafile, batch_size=10, shuffle=True, num_workers=10) # batch_size:一次性读取多少张图片(采样器个数), num_workers:PyTorch读取数据线程数量 # print('Dataset loaded! length of train set is {0}'.format(len(datafile))) # if torch.cuda.is_available() == True: # print('Cuda is available!') # model = model.cuda() # optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # 学习率 # criterion = nn.CrossEntropyLoss() # # cnt = 0 # for img, label in dataloader: # img, label = Variable(img).cuda(), Variable(label).cuda() # out = model(img) # loss = criterion(out, label.squeeze()) # torch.Size([10, 1]) → tensor([1, 1, 1, 1, 1, 1, 1, 0, 0, 1] # loss.backward() # optimizer.step() # optimizer.zero_grad() # cnt += 1 # # print('\rFrame:{0}, train_loss:{1}'.format(cnt * 10, loss / 10), end='') # torch.save(model.state_dict(), dataset_dir + '/model.pth') # Test datafile = DogsVSCatsDataset('test', dataset_dir) dataloader = DataLoader(datafile) print('Dataset loaded! length of test set is {0}'.format(len(datafile))) if torch.cuda.is_available() == True: model.cuda() model.load_state_dict(torch.load(dataset_dir + '/model.pth')) model.eval() correct_num = 0 cnt = 0 for img, label in dataloader: img, label = Variable(img).cuda(), Variable(label).cuda() out = model(img) # tensor([[0.8892, 0.1108]], device='cuda:0', grad_fn= ) if (out[0][0] > out[0][1] and label[0][0] == 0) or (out[0][0] < out[0][1] and label[0][0] == 1): correct_num += 1 cnt += 1 if cnt % 20 == 0: print('\r%d Finished!' % cnt, end='') print(correct_num/len(datafile))
能够利用PyTorch搭建CNN网络,对卷积神经网络的一些概念、函数的参数有了一些了解,此仅为动手实践,所以精确度并不高,可以尝试增加验证集、多次迭代训练、更改损失函数、更改梯度下降计算方式、修改softmax函数等方式
未来进行RNN和NLP的实战
转载地址:http://wwtrn.baihongyu.com/