顺着文档理解代码,加备注
import os, sys, glob, shutil, json
os.environ["CUDA_VISIBLE_DEVICES"]= '0'
import cv2
from PILimport Image
import numpyas np
from tqdmimport tqdm, tqdm_notebook
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic= False
torch.backends.cudnn.benchmark= True
import torchvision.modelsas models
import torchvision.transformsas transforms
import torchvision.datasetsas datasets
import torch.nnas nn
import torch.nn.functionalas F
import torch.optimas optim
from torch.autogradimport Variable
from torch.utils.data.datasetimport Dataset
#定义好读取图像的Dataset的类
#属性img_path, img_label, transform,方法__getitem__,__len__
class SVHNDataset(object):
def __init__(self,img_path,img_label,transform=None):
self.img_path= img_path
self.img_label= img_label
if transform is not None:
self.transform= transform
else:
self.transform= None
#返回index的图像和标签
def __getitem__(self,index):
img= Image.open(self.img_path[index]).convert('RGB')
if self.transformis not None :
img= self.transform(img)
# 设置最⻓的字符⻓度为5个
lbl= np.array(self.img_label[index],dtype= np.int)
#组合标签时用list随意加入元素——np.array类型——张量
lbl= list(lbl)+ (5-len(lbl))* [10]
return img,torch.from_numpy(np.array(lbl[:5]))
def __len__(self):
return len(self.img_path)
#定义训练和验证数据的Dataset
def getDataSet():
# 训练数据的Dataset
#查找符合特定规则的文件路径名,获取所有的匹配路径(用完全的路径别用../)
train_path= glob.glob(r'D:/competition/tianchi/cv/SVHN/input/mchar_train/mchar_train/*.png')
train_path.sort()
train_json= json.load(open('D:/competition/tianchi/cv/SVHN/input/train.json'))
#json格式:"000000.png": {"height": [219, 219], "label": [1, 9], "left": [246, 323], "top": [77, 81], "width": [81, 96]},
#train_json[x]是dict,索引:D2['name'] = 'Bob'
train_label= [train_json[x]['label']for xin train_json]
print(len(train_path),len(train_label))
#dataloader对dataset封装以批量迭代读取,图像和标签读取成SVHNDataset类,数据扩充,
#数据加载器。组合数据集和采样器,并在数据集上提供单进程或多进程迭代器。
train_loader= torch.utils.data.DataLoader(
SVHNDataset(train_path, train_label,#(input, target)
transforms.Compose([#将多种变换组合在一起
transforms.Resize((64,128)),#指定大小,缩放到固定尺⼨
transforms.RandomCrop((60,120)),#在一个随机的位置进行裁
transforms.ColorJitter(0.3,0.3,0.2),#随机改变图像的亮度对比度和饱和度
transforms.RandomRotation(5),# 加⼊随机旋转
#convert a PIL image to tensor (HWC) in range [0,255] then to a torch.Tensor(CHW) in the range [0.0,1.0]
transforms.ToTensor(),# 将图⽚转换为pytorch 的tesntor
#用给定的均值和标准差分别对每个通道的数据进行正则化。均值(M1,…,Mn),给定标准差(S1,…,Sn)
#output[channel] = (input[channel] - mean[channel]) / std[channel],如((0,1)-0.5)/0.5=(-1,1)
transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])),
batch_size=40,#每个batch加载多少个样本
#shuffle=True, #在每个epoch重新打乱数据,默认false
#num_workers=10, #用多少个子进程加载数据
)
#验证数据的dataset
val_path= glob.glob('D:/competition/tianchi/cv/SVHN/input/mchar_val/mchar_val/*.png')
val_path.sort()
val_json= json.load(open('D:/competition/tianchi/cv/SVHN/input/val.json'))
val_label= [val_json[x]['label']for xin val_json]
print(len(val_path),len(val_label))
val_loader= torch.utils.data.DataLoader(
SVHNDataset(val_path, val_label,
transforms.Compose([
transforms.Resize((60,120)),
# transforms.ColorJitter(0.3, 0.3, 0.2),
# transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])),
batch_size=40,
shuffle=False,
num_workers=10,)
return train_loader,val_loader
#output
train_loader,val_loader= getDataSet()#输出应当为30000 30000 10000 10000
#构建CNN模型
class SVHN_model_1(nn.Module):
#super指代父类,继承的时候,调用含super的各个的基类__init__函数
def __init__(self):
super(SVHN_model_1,self).__init__()
#cnn提取特征模块
#一个有序的容器,神经网络模块将按照在传入构造器的顺序依次被添加到计算图中执行,同时以神经网络模块为元素的有序字典也可以作为传入参数。
self.cnn= nn.Sequential(
nn.Conv2d(3,16,kernel_size=(3,3),stride=(2,2)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(16,32,kernel_size=(3,3),stride=(2,2)),
nn.ReLU(),
nn.MaxPool2d(2),
)
#对传入数据应用线性变换:y = A x+ b,(每个输入样本的大小,出大小,bias=False则图层不会学习附加偏差。)
self.fc1= nn.Linear(32 * 3 * 7,11)
self.fc2= nn.Linear(32 * 3 * 7,11)
self.fc3= nn.Linear(32 * 3 * 7,11)
self.fc4= nn.Linear(32 * 3 * 7,11)
self.fc5= nn.Linear(32 * 3 * 7,11)
self.fc6= nn.Linear(32 * 3 * 7,11)
#输出经过所有神经网络层的结果
def forward(self,img):
feat= self.cnn(img)#nn.Sequential
#将一个多行的Tensor,拼接成一行
feat= feat.view(feat.shape[0],-1)
#并联6全连接,进行分类
c1= self.fc1(feat)
c2= self.fc2(feat)
c3= self.fc3(feat)
c4= self.fc4(feat)
c5= self.fc5(feat)
c6= self.fc6(feat)
return c1, c2, c3, c4, c5, c6
#model已经经过了cnn,有forward()方法出6个并联全连接结果
model1= SVHN_model_1()
#使用在imageNet数据集上的与训练模型
class SVHN_model_2(nn.Module):
def __init__(self):
#??
super(SVHN_model_2,self).__init__()
model_conv= models.resnet18(pretrained=True)
#平均池化:输出特征的个数等于输入平面的个数,都为1*1的tensor,strides,paddings等参数都自适应好了
model_conv.avgpool= nn.AdaptiveAvgPool2d(1)
#迭代器:model.modules()会遍历model中所有的子层,而model.children()仅会遍历当前层[[1, 2], 3]--[1, 2], 3。
model_conv= nn.Sequential(*list(model_conv.children())[:-1])
self.cnn= model_conv
self.fc1= nn.Linear(512,11)
self.fc2= nn.Linear(512,11)
self.fc3= nn.Linear(512,11)
self.fc4= nn.Linear(512,11)
self.fc5= nn.Linear(512,11)
#与model1一样
def forward(self,img):
feat= self.cnn(img)
# print(feat.shape)
feat= feat.view(feat.shape[0],-1)
c1= self.fc1(feat)
c2= self.fc2(feat)
c3= self.fc3(feat)
c4= self.fc4(feat)
c5= self.fc5(feat)
return c1, c2, c3, c4, c5
model2= SVHN_model_2()
#训练数据
def trainData(train_loader,model):
#s=nn.Softmax(dim=1)一列的和为1–NLLLoss --i=torch.log(s(input))
#nn.NLLLoss(i,target) 把上面的输出与Label对应的那个值拿出来,再去掉负号,再求均值。
criterion= nn.CrossEntropyLoss()#二分类损失函数,是上面三步的综合
#优化器对象Optimizer,用来保存当前的状态,并能够根据计算得到的梯度来更新参数。
optimizer= torch.optim.Adam(model.parameters(),0.005)
loss_plot= []
c0_plot= []
#epoch训练过程
for epochin range(1):
for datain train_loader:
c0, c1, c2, c3, c4, c5= model(data[0])
data[1]= data[1].long()
loss= criterion(c0, data[1][:,0])+\
criterion(c1, data[1][:,1])+\
criterion(c2, data[1][:,2])+\
criterion(c3, data[1][:,3])+\
criterion(c4, data[1][:,4])
#报错下面的data[1][:, 5]不存在
#criterion(c5, data[1][:, 5])
loss/= 5
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_plot.append(loss.item())
c0_plot.append((c0.argmax(1)==data[1][:,0].sum().item()*1.0/c0.shape[0]))
print(epoch)
#trainData(train_loader,model1)
#每次epoch训练的过程
def train(train_loader,model,criterion,optimizer,epoch):
# 切换模型为训练模式
model.train()
#enumerate()将一个可遍历的数据对象组合为一个索引序列,同时列出下标和数据
for i, datain enumerate(train_loader):
c0, c1, c2, c3, c4, c5= model(data[0])
data[1]= data[1].long()
loss= criterion(c0, data[1][:,0])+ \
criterion(c1, data[1][:,1])+ \
criterion(c2, data[1][:,2])+ \
criterion(c3, data[1][:,3])+ \
criterion(c4, data[1][:,4])
# criterion(c5, data[1][:, 5])
loss/= 5
optimizer.zero_grad()#把模型中参数的梯度设为0
loss.backward()#反向传播(grad_fn就是Tensor专门保存其进行过的数学运算
optimizer.step()#模型更新
def validate(val_loader,model,criterion):
# 切换模型为预测模型
model.eval()
val_loss= []
# 不记录模型梯度信息
with torch.no_grad():
for i, datain enumerate(val_loader):
c0, c1, c2, c3, c4, c5= model(data[0])
loss= criterion(c0, data[1][:,0])+ \
criterion(c1, data[1][:,1])+ \
criterion(c2, data[1][:,2])+ \
criterion(c3, data[1][:,3])+ \
criterion(c4, data[1][:,4])+ \
criterion(c5, data[1][:,5])
loss/= 6
#不记录模型梯度信息
val_loss.append(loss.item())
return np.mean(val_loss)
def valCal(train_loader,val_loader,model):
criterion= nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(model.parameters(),0.001)
best_loss= 1000.0
for epochin range(3):
print('Epoch: ', epoch)
#每一次epoch都对训练集和VAL在同一个模型进行计算
train(train_loader,model, criterion, optimizer, epoch)
print('train_over')
val_loss= validate(val_loader,model, criterion)
print('val_over')
# 记录下验证集精度
if val_loss< best_loss:
best_loss= val_loss
torch.save(model.state_dict(),'./model.pt')
valCal(train_loader,val_loader,model1)