示例数据 COCO 2014
- 数据下载 get_coco_dataset.sh
#!/bin/bash
# Clone COCO API
git clone https://github.com/pdollar/coco
cd coco
mkdir images
cd images
# Download Images
wget -c https://pjreddie.com/media/files/train2014.zip
wget -c https://pjreddie.com/media/files/val2014.zip
# Unzip
unzip -q train2014.zip
unzip -q val2014.zip
cd ..
# Download COCO Metadata
wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
wget -c https://pjreddie.com/media/files/coco/5k.part
wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
wget -c https://pjreddie.com/media/files/coco/labels.tgz
tar xzf labels.tgz
unzip -q instances_train-val2014.zip
# Set Up Image Lists
paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
window系统下可直接复制sh里的数据地址到浏览器即可实现数据下载,省得再装相关sh执行器。
如https://pjreddie.com/media/files/instances_train-val2014.zip
YOLO v3 之darknet
https://github.com/pjreddie/darknet
辅助函数 myutils.py
import torch
from torch import nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def parse_model_config(path2file):
cfg_file = open(path2file, 'r')
lines = cfg_file.read().split('\n')
lines = [x for x in lines if x and not x.startswith('#')]
lines = [x.rstrip().lstrip() for x in lines]
blocks_list = []
for line in lines:
# start of a new block
if line.startswith('['):
blocks_list.append({})
blocks_list[-1]['type'] = line[1:-1].rstrip()
else:
key, value = line.split("=")
value = value.strip()
blocks_list[-1][key.rstrip()] = value.strip()
return blocks_list
def create_layers(blocks_list):
hyperparams = blocks_list[0]
channels_list = [int(hyperparams["channels"])]
module_list = nn.ModuleList()
for layer_ind, layer_dict in enumerate(blocks_list[1:]):
modules = nn.Sequential()
if layer_dict["type"] == "convolutional":
filters = int(layer_dict["filters"])
kernel_size = int(layer_dict["size"])
pad = (kernel_size - 1) // 2
bn =layer_dict.get("batch_normalize",0)
conv2d = nn.Conv2d(
in_channels=channels_list[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(layer_dict["stride"]),
padding=pad,
bias=not bn)
modules.add_module("conv_{0}".format(layer_ind), conv2d)
if bn:
bn_layer = nn.BatchNorm2d(filters,momentum=0.9, eps=1e-5)
modules.add_module("batch_norm_{0}".format(layer_ind), bn_layer)
if layer_dict["activation"] == "leaky":
activn = nn.LeakyReLU(0.1)
modules.add_module("leaky_{0}".format(layer_ind), activn)
elif layer_dict["type"] == "upsample":
stride = int(layer_dict["stride"])
upsample = nn.Upsample(scale_factor = stride)
modules.add_module("upsample_{}".format(layer_ind), upsample)
elif layer_dict["type"] == "shortcut":
backwards =int(layer_dict["from"])
filters = channels_list[1:][backwards]
modules.add_module("shortcut_{}".format(layer_ind), EmptyLayer())
elif layer_dict["type"] == "route":
layers = [int(x) for x in layer_dict["layers"].split(",")]
filters = sum([channels_list[1:][l] for l in layers])
modules.add_module("route_{}".format(layer_ind), EmptyLayer())
elif layer_dict["type"] == "yolo":
anchors = [int(a) for a in layer_dict["anchors"].split(",")]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
mask = [int(m) for m in layer_dict["mask"].split(",")]
anchors = [anchors[i] for i in mask]
num_classes = int(layer_dict["classes"])
img_size = int(hyperparams["height"])
yolo_layer = YOLOLayer(anchors, num_classes, img_size)
modules.add_module("yolo_{}".format(layer_ind), yolo_layer)
module_list.append(modules)
channels_list.append(filters)
return hyperparams, module_list
class EmptyLayer(nn.Module):
def __init__(self):
super(EmptyLayer, self).__init__()
class YOLOLayer(nn.Module):
def __init__(self, anchors, num_classes, img_dim=416):
super(YOLOLayer, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.img_dim = img_dim
self.grid_size = 0
def forward(self, x_in):
batch_size = x_in.size(0)
grid_size = x_in.size(2)
devide = x_in.device
prediction = x_in.view(batch_size, self.num_anchors,
self.num_classes + 5, grid_size, grid_size)
prediction = prediction.permute(0, 1, 3, 4, 2)
prediction = prediction.contiguous()
obj_score = torch.sigmoid(prediction[..., 4])
pred_cls = torch.sigmoid(prediction[..., 5:])
if grid_size != self.grid_size:
self.compute_grid_offsets(grid_size, cuda=x_in.is_cuda)
pred_boxes = self.transform_outputs(prediction)
output = torch.cat(
(
pred_boxes.view(batch_size, -1, 4),
obj_score.view(batch_size, -1, 1),
pred_cls.view(batch_size, -1, self.num_classes),
), -1,)
return output
def compute_grid_offsets(self, grid_size, cuda=True):
self.grid_size = grid_size
self.stride = self.img_dim / self.grid_size
self.grid_x = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1 ).type(torch.float32)
self.grid_y = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1).transpose(3, 2).type(torch.float32)
scaled_anchors = [(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]
self.scaled_anchors=torch.tensor(scaled_anchors,device=device)
self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
def transform_outputs(self,prediction):
device = prediction.device
x = torch.sigmoid(prediction[..., 0]) # Center x
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
pred_boxes = torch.zeros_like(prediction[..., :4]).to(device)
pred_boxes[..., 0] = x.data + self.grid_x
pred_boxes[..., 1] = y.data + self.grid_y
pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
return pred_boxes * self.stride
示例包引入
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.pylab as plab
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import pandas as pd
import os
import copy
import random
import collections
from sklearn.model_selection import StratifiedShuffleSplit
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split, Subset
import torchvision.transforms as transforms
from torchvision import models,utils, datasets
import torchvision.transforms.functional as TF
from torchvision.transforms.functional import to_pil_image
import torch.nn.functional as F
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchsummary import summary
from myutils import parse_model_config, create_layers
# CPU or GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# dataloader里的多进程用到num_workers
workers = 0 if os.name=='nt' else 4
数据处理 探索
# 创建cocodataset
class CocoDataset(Dataset):
def __init__(self, path_list_file, transform=None, trans_params=None):
# build image file path
with open(path_list_file, 'r') as file:
self.path_imgs = file.readlines()
self.path_imgs = ['./data/mod' + path for path in self.path_imgs]
print(self.path_imgs[1])
# get the labels path
self.path_labels = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt')
for path in self.path_imgs]
print(self.path_labels[1])
self.trans_params = trans_params
self.transform = transform
def __len__(self):
return len(self.path_imgs)
def __getitem__(self, idx):
path_img = self.path_imgs[idx % len(self.path_imgs)].rstrip()
img = Image.open(path_img).convert('RGB')
path_label = self.path_labels[idx % len(self.path_imgs)].rstrip()
labels = None
if os.path.exists(path_label):
labels = np.loadtxt(path_label).reshape(-1, 5)
if self.transform:
img, labels = self.transform(img, labels, self.trans_params)
return img, labels, path_img
# 训练验证dataset
root_data = './data/mod'
path_train_list = os.path.join(root_data, 'trainvalno5k.txt')
coco_train = CocoDataset(path_train_list)
print(len(coco_train))
# val dataset
path_val_list = os.path.join(root_data, '5k.txt')
coco_val = CocoDataset(path_val_list, transform=None, trans_params=None)
print(len(coco_val))
"""
./data/mod/images/train2014/COCO_train2014_000000000025.jpg
./data/mod/labels/train2014/COCO_train2014_000000000025.txt
117264
./data/mod/images/val2014/COCO_val2014_000000000192.jpg
./data/mod/labels/val2014/COCO_val2014_000000000192.txt
5000
"""
查看数据情况
img, labels, path2img = coco_train[1]
print("image size:", img.size, type(img))
print("labels shape:", labels.shape, type(labels))
print("labels \n", labels)
print('--'*10)
img, labels, path2img = coco_val[7]
print("image size:", img.size, type(img))
print("labels shape:", labels.shape, type(labels))
print("labels \n", labels)
"""
image size: (640, 426) <class 'PIL.Image.Image'>
labels shape: (2, 5) <class 'numpy.ndarray'>
labels
[[23. 0.770336 0.489695 0.335891 0.697559]
[23. 0.185977 0.901608 0.206297 0.129554]]
--------------------
image size: (640, 427) <class 'PIL.Image.Image'>
labels shape: (3, 5) <class 'numpy.ndarray'>
labels
[[20. 0.539742 0.521429 0.758641 0.957143]
[20. 0.403469 0.470714 0.641656 0.695948]
[20. 0.853039 0.493279 0.293922 0.982061]]
"""
path2cocoNames="./data/mod/coco.names"
fp = open(path2cocoNames, "r")
coco_names = fp.read().split("\n")[:-1]
print("number of classese:", len(coco_names))
print(coco_names)
"""
number of classese: 80
['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
"""
显示图片
# rescale normalized bounding boxes to the original image size
def rescale_bbox(bb, W, H):
x,y,w,h = bb
return [x*W, y*H, w*W, h*H]
# 显示图片
COLORS = np.random.randint(0, 255, size=(80, 3),dtype="uint8")
def show_img_bbox(img,targets):
if torch.is_tensor(img):
img = to_pil_image(img)
if torch.is_tensor(targets):
targets = targets.numpy()[:,1:]
W, H = img.size
draw = ImageDraw.Draw(img)
for tg in targets:
id_ = int(tg[0])
bbox = tg[1:]
bbox = rescale_bbox(bbox,W,H)
xc, yc, w, h = bbox
color = [int(c) for c in COLORS[id_]]
name = coco_names[id_]
draw.rectangle(((xc-w/2, yc-h/2), (xc+w/2, yc+h/2)),outline=tuple(color),width=3)
draw.text((xc-w/2,yc-h/2),name, fill=(255,255,255,0))
plt.imshow(np.array(img))
- 训练数据
np.random.seed(2)
rnd_ind = np.random.randint(len(coco_train))
img, labels, path2img = coco_train[rnd_ind]
print(img.size, labels.shape)
plt.rcParams['figure.figsize'] = (20, 10)
show_img_bbox(img,labels)
"""
(640, 428) (2, 5)
"""
- 验证数据
np.random.seed(0)
rnd_ind = np.random.randint(len(coco_val))
img, labels, path2img = coco_val[rnd_ind]
print(img.size, labels.shape)
plt.rcParams['figure.figsize'] = (20, 10)
show_img_bbox(img,labels)
"""
(640, 480) (3, 5)
"""
数据转换
# 数据转换 data transforming
# 辅助函数 pad_to_square
def pad_to_square(img, boxes, pad_value=0, normalized_labels=True):
w, h = img.size
w_factor, h_factor = (w,h) if normalized_labels else (1, 1)
dim_diff = np.abs(h - w)
pad1 = dim_diff // 2
pad2 = dim_diff - pad1
if h<=w:
left, top, right, bottom= 0, pad1, 0, pad2
else:
left, top, right, bottom= pad1, 0, pad2, 0
padding = (left, top, right, bottom)
img_padded = TF.pad(img, padding=padding, fill=pad_value)
w_padded, h_padded = img_padded.size
x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
x1 += padding[0] # left
y1 += padding[1] # top
x2 += padding[2] # right
y2 += padding[3] # bottom
boxes[:, 1] = ((x1 + x2) / 2) / w_padded
boxes[:, 2] = ((y1 + y2) / 2) / h_padded
boxes[:, 3] *= w_factor / w_padded
boxes[:, 4] *= h_factor / h_padded
return img_padded, boxes
def hflip(image, labels):
image = TF.hflip(image)
labels[:, 1] = 1.0 - labels[:, 1]
return image, labels
def transformer(image, labels, params):
if params["pad2square"] is True:
image,labels = pad_to_square(image, labels)
image = TF.resize(image,params["target_size"])
if random.random() < params["p_hflip"]:
image,labels = hflip(image,labels)
image = TF.to_tensor(image)
targets = torch.zeros((len(labels), 6))
targets[:, 1:] = torch.from_numpy(labels)
return image, targets
trans_params_train = {
"target_size" : (416, 416),
"pad2square": True,
"p_hflip" : 1.0,
"normalized_labels": True,
}
coco_train = CocoDataset(path_train_list,
transform=transformer,
trans_params=trans_params_train)
trans_params_val = {
"target_size" : (416, 416),
"pad2square": True,
"p_hflip" : 0.0,
"normalized_labels": True,
}
coco_val = CocoDataset(path_val_list,
transform=transformer,
trans_params=trans_params_val)
数据转换后图片情况
np.random.seed(2)
rnd_ind=np.random.randint(len(coco_train))
img, targets, path2img = coco_train[rnd_ind]
print("image shape:", img.shape)
print("labels shape:", targets.shape)
plt.rcParams['figure.figsize'] = (20, 10)
COLORS = np.random.randint(0, 255, size=(80, 3),dtype="uint8")
show_img_bbox(img,targets)
np.random.seed(0)
rnd_ind=np.random.randint(len(coco_val))
img, targets, path2img = coco_val[rnd_ind]
print("image shape:", img.shape)
print("labels shape:", targets.shape)
plt.rcParams['figure.figsize'] = (20, 10)
COLORS = np.random.randint(0, 255, size=(80, 3),dtype="uint8")
show_img_bbox(img,targets)
定义dataloader
batch_size = 4
def collate_fn(batch):
imgs, targets, paths = list(zip(*batch))
# Remove empty boxes
targets = [boxes for boxes in targets if boxes is not None]
# set the sample index
for b_i, boxes in enumerate(targets):
boxes[:, 0] = b_i
targets = torch.cat(targets, 0)
imgs = torch.stack([img for img in imgs])
return imgs, targets, paths
train_dl = DataLoader(
coco_train,
batch_size=batch_size,
shuffle=True,
num_workers=0,
pin_memory=True,
collate_fn=collate_fn,
)
val_dl = DataLoader(
coco_val,
batch_size=batch_size,
shuffle=False,
num_workers=0,
pin_memory=True,
collate_fn=collate_fn,
)
# 验证数据加载是否正确
torch.manual_seed(0)
for imgs_batch,tg_batch,path_batch in train_dl:
break
print(imgs_batch.shape)
print(tg_batch.shape,tg_batch.dtype)
for imgs_batch,tg_batch,path_batch in val_dl:
break
print(imgs_batch.shape)
print(tg_batch.shape,tg_batch.dtype)
"""
torch.Size([4, 3, 416, 416])
torch.Size([30, 6]) torch.float32
torch.Size([4, 3, 416, 416])
torch.Size([57, 6]) torch.float32
"""
构建模型
以下YOLO v3图片及网络结构介绍均转载自:https://blog.csdn.net/leviopku/article/details/82660381
DBL: 如上图左下角所示,也就是代码中的Darknetconv2d_BN_Leaky,是yolo_v3的基本组件。就是卷积+BN+Leaky relu。对于v3来说,BN和leaky relu已经是和卷积层不可分离的部分了(最后一层卷积除外),共同构成了最小组件。
resn:n代表数字,有res1,res2, … ,res8等等,表示这个res_block里含有多少个res_unit。这是yolo_v3的大组件,yolo_v3开始借鉴了ResNet的残差结构,使用这种结构可以让网络结构更深(从v2的darknet-19上升到v3的darknet-53,前者没有残差结构)。对于res_block的解释,可以在图1的右下角直观看到,其基本组件也是DBL。
concat:张量拼接。将darknet中间层和后面的某一层的上采样进行拼接。拼接的操作和残差层add的操作是不一样的,拼接会扩充张量的维度,而add只是直接相加不会导致张量维度的改变。
整个v3结构里面,是没有池化层和全连接层的。前向传播过程中,张量的尺寸变换是通过改变卷积核的步长来实现的,比如stride=(2, 2),这就等于将图像边长缩小了一半(即面积缩小到原来的1/4)。在yolo_v2中,要经历5次缩小,会将特征图缩小到原输入尺寸的1 / 2 5 1/2^51/25,即1/32。输入为416x416,则输出为13x13(416/32=13)。
yolo_v3也和v2一样,backbone都会将输出特征图缩小到输入的1/32。所以,通常都要求输入图片是32的倍数。
版权声明:本片断内容为CSDN博主「木盏」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/leviopku/article/details/82660381
path_config = "./config/yolov3.cfg"
# Defining the Darknet Model
class Darknet(nn.Module):
def __init__(self, config_path, img_size=416):
super(Darknet, self).__init__()
self.blocks_list = parse_model_config(config_path)
self.hyperparams, self.module_list = create_layers(self.blocks_list)
self.img_size = img_size
def forward(self, x):
img_dim = x.shape[2]
layer_outputs, yolo_outputs = [], []
for block, module in zip(self.blocks_list[1:], self.module_list):
if block["type"] in ["convolutional", "upsample", "maxpool"]:
x = module(x)
elif block["type"] == "shortcut":
layer_ind = int(block["from"])
x = layer_outputs[-1] + layer_outputs[layer_ind]
elif block["type"] == "yolo":
x = module[0](x)
yolo_outputs.append(x)
elif block["type"] == "route":
x = torch.cat([layer_outputs[int(l_i)] for l_i in block["layers"].split(",")], 1)
layer_outputs.append(x)
yolo_out_cat = torch.cat(yolo_outputs, 1)
return yolo_out_cat, yolo_outputs
model = Darknet(path_config).to(device)
print(model)
"""
Darknet(
(module_list): ModuleList(
(0): Sequential(
(conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(leaky_0): LeakyReLU(negative_slope=0.1)
)
(1): Sequential(
(conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(leaky_1): LeakyReLU(negative_slope=0.1)
)
...
(105): Sequential(
(conv_105): Conv2d(256, 255, kernel_size=(1, 1), stride=(1, 1))
)
(106): Sequential(
(yolo_106): YOLOLayer()
)
)
)
"""
- 查看一下模型信息
print(next(model.parameters()).device)
dummy_img = torch.rand(1,3,416,416).to(device)
with torch.no_grad():
dummy_out_cat, dummy_out = model.forward(dummy_img)
print(dummy_out_cat.shape)
print(dummy_out[0].shape,dummy_out[1].shape,dummy_out[2].shape)
"""
cuda:0
torch.Size([1, 10647, 85])
torch.Size([1, 507, 85]) torch.Size([1, 2028, 85]) torch.Size([1, 8112, 85])
"""
损失函数
def get_loss_batch(output,targets, params_loss, opt=None):
ignore_thres = params_loss["ignore_thres"]
scaled_anchors = params_loss["scaled_anchors"]
mse_loss = params_loss["mse_loss"]
bce_loss = params_loss["bce_loss"]
num_yolos = params_loss["num_yolos"]
num_anchors = params_loss["num_anchors"]
obj_scale = params_loss["obj_scale"]
noobj_scale = params_loss["noobj_scale"]
loss = 0.0
for yolo_ind in range(num_yolos):
yolo_out = output[yolo_ind]
batch_size, num_bbxs, _ = yolo_out.shape
# get grid size
gz_2 = num_bbxs / num_anchors
grid_size = int(np.sqrt(gz_2))
yolo_out = yolo_out.view(batch_size,num_anchors,grid_size,grid_size,-1)
pred_boxes = yolo_out[:,:,:,:,:4]
x,y,w,h = transform_bbox(pred_boxes, scaled_anchors[yolo_ind])
pred_conf = yolo_out[:,:,:,:,4]
pred_cls_prob = yolo_out[:,:,:,:,5:]
yolo_targets = get_yolo_targets({
"pred_cls_prob": pred_cls_prob,
"pred_boxes":pred_boxes,
"targets": targets,
"anchors": scaled_anchors[yolo_ind],
"ignore_thres": ignore_thres,
})
obj_mask = yolo_targets["obj_mask"]
noobj_mask = yolo_targets["noobj_mask"]
tx = yolo_targets["tx"]
ty = yolo_targets["ty"]
tw = yolo_targets["tw"]
th = yolo_targets["th"]
tcls = yolo_targets["tcls"]
t_conf = yolo_targets["t_conf"]
loss_x = mse_loss(x[obj_mask], tx[obj_mask])
loss_y = mse_loss(y[obj_mask], ty[obj_mask])
loss_w = mse_loss(w[obj_mask], tw[obj_mask])
loss_h = mse_loss(h[obj_mask], th[obj_mask])
loss_conf_obj = bce_loss(pred_conf[obj_mask], t_conf[obj_mask])
loss_conf_noobj = bce_loss(pred_conf[noobj_mask], t_conf[noobj_mask])
loss_conf = obj_scale * loss_conf_obj + noobj_scale * loss_conf_noobj
loss_cls = bce_loss(pred_cls_prob[obj_mask], tcls[obj_mask])
loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
if opt is not None:
opt.zero_grad()
loss.backward()
opt.step()
return loss.item()
def transform_bbox(bbox, anchors):
x = bbox[:,:,:,:,0]
y = bbox[:,:,:,:,1]
w = bbox[:,:,:,:,2]
h = bbox[:,:,:,:,3]
anchor_w = anchors[:, 0].view((1, 3, 1, 1))
anchor_h = anchors[:, 1].view((1, 3, 1, 1))
x = x - x.floor()
y = y - y.floor()
w = torch.log(w / anchor_w + 1e-16)
h = torch.log(h / anchor_h + 1e-16)
return x, y, w, h
def get_yolo_targets(params):
pred_boxes=params["pred_boxes"]
pred_cls_prob=params["pred_cls_prob"]
target=params["targets"]
anchors=params["anchors"]
ignore_thres=params["ignore_thres"]
batch_size = pred_boxes.size(0)
num_anchors = pred_boxes.size(1)
grid_size = pred_boxes.size(2)
num_cls = pred_cls_prob.size(-1)
sizeT=batch_size, num_anchors, grid_size, grid_size
obj_mask = torch.zeros(sizeT,device=device,dtype=torch.uint8)
noobj_mask = torch.ones(sizeT,device=device,dtype=torch.uint8)
tx = torch.zeros(sizeT, device=device, dtype=torch.float32)
ty= torch.zeros(sizeT, device=device, dtype=torch.float32)
tw= torch.zeros(sizeT, device=device, dtype=torch.float32)
th= torch.zeros(sizeT, device=device, dtype=torch.float32)
sizeT=batch_size, num_anchors, grid_size, grid_size, num_cls
tcls= torch.zeros(sizeT, device=device, dtype=torch.float32)
target_bboxes = target[:, 2:] * grid_size
t_xy = target_bboxes[:, :2]
t_wh = target_bboxes[:, 2:]
t_x, t_y = t_xy.t()
t_w, t_h = t_wh.t()
grid_i, grid_j = t_xy.long().t()
iou_with_anchors=[get_iou_WH(anchor, t_wh) for anchor in anchors]
iou_with_anchors = torch.stack(iou_with_anchors)
best_iou_wa, best_anchor_ind = iou_with_anchors.max(0)
batch_inds, target_labels = target[:, :2].long().t()
obj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 1
noobj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 0
for ind, iou_wa in enumerate(iou_with_anchors.t()):
noobj_mask[batch_inds[ind], iou_wa > ignore_thres, grid_j[ind], grid_i[ind]] = 0
tx[batch_inds, best_anchor_ind, grid_j, grid_i] = t_x - t_x.floor()
ty[batch_inds, best_anchor_ind, grid_j, grid_i] = t_y - t_y.floor()
anchor_w=anchors[best_anchor_ind][:, 0]
tw[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_w / anchor_w + 1e-16)
anchor_h=anchors[best_anchor_ind][:, 1]
th[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_h / anchor_h + 1e-16)
tcls[batch_inds, best_anchor_ind, grid_j, grid_i, target_labels] = 1
output={
"obj_mask" : obj_mask,
"noobj_mask" : noobj_mask,
"tx": tx,
"ty": ty,
"tw": tw,
"th": th,
"tcls": tcls,
"t_conf": obj_mask.float(),
}
return output
def get_iou_WH(wh1, wh2):
wh2 = wh2.t()
w1, h1 = wh1[0], wh1[1]
w2, h2 = wh2[0], wh2[1]
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
return inter_area / union_area
训练模型
def get_lr(opt):
for param_group in opt.param_groups:
return param_group['lr']
def loss_epoch(model,params_loss,dataset_dl,sanity_check=False,opt=None):
running_loss=0.0
len_data=len(dataset_dl.dataset)
running_metrics= {}
for xb, yb,_ in dataset_dl:
yb=yb.to(device)
_,output=model(xb.to(device))
loss_b=get_loss_batch(output,yb, params_loss,opt)
running_loss+=loss_b
if sanity_check is True:
break
loss=running_loss/float(len_data)
return loss
def train_val(model, params):
num_epochs=params["num_epochs"]
params_loss=params["params_loss"]
opt=params["optimizer"]
train_dl=params["train_dl"]
val_dl=params["val_dl"]
sanity_check=params["sanity_check"]
lr_scheduler=params["lr_scheduler"]
path2weights=params["path2weights"]
loss_history={
"train": [],
"val": [],
}
best_model_wts = copy.deepcopy(model.state_dict())
best_loss=float('inf')
for epoch in range(num_epochs):
current_lr=get_lr(opt)
print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr))
model.train()
train_loss=loss_epoch(model,params_loss,train_dl,sanity_check,opt)
loss_history["train"].append(train_loss)
print("train loss: %.6f" %(train_loss))
model.eval()
with torch.no_grad():
val_loss=loss_epoch(model,params_loss,val_dl,sanity_check)
loss_history["val"].append(val_loss)
print("val loss: %.6f" %(val_loss))
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model.state_dict(), path2weights)
print("Copied best model weights!")
lr_scheduler.step(val_loss)
if current_lr != get_lr(opt):
print("Loading best model weights!")
model.load_state_dict(best_model_wts)
print("-"*10)
model.load_state_dict(best_model_wts)
return model, loss_history
- 训练模型
opt = optim.Adam(model.parameters(), lr=1e-3)
lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)
path2models= "./models/mod/"
if not os.path.exists(path2models):
os.mkdir(path2models)
scaled_anchors=[model.module_list[82][0].scaled_anchors,
model.module_list[94][0].scaled_anchors,
model.module_list[106][0].scaled_anchors]
mse_loss = nn.MSELoss(reduction="sum")
bce_loss = nn.BCELoss(reduction="sum")
params_loss={
"scaled_anchors" : scaled_anchors,
"ignore_thres": 0.5,
"mse_loss": mse_loss,
"bce_loss": bce_loss,
"num_yolos": 3,
"num_anchors": 3,
"obj_scale": 1,
"noobj_scale": 100,
}
params_train={
"num_epochs": 5,
"optimizer": opt,
"params_loss": params_loss,
"train_dl": train_dl,
"val_dl": val_dl,
"sanity_check": True,
"lr_scheduler": lr_scheduler,
"path2weights": path2models+"weights.pt",
}
model,loss_hist=train_val(model,params_train)
"""
Epoch 0/4, current lr=0.001
train loss: 13.039888
val loss: 309.578725
Copied best model weights!
----------
Epoch 1/4, current lr=0.001
train loss: 11.840441
val loss: 182.791525
Copied best model weights!
----------
Epoch 2/4, current lr=0.001
train loss: 10.949079
val loss: 143.510638
Copied best model weights!
----------
Epoch 3/4, current lr=0.001
train loss: 9.800387
val loss: 173.621087
----------
Epoch 4/4, current lr=0.001
train loss: 8.864806
val loss: 160.650937
----------
"""