def yolo_eval(yolo_outputs,
anchors,
num_classes,
image_shape,
max_boxes=20,
score_threshold=.6,
iou_threshold=.5):
#FPN的层数(yolov3是3个尺度)
num_layers = len(yolo_outputs)
#对应三个尺度anchor的index,注意对于bbx网络预测的是3xNXN个(t_x,t_y,t_w和t_h)
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
#输入大小是最后一层feature map的32倍
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
#获取所有的boxes及对应的box_scores
for l in range(num_layers):
#这里的_box_scores是box_confidence * box_class_probs的结果
_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
boxes = K.concatenate(boxes, axis=0)#4维
box_scores = K.concatenate(box_scores, axis=0)#80维
#对类别分量选出置信度大于阈值的所有类别的boxes
mask = box_scores >= score_threshold#80维
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
#针对每一个类别做NMS
for c in range(num_classes):
#根据mask获取置信度大于该类别阈值boxes
class_boxes = tf.boolean_mask(boxes, mask[:, c])#如果类别分量c的取值大于score_threshold,则留下这个box
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])#如果类别分量c的取值大于score_threshold,则留下这个box_score
nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_ = K.concatenate(boxes_, axis=0)
scores_ = K.concatenate(scores_, axis=0)
classes_ = K.concatenate(classes_, axis=0)
return boxes_, scores_, classes_
获取yolo输出的box和对应的scores:
def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
'''Process Conv layer output'''
box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
anchors, num_classes, input_shape)
boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
boxes = K.reshape(boxes, [-1, 4])
box_scores = box_confidence * box_class_probs
box_scores = K.reshape(box_scores, [-1, num_classes])
return boxes, box_scores
获取yolo的原始输出t_x,t_y,t_w,t_h,进一步得到b_x,b_y,b_w,b_h:
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
"""Convert final layer features to bounding box parameters."""
num_anchors = len(anchors)
# Reshape to batch, height, width, num_anchors, box_params.
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
grid_shape = K.shape(feats)[1:3] # height, width
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
[grid_shape[0], 1, 1, 1])
grid = K.concatenate([grid_x, grid_y])
grid = K.cast(grid, K.dtype(feats))
feats = K.reshape(
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
# Adjust preditions to each spatial grid point and anchor size.
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
box_confidence = K.sigmoid(feats[..., 4:5])
box_class_probs = K.sigmoid(feats[..., 5:])
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
矫正bbx,并scale至原图对应的尺寸
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
'''Get corrected boxes'''
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
input_shape = K.cast(input_shape, K.dtype(box_yx))
image_shape = K.cast(image_shape, K.dtype(box_yx))
new_shape = K.round(image_shape * K.min(input_shape/image_shape))
offset = (input_shape-new_shape)/2./input_shape
scale = input_shape/new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes = K.concatenate([
box_mins[..., 0:1], # y_min
box_mins[..., 1:2], # x_min
box_maxes[..., 0:1], # y_max
box_maxes[..., 1:2] # x_max
])
# Scale boxes back to original image shape.
boxes *= K.concatenate([image_shape, image_shape])
return boxes
tf.boolean_mask:(1)要保证tensor的第一个维度与mask的第一个维度相等(2)剩余的维度,如果mask包含对应的维度,mask对应的维度取值要与tensor对应的维度取值相等(见t4),否则会出错(见t3,t5,t7)
import tensorflow as tf
import numpy as np
t1 = [0,1,2,3]
mask1 = np.array([True,False,True,False])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t1,mask1)))
#[0 2]
t2 = [[0,1],[2,3],[4,5]]
mask2 = np.array([True,False,True])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t2,mask2)))
#[[0 1]
# [4 5]]
t3 = [[0,1,2,3]]
mask3 = np.array([True])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t3,mask3)))
#[[0 1 2 3]]
t3 = [[0,1,2,3]]
mask3 = np.array([[True]])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t3,mask3)))
#ValueError: Shapes (1, 4) and (1, 1) are incompatible
t4 = [[0,1],[2,3],[4,5]]
mask4 = np.array([[True,False],[False,True],[True,True]])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t4,mask4)))
#[0 3 4 5]
t5 = [[0,1],[2,3],[4,5]]
mask5 = np.array([[True,False,True],[False,True,True],[True,True,False]])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t5,mask5)))
#ValueError: Shapes (3, 2) and (3, 3) are incompatible
t6 = [[0,1],[2,3],[4,5]]
mask6 = np.array([True,False])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t6,mask6)))
#ValueError: Shapes (3,) and (2,) are incompatible
t7 = [[0,1,2],[2,3,4],[4,5,6]]
mask7 = np.array([[True,False],[False,True],[False,False]])
with tf.Session() as sess:
print(sess.run(tf.boolean_mask(t7,mask7)))
#ValueError: Shapes (3, 3) and (3, 2) are incompatible
利用python实现NMS:
import cv2
import argparse
import numpy as np
import pdb
def parse_parser():
parser = argparse.ArgumentParser()
parser.add_argument('--input_image',type=str,default='../test_data/multi.jpg')
args = parser.parse_args()
return args
def non_max_suppression(bbx_info:dict,threshold):
for bbx_class,bbx in bbx_info.items():
bbx_array = np.array(bbx)
x_min,y_min,x_max,y_max,conf = bbx_array[:,0],bbx_array[:,1],bbx_array[:,2],bbx_array[:,3],bbx_array[:,4]
ordered = np.argsort(conf)[::-1]#index
area = np.maximum(x_max-x_min,0)*np.maximum(y_max-y_min,0)
bbx_res = []
while len(ordered)>0:
i = ordered[0]
bbx_res.append(i)
#求交集
xx1 = np.maximum(x_min[i],x_min[ordered[1:]])#当ordered只有一个元素时候,ordered[1:]是[]
yy1 = np.maximum(y_min[i],y_min[ordered[1:]])
xx2 = np.minimum(x_max[i],x_max[ordered[1:]])
yy2 = np.minimum(y_max[i],y_max[ordered[1:]])
interaction = np.maximum(xx2-xx1,0)*np.maximum(yy2-yy1,0)
#求并集&&IOUs
IOUs = interaction/(area[i]+area[ordered[1:]]-interaction)
#pdb.set_trace()
idx = np.where(IOUs<=threshold)[0]#idx:np.array
ordered = ordered[idx+1]#+1是因为这里得到的idx是从1开始的,当idx为[]时候,idx+1也是[]
bbx_left = bbx_array[bbx_res].tolist()
bbx_info[bbx_class] = bbx_left
return bbx_info
def draw_res(image_name,bbx_info,mode):
assert mode in ('src','dst')
img_BGR = cv2.imread(image_name)
for bbx_class,bbxs in bbx_info.items():
for bbx in bbxs:
x_min,y_min,x_max,y_max,conf = bbx
x_min,y_min,x_max,y_max = int(x_min),int(y_min),int(x_max),int(y_max)
cv2.rectangle(img_BGR,(x_min,y_min),(x_max,y_max),(0,0,255),1)
cv2.putText(img_BGR,bbx_class+':'+str(conf),(x_min,y_min),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,0,0),1)
cv2.imwrite('{}_{}.jpg'.format(image_name.rsplit('/')[-1].split('.')[0],mode),img_BGR)
if __name__=='__main__':
args = parse_parser()
#bbx_info = {'person':[[0,17,219,347,1.0],[10,17,200,300,0.95]],'car':[[0,135,48,190,0.84],[0,125,50,188,0.80],[0,20,48,100,0.6]]}
bbx_info = {'preson':[[501,118,672,325,0.70],[417,90,637,346,0.85],[4,116,344,439,0.99],[490,120,672,300,0.67]],'car':[[18,0,778,534,0.88]]}
draw_res(args.input_image,bbx_info,mode='src')
bbx_info = non_max_suppression(bbx_info,0.6)
draw_res(args.input_image,bbx_info,mode='dst')