KITTI
Car 0.00 0 -1.84 662.20 185.85 690.21 205.03 1.48 1.36 3.51 5.35 2.56 58.84 -1.75
第1个字符串:代表物体类别 'Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist','Tram', 'Misc' or 'DontCare'
注意:’DontCare’ 标签表示该区域没有被标注,比如由于目标物体距离激光雷达太远。为了防止在评估过程中(主要是计算precision),将本来是目标物体但是因为某些原因而没有标注的区域统计为假阳性(false positives),评估脚本会自动忽略’DontCare’ 区域的预测结果。
第2个数:代表物体是否被截断,从0(非截断)到1(截断)浮动,其中truncated指离开图像边界的对象
第3个数:代表物体是否被遮挡,整数0,1,2,3表示被遮挡的程度。0:完全可见 1:小部分遮挡 2:大部分遮挡 3:完全遮挡(unknown)
第4个数:alpha,物体的观察角度,范围:-pi~pi
第5~8这4个数:物体的2维边界框,左上角和右下角的像素坐标
第9~11这3个数:3维物体的尺寸,高、宽、长(单位:米)
第12~14这3个数:3维物体的位置 x,y,z(在照相机坐标系下,单位:米)
第15个数:3维物体的空间方向:rotation_y,在照相机坐标系下,相对于y轴的旋转角,范围:-pi~pi
有些有第16个数:检测的置信度 , 仅用于结果:浮点,p / r曲线所需,越高越好
2D/3D bounding boxes in images
show_image_with_boxes(img, objects, calib)
def show_image_with_boxes(img, objects, calib, show3d=True):
''' Show image with 2D bounding boxes '''
img1 = np.copy(img) # for 2d bbox
img2 = np.copy(img) # for 3d bbox
for obj in objects:
if obj.type == 'DontCare': continue
cv2.rectangle(img1, (int(obj.xmin), int(obj.ymin)),
(int(obj.xmax), int(obj.ymax)), (0, 255, 0), 2)
box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P)
img2 = utils.draw_projected_box3d(img2, box3d_pts_2d)
Image.fromarray(img1).show()
if show3d:
Image.fromarray(img2).show()
- 2D框直接通过xmin,xmax,ymin,ymax来画
cv2.rectangle(img1, (int(obj.xmin), int(obj.ymin)),
(int(obj.xmax), int(obj.ymax)), (0, 255, 0), 2)
- 3D框需要通过投影矩阵变换得到
box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P)
def roty(t):
''' Rotation about the y-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[c, 0, s],
[0, 1, 0],
[-s, 0, c]])
def project_to_image(pts_3d, P):
''' Project 3d points to image plane.
Usage: pts_2d = projectToImage(pts_3d, P)
input: pts_3d: nx3 matrix
P: 3x4 projection matrix
output: pts_2d: nx2 matrix
P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
=> normalize projected_pts_2d(2xn)
<=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
=> normalize projected_pts_2d(nx2)
'''
n = pts_3d.shape[0]
pts_3d_extend = np.hstack((pts_3d, np.ones((n, 1))))
# print(('pts_3d_extend shape: ', pts_3d_extend.shape))
pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
return pts_2d[:, 0:2]
def compute_box_3d(obj, P):
''' Takes an object and a projection matrix (P) and projects the 3d
bounding box into the image plane.
Returns:
corners_2d: (8,2) array in left image coord.
corners_3d: (8,3) array in in rect camera coord.
'''
# compute rotational matrix around yaw axis
R = roty(obj.ry)
# 3d bounding box dimensions
l = obj.l
w = obj.w
h = obj.h
# 3d bounding box corners
x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
# rotate and translate 3d bounding box
corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners]))
# print corners_3d.shape
corners_3d[0, :] = corners_3d[0, :] + obj.t[0]
corners_3d[1, :] = corners_3d[1, :] + obj.t[1]
corners_3d[2, :] = corners_3d[2, :] + obj.t[2]
# print 'cornsers_3d: ', corners_3d
# only draw 3d bounding box for objs in front of the camera
if np.any(corners_3d[2, :] < 0.1):
corners_2d = None
return corners_2d, np.transpose(corners_3d)
# project the 3d bounding box into the image plane
corners_2d = project_to_image(np.transpose(corners_3d), P)
# print 'corners_2d: ', corners_2d
return corners_2d, np.transpose(corners_3d)
def draw_projected_box3d(image, qs, color=(255, 255, 255), thickness=2):
''' Draw 3d bounding box in image
qs: (8,3) array of vertices for the 3d box in following order:
1 -------- 0
/| /|
2 -------- 3 .
| | | |
. 5 -------- 4
|/ |/
6 -------- 7
'''
qs = qs.astype(np.int32)
for k in range(0, 4):
# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
i, j = k, (k + 1) % 4
# use LINE_AA for opencv3
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness, cv2.LINE_AA)
i, j = k + 4, (k + 1) % 4 + 4
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness, cv2.LINE_AA)
i, j = k, k + 4
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness, cv2.LINE_AA)
return image
LiDAR points and 3D boxes in velodyne coordinate
show_lidar_with_boxes(pc_velo, objects, calib, True, img_width, img_height)
# 涉及投影矩阵
class Calibration(object):
''' Calibration matrices and utils
3d XYZ in <label>.txt are in rect camera coord.
2d box xy are in image2 coord
Points in <lidar>.bin are in Velodyne coord.
y_image2 = P^2_rect * x_rect
y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
x_ref = Tr_velo_to_cam * x_velo
x_rect = R0_rect * x_ref
P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x;
0, f^2_v, c^2_v, -f^2_v b^2_y;
0, 0, 1, 0]
= K * [1|t]
image2 coord:
----> x-axis (u)
|
|
v y-axis (v)
velodyne coord:
front x, left y, up z
rect/ref camera coord:
right x, down y, front z
Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
TODO(rqi): do matrix multiplication only once for each projection.
'''
def show_lidar_with_boxes(pc_velo, objects, calib,
img_fov=False, img_width=None, img_height=None):
''' Show all LiDAR points.
Draw 3d box in LiDAR point cloud (in velo coord system) '''
if img_fov: # 找到从雷达坐标系投影到图像坐标系后不超过图像image大小的点云
pc_velo = get_lidar_in_image_fov(pc_velo, calib, 0, 0,
img_width, img_height)
print(('FOV point num: ', pc_velo.shape[0]))
for obj in objects:
if obj.type == 'DontCare': continue
# Draw 3d bounding box
box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P)
box3d_pts_3d_velo = calib.project_rect_to_velo(box3d_pts_3d)
# Draw heading arrow
ori3d_pts_2d, ori3d_pts_3d = utils.compute_orientation_3d(obj, calib.P)
ori3d_pts_3d_velo = calib.project_rect_to_velo(ori3d_pts_3d)
x1, y1, z1 = ori3d_pts_3d_velo[0, :]
x2, y2, z2 = ori3d_pts_3d_velo[1, :]
Extract Frustum Data
frustum data
- 将雷达坐标系的点云转到相机坐标系 project_velo_to_rect
- pc_image_coord 将雷达坐标系转到图像坐标系 project_velo_to_image;
img_fov_inds 将点云投影到彩色图像并不超过彩色图像边界的点的index - box_fov_inds 同时在图像边界里和2D标定框里的点的index
- pc_in_box_fov = pc_rect[box_fov_inds, :] 即为frustum的点
frustum angle
通过2D box的中心获得
2D box的中心为 (u,v),给一个深度 20 则得到一个行向量
(u,v,depth) 将这个向量转到相机坐标系中得到 box2d_center_rect
frustum_angle = -1 * np.arctan2(box2d_center_rect[0, 2],
box2d_center_rect[0, 0])
def project_image_to_rect(self, uv_depth):
''' Input: nx3 first two channels are uv, 3rd channel
is depth in rect camera coord.
Output: nx3 points in rect camera coord.
'''
n = uv_depth.shape[0]
x = ((uv_depth[:, 0] - self.c_u) * uv_depth[:, 2]) / self.f_u + self.b_x
y = ((uv_depth[:, 1] - self.c_v) * uv_depth[:, 2]) / self.f_v + self.b_y
pts_3d_rect = np.zeros((n, 3))
pts_3d_rect[:, 0] = x
pts_3d_rect[:, 1] = y
pts_3d_rect[:, 2] = uv_depth[:, 2]
return pts_3d_rect
def extract_frustum_data(idx_filename, split, output_filename, viz=False,
perturb_box2d=False, augmentX=1, type_whitelist=['Car'], projection=True, output='./range_image'):
''' Extract point clouds and corresponding annotations in frustums
defined generated from 2D bounding boxes
Lidar points and 3d boxes are in *rect camera* coord system
(as that in 3d box label files)
Input:
idx_filename: string, each line of the file is a sample ID
split: string, either trianing or testing
output_filename: string, the name for output .pickle file
viz: bool, whether to visualize extracted data
perturb_box2d: bool, whether to perturb the box2d
(used for data augmentation in train set)
augmentX: scalar, how many augmentations to have for each 2D box.
type_whitelist: a list of strings, object types we are interested in.
Output:
None (will write a .pickle file to the disk)
'''
dataset = kitti_object(os.path.join(ROOT_DIR, 'dataset/KITTI/object'), split)
data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]
id_list = [] # int number
box2d_list = [] # [xmin,ymin,xmax,ymax]
box3d_list = [] # (8,3) array in rect camera coord
input_list = [] # channel number = 4, xyz,intensity in rect camera coord
label_list = [] # 1 for roi object, 0 for clutter
type_list = [] # string e.g. Car
heading_list = [] # ry (along y-axis in rect camera coord) radius of
# (cont.) clockwise angle from positive x axis in velo coord.
box3d_size_list = [] # array of l,w,h
frustum_angle_list = [] # angle of 2d box center from pos x-axis
pos_cnt = 0
all_cnt = 0
for data_idx in data_idx_list:
print('------------- ', data_idx)
calib = dataset.get_calibration(data_idx) # 3 by 4 matrix
objects = dataset.get_label_objects(data_idx)
pc_velo = dataset.get_lidar(data_idx)
pc_rect = np.zeros_like(pc_velo)
pc_rect[:, 0:3] = calib.project_velo_to_rect(pc_velo[:, 0:3])
pc_rect[:, 3] = pc_velo[:, 3]
img = dataset.get_image(data_idx)
img_height, img_width, img_channel = img.shape
_, pc_image_coord, img_fov_inds = get_lidar_in_image_fov(pc_velo[:, 0:3],
calib, 0, 0, img_width, img_height, True)
for obj_idx in range(len(objects)):
if objects[obj_idx].type not in type_whitelist: continue
# 2D BOX: Get pts rect backprojected
box2d = objects[obj_idx].box2d
for _ in range(augmentX): # 数据增强
# Augment data by box2d perturbation
if perturb_box2d:
xmin, ymin, xmax, ymax = random_shift_box2d(box2d)
print(box2d)
print(xmin, ymin, xmax, ymax)
else:
xmin, ymin, xmax, ymax = box2d
box_fov_inds = (pc_image_coord[:, 0] < xmax) & \
(pc_image_coord[:, 0] >= xmin) & \
(pc_image_coord[:, 1] < ymax) & \
(pc_image_coord[:, 1] >= ymin)
box_fov_inds = box_fov_inds & img_fov_inds
pc_in_box_fov = pc_rect[box_fov_inds, :]
# Get frustum angle (according to center pixel in 2D BOX)
box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0])
uvdepth = np.zeros((1, 3))
uvdepth[0, 0:2] = box2d_center
uvdepth[0, 2] = 20 # some random depth
box2d_center_rect = calib.project_image_to_rect(uvdepth)
frustum_angle = -1 * np.arctan2(box2d_center_rect[0, 2],
box2d_center_rect[0, 0])
# 3D BOX: Get pts velo in 3d box
obj = objects[obj_idx]
box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P)
_, inds = extract_pc_in_box3d(pc_in_box_fov, box3d_pts_3d)
label = np.zeros((pc_in_box_fov.shape[0]))
label[inds] = 1
# Get 3D BOX heading
heading_angle = obj.ry
# Get 3D BOX size
box3d_size = np.array([obj.l, obj.w, obj.h])
# Reject too far away object or object without points
if ymax - ymin < 25 or np.sum(label) == 0:
continue
#
# pc_velo_fov_90 = add_label(pc_velo_fov_90, objects)
# sphere_images = spherical_projection(pc_velo_fov_90)
# if projection:
# path = os.path.join(output, split)
# np.save(path, sphere_images)
id_list.append(data_idx)
box2d_list.append(np.array([xmin, ymin, xmax, ymax]))
box3d_list.append(box3d_pts_3d)
input_list.append(pc_in_box_fov)
label_list.append(label)
type_list.append(objects[obj_idx].type)
heading_list.append(heading_angle)
box3d_size_list.append(box3d_size)
frustum_angle_list.append(frustum_angle)
# collect statistics
pos_cnt += np.sum(label)
all_cnt += pc_in_box_fov.shape[0]
print('Average pos ratio: %f' % (pos_cnt / float(all_cnt)))
print('Average npoints: %f' % (float(all_cnt) / len(id_list)))
with open(output_filename, 'wb') as fp:
cPickle.dump(id_list, fp)
cPickle.dump(box2d_list, fp)
cPickle.dump(box3d_list, fp)
cPickle.dump(input_list, fp)
cPickle.dump(label_list, fp)
cPickle.dump(type_list, fp)
cPickle.dump(heading_list, fp)
cPickle.dump(box3d_size_list, fp)
cPickle.dump(frustum_angle_list, fp)