在使用google提供的模型时,可能会报错,那就要我们了解程序加载模型的具体细节。
训练的开始语句是
python train.py --train_dir='train_dir' --pipeline_config_path='pipeline_config_path'
核心调用的文件就是train.py。在train.py中通过
model_config, train_config, input_config = get_configs_from_pipeline_file()
获取配置信息,其中是调用protobuf进行文件解析。之后分别得到model_config,train_config,input_config(训练的格式)。
之后通过functools.partial函数对model_builder.build函数赋予默认值。通过functools.partial函数对input_reader_builder.build赋予默认值。train_config是在最后训练的时候进行传入。
生成网络模型的代码为:
def build(model_config, is_training):
if not isinstance(model_config, model_pb2.DetectionModel):
raise ValueError('model_config not of type model_pb2.DetectionModel.')
# 获取配置中的模型种类
meta_architecture = model_config.WhichOneof('model')
#进行具体加载
if meta_architecture == 'ssd':
return _build_ssd_model(model_config.ssd, is_training)
if meta_architecture == 'faster_rcnn':
return _build_faster_rcnn_model(model_config.faster_rcnn, is_training)
raise ValueError('Unknown meta architecture: {}'.format(meta_architecture))
之后以'faster_rcnn模型为例子,进入_build_faster_rcnn_model
def _build_faster_rcnn_model(frcnn_config, is_training):
#构建一个Faster R-CNN 或者 R-FCN的检测模型。
#如果second_stage_box_predictor的类型是rfcn_box_predictor则为R-FCN模型,否则为Faster R-CNN
#frcnn_config 说明模型的结构
#is_training 模型是否用来进行训练。
#获取要识别的类数
num_classes = frcnn_config.num_classes
#构建图像归一化
image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
#构建前端网络结构
feature_extractor = _build_faster_rcnn_feature_extractor(
frcnn_config.feature_extractor, is_training)
#是否仅构建RPN层
first_stage_only = frcnn_config.first_stage_only
#构建Anchor
first_stage_anchor_generator = anchor_generator_builder.build(
frcnn_config.first_stage_anchor_generator)
#带间隔的卷积,其中的间隔多大
first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
#卷积神经网络的参数
first_stage_box_predictor_arg_scope = hyperparams_builder.build(
frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
#第一阶段的核大小
first_stage_box_predictor_kernel_size = (
frcnn_config.first_stage_box_predictor_kernel_size)
#rpn的输出深度
first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
#第一阶段的最小批次
first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
#每一张图片RPN中正样本的数量。
first_stage_positive_balance_fraction = (
frcnn_config.first_stage_positive_balance_fraction)
#第一阶段nms得分的阈值
first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
#第一阶段nms的IOU的阈值
first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
#第一阶段最多传出多少个RPN
first_stage_max_proposals = frcnn_config.first_stage_max_proposals
#第一阶段的定位损失权重
first_stage_loc_loss_weight = (
frcnn_config.first_stage_localization_loss_weight)
#第一阶段的物品损失权重
first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight
#输出的rpn的大小(宽高相等)
initial_crop_size = frcnn_config.initial_crop_size
#在maxpool时的核的大小以及步长
maxpool_kernel_size = frcnn_config.maxpool_kernel_size
maxpool_stride = frcnn_config.maxpool_stride
#构建卷积神经网络的超参数
second_stage_box_predictor = box_predictor_builder.build(
hyperparams_builder.build,
frcnn_config.second_stage_box_predictor,
is_training=is_training,
num_classes=num_classes)
#第二阶段进入的图片的批次
second_stage_batch_size = frcnn_config.second_stage_batch_size
#第二阶段中图片中bbox的正样本的比例
second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
#构建后处理的模型
(second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
#第二阶段的位置和种类的权重比例
second_stage_localization_loss_weight = (
frcnn_config.second_stage_localization_loss_weight)
second_stage_classification_loss_weight = (
frcnn_config.second_stage_classification_loss_weight)
#默认不进行困难样本发现
hard_example_miner = None
#但如果配置文件中有hard_example_miner,则进行困难样本发现
if frcnn_config.HasField('hard_example_miner'):
hard_example_miner = losses_builder.build_hard_example_miner(
frcnn_config.hard_example_miner,
second_stage_classification_loss_weight,
second_stage_localization_loss_weight)
#将配置好的模型放入dict中
common_kwargs = {
'is_training': is_training,
'num_classes': num_classes,
'image_resizer_fn': image_resizer_fn,
'feature_extractor': feature_extractor,
'first_stage_only': first_stage_only,
'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope':
first_stage_box_predictor_arg_scope,
'first_stage_box_predictor_kernel_size':
first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
'first_stage_minibatch_size': first_stage_minibatch_size,
'first_stage_positive_balance_fraction':
first_stage_positive_balance_fraction,
'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
'first_stage_max_proposals': first_stage_max_proposals,
'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
'second_stage_batch_size': second_stage_batch_size,
'second_stage_balance_fraction': second_stage_balance_fraction,
'second_stage_non_max_suppression_fn':
second_stage_non_max_suppression_fn,
'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
'second_stage_localization_loss_weight':
second_stage_localization_loss_weight,
'second_stage_classification_loss_weight':
second_stage_classification_loss_weight,
'hard_example_miner': hard_example_miner}
#如果第二阶段是rfcn的则使用上面这个,否则使用上面这个
if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
return rfcn_meta_arch.RFCNMetaArch(
second_stage_rfcn_box_predictor=second_stage_box_predictor,
**common_kwargs)
else:
return faster_rcnn_meta_arch.FasterRCNNMetaArch(
initial_crop_size=initial_crop_size,
maxpool_kernel_size=maxpool_kernel_size,
maxpool_stride=maxpool_stride,
second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
**common_kwargs)
之后说明每一个子模型的构建
首先是image_resizer_builder的模型构建
# 构建图片的resize
def build(image_resizer_config):
# 查看类型是否正确
if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer):
raise ValueError('image_resizer_config not of type '
'image_resizer_pb2.ImageResizer.')
#查看是否设置了image_resizer_oneof属性,如果有判断是否为keep_aspect_ratio_resizer
if image_resizer_config.WhichOneof(
'image_resizer_oneof') == 'keep_aspect_ratio_resizer':
#如果是则进行保持图片比例的缩放,再使用functools.partial对 preprocessor.resize_to_range给默认值。
keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer
if not (keep_aspect_ratio_config.min_dimension
<= keep_aspect_ratio_config.max_dimension):
raise ValueError('min_dimension > max_dimension')
return functools.partial(
preprocessor.resize_to_range,
min_dimension=keep_aspect_ratio_config.min_dimension,
max_dimension=keep_aspect_ratio_config.max_dimension)
#如果有image_resizer_oneof属性,如果有判断是否为fixed_shape_resizer,即归一化到固定大小
if image_resizer_config.WhichOneof(
'image_resizer_oneof') == 'fixed_shape_resizer':
#如果有则使用functools.partial对preprocessor.resize_image,给默认值,插值的那种resize
fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer
return functools.partial(preprocessor.resize_image,
new_height=fixed_shape_resizer_config.height,
new_width=fixed_shape_resizer_config.width)
raise ValueError('Invalid image resizer option.')
接下来看preprocessor.resize_to_range这个函数
def resize_to_range(image,
masks=None,
min_dimension=None,
max_dimension=None,
align_corners=False):
#该函数是将一个图片resize到给定的大小
#其中,有两种可能:
#1.如果图片可以resize到短边等于给定的值,而长边不超过给定的max_dimension
#2.将长边resize到max_dimension。
if len(image.get_shape()) != 3:
raise ValueError('Image should be 3D tensor')
with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
if image.get_shape().is_fully_defined():
new_size = _compute_new_static_size(image, min_dimension,
max_dimension)
else:
new_size = _compute_new_dynamic_size(image, min_dimension,
max_dimension)
new_image = tf.image.resize_images(image, new_size,
align_corners=align_corners)
result = new_image
if masks is not None:
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(new_masks, new_size,
align_corners=align_corners)
new_masks = tf.squeeze(new_masks, 3)
result = [new_image, new_masks]
return result
resize之后就是构建faster_rcnn_meta_arch,也就是进行_build_faster_rcnn_feature_extractor函数的说明
def _build_faster_rcnn_feature_extractor(
feature_extractor_config, is_training, reuse_weights=None):
#获取第一阶段的网络结构,比如:faster_rcnn_resnet101
feature_type = feature_extractor_config.type
#获取
#first_stage_features_stride只能等于8或者16,否则会报错
first_stage_features_stride = (
feature_extractor_config.first_stage_features_stride)
#判断有没有内置的这个特征提取的网络
if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
feature_type))
feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
feature_type]
# 返回了一个初始化了的特征提取
return feature_extractor_class(
is_training, first_stage_features_stride, reuse_weights)
加下来是anchor的构建
def build(anchor_generator_config):
#构建过程二选一,是使用grid_anchor_generator还是ssd_anchor_generator在这里我们看grid_anchor_generator
if not isinstance(anchor_generator_config,
anchor_generator_pb2.AnchorGenerator):
raise ValueError('anchor_generator_config not of type '
'anchor_generator_pb2.AnchorGenerator')
if anchor_generator_config.WhichOneof(
'anchor_generator_oneof') == 'grid_anchor_generator':
grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator
#使用传入的参数对grid_anchor_generator进行初始化,具体的自行看
return grid_anchor_generator.GridAnchorGenerator(
scales=[float(scale) for scale in grid_anchor_generator_config.scales],
aspect_ratios=[float(aspect_ratio)
for aspect_ratio
in grid_anchor_generator_config.aspect_ratios],
base_anchor_size=[grid_anchor_generator_config.height,
grid_anchor_generator_config.width],
anchor_stride=[grid_anchor_generator_config.height_stride,
grid_anchor_generator_config.width_stride],
anchor_offset=[grid_anchor_generator_config.height_offset,
grid_anchor_generator_config.width_offset])
elif anchor_generator_config.WhichOneof(
'anchor_generator_oneof') == 'ssd_anchor_generator':
ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator
return multiple_grid_anchor_generator.create_ssd_anchors(
num_layers=ssd_anchor_generator_config.num_layers,
min_scale=ssd_anchor_generator_config.min_scale,
max_scale=ssd_anchor_generator_config.max_scale,
aspect_ratios=ssd_anchor_generator_config.aspect_ratios,
reduce_boxes_in_lowest_layer=(ssd_anchor_generator_config
.reduce_boxes_in_lowest_layer))
else:
raise ValueError('Empty anchor generator.')
接下来是构建hyperparams_builder.build的那个模块
def build(hyperparams_config, is_training):
#根据给出的配置文件构建tf-slim的arg_scope,
#返回的arg_scope中包含了权重的初始化,归一化,激活函数,BN等信息。
#如果BN没有定义,则不包含BN层。
#BN的参数是否进行训练是基于is_training参数和
#conv_hyperparams_config.batch_norm.train这两个参数。
if not isinstance(hyperparams_config,
hyperparams_pb2.Hyperparams):
raise ValueError('hyperparams_config not of type '
'hyperparams_pb.Hyperparams.')
#如果有batch_norm,则进行BN,
batch_norm = None
batch_norm_params = None
if hyperparams_config.HasField('batch_norm'):
batch_norm = slim.batch_norm
batch_norm_params = _build_batch_norm_params(
hyperparams_config.batch_norm, is_training)
affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
if hyperparams_config.HasField('op') and (
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
affected_ops = [slim.fully_connected]
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm,
normalizer_params=batch_norm_params) as sc:
return sc
接下来是bbox的预测的构建
def build(argscope_fn, box_predictor_config, is_training, num_classes):
#基于配置文件进行box的预测的配置
#argscope_fn接受两个输入:hyperparams_pb2.Hyperparams proto以及表示是否进行训练
if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
raise ValueError('box_predictor_config not of type '
'box_predictor_pb2.BoxPredictor.')
#获取配置文件中的box_predictor_oneof对应的参数,这里给出的是rfcn_box_predictor
box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')
if box_predictor_oneof == 'convolutional_box_predictor':
conv_box_predictor = box_predictor_config.convolutional_box_predictor
conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.ConvolutionalBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
min_depth=conv_box_predictor.min_depth,
max_depth=conv_box_predictor.max_depth,
num_layers_before_predictor=(conv_box_predictor.
num_layers_before_predictor),
use_dropout=conv_box_predictor.use_dropout,
dropout_keep_prob=conv_box_predictor.dropout_keep_probability,
kernel_size=conv_box_predictor.kernel_size,
box_code_size=conv_box_predictor.box_code_size,
apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores)
return box_predictor_object
if box_predictor_oneof == 'mask_rcnn_box_predictor':
mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor
fc_hyperparams = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
is_training)
conv_hyperparams = None
if mask_rcnn_box_predictor.HasField('conv_hyperparams'):
conv_hyperparams = argscope_fn(mask_rcnn_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.MaskRCNNBoxPredictor(
is_training=is_training,
num_classes=num_classes,
fc_hyperparams=fc_hyperparams,
use_dropout=mask_rcnn_box_predictor.use_dropout,
dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability,
box_code_size=mask_rcnn_box_predictor.box_code_size,
conv_hyperparams=conv_hyperparams,
predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
mask_prediction_conv_depth=(mask_rcnn_box_predictor.
mask_prediction_conv_depth),
predict_keypoints=mask_rcnn_box_predictor.predict_keypoints)
return box_predictor_object
#如果是rfcn_box_predictor,则进行之后的操作
if box_predictor_oneof == 'rfcn_box_predictor':
rfcn_box_predictor = box_predictor_config.rfcn_box_predictor
#进行hyperparams_builder.build。
conv_hyperparams = argscope_fn(rfcn_box_predictor.conv_hyperparams,
is_training)
# 初始化一个box的预测器,对正样本ROI预测类型以及位置
#用于第二阶段的RFCN的结构
box_predictor_object = box_predictor.RfcnBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
crop_size=[rfcn_box_predictor.crop_height,
rfcn_box_predictor.crop_width],
num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height,
rfcn_box_predictor.num_spatial_bins_width],
depth=rfcn_box_predictor.depth,
box_code_size=rfcn_box_predictor.box_code_size)
return box_predictor_object
raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))
上面的函数中有hyperparams_builder.build,那么就看看这个
def build(hyperparams_config, is_training):
#其实也是返回一个tf-slim 的arg_scope。
if not isinstance(hyperparams_config,
hyperparams_pb2.Hyperparams):
raise ValueError('hyperparams_config not of type '
'hyperparams_pb.Hyperparams.')
batch_norm = None
batch_norm_params = None
if hyperparams_config.HasField('batch_norm'):
batch_norm = slim.batch_norm
batch_norm_params = _build_batch_norm_params(
hyperparams_config.batch_norm, is_training)
affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
if hyperparams_config.HasField('op') and (
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
affected_ops = [slim.fully_connected]
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm,
normalizer_params=batch_norm_params) as sc:
return sc
已经获取了box以及预测的类别,之后就是要进行一些后处理,可以看看后处理的构建post_processing_builder.build(frcnn_config.second_stage_post_processing)的具体内容。
def build(post_processing_config):
#构建可调用的后处理操作,主要之基于配置文件对性nms以及得分排序的操作。
if not isinstance(post_processing_config, post_processing_pb2.PostProcessing):
raise ValueError('post_processing_config not of type '
'post_processing_pb2.Postprocessing.')
#构建nms
non_max_suppressor_fn = _build_non_max_suppressor(
post_processing_config.batch_non_max_suppression)
#构建得分排序
score_converter_fn = _build_score_converter(
post_processing_config.score_converter)
return non_max_suppressor_fn, score_converter_fn
nms的构建,继续看post_processing.batch_multiclass_non_max_suppression这个函数
def _build_non_max_suppressor(nms_config):
if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0:
raise ValueError('iou_threshold not in [0, 1.0].')
if nms_config.max_detections_per_class > nms_config.max_total_detections:
raise ValueError('max_detections_per_class should be no greater than '
'max_total_detections.')
non_max_suppressor_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=nms_config.score_threshold,
iou_thresh=nms_config.iou_threshold,
max_size_per_class=nms_config.max_detections_per_class,
max_total_size=nms_config.max_total_detections)
return non_max_suppressor_fn
不用说就是post_processing.batch_multiclass_non_max_suppression
#太长了,不复制了。和multiclass_non_max_suppression很相似,具体自己看
接下来是针对loss的build_hard_example_miner
def build_hard_example_miner(config,
classification_weight,
localization_weight):
#核心是 losses.HardExampleMiner,由于没有使用就不看了,需要的话自己看
loss_type = None
if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
loss_type = 'both'
if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
loss_type = 'cls'
if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
loss_type = 'loc'
max_negatives_per_positive = None
num_hard_examples = None
if config.max_negatives_per_positive > 0:
max_negatives_per_positive = config.max_negatives_per_positive
if config.num_hard_examples > 0:
num_hard_examples = config.num_hard_examples
#只是一个初始化,具体的自己看
hard_example_miner = losses.HardExampleMiner(
num_hard_examples=num_hard_examples,
iou_threshold=config.iou_threshold,
loss_type=loss_type,
cls_loss_weight=classification_weight,
loc_loss_weight=localization_weight,
max_negatives_per_positive=max_negatives_per_positive,
min_negatives_per_image=config.min_negatives_per_image)
return hard_example_miner
函数最后也就是最重要的rfcn_meta_arch.RFCNMetaArch,其实就是RFCNMetaArch的初始化。就是构建一个faster r-cnn的模型之后将第二阶段进行替换。
class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
"""R-FCN Meta-architecture definition."""
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
second_stage_rfcn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
hard_example_miner,
parallel_iterations=16):
super(RFCNMetaArch, self).__init__(
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
None, # initial_crop_size is not used in R-FCN
None, # maxpool_kernel_size is not use in R-FCN
None, # maxpool_stride is not use in R-FCN
None, # fully_connected_box_predictor is not used in R-FCN.
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
hard_example_miner,
parallel_iterations)
self._rfcn_box_predictor = second_stage_rfcn_box_predictor