为了证明BatchNormalization的作用,自己写了一个二次函数回归的小程序,从loss图上可以看出,确实是有BatchNormalization层时收敛的速度更快,另外本程序还使用了函数模型的写法,关于BatchNormalization的理论可以参考:https://arxiv.org/pdf/1502.03167v3.pdf
#coding:utf-8
from keras.layers import Dense
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model,Sequential
from keras.layers import Input,Dense,Activation,BatchNormalization
from keras.utils import plot_model
x_data = np.linspace(-5,5,3000)
np.random.shuffle(x_data)
noise = np.random.normal(0,1,x_data.shape)
y_data = x_data**2+5+noise
x_train = x_data[:2500]
y_train = y_data[:2500]
# plt.figure()
plt.scatter(x_train,y_train,marker='.')
# plt.close(1)
x_test = x_data[2500:]
y_test = y_data[2500:]
inputs = Input(shape=(1,))
x=Dense(3)(inputs)
x=BatchNormalization(axis=-1)(x)
x = Activation('sigmoid')(x)
x=Dense(3)(x)
x=BatchNormalization(axis=-1)(x)
x = Activation('sigmoid')(x)
x=Dense(3)(x)
outputs = Dense(1)(x)
model = Model(inputs=inputs,outputs=outputs)
plot_model(model,to_file='model.png',show_shapes=1)
model.compile(optimizer='sgd',loss='mse')
print('Train------------')
sum_cost=[]
for step in range(1001):
cost = model.train_on_batch(x_train,y_train)
sum_cost=np.append(sum_cost,cost)
if step%100==0:
print('cost=',cost)
plt.plot(range(1001),sum_cost)
plt.show()
#用预测的模型来预测y_test
loss = model.test_on_batch(x_test,y_test)
y_test = model.predict(x_test)
plt.figure()
plt.scatter(x_train,y_train,c='b',marker='.')
plt.scatter(x_test,y_test,c='r',marker='.')
plt.show()
-
有batchnormalization
-
没有batchnormalization
拟合效果如下: