-- coding: utf-8 --
"""
Created on Sun Sep 30 22:57:44 2018
@author: ltx
"""
采用正则化的方法和随机删除节点都有助于防止模型过拟合。
当数据集特别小,为了防止训练得出的模型过拟合,使用正则化方法
import numpy as np
import matplotlib.pyplot as plt
import reg_utils #正则化
train_x,train_y,test_x,test_y=reg_utils.load_2D_dataset(is_plot=True)
X=train_x
Y=train_y
m=X.shape[1]
--------------正则化模型(避免模型过度拟合)start----------------------
采用L2正则化方法,适当的修改成本函数,添加一个L2正则化成本
def compute_cost_reg(a3,Y,parameters,lambd):
W1 = parameters["W1"]
W2=parameters["W2"]
W3=parameters["W3"]
cost1=reg_utils.compute_cost(a3, Y)
cost2=(1/m)(np.sum(np.square(W1))+np.sum(np.square(W2))+np.sum(np.square(W3)))(lambd/2)
cost=cost1+cost2
return cost
def backward_propagation_reg(X, Y, cache,lambd):
(Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cache
dZ3=A3-Y
dW3=(1/m)*np.dot(dZ3,A2.T)+((lambd*W3)/m)
db3=(1/m)*np.sum(dZ3,keepdims=True,axis=1)
dA2=np.dot(W3.T,dZ3)
dZ2=np.multiply(dA2,np.int64(A2>0))
dW2=(1/m)*np.dot(dZ2,A1.T)+((lambd*W2)/m)
db2=(1/m)*np.sum(dZ2,keepdims=True,axis=1)
dA1=np.dot(W2.T,dZ2)
dZ1=np.multiply(dA1,np.int64(A1>0))
dW1=(1/m)*np.dot(dZ1,X.T)+((lambd*W1)/m)
db1=(1/m)*np.sum(dZ1,keepdims=True,axis=1)
grads={"dW1":dW1,
"db1":db1,
"dW2":dW2,
"db2":db2,
"dW3":dW3,
"db3":db3
}
return grads
--------------正则化模型(避免模型过度拟合)end----------------------
-------------采用随机删除节点来防止模型过拟合start--------------
def forward_DelNode(X,parameters,keep_prob):
W1=parameters["W1"]
b1=parameters["b1"]
W2=parameters["W2"]
b2=parameters["b2"]
W3=parameters["W3"]
b3=parameters["b3"]
np.random.seed(1)
Z1=np.dot(W1,X)+b1
A1=reg_utils.relu(Z1)
D1=np.random.rand(A1.shape[0],A1.shape[1])
D1=D1 < keep_prob
A1=A1 * D1
A1=A1 / keep_prob
Z2=np.dot(W2,A1)+b2
A2=reg_utils.relu(Z2)
D2=np.random.rand(A2.shape[0],A2.shape[1])
D2=D2 < keep_prob
A2=A2 * D2
A2=A2 / keep_prob
Z3=np.dot(W3,A2)+b3
A3=reg_utils.sigmoid(Z3)
cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
return A3,cache
相应的向后传播模型也要随机删除节点(在未正则化的基础上采用随机节点)
def backDelNode(X,Y,cache,keep_prob):
(Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)=cache
dZ3=A3-Y
dW3=(1/m)*np.dot(dZ3,A2.T)
db3=(1/m)*np.sum(dZ3,keepdims=True,axis=1)
dA2=np.dot(W3.T,dZ3)
dA2=dA2*D2
dA2=dA2/keep_prob
dZ2=np.multiply(dA2,np.int64(A2>0))#为啥要根据A2的正负来消减dZ2的值,但最后对结果没有造成影响
dW2=(1/m)*np.dot(dZ2,A1.T)
db2=(1/m)*np.sum(dZ2,keepdims=True,axis=1)
dA1=np.dot(W2.T,dZ2)
dA1=dA1*D1
dA1=dA1/keep_prob
dZ1=np.multiply(dA1,np.int64(A1>0))
dW1=(1/m)*np.dot(dZ1,X.T)
db1=(1/m)*np.sum(dZ1,keepdims=True,axis=1)
grads={"dW1":dW1,
"db1":db1,
"dW2":dW2,
"db2":db2,
"dW3":dW3,
"db3":db3
}
return grads
-------------采用随机删除节点来防止模型过拟合end--------------
def model(X,Y,learning_rate=0.3,num_iterations=20000,print_cost=True,is_plot=True,lambd=0,keep_prob=1):
grads={}
costs=[]
layerdims=[X.shape[0],20,3,1]
parameters= reg_utils.initialize_parameters(layerdims)
for i in range (0,num_iterations):
if(lambd==0 and keep_prob==1):
a3, cache=reg_utils.forward_propagation(X,parameters)
cost=reg_utils.compute_cost(a3,Y)
elif (lambd!=0):
a3, cache=reg_utils.forward_propagation(X,parameters)
cost=compute_cost_reg(a3,Y,parameters,lambd)
elif(keep_prob!=1):
a3, cache=forward_DelNode(X,parameters,keep_prob)
cost=reg_utils.compute_cost(a3,Y)
if(i % 1000==0 and print_cost==True):
costs.append(cost)
print("Cost="+str(cost))
if(lambd==0 and keep_prob==1):
grads=reg_utils.backward_propagation(X, Y, cache)
elif (lambd!=0):
grads=backward_propagation_reg(X, Y, cache,lambd)
elif(keep_prob!=1):
grads=backDelNode(X,Y,cache,keep_prob)
parameters=reg_utils.update_parameters(parameters, grads, learning_rate)
if(is_plot):
plt.plot(costs)
plt.xlabel('iterations (x1,000)')
plt.ylabel('cost')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
return parameters
parameters=model(X,Y,learning_rate=0.3,num_iterations=30000,print_cost=True,is_plot=True,lambd=0,keep_prob=0.86)
print("训练集精度:")
prediction=reg_utils.predict(X,Y,parameters)
print("测试集精度:")
prediction=reg_utils.predict(test_x,test_y,parameters)
-------------画出预测结果决策图---------------------------
plt.title("Model without regularization")
axes = plt.gca()
axes.set_xlim([-0.75,0.40])
axes.set_ylim([-0.75,0.65])
reg_utils.plot_decision_boundary(lambda x: reg_utils.predict_dec(parameters, x.T), train_x, train_y)
--------------------实验的结果--------------------------------------