使用中间隐藏层使用sigmoid激活函数,输出层使用softmax。反向算梯度的时候把loss和softmax一起算了
import numpy as np
def train(X,target):
#定义超参数:W1,W2分别为两层网络的权重,lr学习率
inputchannels,hiddenchannels,outputchannels=5,20,3
W1=np.random.randn(inputchannels+1,hiddenchannels)
W2=np.random.randn(hiddenchannels+1,outputchannels)
X_hat = np.insert(X, 0, values=1, axis=1)
_,batch_size = X.shape
lr=0.1
#开始训练
for i in range(1000):
g=1
#forward
X1 = X_hat @ W1 #8*6 6*20->8*20
activate_X1 = 1 / (1 + np.exp(-X1))
X2_hat = np.insert(activate_X1, 0, values=1, axis=1)
X2 = X2_hat @ W2 #8*21 21*3 -> 8*3
activate_X2 = np.exp(X2) / np.sum(np.exp(X2), axis=1, keepdims=True)
loss = -np.sum(target * np.log(activate_X2)) / batch_size
#backward
delta_g = (g * activate_X2 - target) / batch_size
delta_W2 = X2_hat.T @ delta_g
delta_X2_hat = delta_g @ W2.T
delta_X1 = delta_X2_hat[:,1:] * (activate_X1 * (1 - activate_X1))
delta_W1 = X_hat.T @ delta_X1
#更新参数
W1 = W1 - delta_W1 *lr
W2 = W2 - delta_W2 *lr
if (i + 1) % 100 == 0 or i % 5 == 0 and (i / 5) < 3:
print(f"Iter: {i}, Loss: {loss:.3f}")
return W1,W2
def test(X_test,W1,W2):
X_hat = np.insert(X_test, 0, values=1, axis=1)
X1 = X_hat @ W1
activate_X1 = 1 / (1 + np.exp(-X1))
X2_hat = np.insert(activate_X1, 0, values=1, axis=1)
X2 = X2_hat @ W2
probability = np.exp(X2) / np.sum(np.exp(X2), axis=1, keepdims=True)
return probability