简单神经网络没有卷积功能,只有简单的三层:输入层,隐藏层和输出层。
数据从输入层输入,在隐藏层进行加权变换,最后在输出层进行输出。输出的时候,我们可以使用softmax回归,输出属于每个类别的概率值。表示如下:
其中,x1,x2,x3为输入数据,经过运算后,得到三个数据属于某个类别的概率值y1,y2,y3. 用简单的公式表示如下:
在训练过程中,我们将真实的结果和预测的结果相比(交叉熵比较法),会得到一个残差。公式如下:
y 是我们预测的概率值, y' 是实际的值。这个残差越小越好,我们可以使用梯度下降法,不停地改变W和b的值,使得残差逐渐变小,最后收敛到最小值。这样训练就完成了,我们就得到了一个模型(W和b的最优化值)。
简单神经网络的训练例子(非卷积):
# -*- coding: utf-8 -*-
"""
@author: gongjia copy
"""
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
y_actual = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784,10])) #初始化权值W
b = tf.Variable(tf.zeros([10])) #初始化偏置项b
y_predict = tf.nn.softmax(tf.matmul(x,W) + b) #加权变换并进行softmax回归,得到预测概率
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_actual*tf.log(y_predict),reduction_indies=1)) #求交叉熵
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) #用梯度下降法使得残差最小
correct_prediction = tf.equal(tf.argmax(y_predict,1), tf.argmax(y_actual,1)) #在测试阶段,测试准确度计算
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) #多个批次的准确度均值
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for i in range(1000): #训练阶段,迭代1000次
batch_xs, batch_ys = mnist.train.next_batch(100) #按批次训练,每批100行数据
sess.run(train_step, feed_dict={x: batch_xs, y_actual: batch_ys}) #执行训练
if(i%100==0): #每训练100次,测试一次
print "accuracy:",sess.run(accuracy, feed_dict={x: mnist.test.images, y_actual: mnist.test.labels})
每训练100次,测试一次,随着训练次数的增加,测试精度也在增加。训练结束后,1W行数据测试的平均精度为91%左右,不是太高,肯定没有CNN高。
# -*- coding: utf-8 -*-
"""
@author: gongjia copy
"""
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) #下载并加载mnist数据
x = tf.placeholder(tf.float32, [None, 784]) #输入的数据占位符
y_actual = tf.placeholder(tf.float32, shape=[None, 10]) #输入的标签占位符
#定义一个函数,用于初始化所有的权值 W
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
#定义一个函数,用于初始化所有的偏置项 b
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
#定义一个函数,用于构建卷积层
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
#定义一个函数,用于构建池化层
def max_pool(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
#构建网络
x_image = tf.reshape(x, [-1,28,28,1]) #转换输入数据shape,以便于用于网络中
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) #第一个卷积层
h_pool1 = max_pool(h_conv1) #第一个池化层
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) #第二个卷积层
h_pool2 = max_pool(h_conv2) #第二个池化层
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) #reshape成向量
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) #第一个全连接层
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #dropout层
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_predict=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) #softmax层
cross_entropy = -tf.reduce_sum(y_actual*tf.log(y_predict)) #交叉熵
train_step = tf.train.GradientDescentOptimizer(1e-3).minimize(cross_entropy) #梯度下降法
correct_prediction = tf.equal(tf.argmax(y_predict,1), tf.argmax(y_actual,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) #精确度计算
sess=tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i%100 == 0: #训练100次,验证一次
train_acc = accuracy.eval(feed_dict={x:batch[0], y_actual: batch[1], keep_prob: 1.0})
print('step',i,'training accuracy',train_acc)
train_step.run(feed_dict={x: batch[0], y_actual: batch[1], keep_prob: 0.5})
test_acc=accuracy.eval(feed_dict={x: mnist.test.images, y_actual: mnist.test.labels, keep_prob: 1.0})
print("test accuracy",test_acc)
('step', 18800, 'training accuracy', 0.079999998)
('step', 18900, 'training accuracy', 0.059999999)
('step', 19000, 'training accuracy', 0.1)
('step', 19100, 'training accuracy', 0.059999999)
('step', 19200, 'training accuracy', 0.12)
('step', 19300, 'training accuracy', 0.14)
('step', 19400, 'training accuracy', 0.079999998)
('step', 19500, 'training accuracy', 0.039999999)
('step', 19600, 'training accuracy', 0.16)
('step', 19700, 'training accuracy', 0.1)
('step', 19800, 'training accuracy', 0.079999998)
('step', 19900, 'training accuracy', 0.1)
('test accuracy', 0.097999997)