1. 首先准备一下原始数据
# *-* coding: utf-8 *-*
import pickle
import numpy as np
# 划分类别的边界
def cos_curve(x):
return 0.25*np.sin(2*x*np.pi+0.5*np.pi) + 0.5
# samples保存二维点的坐标,labels标明类别
np.random.seed(123)
samples = []
labels = []
# 单位空间内平均样本数为50
sample_density = 50
for i in range(sample_density):
x1, x2 = np.random.random(2)
# 计算当前x1对应的分类边界
bound = cos_curve(x1)
# 为了方便可视化,舍弃太靠近边界的样本
if bound - 0.1 < x2 <= bound + 0.1:
continue
else:
samples.append((x1, x2))
# 打标签,上半部分为1,下半部分为2
if x2 > bound:
labels.append(1)
else:
labels.append(0)
# 将生成的样本和标签保存
with open('data.pkl', 'wb') as f:
pickle.dump((samples, labels), f)
# 进行数据可视化
import matplotlib.pyplot as plt
for i, sample in enumerate(samples):
plt.plot(sample[0], sample[1], 'o' if labels[i] else '^', mec='r' if labels[i] else 'b', mfc='none', markersize=10)
x1 = np.linspace(0, 1)
plt.plot(x1, cos_curve(x1), 'k--')
plt.show()
2. 用HDF5准备坐标数据和对应标签。
# *-* coding: utf-8 *-*
import pickle
import numpy as np
import h5py
# 读取先前保存好的数据
with open('data.pkl', 'rb') as f:
samples, labels = pickle.load(f)
sample_size = len(labels)
# 按照HDF5格式要求制作数据
samples = np.array(samples).reshape((sample_size, 2))
labels = np.array(labels).reshape((sample_size, 1))
# 生成HDF5格式数据
h5_filename = 'data.h5'
with h5py.File(h5_filename, 'w') as h:
h.create_dataset('data', data=samples)
h.create_dataset('label', data=labels)
# 生成HDF5数据列表
with open('data_h5.txt', 'w') as f:
f.write(h5_filename)
这部分结束之后,生成data.h5和data_h5.txt。
3.做训练网络以及train.prototxt
name: "SimpleMLP"
input: "data"
input_shape {
dim: 1
dim: 2
}
layer {
name: "fc1"
type: "InnerProduct"
bottom: "data"
top: "fc1"
inner_product_param {
num_output: 2
}
}
layer {
name: "sigmoid1"
type: "Sigmoid"
bottom: "fc1"
top: "sigmoid1"
}
layer {
name: "fc2"
type: "InnerProduct"
bottom: "sigmoid1"
top: "fc2"
inner_product_param {
num_output: 2
}
}
layer {
name: "softmax"
type: "Softmax"
bottom: "fc2"
top: "prob"
}
就这样完成了一个两层神经网络的设计。
接下来将这个网络进行可视化,利用caffe提供的draw_net.py实现,需要安装graphviz和pydot。
遇到的问题在我的另一篇文章用draw_net.py绘制网络图时的小问题中有给出解决方法。
如果你在使用pycharm编辑器的话,需要在run之前给出一些需要设置的参数,第一个参数为train.prototxt,第二个为生成图像的位置,第三个--randir为生成layers的排列顺序,默认为横向,BT为竖直生成。
这样运行之后,就生成了一张网络图:
3.进行训练
# *-* coding: utf-8 *-*
import sys
import numpy as np
import caffe
# caffe的python入口
sys.path.append('D:/new_caffe/caffe/python')
# 初始化一个SGDSolver
solver = caffe.SGDSolver('solver.prototxt')
# 开始训练
solver.solve()
# 获取训练好的网络
net = solver.net
# 制定一个输入数据,比如取值范围平面的中心
net.blobs['data'] = np.array([[0.5, 0.5]])
# 执行前向计算
output = net.forward()
# 输出结果
print output
训练过程:
并且生成了相应的权重文件:
4.写test.prototxt
name: "SimpleMLP"
input: "data"
input_shape {
dim: 1
dim: 2
}
layer {
name: "fc1"
type: "InnerProduct"
bottom: "data"
top: "fc1"
inner_product_param {
num_output: 2
}
}
layer {
name: "sigmoid1"
type: "Sigmoid"
bottom: "fc1"
top: "sigmoid1"
}
layer {
name: "fc2"
type: "InnerProduct"
bottom: "sigmoid1"
top: "fc2"
inner_product_param {
num_output: 2
}
}
layer {
name: "softmax"
type: "Softmax"
bottom: "fc2"
top: "prob"
}
用于使用训练好的模型进行test。
5.来test模型,并且生成可视化的结果
# *-* coding:utf-8 *-*
import sys
import pickle
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
sys.path.append('D:/new_caffe/caffe/python')
import caffe
# 构建一个网络用语Inference
# 网络结构是test.prototxt,权重从训练好的simple_mlp_iter_2000.caffemodel中获取
net = caffe.Net('test.prototxt', "simple_mlp_iter_2000.caffemodel", caffe.TEST)
# 读取二维样本及标签
with open('data.pkl', 'rb') as f:
samples, labels = pickle.load(f)
samples = np.array(samples)
labels = np.array(labels)
# 进行可视化
# 第一步可视化概率值平面
X = np.arange(0, 1.05, 0.05)
Y = np.arange(0, 1.05, 0.05)
X, Y = np.meshgrid(X, Y)
grids = np.array([[X[i][j], Y[i][j]] for i in range(X.shape[0]) for j in range(X.shape[1])])
grid_probs = []
for grid in grids:
net.blobs['data'].data[...] = grid.reshape((1, 2))[...]
output = net.forward()
grid_probs.append(output['prob'][0][1])
grid_probs = np.array(grid_probs).reshape(X.shape)
fig = plt.figure('Sample Surface')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, grid_probs, alpha=0.15, color='k', rstride=2, cstride=2, lw=0.5)
# 对所有样本及对应概率进行可视化
samples0 = samples[labels == 0]
samples0_probs = []
for sample in samples0:
net.blobs['data'].data[...] = sample.reshape((1, 2))[...]
output = net.forward()
samples0_probs.append(output['prob'][0][1])
samples1 = samples[labels == 1]
samples1_probs = []
for sample in samples1:
net.blobs['data'].data[...] = sample.reshape((1, 2))[...]
output = net.forward()
samples1_probs.append(output['prob'][0][1])
ax.scatter(samples0[:, 0], samples0[:, 1], samples0_probs, c='b', marker='^', s=50)
ax.scatter(samples1[:, 0], samples1[:, 1], samples1_probs, c='r', marker='o', s=50)
plt.show()
test过程:
生成结果: