缘起
最近跟着老师在学习神经网络,为了更加深刻地理解这个黑盒,我打算自己用C/C++将其实现一遍。今天忙活了好一会儿,终于实现了一个BP神经网络,后期还会陆续实现CNN神经网络之类的,也会发上来和大家一起分享的~
因为最近比较忙,所以这里直接放代码了,关于一些原理以及自己的一点见解会在有空的时候整理出来的~
代码
main.cpp
#include <iostream>
#include <vector>
#include "BPUtils.h"
using namespace std;
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
vector<vector<double>>dataTest;
vector<double>dataTestY;
vector<vector<double>>trainDataX;
vector<double>trainDataY;
int main() {
// double m1[3][1]={{1},{2},{3}};
// double m2[1][4]={1,2,3,4};
// double m3[3][4];
// dott(&m1[0][0],&m2[0][0],&m3[0][0],3,1,4);
// for(int i=0;i<3;i++){
// for(int j=0;j<4;j++){
// cout<<m3[i][j]<<" ";
// }
// cout<<endl;
// }
createTrainSet();
createTestSet();
guiYiHua(dataTest);
guiYiHua(trainDataX);
NeuralNetwork nn(2,44,2);
nn.train(trainDataX,trainDataY);
// for(int i=0;i<trainDataX.size();i++){
// for(int j=0;j<trainDataX[i].size();j++){
// cout<<trainDataX[i][j]<<" ";
// }
// cout<<endl;
// }
// for(int i=0;i<trainDataX.size();i++){
// cout<<trainDataY[i]<<" ";
// }
//
// cout<<endl<<"---------------------------------------------------------"<<endl;
//
// for(int i=0;i<dataTest.size();i++){
// for(int j=0;j<dataTest[i].size();j++){
// cout<<dataTest[i][j]<<" ";
// }
// cout<<endl;
// }
// for(int i=0;i<dataTestY.size();i++){
// cout<<dataTestY[i]<<" ";
// }
// NeuralNetwork nn(2,4,3);
// vector<vector<double>>dataX;
// vector<double>dataY;
// for(int i=0;i<4;i++){
// vector<double>vec;
// for(int j=0;j<2;j++){
// vec.push_back(i+j);
// }
// dataX.push_back(vec);
// }
// for(int i=0;i<4;i++){
// for(int j=0;j<2;j++){
// cout<<dataX[i][j]<<" ";
// }
// cout<<endl;
// }
// for(int i=0;i<4;i++){
// dataY.push_back(i);
// }
// nn.train(dataX,dataY);
return 0;
}
BPUtils.h
#ifndef BP_UTILS
#define BP_UTILS
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <vector>
#include <ctime>
#include <string.h>
#include <cstdio>
#include <fstream>
#define random(x) (rand()%x)
using namespace std;
#define MAXSIZE 99
//全局变量
//测试集
extern vector<vector<double>>dataTest;
extern vector<double>dataTestY;
extern vector<vector<double>>trainDataX;
extern vector<double>trainDataY;
vector<string> split(const string& str, const string& delim) {
vector<string> res;
if("" == str) return res;
//先将要切割的字符串从string类型转换为char*类型
char * strs = new char[str.length() + 1] ; //不要忘了
strcpy(strs, str.c_str());
char * d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char *p = strtok(strs, d);
while(p) {
string s = p; //分割得到的字符串转换为string类型
res.push_back(s); //存入结果数组
p = strtok(NULL, d);
}
return res;
}
double getMax(vector<vector<double>>dataSet){
double MYMAX=-999;
for(int i=0;i<dataSet.size();i++){
for(int j=0;j<dataSet[i].size();j++){
if(MYMAX<dataSet[i][j]){
MYMAX=dataSet[i][j];
}
}
}
return MYMAX;
}
double getMin(vector<vector<double>>dataSet){
double MYMIN=999;
for(int i=0;i<dataSet.size();i++){
for(int j=0;j<dataSet[i].size();j++){
if(MYMIN>dataSet[i][j]){
MYMIN=dataSet[i][j];
}
}
}
return MYMIN;
}
//数据归一化
//一般是x=(x-x.min)/x.max-x.min
void guiYiHua(vector<vector<double>>&dataSet){
double MYMAX=getMax(dataSet);
double MYMIN=getMin(dataSet);
for(int i=0;i<dataSet.size();i++){
for(int j=0;j<dataSet[i].size();j++){
dataSet[i][j]=(dataSet[i][j]-MYMIN)/(MYMAX-MYMIN);
}
}
}
//创建测试集的数据
void createTrainSet(){
fstream f("train.txt");
//保存读入的每一行
string line;
vector<string>res;
int ii=0;
while(getline(f,line)){
res=split(line,"\t");
vector<double>vec1;
for(int i=0;i<res.size();i++){
//cout<<res[i]<<endl;
char ch[MAXSIZE];
strcpy(ch,res[i].c_str());
if(i!=2){
vec1.push_back(atof(ch));
}else{
trainDataY.push_back(atof(ch));
}
}
trainDataX.push_back(vec1);
ii++;
}
}
//创建训练集的数据
void createTestSet(){
fstream f("test.txt");
//保存读入的每一行
string line;
vector<string>res;
int ii=0;
while(getline(f,line)){
res=split(line,"\t");
vector<double>vec1;
for(int i=0;i<res.size();i++){
//cout<<res[i]<<endl;
char ch[MAXSIZE];
strcpy(ch,res[i].c_str());
if(i!=2){
vec1.push_back(atof(ch));
}else{
dataTestY.push_back(atof(ch));
}
}
dataTest.push_back(vec1);
ii++;
}
}
//sigmoid激活函数
double sigmoid(double x){
return 1/(1+exp(-x));
}
//sigmoid函数的导数
double dsigmoid(double x){
return x*(1-x);
}
class NeuralNetwork{
public:
//输入层单元个数
int inputLayers;
//隐藏层单元个数
int hidenLayers;
//输出层单元个数
int outputLayers;
//输入层到隐藏层的权值
//行数为输入层单元个数+1(因为有偏置)
//列数为隐藏层单元个数
vector<vector<double>>VArr;
//隐藏层到输出层的权值
//行数为隐藏层单元个数
//列数为输出层单元个数
vector<vector<double>>WArr;
private:
//矩阵乘积
void dot(const double* m1,const double* m2,double *m3,int m,int n,int p){
for(int i=0;i<m;++i) //点乘运算
{
for(int j=0;j<p;++j)
{
(*(m3+i*p+j))=0;
for(int k=0;k<n;++k)
{
(*(m3+i*p+j))+=(*(m1+i*n+k))*(*(m2+k*p+j));
}
}
}
}
void vectorToArr1(vector<vector<double>>vec,double *arr,int n){
for(int i=0;i<vec.size();i++){
for(int j=0;j<vec[i].size();j++){
//cout<<endl<<vec[i][j]<<"******"<<i<<"*********"<<j<<"***";
(*(arr+i*n+j))=vec[i][j];
}
//cout<<endl;
}
}
void vectorToArr2(vector<double>vec,double *arr){
for(int i=0;i<vec.size();i++){
(*(arr+i))=vec[i];
}
}
void arrToVector1(double *arr,vector<double>&vec,int m){
for(int i=0;i<m;i++){
vec.push_back((*(arr+i)));
}
}
//矩阵转置
void ZhuanZhi(const double*m1,double *m2,int n1,int n2){
for(int i=0;i<n1;i++){
for(int j=0;j<n2;j++){
(*(m2+j*n1+i))=(*(m1+i*n2+j));
}
}
}
//验证准确率时的预测
//输入测试集的一行数据
//ArrL2为输出层的输出
//eg.当我们要分成10类的时候,输出10个数,类似于该样本属于这10个类别的概率
//我们选取其中概率最大的类别作为最终分类得到的类别
void predict(vector<double>test,double *ArrL2){
// for(int i=0;i<test.size();i++){
// cout<<"test[i]:"<<test[i]<<endl;
// }
//添加转置
test.push_back(1);
double testArr[1][inputLayers+1];
//转成矩阵
vectorToArr2(test,&testArr[0][0]);
// for(int i=0;i<inputLayers+1;i++){
// cout<<"testArr:"<<testArr[0][i]<<endl;
// }
double dotL1[1][hidenLayers];
double VArr_temp[inputLayers+1][hidenLayers];
vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers);
// for(int i=0;i<inputLayers+1;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<VArr_temp[i][j]<<" ";
// }
// cout<<endl;
// }
//testArr[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers]
dot(&testArr[0][0],&VArr_temp[0][0],&dotL1[0][0],1,inputLayers+1,hidenLayers);
// for(int i=0;i<1;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<dotL1[i][j]<<" ";
// }
// cout<<endl;
// }
//隐藏层输出
double ArrL1[1][hidenLayers];
//double ArrL2[1][outputLayers];
for(int i=0;i<hidenLayers;i++){
ArrL1[0][i]=sigmoid(dotL1[0][i]);
//cout<<ArrL1[0][i]<<endl;
}
double dotL2[1][outputLayers];
double WArr_temp[hidenLayers][outputLayers];
vectorToArr1(WArr,&WArr_temp[0][0],outputLayers);
//ArrL1[1][hidenLayers] dot WArr[hidenLayers][outputLayers]
dot(&ArrL1[0][0],&WArr_temp[0][0],&dotL2[0][0],1,hidenLayers,outputLayers);
//输出层输出
for(int i=0;i<outputLayers;i++){
//ArrL2[0][i]=sigmoid(dotL2[0][1]);
(*(ArrL2+i))=sigmoid(dotL2[0][i]);
//cout<<*(ArrL2+i)<<endl;
}
}
int getMaxIndex(vector<double>vec){
int index=-1;
double MYMAX=-999;
for(int i=0;i<vec.size();i++){
//cout<<vec.size()<<"*********"<<endl;
//cout<<i<<"::::"<<vec[i]<<endl;
if(MYMAX<vec[i]){
MYMAX=vec[i];
index=i;
}
}
return index;
}
public:
//构造函数,传入输入层,隐藏层,输出层单元个数
//并且构造权值矩阵
NeuralNetwork(int _inputLayers,int _hidenLayers,int _outputLayers){
this->inputLayers=_inputLayers;
hidenLayers=_hidenLayers;
outputLayers=_outputLayers;
//构造V权值矩阵
for(int i=0;i<inputLayers+1;i++){
vector<double>vec;
for(int j=0;j<hidenLayers;j++){
vec.push_back((double)rand()/RAND_MAX*2-1);
}
VArr.push_back(vec);
}
for(int i=0;i<hidenLayers;i++){
vector<double>vec;
for(int j=0;j<outputLayers;j++){
vec.push_back((double)rand()/RAND_MAX*2-1);
}
WArr.push_back(vec);
}
}
//开始训练
//传入训练集,预期的y值,学习效率,以及训练迭代的次数
//这里规定输入的数据为2列的数据
void train(vector<vector<double>>dataX,vector<double>dataY,double lr=0.03,int epochs=1000000){
double arrL1[1][hidenLayers];
//将VArr由vector转成arr
double VArr_temp[inputLayers+1][hidenLayers];
double hangx_temp[1][inputLayers+1];
vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers);
double hangxT[inputLayers+1][1];
double hangxDotVArr[1][hidenLayers];
double arrL2[1][outputLayers];
double WArr_temp[hidenLayers][outputLayers];
double arrL2_delta[1][outputLayers];
double arrL1_delta[1][hidenLayers];
double E;
double dao;
double dotTemp[hidenLayers][outputLayers];
double WArr_tempT[outputLayers][hidenLayers];
double arrL1T[hidenLayers][1];
double dotTempp[inputLayers+1][hidenLayers];
srand((int)time(0));
//为数据集添加偏置
//eg.当我们输入的数据集为4X2的时候,需要为其在最后添加一列偏置,让其变成一个4X3的矩阵
for(int i=0;i<dataX.size();i++){
//最后一列为偏置
dataX[i].push_back(1);
}
//进行权值训练更新
for(int n=0;n<epochs;n++){
//随机选取一行样本进行更新
int iii=random(dataX.size());
//cout<<"iii:"<<iii<<endl;
//得到随机选取的一行数据
vector<double>hangx=dataX[iii];
// for(int i=0;i<hangx.size();i++){
// cout<<hangx[i]<<"***"<<endl;
// }
//隐藏层输出
//这里先计算输入矩阵与权值矩阵的点乘,再将其输入sigmoid函数中,得到最终的输出
//eg.输入4X2的dataX,我们先加上偏置变成4X3
//选取其中的一行数据1X3
//然后计算dataX与arrV(3XhidenLayers)的dot,得到一个1XhidenLayers的矩阵
// for(int ii=0;ii<inputLayers+1;ii++){
// for(int jj=0;jj<hidenLayers;jj++){
// cout<<VArr[ii][jj]<<"---";
// cout<<VArr_temp[ii][jj]<<" ";
// }
// cout<<endl;
// }
vectorToArr2(hangx,&hangx_temp[0][0]);
// for(int i=0;i<inputLayers+1;i++){
// cout<<hangx[i]<<"---"<<endl;
// cout<<hangx_temp[0][i]<<"**"<<endl;
// }
//hangx[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers]
dot(&hangx_temp[0][0],&VArr_temp[0][0],&arrL1[0][0],1,inputLayers+1,hidenLayers);
//将点乘后的值输入到sigmoid函数中
for(int k1=0;k1<hidenLayers;k1++){
arrL1[0][k1]=sigmoid(arrL1[0][k1]);
//cout<<arrL1[0][k1]<<endl;
}
vectorToArr1(WArr,&WArr_temp[0][0],outputLayers);
// for(int ii=0;ii<hidenLayers;ii++){
// for(int jj=0;jj<outputLayers;jj++){
// cout<<WArr_temp[ii][jj]<<endl;
// }
// }
//arrL1[1][hidenLayers] dot WArr_temp[hidenLayers][outputLayers]
dot(&arrL1[0][0],&WArr_temp[0][0],&arrL2[0][0],1,hidenLayers,outputLayers);
//cout<<outputLayers<<endl;
//cout<<arrL2[0][1]<<endl;
// for(int k1=0;k1<outputLayers;k1++){
// arrL2[0][k1]=sigmoid(arrL2[0][k1]);
//// // cout<<k1<<endl;
//// cout<<arrL2[0][k1]<<endl;
// }
//求权值的delta
//根据公式计算权值更新的delta
for(int k1=0;k1<outputLayers;k1++){
arrL2[0][k1]=sigmoid(arrL2[0][k1]);
// cout<<k1<<endl;
//cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl;
E=dataY[iii]-arrL2[0][k1];
//cout<<"E:"<<E<<endl;
dao=dsigmoid(arrL2[0][k1]);
//cout<<"dao:"<<dao<<endl;
arrL2_delta[0][k1]=E*dao;
//cout<<"arrL2_delta[0][k1]:"<<arrL2_delta[0][k1]<<endl;
}
// for(int k1=0;k1<outputLayers;k1++){
// //计算误差
// E=dataY[iii]-arrL2[0][k1];
// //对L2输出的结果求导
// dao=dsigmoid(arrL2[0][k1]);
//// cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl;
//// cout<<"dataY[iii]:"<<dataY[iii]<<endl;
//// cout<<"E:"<<E<<endl;
//// cout<<"dao:"<<dao<<endl;
// //计算delta
// arrL2_delta[0][k1]=E*dao;
// }
// for(int i=0;i<outputLayers;i++){
// cout<<arrL2_delta[0][i]<<endl;
// }
//W矩阵转置
ZhuanZhi(&WArr_temp[0][0],&WArr_tempT[0][0],hidenLayers,outputLayers);
// for(int i=0;i<outputLayers;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<WArr_temp[j][i]<<"**";
// cout<<WArr_tempT[i][j]<<" ";
// }
// cout<<endl;
// }
//arrL2_delta[1][outputLayers] dot WArr_tempT[outputLayers][hidenLayers]
dot(&arrL2_delta[0][0],&WArr_tempT[0][0],&arrL1_delta[0][0],1,outputLayers,hidenLayers); //乘上L1输出的导数
// for(int k1=0;k1<hidenLayers;k1++){
// cout<<dsigmoid(arrL1[0][k1])<<endl;
// }
//乘上L1输出的导数
for(int k1=0;k1<hidenLayers;k1++){
double ii=arrL1_delta[0][k1];
arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]);
//cout<<ii<<"**"<<dsigmoid(arrL1[0][k1])<<"**"<<arrL1_delta[0][k1]<<endl;
}
//通过上面的delta更新权值WV
ZhuanZhi(&arrL1[0][0],&arrL1T[0][0],1,hidenLayers);
// for(int i=0;i<hidenLayers;i++){
// cout<<arrL1T[i][0]<<endl;
// }
//arrL1T[hidenLayers][1] dot arrL2_delta[1][outputLayers]
dot(&arrL1T[0][0],&arrL2_delta[0][0],&dotTemp[0][0],hidenLayers,1,outputLayers);
// for(int k1=0;k1<outputLayers;k1++){
// cout<<arrL2_delta[0][k1]<<endl;
// }
// for(int k1=0;k1<hidenLayers;k1++){
// for(int k2=0;k2<outputLayers;k2++){
// cout<<dotTemp[k1][k2]<<" ";
// }
// cout<<endl;
// }
// for(int k1=0;k1<outputLayers;k1++){
// cout<<arrL2_delta[0][k1]<<endl;
// }
for(int k1=0;k1<hidenLayers;k1++){
for(int k2=0;k2<outputLayers;k2++){
//根据学习效率进行更新
//cout<<dotTemp[k1][k2]<<endl;
WArr[k1][k2]+=(lr*dotTemp[k1][k2]);
//cout<<"WArr[k1][k2]:"<<WArr[k1][k2]<<endl;
}
}
//转置
ZhuanZhi(&hangx_temp[0][0],&hangxT[0][0],1,inputLayers+1);
// for(int i=0;i<inputLayers+1;i++){
// cout<<hangxT[i][0]<<"))"<<endl;
// }
//hangxT[inputLayers+1][1] dot arrL1_delta[1][hidenLayers]
// for(int k1=0;k1<hidenLayers;k1++){
// //double ii=arrL1_delta[0][k1];
// //arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]);
// cout<<arrL1_delta[0][k1]<<"** ";
// }
//cout<<endl;
dot(&hangxT[0][0],&arrL1_delta[0][0],&dotTempp[0][0],inputLayers+1,1,hidenLayers);
// for(int i=0;i<inputLayers+1;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<dotTempp[i][j]<<" ";
// }
// cout<<endl;
// }
for(int k1=0;k1<inputLayers+1;k1++){
for(int k2=0;k2<hidenLayers;k2++){
VArr[k1][k2]+=(lr*dotTempp[k1][k2]);
//cout<<"(lr*dotTempp[k1][k2]):"<<(lr*dotTempp[k1][k2])<<endl;
//cout<<VArr[k1][k2]<<"*****"<<endl;
}
}
//每训练100次预测一下准确率
if(n%10000==0){
//使用测试集验证一下准确率
//存放预测返回的结果
double resultArr[1][outputLayers];
int index;
//整个样本集中预测结果正确的样本个数
int num=0;
//准确率
double accuracy=0;
//遍历整个测试样本
for(int k1=0;k1<dataTest.size();k1++){
vector<double>result;
//取测试集中的第k1行进行测试,结果保存在resultArr中
predict(dataTest[k1],&resultArr[0][0]);
//将arr转成vector
arrToVector1(&resultArr[0][0],result,outputLayers);
// for(int kk=0;kk<result.size();kk++){
// //cout<<resultArr[0][kk]<<"%%%%%%%%"<<endl;
// cout<<result[kk]<<"&&&&&&&&&7"<<endl;
// }
//取得结果中的最大值(概率最大)的index
index=getMaxIndex(result);
// cout<<"**k1:"<<k1<<endl;
// cout<<"**index:"<<index<<endl;
// cout<<"**Y:"<<dataTestY[k1]<<endl;
if(index==dataTestY[k1]){
// cout<<"k1:"<<k1<<endl;
// cout<<"index:"<<index<<endl;
// cout<<"Y:"<<dataTestY[k1]<<endl;
num++;
}
}
accuracy=(double)num/dataTestY.size();
//if(num>5)cout<<"num:!!!!!!!!!!!!!!!!!!!!!!!111"<<num<<endl;
cout<<"epoch: "<<n<<", "<<"accuracy: "<<accuracy<<endl;
}
}
}
};
#endif