最近跟着老师在学习神经网络,为了更加深入地理解这个黑盒,我打算本身用C/C++将其实现一遍。今天忙活了好一下子,终于实现了一个BP神经网络,后期还会陆续实现CNN神经网络之类的,也会发上来和你们一块儿分享的~ios
由于最近比较忙,因此这里直接放代码了,关于一些原理以及本身的一点看法会在有空的时候整理出来的~数组
main.cpp网络
#include <iostream> #include <vector> #include "BPUtils.h" using namespace std; /* run this program using the console pauser or add your own getch, system("pause") or input loop */ vector<vector<double>>dataTest; vector<double>dataTestY; vector<vector<double>>trainDataX; vector<double>trainDataY; int main() { // double m1[3][1]={{1},{2},{3}}; // double m2[1][4]={1,2,3,4}; // double m3[3][4]; // dott(&m1[0][0],&m2[0][0],&m3[0][0],3,1,4); // for(int i=0;i<3;i++){ // for(int j=0;j<4;j++){ // cout<<m3[i][j]<<" "; // } // cout<<endl; // } createTrainSet(); createTestSet(); guiYiHua(dataTest); guiYiHua(trainDataX); NeuralNetwork nn(2,44,2); nn.train(trainDataX,trainDataY); // for(int i=0;i<trainDataX.size();i++){ // for(int j=0;j<trainDataX[i].size();j++){ // cout<<trainDataX[i][j]<<" "; // } // cout<<endl; // } // for(int i=0;i<trainDataX.size();i++){ // cout<<trainDataY[i]<<" "; // } // // cout<<endl<<"---------------------------------------------------------"<<endl; // // for(int i=0;i<dataTest.size();i++){ // for(int j=0;j<dataTest[i].size();j++){ // cout<<dataTest[i][j]<<" "; // } // cout<<endl; // } // for(int i=0;i<dataTestY.size();i++){ // cout<<dataTestY[i]<<" "; // } // NeuralNetwork nn(2,4,3); // vector<vector<double>>dataX; // vector<double>dataY; // for(int i=0;i<4;i++){ // vector<double>vec; // for(int j=0;j<2;j++){ // vec.push_back(i+j); // } // dataX.push_back(vec); // } // for(int i=0;i<4;i++){ // for(int j=0;j<2;j++){ // cout<<dataX[i][j]<<" "; // } // cout<<endl; // } // for(int i=0;i<4;i++){ // dataY.push_back(i); // } // nn.train(dataX,dataY); return 0; }
BPUtils.hdom
#ifndef BP_UTILS #define BP_UTILS #include <cmath> #include <cstdlib> #include <iostream> #include <vector> #include <ctime> #include <string.h> #include <cstdio> #include <fstream> #define random(x) (rand()%x) using namespace std; #define MAXSIZE 99 //全局变量 //测试集 extern vector<vector<double>>dataTest; extern vector<double>dataTestY; extern vector<vector<double>>trainDataX; extern vector<double>trainDataY; vector<string> split(const string& str, const string& delim) { vector<string> res; if("" == str) return res; //先将要切割的字符串从string类型转换为char*类型 char * strs = new char[str.length() + 1] ; //不要忘了 strcpy(strs, str.c_str()); char * d = new char[delim.length() + 1]; strcpy(d, delim.c_str()); char *p = strtok(strs, d); while(p) { string s = p; //分割获得的字符串转换为string类型 res.push_back(s); //存入结果数组 p = strtok(NULL, d); } return res; } double getMax(vector<vector<double>>dataSet){ double MYMAX=-999; for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ if(MYMAX<dataSet[i][j]){ MYMAX=dataSet[i][j]; } } } return MYMAX; } double getMin(vector<vector<double>>dataSet){ double MYMIN=999; for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ if(MYMIN>dataSet[i][j]){ MYMIN=dataSet[i][j]; } } } return MYMIN; } //数据归一化 //通常是x=(x-x.min)/x.max-x.min void guiYiHua(vector<vector<double>>&dataSet){ double MYMAX=getMax(dataSet); double MYMIN=getMin(dataSet); for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ dataSet[i][j]=(dataSet[i][j]-MYMIN)/(MYMAX-MYMIN); } } } //建立测试集的数据 void createTrainSet(){ fstream f("train.txt"); //保存读入的每一行 string line; vector<string>res; int ii=0; while(getline(f,line)){ res=split(line,"\t"); vector<double>vec1; for(int i=0;i<res.size();i++){ //cout<<res[i]<<endl; char ch[MAXSIZE]; strcpy(ch,res[i].c_str()); if(i!=2){ vec1.push_back(atof(ch)); }else{ trainDataY.push_back(atof(ch)); } } trainDataX.push_back(vec1); ii++; } } //建立训练集的数据 void createTestSet(){ fstream f("test.txt"); //保存读入的每一行 string line; vector<string>res; int ii=0; while(getline(f,line)){ res=split(line,"\t"); vector<double>vec1; for(int i=0;i<res.size();i++){ //cout<<res[i]<<endl; char ch[MAXSIZE]; strcpy(ch,res[i].c_str()); if(i!=2){ vec1.push_back(atof(ch)); }else{ dataTestY.push_back(atof(ch)); } } dataTest.push_back(vec1); ii++; } } //sigmoid激活函数 double sigmoid(double x){ return 1/(1+exp(-x)); } //sigmoid函数的导数 double dsigmoid(double x){ return x*(1-x); } class NeuralNetwork{ public: //输入层单元个数 int inputLayers; //隐藏层单元个数 int hidenLayers; //输出层单元个数 int outputLayers; //输入层到隐藏层的权值 //行数为输入层单元个数+1(由于有偏置) //列数为隐藏层单元个数 vector<vector<double>>VArr; //隐藏层到输出层的权值 //行数为隐藏层单元个数 //列数为输出层单元个数 vector<vector<double>>WArr; private: //矩阵乘积 void dot(const double* m1,const double* m2,double *m3,int m,int n,int p){ for(int i=0;i<m;++i) //点乘运算 { for(int j=0;j<p;++j) { (*(m3+i*p+j))=0; for(int k=0;k<n;++k) { (*(m3+i*p+j))+=(*(m1+i*n+k))*(*(m2+k*p+j)); } } } } void vectorToArr1(vector<vector<double>>vec,double *arr,int n){ for(int i=0;i<vec.size();i++){ for(int j=0;j<vec[i].size();j++){ //cout<<endl<<vec[i][j]<<"******"<<i<<"*********"<<j<<"***"; (*(arr+i*n+j))=vec[i][j]; } //cout<<endl; } } void vectorToArr2(vector<double>vec,double *arr){ for(int i=0;i<vec.size();i++){ (*(arr+i))=vec[i]; } } void arrToVector1(double *arr,vector<double>&vec,int m){ for(int i=0;i<m;i++){ vec.push_back((*(arr+i))); } } //矩阵转置 void ZhuanZhi(const double*m1,double *m2,int n1,int n2){ for(int i=0;i<n1;i++){ for(int j=0;j<n2;j++){ (*(m2+j*n1+i))=(*(m1+i*n2+j)); } } } //验证准确率时的预测 //输入测试集的一行数据 //ArrL2为输出层的输出 //eg.当咱们要分红10类的时候,输出10个数,相似于该样本属于这10个类别的几率 //咱们选取其中几率最大的类别做为最终分类获得的类别 void predict(vector<double>test,double *ArrL2){ // for(int i=0;i<test.size();i++){ // cout<<"test[i]:"<<test[i]<<endl; // } //添加转置 test.push_back(1); double testArr[1][inputLayers+1]; //转成矩阵 vectorToArr2(test,&testArr[0][0]); // for(int i=0;i<inputLayers+1;i++){ // cout<<"testArr:"<<testArr[0][i]<<endl; // } double dotL1[1][hidenLayers]; double VArr_temp[inputLayers+1][hidenLayers]; vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers); // for(int i=0;i<inputLayers+1;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<VArr_temp[i][j]<<" "; // } // cout<<endl; // } //testArr[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers] dot(&testArr[0][0],&VArr_temp[0][0],&dotL1[0][0],1,inputLayers+1,hidenLayers); // for(int i=0;i<1;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<dotL1[i][j]<<" "; // } // cout<<endl; // } //隐藏层输出 double ArrL1[1][hidenLayers]; //double ArrL2[1][outputLayers]; for(int i=0;i<hidenLayers;i++){ ArrL1[0][i]=sigmoid(dotL1[0][i]); //cout<<ArrL1[0][i]<<endl; } double dotL2[1][outputLayers]; double WArr_temp[hidenLayers][outputLayers]; vectorToArr1(WArr,&WArr_temp[0][0],outputLayers); //ArrL1[1][hidenLayers] dot WArr[hidenLayers][outputLayers] dot(&ArrL1[0][0],&WArr_temp[0][0],&dotL2[0][0],1,hidenLayers,outputLayers); //输出层输出 for(int i=0;i<outputLayers;i++){ //ArrL2[0][i]=sigmoid(dotL2[0][1]); (*(ArrL2+i))=sigmoid(dotL2[0][i]); //cout<<*(ArrL2+i)<<endl; } } int getMaxIndex(vector<double>vec){ int index=-1; double MYMAX=-999; for(int i=0;i<vec.size();i++){ //cout<<vec.size()<<"*********"<<endl; //cout<<i<<"::::"<<vec[i]<<endl; if(MYMAX<vec[i]){ MYMAX=vec[i]; index=i; } } return index; } public: //构造函数,传入输入层,隐藏层,输出层单元个数 //而且构造权值矩阵 NeuralNetwork(int _inputLayers,int _hidenLayers,int _outputLayers){ this->inputLayers=_inputLayers; hidenLayers=_hidenLayers; outputLayers=_outputLayers; //构造V权值矩阵 for(int i=0;i<inputLayers+1;i++){ vector<double>vec; for(int j=0;j<hidenLayers;j++){ vec.push_back((double)rand()/RAND_MAX*2-1); } VArr.push_back(vec); } for(int i=0;i<hidenLayers;i++){ vector<double>vec; for(int j=0;j<outputLayers;j++){ vec.push_back((double)rand()/RAND_MAX*2-1); } WArr.push_back(vec); } } //开始训练 //传入训练集,预期的y值,学习效率,以及训练迭代的次数 //这里规定输入的数据为2列的数据 void train(vector<vector<double>>dataX,vector<double>dataY,double lr=0.03,int epochs=1000000){ double arrL1[1][hidenLayers]; //将VArr由vector转成arr double VArr_temp[inputLayers+1][hidenLayers]; double hangx_temp[1][inputLayers+1]; vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers); double hangxT[inputLayers+1][1]; double hangxDotVArr[1][hidenLayers]; double arrL2[1][outputLayers]; double WArr_temp[hidenLayers][outputLayers]; double arrL2_delta[1][outputLayers]; double arrL1_delta[1][hidenLayers]; double E; double dao; double dotTemp[hidenLayers][outputLayers]; double WArr_tempT[outputLayers][hidenLayers]; double arrL1T[hidenLayers][1]; double dotTempp[inputLayers+1][hidenLayers]; srand((int)time(0)); //为数据集添加偏置 //eg.当咱们输入的数据集为4X2的时候,须要为其在最后添加一列偏置,让其变成一个4X3的矩阵 for(int i=0;i<dataX.size();i++){ //最后一列为偏置 dataX[i].push_back(1); } //进行权值训练更新 for(int n=0;n<epochs;n++){ //随机选取一行样本进行更新 int iii=random(dataX.size()); //cout<<"iii:"<<iii<<endl; //获得随机选取的一行数据 vector<double>hangx=dataX[iii]; // for(int i=0;i<hangx.size();i++){ // cout<<hangx[i]<<"***"<<endl; // } //隐藏层输出 //这里先计算输入矩阵与权值矩阵的点乘,再将其输入sigmoid函数中,获得最终的输出 //eg.输入4X2的dataX,咱们先加上偏置变成4X3 //选取其中的一行数据1X3 //而后计算dataX与arrV(3XhidenLayers)的dot,获得一个1XhidenLayers的矩阵 // for(int ii=0;ii<inputLayers+1;ii++){ // for(int jj=0;jj<hidenLayers;jj++){ // cout<<VArr[ii][jj]<<"---"; // cout<<VArr_temp[ii][jj]<<" "; // } // cout<<endl; // } vectorToArr2(hangx,&hangx_temp[0][0]); // for(int i=0;i<inputLayers+1;i++){ // cout<<hangx[i]<<"---"<<endl; // cout<<hangx_temp[0][i]<<"**"<<endl; // } //hangx[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers] dot(&hangx_temp[0][0],&VArr_temp[0][0],&arrL1[0][0],1,inputLayers+1,hidenLayers); //将点乘后的值输入到sigmoid函数中 for(int k1=0;k1<hidenLayers;k1++){ arrL1[0][k1]=sigmoid(arrL1[0][k1]); //cout<<arrL1[0][k1]<<endl; } vectorToArr1(WArr,&WArr_temp[0][0],outputLayers); // for(int ii=0;ii<hidenLayers;ii++){ // for(int jj=0;jj<outputLayers;jj++){ // cout<<WArr_temp[ii][jj]<<endl; // } // } //arrL1[1][hidenLayers] dot WArr_temp[hidenLayers][outputLayers] dot(&arrL1[0][0],&WArr_temp[0][0],&arrL2[0][0],1,hidenLayers,outputLayers); //cout<<outputLayers<<endl; //cout<<arrL2[0][1]<<endl; // for(int k1=0;k1<outputLayers;k1++){ // arrL2[0][k1]=sigmoid(arrL2[0][k1]); //// // cout<<k1<<endl; //// cout<<arrL2[0][k1]<<endl; // } //求权值的delta //根据公式计算权值更新的delta for(int k1=0;k1<outputLayers;k1++){ arrL2[0][k1]=sigmoid(arrL2[0][k1]); // cout<<k1<<endl; //cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl; E=dataY[iii]-arrL2[0][k1]; //cout<<"E:"<<E<<endl; dao=dsigmoid(arrL2[0][k1]); //cout<<"dao:"<<dao<<endl; arrL2_delta[0][k1]=E*dao; //cout<<"arrL2_delta[0][k1]:"<<arrL2_delta[0][k1]<<endl; } // for(int k1=0;k1<outputLayers;k1++){ // //计算偏差 // E=dataY[iii]-arrL2[0][k1]; // //对L2输出的结果求导 // dao=dsigmoid(arrL2[0][k1]); //// cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl; //// cout<<"dataY[iii]:"<<dataY[iii]<<endl; //// cout<<"E:"<<E<<endl; //// cout<<"dao:"<<dao<<endl; // //计算delta // arrL2_delta[0][k1]=E*dao; // } // for(int i=0;i<outputLayers;i++){ // cout<<arrL2_delta[0][i]<<endl; // } //W矩阵转置 ZhuanZhi(&WArr_temp[0][0],&WArr_tempT[0][0],hidenLayers,outputLayers); // for(int i=0;i<outputLayers;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<WArr_temp[j][i]<<"**"; // cout<<WArr_tempT[i][j]<<" "; // } // cout<<endl; // } //arrL2_delta[1][outputLayers] dot WArr_tempT[outputLayers][hidenLayers] dot(&arrL2_delta[0][0],&WArr_tempT[0][0],&arrL1_delta[0][0],1,outputLayers,hidenLayers); //乘上L1输出的导数 // for(int k1=0;k1<hidenLayers;k1++){ // cout<<dsigmoid(arrL1[0][k1])<<endl; // } //乘上L1输出的导数 for(int k1=0;k1<hidenLayers;k1++){ double ii=arrL1_delta[0][k1]; arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]); //cout<<ii<<"**"<<dsigmoid(arrL1[0][k1])<<"**"<<arrL1_delta[0][k1]<<endl; } //经过上面的delta更新权值WV ZhuanZhi(&arrL1[0][0],&arrL1T[0][0],1,hidenLayers); // for(int i=0;i<hidenLayers;i++){ // cout<<arrL1T[i][0]<<endl; // } //arrL1T[hidenLayers][1] dot arrL2_delta[1][outputLayers] dot(&arrL1T[0][0],&arrL2_delta[0][0],&dotTemp[0][0],hidenLayers,1,outputLayers); // for(int k1=0;k1<outputLayers;k1++){ // cout<<arrL2_delta[0][k1]<<endl; // } // for(int k1=0;k1<hidenLayers;k1++){ // for(int k2=0;k2<outputLayers;k2++){ // cout<<dotTemp[k1][k2]<<" "; // } // cout<<endl; // } // for(int k1=0;k1<outputLayers;k1++){ // cout<<arrL2_delta[0][k1]<<endl; // } for(int k1=0;k1<hidenLayers;k1++){ for(int k2=0;k2<outputLayers;k2++){ //根据学习效率进行更新 //cout<<dotTemp[k1][k2]<<endl; WArr[k1][k2]+=(lr*dotTemp[k1][k2]); //cout<<"WArr[k1][k2]:"<<WArr[k1][k2]<<endl; } } //转置 ZhuanZhi(&hangx_temp[0][0],&hangxT[0][0],1,inputLayers+1); // for(int i=0;i<inputLayers+1;i++){ // cout<<hangxT[i][0]<<"))"<<endl; // } //hangxT[inputLayers+1][1] dot arrL1_delta[1][hidenLayers] // for(int k1=0;k1<hidenLayers;k1++){ // //double ii=arrL1_delta[0][k1]; // //arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]); // cout<<arrL1_delta[0][k1]<<"** "; // } //cout<<endl; dot(&hangxT[0][0],&arrL1_delta[0][0],&dotTempp[0][0],inputLayers+1,1,hidenLayers); // for(int i=0;i<inputLayers+1;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<dotTempp[i][j]<<" "; // } // cout<<endl; // } for(int k1=0;k1<inputLayers+1;k1++){ for(int k2=0;k2<hidenLayers;k2++){ VArr[k1][k2]+=(lr*dotTempp[k1][k2]); //cout<<"(lr*dotTempp[k1][k2]):"<<(lr*dotTempp[k1][k2])<<endl; //cout<<VArr[k1][k2]<<"*****"<<endl; } } //每训练100次预测一下准确率 if(n%10000==0){ //使用测试集验证一下准确率 //存放预测返回的结果 double resultArr[1][outputLayers]; int index; //整个样本集中预测结果正确的样本个数 int num=0; //准确率 double accuracy=0; //遍历整个测试样本 for(int k1=0;k1<dataTest.size();k1++){ vector<double>result; //取测试集中的第k1行进行测试,结果保存在resultArr中 predict(dataTest[k1],&resultArr[0][0]); //将arr转成vector arrToVector1(&resultArr[0][0],result,outputLayers); // for(int kk=0;kk<result.size();kk++){ // //cout<<resultArr[0][kk]<<"%%%%%%%%"<<endl; // cout<<result[kk]<<"&&&&&&&&&7"<<endl; // } //取得结果中的最大值(几率最大)的index index=getMaxIndex(result); // cout<<"**k1:"<<k1<<endl; // cout<<"**index:"<<index<<endl; // cout<<"**Y:"<<dataTestY[k1]<<endl; if(index==dataTestY[k1]){ // cout<<"k1:"<<k1<<endl; // cout<<"index:"<<index<<endl; // cout<<"Y:"<<dataTestY[k1]<<endl; num++; } } accuracy=(double)num/dataTestY.size(); //if(num>5)cout<<"num:!!!!!!!!!!!!!!!!!!!!!!!111"<<num<<endl; cout<<"epoch: "<<n<<", "<<"accuracy: "<<accuracy<<endl; } } } }; #endif