系统学习图像算法Day.37——C++应用——复现机器学习中的“逻辑回归”算法

≯℡__Kan透↙ 提交于 2020-01-09 12:33:27

逻辑回归 是为了解决在样本标记为0、1时,希望预测其取1概率 的问题,而不是单纯预测为0或1
在学习逻辑回归后,自己用C++复现该代码

#include <iostream>
#include <cmath>
#include <fstream>
#include <vector>

using namespace std;
const int DIMS = 20;

char *file1 = "logical_regression_train.dat";
char *file2 = "logical_regression_test.dat";

struct training_unit    //每个X数据结构体
{
    double x[DIMS+1];
    int y;
};

vector <training_unit> training;   //用来保存X
vector <training_unit> test;

int sign( double signx )    //sign函数,正为1,负为-1
{
    int signy; 
    if (signx > 0) signy = 1;
    else signy = -1;
    return signy;
}

double sigmoid( double x )
{
    double y;
    y = 1.0/(1+exp(-x));
    return y;
}

void arr_b2a( double *a, double *b, int Num )
{
    for (int i=0; i<Num; i++)
    {
         a[i] = b[i];
    }
}

double vector_multiply( const double *a, const double *b, int Num )
{
    double ab = 0.0;
    for(int i=0; i<Num; i++)
    {
       ab += a[i] * b[i];
    }
    return ab;
}

double errlog ( const vector <training_unit> training, double *W, int N_data )
{
    double err = 0.0;
    double temp = 0.0;
    for( int i=0; i<N_data; i++ )
    {
        temp += log( exp( 1 + exp(-1 * vector_multiply(training[i].x, W, DIMS+1) * (double)training[i].y ) ) );
    }
    err = temp/N_data;
    return err;
}

double err01(const vector <training_unit> training, double *W, int N_data)
{   
    double error = 0.0; 
    for(int i = 0; i < N_data; i++)
    { 
        if(sign(vector_multiply(training[i].x,W,DIMS+1)) != training[i].y)  error++;
    } 
    return double(error / N_data);
}

void grad_Ein( const vector <training_unit> training, double *W, int N_data, double *grad)
{
    double temp1 = 0.0;
    for(int i=0; i<N_data; i++)
    {
        temp1 = sigmoid( -1 * vector_multiply(training[i].x, W, DIMS+1) * (double)training[i].y );
        for(int j=0; j<DIMS+1; j++)
        {
            grad[j] += temp1*(-1.0 * training[i].y * training[i].x[j]);
        }
     }
    for(int i=0; i<DIMS+1; i++)
    grad[i] = grad[i]/N_data;
}

void iter_W(double *W, double iter_step, double *grad_Ein_Wt)
{
    double W_tadd1[DIMS+1];
    for(int i=0; i<DIMS+1; i++)
    {
        W_tadd1[i] -= (iter_step * grad_Ein_Wt[i]);
    }
 }
 
void file_to_vector( char *filename, vector <training_unit>& vectorname )  //将数据文件中的数据读取到vector中去
{
    ifstream inFile;
    inFile.open(filename);
    if (!inFile.is_open())
    {
        cerr <<"Could not open"<<filename<<endl;
        exit(EXIT_FAILURE);
    }
    training_unit x21y1_model;
    while (!inFile.eof())      //把x、y数据读入vector容器
    {
        x21y1_model.x[0] = 1; 
        for(int i=1;i<DIMS+1;i++)
        {
            inFile >> x21y1_model.x[i] ;
            inFile >> x21y1_model.y;
        }
     vectorname.push_back(x21y1_model); 
     }
 inFile.close();
}

void logical_regression( const vector <training_unit> training, const vector <training_unit> test, double iter_step, double *W_init, int iter )
{
    int N_data1 = training.size();
    int N_data2 = test.size();
    double W[DIMS+1];
    double err1 = 0.0;
    double err2 = 0.0;
    double grad[DIMS+1] = {0.0};
    for ( int iter_i=0; iter_i<iter; iter_i++ )
    {
        if (iter_i == 0) arr_b2a(W, W_init, DIMS+1 );
        grad_Ein( training, W, N_data1, grad );
        iter_W( W, iter_step, grad );
    }
    err1 = errlog( test, W, N_data2 );
    err2 = err01( test, W, N_data2 );
    cout << "The errorlog is " << err1 <<endl;
    cout << "The error01 is " << err2 <<endl;
}

int main()
{
    int iter = 4000;
    double iter_step = 0.001;
    double W_init[DIMS+1];
    for( int i=0; i<DIMS+1; i++ )
        W_init[i] = 1;
    file_to_vector( file1, training );
    file_to_vector( file2, test );
    logical_regression( training, test, iter_step, W_init, iter );
    cin.get();
    return 0;
}
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!