ID3算法 C++实现

思想:

用dfs每次都在前一次的基础上更新表,对于每层都判断是否是底层

(代码有点辣鸡qwq)

#include <bits/stdc++.h>

using namespace std;
typedef pair<int,int>P;

const int INF=0x3f3f3f3f;
const int N=25,mod=32767;

string sep="     ";
vector<string>now[6],tmp[6],vec[6];
vector<string>type[6];
int vis[6];
string name[6]={" ","weather","temperature","humidity","windy"};

double cal(double a,double b){
    double c=a+b;
    if(c==a||c==b)return 0;
    double t1=log(a/c)/log(2);
    double t2=log(b/c)/log(2);
    return -(a/c)*t1-(b/c)*t2;
}

double Info(vector<string>now[6],int k){
    double res=0;
    for(int i=0;i<type[k].size();i++){//表示类型
        double c1=0,c2=0,tot=now[k].size();
        string s=type[k][i];
        for(int j=0;j<now[k].size();j++){
            if(now[k][j]==s){
                c1++;
                if(now[5][j]=="no")c2++;
            }
        }
        res+=(c1/tot)*cal(c1-c2,c2);
    }
    return res;
}

void init(){
    type[1].push_back("sunny");
    type[1].push_back("overcast");
    type[1].push_back("rainy");
    type[2].push_back("hot");
    type[2].push_back("mild");
    type[2].push_back("cool");
    type[3].push_back("high");
    type[3].push_back("normal");
    type[4].push_back("weak");
    type[4].push_back("strong");
    type[5].push_back("yes");
    type[5].push_back("no");
}

void build(vector<string>now[6],vector<string>tmp[6],int k,string s){
    for(int i=1;i<=5;i++)tmp[i].clear();
    for(int i=0;i<now[k].size();i++){
        if(now[k][i]==s){//这条要加上
            for(int j=1;j<=5;j++){
                tmp[j].push_back(now[j][i]);
            }
        }
    }
}


int judge(vector<string>tmp[6],int u,string s){
    int c1=0,c2=0,tot=0;
    for(int i=0;i<tmp[u].size();i++){
        if(tmp[u][i]==s){
            tot++;
            if(tmp[5][i]=="yes")c1++;
            else c2++;
        }
    }

    if(c1==tot)return 1;
    else if(c2==tot)return 0;
    else return -1;
}

void ID3(int u,int dep,vector<string>now[6],vector<string>tmp[6]){
    for(int i=0;i<dep;i++)cout<<sep;
    cout<<name[u]<<endl;

    for(int i=0;i<type[u].size();i++){
        build(now,tmp,u,type[u][i]);//重新建好图
        int flag=judge(tmp,u,type[u][i]);//根据每个属性判断,是否这个属性,对应的值都相同
        for(int i=0;i<=dep;i++)cout<<sep;
        cout<<type[u][i];
        if(flag!=-1){
            if(flag==1)cout<<"  yes"<<endl;
            else cout<<"  no"<<endl;
            continue;
        }
        else{
            cout<<endl;
        }
        int ans;
        double mi=INF;
        for(int j=1;j<5;j++){
            if(vis[j]==1)continue;//这个已经算过了
            double c=Info(tmp,j);
            if(c<mi){//求信息期望最小,那么信息增益就最大
                mi=c;
                ans=j;
            }
        }
        vis[ans]=1;
        ID3(ans,dep+2,tmp,vec);
        vis[ans]=0;
    }
}

int main(){
    int t;
    cin>>t;
    init();
    string s;
    for(int i=1;i<=t;i++){
        for(int j=1;j<=5;j++){
            cin>>s;
            now[j].push_back(s);
            tmp[j].push_back(s);
        }
    }

    int ans;
    double mi=INF;
    for(int i=1;i<5;i++){
        double c=Info(tmp,i);
        if(c<mi){//求信息期望最小,那么信息增益就最大
            mi=c;
            ans=i;
        }
    }
    vis[ans]=1;
    cout<<"The decision tree is :"<<endl;
    ID3(ans,0,now,tmp);
}
/*
14
sunny hot high weak no
sunny hot high strong no
overcast hot high weak yes
rainy mild high weak yes
rainy cool normal weak yes
rainy cool normal strong no
overcast cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rainy mild normal weak yes
sunny mild normal strong yes
overcast mild high strong yes
overcast hot normal weak yes
rainy mild high strong no
*/

运行结果: