机器学习算法实现:[1]候选消除学习算法
1、数据结构定义:#define A_CHAR_MAX 16#define A_VALUE_MAX 16#define A_NUM_MAX 32#define SAMPLES_MAX 256#define ALL -1#define NUL -2#define YES 1#define NO 0#define NUKOWN -1// 属性struct Attribute{char name[A_CHAR_MAX]; // 属性名称int num; // 属性值个数char att[A_VALUE_MAX][A_CHAR_MAX]; // 属性值};// 假设struct Hypothesis{int num; // 属性个数Attribute an[A_NUM_MAX]; // 属性集合};// 假设值struct HypoValue{int value[A_NUM_MAX];};// 样本struct Sample{HypoValue ev; // 假设int result; // 正例/反例};Hypothesis g_Hypo; // 假设集合Sample g_sa[SAMPLES_MAX]; // 样本空间int g_sn; // 样本数list<HypoValue*> g_G; // 一般边界Glist<HypoValue*> g_S; // 特殊边界S
2、读文件和进行样例对G边界进行特殊化,S边界进行一般化:
3、打印输出函数如下:void printdata(list<HypoValue*> &g_S,list<HypoValue*> &g_G,int i){ int j,k;list<HypoValue*>::iterator it;if(i==-3)printf("最终的特殊边界S集合有%d个极小一般化成员为:\n",g_S.size());else printf("第%d样例使特殊边界S一般化后的集合有%d个极小一般化成员为:\n",i+1,g_S.size());for (it=g_S.begin(); it!=g_S.end(); it++){ HypoValue s = **it; printf("<"); for (j=0; j<g_Hypo.num-1; j++) { if (s.value[j]==ALL) printf("?, "); else if (s.value[j]==NUL) printf("O, "); else printf("%s, ", g_Hypo.an[j].att[s.value[j]-1]); } if (s.value[j]==ALL) printf("?>\n"); else if (s.value[j]==NUL) printf("O>\n"); else printf("%s>\n", g_Hypo.an[j].att[s.value[j]-1]);}//printf("特殊边界G:%d\n",g_G.size());// 输出Gif(i==-3)printf("最终的一般边界G集合有%d个极小特殊化成员为:\n",g_G.size());elseprintf("第%d样例使特殊边界G特殊化后的集合有%d个极小特殊化成员为:\n",i+1,g_G.size());for (it=g_G.begin(); it!=g_G.end(); it++){ HypoValue s = **it; printf("<"); for (k=0; k<g_Hypo.num-1; k++) { if (s.value[k]==ALL) printf("?, "); else if (s.value[k]==NUL) printf("O, "); else printf("%s, ", g_Hypo.an[k].att[s.value[k]-1]); } if (s.value[k]==ALL) printf("?>\n"); else if (s.value[k]==NUL) printf("O>\n"); else printf("%s>\n", g_Hypo.an[k].att[s.value[k]-1]);}
4、运行输出的每一次样例一般化S边界与特殊化G边界的结果集合,和最终的一般化S边界与特殊化G边界的结果集合:第1样例使特殊边界S一般化后的集合有1个极小一般化成员为:<sunny, warm, normal,strong, warm, same>第1样例使特殊边界G特殊化后的集合有1个极小特殊化成员为:<?, ?, ?, ?, ?, ?>第2样例使特殊边界S一般化后的集合有1个极小一般化成员为:<sunny, warm, ?,strong, warm, same>第2样例使特殊边界G特殊化后的集合有1个极小特殊化成员为:<?, ?, ?, ?, ?, ?>第3样例使特殊边界S一般化后的集合有1个极小一般化成员为:<sunny, warm, ?,strong, warm, same>第3样例使特殊边界G特殊化后的集合有3个极小特殊化成员为:<?, ?, ?, ?, ?,same><?, warm, ?, ?, ?,?><sunny, ?, ?, ?, ?,?>第4样例使特殊边界S一般化后的集合有1个极小一般化成员为:<sunny, warm, ?,strong, ?, ?>第4样例使特殊边界G特殊化后的集合有2个极小特殊化成员为:<?, warm, ?, ?, ?,?><sunny, ?, ?, ?, ?,?>最终的特殊边界S集合有1个极小一般化成员为:<sunny, warm, ?,strong, ?, ?>最终的一般边界G集合有2个极小特殊化成员为:<?, warm, ?, ?, ?,?><sunny, ?, ?, ?, ?,?>请按任意键继续. . .
5、hypothesis.txt与samples.txt文件中的内容如下:hypothesis.txt6sky 3 sunny rainy cloudyairtemp 2 warm coldhumidity 2 normal highwind 2 strong weakwater 2 warm coolforecast 2 same change// 从文件中读取假设集合/*/ 文件格式[集合个数n][属性名称1] [属性值个数] [属性值1] [属性值2] [属性值3] ……[属性名称2] [属性值个数] [属性值1] [属性值2] [属性值3] …………[属性名称n] [属性值个数] [属性值1] [属性值2] [属性值3] ……/*/samples.txt41 1 1 1 1 1 11 1 2 1 1 1 12 2 2 1 1 2 01 1 2 1 2 2 1// 从文件中读取样本/*/ 文件格式[样本个数m][样本1属性1的值的序号] [样本1属性2的值的序号] …… [样本1属性n的值的序号] [1(正例)或者0(反例)][样本2属性1的值的序号] [样本2属性2的值的序号] …… [样本2属性n的值的序号] [1(正例)或者0(反例)]……[样本m属性1的值的序号] [样本m属性2的值的序号] …… [样本m属性n的值的序号] [1(正例)或者0(反例)]/*/
6、附完整代码如下:#include <坡纠课柩iostream>#include <string>#i荏鱿胫协nclude <list>using namespace std;#define A_CHAR_MAX 16#define A_VALUE_MAX 16#define A_NUM_MAX 32#define SAMPLES_MAX 256#define ALL -1#define NUL -2#define YES 1#define NO 0#define NUKOWN -1// 属性struct Attribute{char name[A_CHAR_MAX]; // 属性名称int num; // 属性值个数char att[A_VALUE_MAX][A_CHAR_MAX]; // 属性值};// 假设struct Hypothesis{int num; // 属性个数Attribute an[A_NUM_MAX]; // 属性集合};// 假设值struct HypoValue{int value[A_NUM_MAX];};// 样本struct Sample{HypoValue ev; // 假设int result; // 正例/反例};Hypothesis g_Hypo; // 假设集合Sample g_sa[SAMPLES_MAX]; // 样本空间int g_sn; // 样本数list<HypoValue*> g_G; // 一般边界Glist<HypoValue*> g_S; // 特殊边界Sbool ReadHypothesis(const char* filename){FILE* file;if (!(file=fopen(filename , "r"))) return false;int i,j,k;fscanf(file, "%d\n", &g_Hypo.num);for (i=0; i<g_Hypo.num ; i++){ fscanf(file, "%s%d\n", g_Hypo.an[i].name, &k); g_Hypo.an[i].num = k; for (j=0; j<k; j++) { fscanf(file, "%s", g_Hypo.an[i].att[j]); }/*1wangxiaobo@163.com*/ fscanf(file, "\n");}fclose(file);return true;}bool ReadSamples(const char* filename){FILE* file;if (!(file=fopen(filename , "r"))) return false;int i,j;fscanf(file, "%d\n", &g_sn);for (i=0; i<g_sn ; i++){/*1wangxiaobo@163.com*/ for (j=0; j<g_Hypo.num; j++) { fscanf(file, "%d", &g_sa[i].ev.value[j]); } fscanf(file, "%d\n",&g_sa[i].result);}fclose(file);return true;}// 初始化G和Svoid InitGandS(){HypoValue* evg = new HypoValue();HypoValue* evs = new HypoValue();for (int i=0; i<g_Hypo.num; i++){ evg->value[i] = ALL; // 一般边界G evs->value[i] = NUL; // 特殊边界S}g_G.push_back(evg);g_S.push_back(evs);}// 正例ps与假设h是否一致bool PositiveisConsistent(HypoValue h, HypoValue ps){for (int i=0; i<g_Hypo.num; i++){ if (h.value[i]==ALL) continue; else if (h.value[i]!=ps.value[i]) return false;}return true;}/*1wangxiaobo@163.com*/ // 反例ns与假设h是否一致bool NagativeisConsistent(HypoValue h, HypoValue ns){for (int i=0; i<g_Hypo.num; i++){ if (h.value[i]!=ALL && h.value[i]!=ns.value[i]) return true;/*1wangxiaobo@163.com*/ }return false;}// G的某个成员是否比h更一般bool GMemberMoreGeneral(HypoValue h){int i;list<HypoValue*>::iterator it;HypoValue mem;for (it=g_G.begin(); it!=g_G.end(); it++){ mem = **it; for (i=0; i<g_Hypo.num; i++) { if (mem.value[i]==ALL && h.value[i]==ALL) continue; else if (mem.value[i]==h.value[i]) continue; else if (mem.value[i]!=ALL && h.value[i]==ALL) break;/*1wangxiaobo@163.com*/ else if (mem.value[i]==ALL && h.value[i]!=ALL) continue; else break; } if (i==g_Hypo.num) return true;}return false;}// 把s的所有的极小泛化式h加入到S中,并且满足h与d一致,而且G的某个成员比h更一般void MiniGeneral(HypoValue s, HypoValue d){HypoValue* h = new HypoValue();for (int i=0; /*1wangxiaobo@163.com*/ i<g_Hypo.num; i++){ if (s.value[i]==NUL) h->value[i] = d.value[i]; else if (s.value[i]==ALL) h->value[i] = ALL; else if (s.value[i]!=d.value[i]) h->value[i] = ALL; else h->value[i] = d.value[i];}if (GMemberMoreGeneral(*h))// G的某个成员是否比h更一般 g_S.push_front(h);else delete h;}// 从S中移去所有这样的假设:它比S中另一个假设更一般void RemoveMoreGeneralFromS(){bool r1,r2;int i;HypoValue e1, e2;list<HypoValue*>::iterator it;list<HypoValue*>::iterator next;list<HypoValue*>::iterator temp;for (it=g_S.begin(); it!=g_S.end();){ e1 = * *it; next = it; r1 = r2 = false; for (next++; next!=g_S.end();) { e2 = * *next; r1 = r2 = false; for (i=0; /*1wangxiaobo@163.com*/ i<g_Hypo.num; i++) { if (e1.value[i]==ALL && e2.value[i]==ALL) continue; else if (e1.value[i]==e2.value[i]) continue; else if (e1.value[i]==ALL && e2.value[i]!=ALL) { r1 = true; if (r2) break; } else if (e1.value[i]!=ALL && e2.value[i]==ALL) { r2 = true; if (r1) break; } else { r1 = r2 = true; break; } } if (r1==true && r2==false) break; if (r1==false) { temp = next; next++; g_S.remove(*temp); } else next++; } if (r1==true /*1wangxiaobo@163.com*/ && r2==false) { temp = it; it++; g_S.remove(*temp); } else it++;}}// S的某个成员是否比h更特殊bool SMemberMoreSpecial(HypoValue h){int i;list<HypoValue*>::iterator it;HypoValue mem;for (it=g_S.begin(); it!=g_S.end(); it++){ mem = **it; for (i=0; i<g_Hypo.num; i++) { if (mem.value[i]==ALL && h.value[i]==ALL) continue; else if (mem.value[i]==h.value[i]) continue; else if (mem.value[i]!=ALL && h.value[i]==ALL) continue; else if (mem.value[i]==ALL && h.value[i]!=ALL) break; else break; } if (i==g_Hypo.num) return true;}return false;}// 把g的所有的极小特殊化式h加入到G中,其中h满足h与d一致,而且S的某个成员比h更特殊void MiniSpecial(HypoValue g, HypoValue d){int i,j;for (i=0; i<g_Hypo.num; i++){ for (j=1; j<=g_Hypo.an[i].num; j++) { if (j!=d.value[i]) { HypoValue* h = new HypoValue(g); h->value[i] = j; if (SMemberMoreSpecial(*h))/*1wangxiaobo@163.com*/ g_G.push_front(h); else delete h; } }}}// 从G中移去所有这样的假设:它比G中另一个假设更特殊void RemoveMoreSpecialFromG(){bool r1,r2;int i;HypoValue e1, e2;list<HypoValue*>::iterator it;list<HypoValue*>::iterator next;list<HypoValue*>::iterator temp;for (it=g_G.begin(); it!=g_G.end();){ e1 = * *it; next = it; r1 = r2 = false; for (next++; next!=g_G.end();) { e2 = * *next; r1 = r2 = false; for (i=0; i<g_Hypo.num; i++) { if (e1.value[i]==ALL && e2.value[i]==ALL) continue; else if (e1.value[i]==e2.value[i])/*1wangxiaobo@163.com*/ continue; else if (e1.value[i]!=ALL && e2.value[i]==ALL) { r1 = true; if (r2) break; } else if (e1.value[i]==ALL && e2.value[i]!=ALL) { r2 = true; if (r1) break; } else { r1 = r2 = true; break; } } if (r1==true && r2==false) break; if (r1==false)/*1wangxiaobo@163.com*/ { temp = next; next++; g_G.remove(*temp); } else next++; } if (r1==true && r2==false) { temp = it; it++; g_G.remove(*temp); } else it++;}}void printdata(list<HypoValue*> &g_S,list<HypoValue*> &g_G,int i){/*1wangxiaobo@163.com*/ int j,k;list<HypoValue*>::iterator it;if(i==-3)printf("最终的特殊边界S集合有%d个极小一般化成员为:\n",g_S.size());else printf("第%d样例使特殊边界S一般化后的集合有%d个极小一般化成员为:\n",i+1,g_S.size());for (it=g_S.begin(); it!=g_S.end(); it++){ HypoValue s = **it; printf("<"); for (j=0; j<g_Hypo.num-1; j++) { if (s.value[j]==ALL) printf("?, "); else if (s.value[j]==NUL) printf("O, "); else printf("%s, ", g_Hypo.an[j].att[s.value[j]-1]); }/*1wangxiaobo@163.com*/ if (s.value[j]==ALL) printf("?>\n"); else if (s.value[j]==NUL) printf("O>\n"); else printf("%s>\n", g_Hypo.an[j].att[s.value[j]-1]);}//printf("特殊边界G:%d\n",g_G.size());// 输出Gif(i==-3)printf("最终的一般边界G集合有%d个极小特殊化成员为:\n",g_G.size());elseprintf("第%d样例使特殊边界G特殊化后的集合有%d个极小特殊化成员为:\n",i+1,g_G.size());for (it=g_G.begin(); it!=g_G.end(); it++){ HypoValue s = **it;/*1wangxiaobo@163.com*/ printf("<"); for (k=0; k<g_Hypo.num-1; k++) { if (s.value[k]==ALL) printf("?, "); else if (s.value[k]==NUL) printf("O, "); else printf("%s, ", g_Hypo.an[k].att[s.value[k]-1]); } if (s.value[k]==ALL) printf("?>\n"); else if (s.value[k]==NUL) printf("O>\n"); else printf("%s>\n", g_Hypo.an[k].att[s.value[k]-1]);} }// 主函数int main(int arc, char** argv)/*1wangxiaobo@163.com*/ { system("title 晓博List-Then-Eliminate试验程序");printf("Designed by wangxiaobo email:1wangxiaobo@163.com");// 读取假设和样本if (!ReadHypothesis("hypothesis.txt")){ printf("read Hypothesis file error"); return 0;}if (!ReadSamples("samples.txt")){ printf("read samples file error"); return 0;}// 初始化G和SInitGandS();list<HypoValue*>::iterator it;/*1wangxiaobo@163.com*/ list<HypoValue*>::iterator temp;int i,j,k;for (i=0; i<g_sn; i++){ if (g_sa[i].result==YES) // 正例时 { // 从G中移去所有与d不一致的假设 for (it=g_G.begin(); it!=g_G.end(); it++) { if (!PositiveisConsistent(**it, g_sa[i].ev)) { temp = it; it++; g_G.remove(*temp);/*1wangxiaobo@163.com*/ } } // 对S中每个与d不一致的假设s for (it=g_S.begin(); it!=g_S.end();) { if (!PositiveisConsistent(**it, g_sa[i].ev)) { MiniGeneral(**it, g_sa[i].ev); temp = it; it++; g_S.remove(*temp); // 从S中移去s RemoveMoreGeneralFromS(); }/*1wangxiaobo@163.com*/ else it++; } } else // 反例时 { // 从S中移去所有与d不一致的假设 for (it=g_S.begin(); it!=g_S.end(); it++) { if (!NagativeisConsistent(**it, g_sa[i].ev)) { temp = it; it++; g_S.remove(*temp); } } // 对G中每个与d不一致的假设g for (it=g_G.begin(); it!=g_G.end();) { if (!NagativeisConsistent(**it, g_sa[i].ev))/*1wangxiaobo@163.com*/ { MiniSpecial(**it, g_sa[i].ev); temp = it;/*1wangxiaobo@163.com*/ it++; g_G.remove(*temp); /*1wangxiaobo@163.com*/ // 从G中移去g RemoveMoreSpecialFromG();/*1wangxiaobo@163.com*/ } else it++; } } //.........................................................................//printf("特殊边界S:%d\n",g_S.size());printdata(g_S,g_G,i);//....................................................................}// 最终输出printdata(g_S,g_G,-3);//用于选择输出dos屏幕// 释放空间g_S.erase(g_S.begin(), g_S.end());/*1wangxiaobo@163.com*/ g_G.erase(g_G.begin(), g_G.end());system("pause");return 0;}