此次介绍一个我如今正在作的半监督算法,由于我只是提出一个框架,半监督算法只是一个工具,但是为了找这个工具也让我花了很多功夫。如今介绍的暂时不是Weka的一部分,但开发者是waikato大学(也就是开发Weka的那个大学)。下载地址是http://www.cs.waikato.ac.nz/~fracpete/projects/collective-classification/。我下载的是Classifiers compatible now with Weka 3.5.8 。别的几种,能不能行我也不知道,我而后也按做者说的把Weka3.5.8也下载下来了(不知道和之前的版本是否兼容),使用很是简单,先到Weka目录下,把weka-src.jar解压,再把下载的src.tar.gz在Weka目录下解压,若是有替换就替换,若是没有替换,证实你的操做有错。html
而后就能够试一下了,把所有Weka源文件拷贝到你新的工程中去,下面我给出了一个测试的例子,不要太相信个人办法,我还没细看他的代码(分红训练和测试样本的方法有问题,懒得改有,谁改完了,请发给我)。注意一下,他训练是同时用训练集和测试集。若是不明白,下载他的一篇论文Using Weighted Nearest Neighbor to Benefit from Unlabeled Data。很是简单,不用惧怕。java
补充:Tri-train是属于Multi-View的半监督算法,能够从周志华的网站上下载到,论文和代码都不是很难懂,网址是:http://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/annex/TriTrain.htm,这里不做解释。若是还有别的基于Weka的半监督代码,请告诉我。算法
package semiTest; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import weka.classifiers.Evaluation; import weka.classifiers.collective.functions.LLGC; import weka.classifiers.collective.meta.CollectiveBagging; import weka.classifiers.collective.meta.YATSI; import weka.classifiers.trees.J48; import weka.core.Instances; import weka.core.Utils; import weka.filters.Filter; import weka.filters.unsupervised.instance.Resample; public class Test { Instances m_instances = null; Instances m_testIns = null; Instances m_trainIns = null; public void getFileInstances( String fileName ) throws Exception { FileReader frData = new FileReader( fileName ); m_instances = new Instances( frData ); } public void writeToArffFile(String newFilePath,Instances ins) throws IOException { BufferedWriter writer = new BufferedWriter(new FileWriter(newFilePath)); writer.write(ins.toString()); writer.flush(); writer.close(); } public void FilterRemovePercentageTest() throws Exception { Resample removePercentage =new Resample(); String[] options = Utils.splitOptions("-Z 10 -no-replacement"); removePercentage.setOptions(options); removePercentage.setInputFormat( m_instances ); m_trainIns = Filter.useFilter( m_instances, removePercentage); writeToArffFile("TrainData.arff", m_trainIns ); options = Utils.splitOptions("-Z 90 -no-replacement"); removePercentage.setOptions(options); removePercentage.setInputFormat( m_instances ); m_testIns = Filter.useFilter( m_instances,removePercentage); writeToArffFile("TestData.arff", m_testIns ); m_trainIns.setClassIndex( m_trainIns.numAttributes() - 1 ); m_testIns.setClassIndex( m_testIns.numAttributes() - 1 ); } public void LLGCTest() throws Exception { System.out.println( " **************LLGC********** " ); LLGC llgc = new LLGC(); llgc.buildClassifier( m_trainIns, m_testIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( llgc, m_testIns ); System.out.println( eval.toSummaryString() ); } public void J48Test() throws Exception { System.out.println( " **************J48********** " ); J48 j48 = new J48(); j48.buildClassifier( m_trainIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( j48, m_testIns ); System.out.println( eval.toSummaryString() ); } public void YATSITest() throws Exception { System.out.println( " **************YATSI********** " ); YATSI yatsi = new YATSI(); yatsi.buildClassifier( m_trainIns, m_testIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( yatsi, m_testIns ); System.out.println( eval.toSummaryString() ); } public void CollectiveEMTest() throws Exception { System.out.println( " **************EM********** " ); CollectiveBagging bagging = new CollectiveBagging(); bagging.buildClassifier( m_trainIns, m_testIns ); Evaluation eval = new Evaluation( m_trainIns ); eval.evaluateModel( bagging, m_testIns ); System.out.println( eval.toSummaryString() ); } public static void main(String[] args) throws Exception { Test percentage = new Test(); percentage.getFileInstances( "F://Program Files//Weka-3-4//data//soybean.arff"); percentage.FilterRemovePercentageTest(); percentage.J48Test(); percentage.YATSITest(); //percentage.LLGCTest(); } }