public static Instances preProcessData(Instances data) throws Exception{ /* * Remove useless attributes */ RemoveUseless removeUseless = new RemoveUseless(); removeUseless.setOptions(new String[] { "-M", "99" }); // threshold removeUseless.setInputFormat(data); data = Filter.useFilter(data, removeUseless); /* * Remove useless attributes */ ReplaceMissingValues fixMissing = new ReplaceMissingValues(); fixMissing.setInputFormat(data); data = Filter.useFilter(data, fixMissing); /* * Remove useless attributes */ Discretize discretizeNumeric = new Discretize(); discretizeNumeric.setOptions(new String[] { "-O", "-M", "-1.0", "-B", "4", // no of bins "-R", "first-last"}); //range of attributes fixMissing.setInputFormat(data); data = Filter.useFilter(data, fixMissing); /* * Select only informative attributes */ InfoGainAttributeEval eval = new InfoGainAttributeEval(); Ranker search = new Ranker(); search.setOptions(new String[] { "-T", "0.001" }); // information gain threshold AttributeSelection attSelect = new AttributeSelection(); attSelect.setEvaluator(eval); attSelect.setSearch(search); // apply attribute selection attSelect.SelectAttributes(data); // remove the attributes not selected in the last run data = attSelect.reduceDimensionality(data); return data; }
public static void getBestPerfFrom(String path){ try { BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile(path); Instance best = trainingSet.firstInstance(); //set the best configuration to the cluster Map<Attribute,Double> attsmap = new HashMap<Attribute,Double>(); for(int i=0;i<best.numAttributes()-1;i++){ attsmap.put(best.attribute(i), best.value(i)); } double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom"); System.out.println("========================================="); System.err.println("The actual performance for the best point is : "+bestPerf); System.out.println("========================================="); } catch (IOException e) { e.printStackTrace(); } }
public Filter createFilter(Instances data) throws Exception { Set<Integer> indexes = new HashSet<Integer>(); for (int i = 0, cnt = this.size(); i < cnt; i++) { indexes.add(this.get(i).index()); } // FOR SortedSet<Integer> to_remove = new TreeSet<Integer>(); for (int i = 0, cnt = data.numAttributes(); i < cnt; i++) { if (indexes.contains(i) == false) { to_remove.add(i+1); } } // FOR Remove filter = new Remove(); filter.setInputFormat(data); String options[] = { "-R", StringUtil.join(",", to_remove) }; filter.setOptions(options); return (filter); }
protected Map<Integer, MarkovGraphsContainer> constructMarkovModels(MarkovAttributeSet aset, Instances data) throws Exception { // Create an ExecutionState for this run ExecutionState state = (ExecutionState)this.state_pool.borrowObject(); state.init(this.createClusterer(aset, data)); // Construct the MarkovGraphs for each Partition/Cluster using the Training Data Set this.generateMarkovGraphs(state, data); // Generate the MarkovModels for the different partitions+clusters this.generateMarkovCostModels(state); Map<Integer, MarkovGraphsContainer> ret = new HashMap<Integer, MarkovGraphsContainer>(); for (int p = 0; p < state.markovs_per_partition.length; p++) { ret.put(p, state.markovs_per_partition[p]); } // FOR return (ret); }
/** * testCreateMarkovAttributeSetFilter */ @Test public void testCreateMarkovAttributeSetFilter() throws Exception { // Test that we can create a filter from an MarkovAttributeSet MarkovAttributeSet aset = new MarkovAttributeSet(data, FeatureUtil.getFeatureKeyPrefix(ParamArrayLengthFeature.class)); assertEquals(CatalogUtil.getArrayProcParameters(catalog_proc).size(), aset.size()); Filter filter = aset.createFilter(data); Instances newData = Filter.useFilter(data, filter); for (int i = 0, cnt = newData.numInstances(); i < cnt; i++) { Instance processed = newData.instance(i); // System.err.println(processed); assertEquals(aset.size(), processed.numAttributes()); } // WHILE assertEquals(data.numInstances(), newData.numInstances()); // System.err.println("MarkovAttributeSet: " + aset); }
/** * <p>To get the distribution of inTrace and outTrace instance in given dataset in <b>path</b>.</p> * @param ins Instances of each project * @throws Exception */ public static void getDist(String path) throws Exception{ Instances ins = DataSource.read(path); int numAttr = ins.numAttributes(); ins.setClassIndex(numAttr-1); int numIns = ins.numInstances(); int intrace = 0; int outtrace = 0; for(int i=0; i<numIns; i++){ if(ins.get(i).stringValue(ins.attribute(ins.classIndex())).equals("InTrace")){ intrace++; }else{ outtrace++; } } System.out.printf("[ %-30s ] inTrace:%4d, outTrace:%4d.\n", path, intrace, outtrace); }
/*** * <p>To get 10-fold cross validation in one single arff in <b>path</b></p> * <p>Use C4.5 and <b>SMOTE</b> to classify the dataset.</p> * @param path dataset path * @throws Exception */ public static void getEvalResultbySMOTE(String path, int index) throws Exception{ Instances ins = DataSource.read(path); int numAttr = ins.numAttributes(); ins.setClassIndex(numAttr - 1); SMOTE smote = new SMOTE(); smote.setInputFormat(ins); /** classifiers setting*/ J48 j48 = new J48(); // j48.setConfidenceFactor(0.4f); j48.buildClassifier(ins); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(j48); fc.setFilter(smote); Evaluation eval = new Evaluation(ins); eval.crossValidateModel(fc, ins, 10, new Random(1)); // System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(0), eval.recall(0), eval.fMeasure(0)); // System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(1), eval.recall(1), eval.fMeasure(1)); // System.out.printf(" %4.3f \n\n", (1-eval.errorRate())); results[index][0] = eval.precision(0); results[index][1] = eval.recall(0); results[index][2] = eval.fMeasure(0); results[index][3] = eval.precision(1); results[index][4] = eval.recall(1); results[index][5] = eval.fMeasure(1); results[index][6] = 1-eval.errorRate(); }
public ModelClassifier() { name = new Attribute("name"); type = new Attribute("type"); attributes = new ArrayList(); classVal = new ArrayList(); classVal.add("Monday"); classVal.add("Tuesday"); classVal.add("Wednesday"); classVal.add("Thursday"); classVal.add("Friday"); classVal.add("Saturday"); classVal.add("Sunday"); attributes.add(name); attributes.add(type); attributes.add(new Attribute("class", classVal)); dataRaw = new Instances("TestInstances", attributes, 0); dataRaw.setClassIndex(dataRaw.numAttributes() - 1); }
protected double[][] predictDataDistribution(Instances unlabeled) throws Exception { // set class attribute unlabeled.setClassIndex(unlabeled.numAttributes() - 1); // distribution for instance double[][] dist = new double[unlabeled.numInstances()][unlabeled.numClasses()]; // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { // System.out.println("debug: "+this.getClass().getName()+": classifier: "+m_Classifier.toString()); LibSVM libsvm = (LibSVM) m_Classifier; libsvm.setProbabilityEstimates(true); double[] instanceDist = libsvm.distributionForInstance(unlabeled.instance(i)); dist[i] = instanceDist; } return dist; }
public double[] predictInstanceDistribution(Reader reader) throws Exception { // assume that the file contains only 1 instance // load instances Instances data = new Instances(reader); // remove reportID attribute String[] options = weka.core.Utils.splitOptions("-R 1"); // removes the first attribute in instances (should be the document id?) String filterName = "weka.filters.unsupervised.attribute.Remove"; Filter filter = (Filter) Class.forName(filterName).newInstance(); if (filter instanceof OptionHandler) { ((OptionHandler) filter).setOptions(options); } filter.setInputFormat(data); // make the instances Instances unlabeled = Filter.useFilter(data, filter); double[][] dist = this.predictDataDistribution(unlabeled); return dist[0]; }
public void trainModelFromFile(String fnTrainData) throws Exception { // load instances Instances data = new Instances(new BufferedReader(new FileReader(fnTrainData))); // preprocess instances String[] options = weka.core.Utils.splitOptions("-R 1"); String filterName = "weka.filters.unsupervised.attribute.Remove"; Filter filter = (Filter) Class.forName(filterName).newInstance(); if (filter instanceof OptionHandler) { ((OptionHandler) filter).setOptions(options); } filter.setInputFormat(data); // make the instances Instances unlabeled = Filter.useFilter(data, filter); // train model this.trainModel(unlabeled); }
public static void main(String[] args){ ArrayList<Attribute> atts = new ArrayList<Attribute>(); /*Properties p1 = new Properties(); p1.setProperty("range", "[0,1]"); ProtectedProperties prop1 = new ProtectedProperties(p1);*/ Properties p2 = new Properties(); p2.setProperty("range", "[321,1E9]"); ProtectedProperties prop2 = new ProtectedProperties(p2); ArrayList<String> attVals = new ArrayList<String>(); for (int i = 0; i < 5; i++) attVals.add("val" + (i+1)); //atts.add(new Attribute("att1", prop1)); atts.add(new Attribute("att2", prop2)); //atts.add(new Attribute("att3", attVals)); //Instances data = LHSInitializer.getMultiDimContinuous(atts, 10, false); //Instances data = LHSInitializer.getMultiDim(atts, 10, false); LHSSampler sampler = new LHSSampler(); Instances data = sampler.sampleMultiDimContinuous(atts, 1, false); System.out.println(data); }
protected static void verifyCecum() throws Exception { // train model from cecum.arff String fn_train = Util.getOSPath(new String[]{Storage_Controller.getTrainingFileFolder(), "0..cecum.arff"}); SVMPredictor svm = new SVMPredictor(); svm.trainModelFromFile(fn_train); List<String> reportIDList = XMLUtil .getReportIDFromXMLList(Util.getOSPath(new String[] { Storage_Controller.getDocumentListFolder(), "devIDList.xml"})); Report_Controller reportController = new Report_Controller(); Instances testSet = reportController.getWekaTestSet(reportIDList); double[][] predTable = svm.predict(testSet); for(int i = 0; i < testSet.numInstances(); i++) { System.out.print(testSet.instance(i).stringValue(0) + ","); System.out.println(predTable[i][0] + "," + predTable[i][1]); } }
/** * Calculate support value of a given rule on the dataset * * @param dataset the dataset * @param bodySide left-side or BODY part of the rule * @return support value for the rule on the given dataset */ public double calculateSupport(Instances dataset, List<Term> bodySide){ Iterator<Instance> datasetIterator = dataset.iterator(); int supportCount = 0; while(datasetIterator.hasNext()){ Instance anInstance = datasetIterator.next(); if(instanceCoveredByTermsList(anInstance,bodySide)){ supportCount++; } } return !dataset.isEmpty() ? (double) supportCount / (double) dataset.size() : 0.0d; }
public static void main(String[] args){ ArrayList<Attribute> atts = new ArrayList<Attribute>(); /*Properties p1 = new Properties(); p1.setProperty("range", "[0,1]"); ProtectedProperties prop1 = new ProtectedProperties(p1);*/ Properties p2 = new Properties(); p2.setProperty("range", "[321,1E9]"); ProtectedProperties prop2 = new ProtectedProperties(p2); ArrayList<String> attVals = new ArrayList<String>(); for (int i = 0; i < 5; i++) attVals.add("val" + (i+1)); //atts.add(new Attribute("att1", prop1)); atts.add(new Attribute("att2", prop2)); //atts.add(new Attribute("att3", attVals)); //Instances data = LHSInitializer.getMultiDimContinuous(atts, 10, false); //Instances data = LHSInitializer.getMultiDim(atts, 10, false); Instances data = LHSInitializer.getMultiDimContinuous(atts, 1, false); System.out.println(data); }
public static void runJ48(Instances trainSet, Instances testSet) { System.out.println("##################### J48 #####################"); Classifier model = null; Train train = new Train(trainSet); /* * TRAIN */ try { model = train.getJ48Model(); } catch (Exception e) { e.printStackTrace(); } /* * TEST */ Test test = new Test(trainSet, testSet); test.testModel(model); System.out.println("##################### END OF J48 #####################"); System.out.print("\n\n\n"); }
public static void runSMO(Instances trainSet, Instances testSet) { System.out.println("##################### SMO (SVM) #####################"); Classifier model = null; Train train = new Train(trainSet); /* * TRAIN */ try { model = train.getSMO(); } catch (Exception e) { e.printStackTrace(); } /* * TEST */ Test test = new Test(trainSet, testSet); test.testModel(model); System.out.println("##################### END OF SMO (SVM) #####################"); System.out.print("\n\n\n"); }
public static DMatrix instancesToDMatrix(Instances instances) throws XGBoostError { long[] rowHeaders = new long[instances.size()+1]; rowHeaders[0]=0; List<Float> dataList = new ArrayList<>(); List<Integer> colList = new ArrayList<>(); float[] labels = new float[instances.size()]; for(int i=0; i<instances.size(); i++) { Instance instance = instances.get(i); rowHeaders[i] = dataList.size(); processInstance(instance, dataList, colList); labels[i] = (float) instance.classValue(); } rowHeaders[rowHeaders.length - 1] = dataList.size(); int colNum = instances.numAttributes()-1; DMatrix dMatrix = createDMatrix(rowHeaders, dataList, colList, colNum); dMatrix.setLabel(labels); return dMatrix; }
private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception{ double retval = 0., y; Enumeration<Instance> enu = omega.enumerateInstances(); int idxClass = omega.classIndex(); Instance ins; while(enu.hasMoreElements()){ ins = enu.nextElement(); y = ins.value(idxClass); retval += Math.pow(y-model.classifyInstance(ins), 2)-Math.pow(y-modelPi.classifyInstance(ins), 2); } return retval; }
/** * Return the data set loaded from the Arff file at @param path */ public static Instances loadDataFromArffFile(String path) throws IOException{ ArffLoader loader = new ArffLoader(); loader.setSource(new File(path)); Instances data = loader.getDataSet(); System.out.println("\nHeader of dataset:\n"); System.out.println(new Instances(data, 0)); return data; }
/** * * @param trainingData * @param round * @throws Exception */ protected AbstractClusterer createClusterer(MarkovAttributeSet aset, Instances trainingData) throws Exception { if (trace.val) LOG.trace(String.format("Clustering %d %s instances with %d attributes", trainingData.numInstances(), CatalogUtil.getDisplayName(catalog_proc), aset.size())); // Create the filter we need so that we only include the attributes in the given MarkovAttributeSet Filter filter = aset.createFilter(trainingData); // Using our training set to build the clusterer int seed = this.rand.nextInt(); // SimpleKMeans inner_clusterer = new SimpleKMeans(); EM inner_clusterer = new EM(); String options[] = { "-N", Integer.toString(1000), // num_partitions), "-S", Integer.toString(seed), "-I", Integer.toString(100), }; inner_clusterer.setOptions(options); FilteredClusterer filtered_clusterer = new FilteredClusterer(); filtered_clusterer.setFilter(filter); filtered_clusterer.setClusterer(inner_clusterer); AbstractClusterer clusterer = filtered_clusterer; clusterer.buildClusterer(trainingData); return (clusterer); }
public static void main(String[] args) throws Exception{ String databasePath = "data/features.arff"; // Load the data in arff format Instances data = new Instances(new BufferedReader(new FileReader(databasePath))); // Set class the last attribute as class data.setClassIndex(data.numAttributes() - 1); // Build a basic decision tree model String[] options = new String[]{}; J48 model = new J48(); model.setOptions(options); model.buildClassifier(data); // Output decision tree System.out.println("Decision tree model:\n"+model); // Output source code implementing the decision tree System.out.println("Source code:\n"+model.toSource("ActivityRecognitionEngine")); // Check accuracy of model using 10-fold cross-validation Evaluation eval = new Evaluation(data); eval.crossValidateModel(model, data, 10, new Random(1), new String[] {}); System.out.println("Model performance:\n"+eval.toSummaryString()); String[] activities = new String[]{"Walk", "Walk", "Walk", "Run", "Walk", "Run", "Run", "Sit", "Sit", "Sit"}; DiscreteLowPass dlpFilter = new DiscreteLowPass(3); for(String str : activities){ System.out.println(str +" -> "+ dlpFilter.filter(str)); } }
public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center){ switch(scaleDownChoice){ case 0: return scaleDownMindists(previousSet,center); default: return scaleDownNeighbordists(previousSet,center); } }
public BookDecisionTree(String fileName) { try { BufferedReader reader = new BufferedReader(new FileReader(fileName)); trainingData = new Instances(reader); trainingData.setClassIndex(trainingData.numAttributes() - 1); } catch (IOException ex) { ex.printStackTrace(); } }
/** * Save @param data to the CSV file at @param path */ public static void saveDataToCsvFile(String path, Instances data) throws IOException{ System.out.println("\nSaving to file " + path + "..."); CSVSaver saver = new CSVSaver(); saver.setInstances(data); saver.setFile(new File(path)); saver.writeBatch(); }
private static M5P buildModel(Instances modelInstances, int numOfInstanceInLeaf) throws Exception{ M5P retval = new M5P(); retval.setSaveInstances(true); retval.setOptions(Utils.splitOptions("-N -L -M "+numOfInstanceInLeaf)); retval.buildClassifier(modelInstances); return retval; }
public static void main(String[] vargs) throws Exception { ArgumentsParser args = ArgumentsParser.load(vargs); args.require( ArgumentsParser.PARAM_CATALOG, ArgumentsParser.PARAM_WORKLOAD, ArgumentsParser.PARAM_MAPPINGS ); FeatureExtractor extractor = new FeatureExtractor(args.catalogContext); Map<Procedure, FeatureSet> fsets = extractor.calculate(args.workload); // List<String> targets = args.getOptParams(); for (Entry<Procedure, FeatureSet> e : fsets.entrySet()) { String proc_name = e.getKey().getName(); // if (targets.contains(proc_name) == false) continue; // File path = new File(proc_name + ".fset"); // e.getValue().save(path.getAbsolutePath()); // LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", e.getValue().getTransactionCount(), path.getAbsolutePath())); File path = new File(proc_name + ".arff"); Instances data = e.getValue().export(proc_name, false); FileUtil.writeStringToFile(path, data.toString()); LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", data.numInstances(), path.getAbsolutePath())); } }
public static Instances convertToArff(List<Document> dataSet, List<String> vocabulary, String fileName) { int dataSetSize = dataSet.size(); /* Create features */ ArrayList<Attribute> attributes = new ArrayList<>(); for (int i = 0; i < vocabulary.size(); i++) { attributes.add(new Attribute("word_" + i)); } Attribute classAttribute = new Attribute("Class"); attributes.add(classAttribute); /* Add examples */ System.out.println("Building instances..."); Instances trainingDataSet = new Instances(fileName, attributes, 0); for (int k = 0; k < dataSetSize; k++) { Document document = dataSet.get(k); Instance example = new DenseInstance(attributes.size()); for (int i = 0; i < vocabulary.size(); i++) { String word = vocabulary.get(i); example.setValue(i, Collections.frequency(document.getTerms(), word)); } example.setValue(classAttribute, document.getDocumentClass()); trainingDataSet.add(example); int progress = (int) ((k * 100.0) / dataSetSize); System.out.printf("\rPercent completed: %3d%%", progress); } trainingDataSet.setClass(classAttribute); System.out.println(); System.out.println("Writing to file ..."); try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingDataSet); saver.setFile(new File(fileName)); saver.writeBatch(); } catch (IOException e) { e.printStackTrace(); } return trainingDataSet; }
public Main() { try { BufferedReader datafile; datafile = readDataFile("camping.txt"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); Instances trainingData = new Instances(data, 0, 14); Instances testingData = new Instances(data, 14, 5); Evaluation evaluation = new Evaluation(trainingData); SMO smo = new SMO(); smo.buildClassifier(data); evaluation.evaluateModel(smo, testingData); System.out.println(evaluation.toSummaryString()); // Test instance Instance instance = new DenseInstance(3); instance.setValue(data.attribute("age"), 78); instance.setValue(data.attribute("income"), 125700); instance.setValue(data.attribute("camps"), 1); instance.setDataset(data); System.out.println("The instance: " + instance); System.out.println(smo.classifyInstance(instance)); } catch (Exception ex) { ex.printStackTrace(); } }
public static void main(String[] args){ ArrayList<Attribute> atts = new ArrayList<Attribute>(); Properties p1 = new Properties(); p1.setProperty("range", "[0,1]"); ProtectedProperties prop1 = new ProtectedProperties(p1); Properties p2 = new Properties(); p2.setProperty("range", "[321,1E9]"); ProtectedProperties prop2 = new ProtectedProperties(p2); Properties p3 = new Properties(); p3.setProperty("range", "[1,30]"); ProtectedProperties prop3 = new ProtectedProperties(p3); ArrayList<String> attVals = new ArrayList<String>(); for (int i = 0; i < 5; i++) attVals.add("val" + (i+1)); atts.add(new Attribute("att1", prop1)); atts.add(new Attribute("att2", prop2)); atts.add(new Attribute("att3", prop3)); //atts.add(new Attribute("att4", attVals)); //Instances data = LHSInitializer.getMultiDimContinuous(atts, 10, false); //Instances data = LHSInitializer.getMultiDim(atts, 10, false); DDSSampler sampler = new DDSSampler(3); sampler.setCurrentRound(0); Instances data = sampler.sampleMultiDimContinuous(atts, 2, false); System.out.println(data); sampler.setCurrentRound(01); data = sampler.sampleMultiDimContinuous(atts, 2, false); System.out.println(data); sampler.setCurrentRound(2); data = sampler.sampleMultiDimContinuous(atts, 2, false); System.out.println(data); }
/*** * <p>To Merge the datasets in path array and save the total dataset in dirpath. * </p> * @param path String array of arff file * @throws Exception */ public static void getIns(String[] path, String dirpath) throws Exception{ /** Create a empty dataset total*/ Instances total = new Instances("total3500", getStandAttrs(), 1); total.setClassIndex(total.numAttributes() - 1); int len = path.length; Instances[] temp = new Instances[len]; for(int i=0; i<path.length; i++){ temp[i] = DataSource.read(path[i]); temp[i].setClassIndex(temp[i].numAttributes() - 1); total.addAll(temp[i]); System.out.println("adding " + path[i] + " " + temp[i].numInstances()); // System.out.println("data" + total.numInstances() + "\n"); } String totalName = dirpath+"total3500" + String.valueOf(System.currentTimeMillis()) + ".arff"; DataSink.write(totalName, total); System.out.println("Writing the data into [" + totalName + "] successfully.\n"); }
public static ArrayList<String> preprocessInstances(Instances retval){ double[][] cMatrix; ArrayList<String> result = new ArrayList<String>(); ArrayList<String> deleteAttNames = new ArrayList<String>(); PrincipalComponents pc = new PrincipalComponents(); HashMap<Integer, ArrayList<Integer>> filter = new HashMap<Integer, ArrayList<Integer>>(); try { pc.buildEvaluator(retval); cMatrix = pc.getCorrelationMatrix(); for(int i = 0; i < cMatrix.length; i++){ ArrayList<Integer> record = new ArrayList<Integer>(); for(int j = i + 1; j < cMatrix.length; j++) if(cMatrix[i][j] >= correlationFactorThreshold || cMatrix[i][j] <= -correlationFactorThreshold){ record.add(j); } if(record.size() != 0){ filter.put(i, record); } } Iterator<Map.Entry<Integer, ArrayList<Integer>>> iter = filter.entrySet().iterator(); while (iter.hasNext()) { Map.Entry<Integer, ArrayList<Integer>> entry = iter.next(); ArrayList<Integer> arr = entry.getValue(); for(int i = 0; i < arr.size(); i++) if(arr.get(i) != cMatrix.length - 1 && !deleteAttNames.contains(retval.attribute(arr.get(i)).name())){ deleteAttNames.add(retval.attribute(arr.get(i)).name()); } if(arr.contains(cMatrix.length-1)){ result.add(retval.attribute(Integer.parseInt(entry.getKey().toString())).name()); } } for(int i = 0; i < deleteAttNames.size(); i++){ retval.deleteAttributeAt(retval.attribute(deleteAttNames.get(i)).index()); } } catch (Exception e) { e.printStackTrace(); } return result; }
public static void testCOMT2() throws Exception{ BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff"); trainingSet.setClassIndex(trainingSet.numAttributes()-1); Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false); samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes()); samplePoints.setClassIndex(samplePoints.numAttributes()-1); COMT2 comt = new COMT2(samplePoints, COMT2Iteration); comt.buildClassifier(trainingSet); Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(comt, trainingSet); System.err.println(eval.toSummaryString()); Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance()); Instances bestInstances = new Instances(trainingSet,2); bestInstances.add(best); DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances); //now we output the training set with the class value updated as the predicted value Instances output = new Instances(trainingSet, trainingSet.numInstances()); Enumeration<Instance> enu = trainingSet.enumerateInstances(); while(enu.hasMoreElements()){ Instance ins = enu.nextElement(); double[] values = ins.toDoubleArray(); values[values.length-1] = comt.classifyInstance(ins); output.add(ins.copy(values)); } DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output); }
/*** * <p>To get 10-fold cross validation in one single arff in <b>path</b></p> * <p>Use C4.5 and <b>Cost-sensitive learning</b> to classify the dataset.</p> * @param path dataset path * @throws Exception */ public static void getEvalResultbyCost(String path, int index) throws Exception{ Instances ins = DataSource.read(path); int numAttr = ins.numAttributes(); ins.setClassIndex(numAttr - 1); /**Classifier setting*/ J48 j48 = new J48(); // j48.setConfidenceFactor(0.4f); j48.buildClassifier(ins); CostSensitiveClassifier csc = new CostSensitiveClassifier(); csc.setClassifier(j48); csc.setCostMatrix(new CostMatrix(new BufferedReader(new FileReader("files/costm")))); Evaluation eval = new Evaluation(ins); eval.crossValidateModel(csc, ins, 10, new Random(1)); // System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(0), eval.recall(0), eval.fMeasure(0)); // System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(1), eval.recall(1), eval.fMeasure(1)); // System.out.printf(" %4.3f \n\n", (1-eval.errorRate())); results[index][0] = eval.precision(0); results[index][1] = eval.recall(0); results[index][2] = eval.fMeasure(0); results[index][3] = eval.precision(1); results[index][4] = eval.recall(1); results[index][5] = eval.fMeasure(1); results[index][6] = 1-eval.errorRate(); }
public static void runSVMRegression() throws Exception { BufferedReader br = null; int numFolds = 10; br = new BufferedReader(new FileReader("rawData.arff")); Instances trainData = new Instances(br); trainData.setClassIndex(trainData.numAttributes() - 1); br.close(); WekaPackageManager.loadPackages(false, true, false); AbstractClassifier classifier = (AbstractClassifier) Class.forName( "weka.classifiers.functions.supportVector").newInstance(); String options = ("-S 3 -V 10 -T 0"); String[] optionsArray = options.split(" "); classifier.setOptions(optionsArray); classifier.buildClassifier(trainData); Evaluation evaluation = new Evaluation(trainData); /*******************CROSS VALIDATION*************************/ evaluation.crossValidateModel(classifier, trainData, numFolds, new Random(1)); /***********************************************************/ evaluateResults(evaluation); }
private static Instances getSiblings(M5P modelTree, Instance ins){ RuleNode node = modelTree.getM5RootNode(); while(!node.isLeaf()){ if(ins.value(node.splitAtt())<=node.splitVal()){ node = node.leftNode(); }else { node = node.rightNode(); } } return node.zyqGetTrainingSet(); }
public Instances createInstances(List<String> orderedFeatureNames) { if (orderedFeatureNames == null) orderedFeatureNames = new ArrayList<String>(this.getFeatures()); Instances data = this.createEmptyDataset(orderedFeatureNames); for (I key : this.featureValues.keySet()) data.add(this.createInstance(orderedFeatureNames, key)); return data; }