/*** * <p>To Merge the datasets in path array and save the total dataset in dirpath. * </p> * @param path String array of arff file * @throws Exception */ public static void getIns(String[] path, String dirpath) throws Exception{ /** Create a empty dataset total*/ Instances total = new Instances("total3500", getStandAttrs(), 1); total.setClassIndex(total.numAttributes() - 1); int len = path.length; Instances[] temp = new Instances[len]; for(int i=0; i<path.length; i++){ temp[i] = DataSource.read(path[i]); temp[i].setClassIndex(temp[i].numAttributes() - 1); total.addAll(temp[i]); System.out.println("adding " + path[i] + " " + temp[i].numInstances()); // System.out.println("data" + total.numInstances() + "\n"); } String totalName = dirpath+"total3500" + String.valueOf(System.currentTimeMillis()) + ".arff"; DataSink.write(totalName, total); System.out.println("Writing the data into [" + totalName + "] successfully.\n"); }
/**<p>Generate Random sample according to random seed on Desktop, each sample has the same distribution of InTrace/OutTrace * and have <b>SIZE</b> instances. * </p> * @param path original arff file to be sampled in path * @param rand random seed * @param num the number of selection * */ public static void generateARFF(String path, int rand, int num) throws Exception{ /*** original dataset reading */ Instances data = DataSource.read(path); data.setClassIndex(data.numAttributes()-1); /*** randomize the dataset */ data.randomize(new Random(rand)); /*** dataIn to save instances of InTrace class */ Instances dataIn = new Instances("dataIn", InsMerge.getStandAttrs(), 1); dataIn.setClassIndex(dataIn.numAttributes() - 1); /*** dataOut to save instances of OutTrace class */ Instances dataOut = new Instances("dataOut", InsMerge.getStandAttrs(), 1); dataIn.setClassIndex(dataIn.numAttributes() - 1); /*** add OutTrace instances into dataOut */ for(int i=0; i<data.numInstances(); i++){ if(data.get(i).stringValue(data.get(i).classAttribute()).equals("OutTrace")){ dataOut.add(data.get(i)); } } /** add InTrace instances into dataIn */ for(int i=0; i<data.numInstances(); i++){ if(data.get(i).stringValue(data.get(i).classAttribute()).equals("InTrace")){ dataIn.add(data.get(i)); } } /*** get the In/Out ratio in original dataset */ int inTrace = dataIn.numInstances(); int outTrace = dataOut.numInstances(); double ratioI = inTrace*1.0/(outTrace + inTrace); /*** expected number to select from original dataset*/ int intrace = (int) (num * ratioI); int outtrace = num - intrace; /** create new generated dataset train*/ Instances train = new Instances("dataIn", InsMerge.getStandAttrs(), 1); train.setClassIndex(train.numAttributes() - 1); /** train get X instances from dataIn*/ for(int i=0; i<intrace; i++){ train.add(dataIn.get(i)); } /** train get Y instances from dataOut*/ for(int j=0; j<outtrace; j++){ train.add(dataOut.get(j)); } /** save the dataset in path, we save the arff into D:/Users/LEE/Desktop/New_Data/XXX.arff */ String filename = "files/generated/" + filterName(path) + rand + ".arff"; DataSink.write(filename, train); }