/** * When the score changes, rewrite the file. * This is really rare in practice, so don't bother optimizing it. */ private static void dump_from_scratch(Collection<String> names, Timestamp start_time) throws IOException { saved_schema_version = names.size(); FastVector attributes = new FastVector(); // Answer score names for (String name: names) attributes.addElement(new Attribute(name)); Instances data = new Instances("Watsonsim captured question stream", attributes, 0); // Save the results to a file saver = new ArffSaver(); saver.setStructure(data); saver.setRetrieval(Saver.INCREMENTAL); saver.setFile(new File("data/weka-log." + start_time + ".arff")); for (Score row : dataset) saver.writeIncremental(new Instance(1.0, row.getEach(names))); }
public static void buildTrainingDataFromCorpus(String dataSetName, File corpusRoot, FVGenerator fvGenerator, File dest) throws IOException { Collection<File> children = FileUtils.listFiles(corpusRoot, new RegexFileFilter(".+\\.txt", IOCase.INSENSITIVE), DirectoryFileFilter.INSTANCE); ArffSaver saver = new ArffSaver(); saver.setFile(dest); saver.setRetrieval(Saver.INCREMENTAL); boolean first = true; for (File textFile : children) { Instances dataSet = buildTrainingDataFromFile(dataSetName, textFile, fvGenerator); if (first) { saver.setStructure(dataSet); first = false; } for (int i = 0; i < dataSet.numInstances(); ++i) { saver.writeIncremental(dataSet.instance(i)); } } saver.getWriter().flush(); }
public static void mergeAndWrite(String relationName, String destPath, String... dataSetPaths) throws IOException { ArffSaver saver = new ArffSaver(); saver.setFile(new File(destPath)); saver.setRetrieval(Saver.INCREMENTAL); boolean first = true; for (String p : dataSetPaths) { ArffReader reader = new ArffReader(new BufferedReader(new FileReader(p))); Instances dataSet = reader.getData(); if (first) { dataSet.setRelationName(relationName); saver.setStructure(dataSet); first = false; } for (int i = 0; i < dataSet.numInstances(); ++i) { saver.writeIncremental(dataSet.instance(i)); } } saver.getWriter().flush(); }
public static void main(String[] args) throws Exception { if (args.length < 3) { System.out.println("USAGE: program dataSetName destARFF sourceFile1 [sourceFile2 [sourceFile3 [...]]]"); System.exit(1); } FVGenerator fvg = new GuidedFVGenerator(); String dataSetName = args[0]; File dest = new File(args[1]); ArffSaver saver = new ArffSaver(); saver.setFile(dest); saver.setRetrieval(Saver.INCREMENTAL); boolean first = true; for (int i = 2; i < args.length; ++i) { File corpusFile = new File(args[i]); Instances dataSet = Actions.buildTrainingDataFromFile(dataSetName, corpusFile, fvg); if (first) { saver.setStructure(dataSet); first = false; } int num = dataSet.numInstances(); System.out.println("Num instances: "+num); for (int j = 0; j < num; ++j) { saver.writeIncremental(dataSet.instance(j)); } } saver.getWriter().flush(); }