/** * Initialize and run the ARFF header creation job (if necessary). * * @return true if the job was successful * @throws DistributedWekaException if a problem occurs * @throws IOException if a problem occurs */ protected boolean initializeAndRunArffJob() throws DistributedWekaException, IOException { if (m_env == null) { m_env = Environment.getSystemWide(); } // Run the ARFF header job first logMessage("Executing ARFF Job...."); statusMessage("Excecuting ARFF Job..."); m_arffHeaderJob.setEnvironment(m_env); m_arffHeaderJob.setLog(getLog()); m_arffHeaderJob.setStatusMessagePrefix(m_statusMessagePrefix); if (!m_arffHeaderJob.runJob()) { statusMessage("Unable to continue - creating the ARFF header failed!"); logMessage("Unable to continue - creating the ARFF header failed!"); return false; } return true; }
/** * Initialize and run the ARFF header creation job (if necessary). * * @return true if the job was successful * @throws DistributedWekaException if a problem occurs * @throws IOException if a problem occurs */ protected boolean initializeAndRunArffJob() throws DistributedWekaException, IOException { if (m_arffHeaderJob.getFinalHeader() != null) { return true; } if (m_env == null) { m_env = Environment.getSystemWide(); } // Run the ARFF header job first logMessage("Checking to see if ARFF job is needed...."); statusMessage("Checking to see if ARFF job is needed..."); m_arffHeaderJob.setEnvironment(m_env); m_arffHeaderJob.setLog(getLog()); m_arffHeaderJob.setStatusMessagePrefix(m_statusMessagePrefix); if (!m_arffHeaderJob.runJob()) { statusMessage("Unable to continue - creating the ARFF header failed!"); logMessage("Unable to continue - creating the ARFF header failed!"); return false; } return true; }
/** * Adds the core weka and distributed weka jar files to the classpath for map * and reduce tasks * * @param conf the Configuration object to populate * @throws IOException if a problem occurs */ protected void addWekaLibrariesToClasspath(Configuration conf) throws IOException { if (m_env == null) { m_env = Environment.getSystemWide(); } statusMessage("Adding Weka libraries to the distributed cache and classpath " + "for the job"); List<String> cacheFiles = new ArrayList<String>(); cacheFiles.add(new File(m_pathToWekaJar).getName()); cacheFiles.add(new File(DISTRIBUTED_WEKA_BASE_JAR).getName()); cacheFiles.add(new File(DISTRIBUTED_WEKA_HADOOP_JAR).getName()); cacheFiles.add(new File(OPEN_CSV_JAR).getName()); cacheFiles.add(new File(JFREECHART_JAR).getName()); cacheFiles.add(new File(JCOMMON_JAR).getName()); cacheFiles.add(new File(COLT_JAR).getName()); cacheFiles.add(new File(LA4J_JAR).getName()); HDFSUtils.addWekaInstalledFilesToClasspath(m_mrConfig.getHDFSConfig(), conf, cacheFiles, m_env); }
/** * Initializes and executes the ARFF header job if necessary * * @return true if the job succeeds * @throws DistributedWekaException if a problem occurs */ protected boolean initializeAndRunArffJob() throws DistributedWekaException { if (m_env == null) { m_env = Environment.getSystemWide(); } m_arffHeaderJob.setEnvironment(m_env); if (!m_arffHeaderJob.runJob()) { statusMessage("Unable to continue - creating the ARFF header failed!"); logMessage("Unable to continue - creating the ARFF header failed!"); return false; } // configure our output subdirectory String outputPath = m_mrConfig.getOutputPath(); outputPath += OUTPUT_SUBDIR; outputPath = environmentSubstitute(outputPath); m_mrConfig.setOutputPath(outputPath); return true; }
/** * Populate the supplied Configuration object with HDFS-related settings * * @param conf the Configuration object to populate * @param env environment variables */ public void configureForHadoop(Configuration conf, Environment env) { // transfer over the properties to Hadoop ones conf.set(HDFSConfig.HADOOP_FS_DEFAULT_NAME, HDFSConfig.constructHostURL(this, env)); for (Map.Entry<String, String> e : getUserSuppliedProperties().entrySet()) { // '*' indicates special job-related properties for use by Weka map/reduce // tasks. No need to copy these over here as each job in question will // make sure that their specific props are copied over. if (!e.getKey().startsWith("*")) { if (!DistributedJobConfig.isEmpty(e.getValue())) { conf.set(e.getKey(), e.getValue()); } } } }
/** * Move a file from one location to another in HDFS * * @param source the source path in HDFS * @param target the target path in HDFS * @param config the HDFSConfig with connection details * @param env environment variables * @throws IOException if a problem occurs */ public static void moveInHDFS(String source, String target, HDFSConfig config, Environment env) throws IOException { createTmpDistributedCacheDirIfNecessary(config); Path sourceP = new Path(resolvePath(source, env)); Path targetP = new Path(resolvePath(target, env)); Configuration conf = new Configuration(); config.configureForHadoop(conf, env); FileSystem fs = FileSystem.get(conf); if (fs.exists(targetP)) { fs.delete(targetP, true); } fs.rename(sourceP, targetP); }
/** * Copy a local file into HDFS * * @param localFile the path to the local file * @param hdfsPath the destination path in HDFS * @param config the HDFSConfig containing connection details * @param env environment variables * @param overwrite true if the destination should be overwritten (if it * already exists) * @throws IOException if a problem occurs */ public static void copyToHDFS(String localFile, String hdfsPath, HDFSConfig config, Environment env, boolean overwrite) throws IOException { File local = new File(localFile); URI localURI = local.toURI(); Path localPath = new Path(localURI); Path destPath = new Path(resolvePath(hdfsPath, env)); Configuration conf = new Configuration(); // conf.set(HDFSConfig.FS_DEFAULT_NAME, // HDFSConfig.constructHostURL(config, env)); config.configureForHadoop(conf, env); FileSystem fs = FileSystem.get(conf); // only copy if the file doesn't exist or overwrite is specified if (!fs.exists(destPath) || overwrite) { if (fs.exists(destPath)) { fs.delete(destPath, true); } fs.copyFromLocalFile(localPath, destPath); } }
/** * Substitute environment variables in the supplied string. * * @param orig the string to modify * @return the string with environment variables resolved */ public String environmentSubstitute(String orig) { if (m_env == null) { m_env = Environment.getSystemWide(); } if (m_env != null) { try { orig = m_env.substitute(orig); } catch (Exception ex) { // not interested if there are no variables substituted } } return orig; }
/** * Main method for testing this class * * @param args command line args (ignored) */ public static void main(String[] args) { try { final javax.swing.JFrame jf = new javax.swing.JFrame("EnvironmentField"); jf.getContentPane().setLayout(new BorderLayout()); final EnvironmentField f = new EnvironmentField("A label here"); jf.getContentPane().add(f, BorderLayout.CENTER); Environment env = Environment.getSystemWide(); f.setEnvironment(env); jf.addWindowListener(new java.awt.event.WindowAdapter() { @Override public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); System.exit(0); } }); jf.pack(); jf.setVisible(true); } catch (Exception ex) { ex.printStackTrace(); } }
public void setCurrentDirectory(File directory) { String tmpString = directory.toString(); if (Environment.containsEnvVariables(tmpString)) { try { tmpString = m_env.substitute(tmpString); } catch (Exception ex) { // ignore } } File tmp2 = new File((new File(tmpString)).getAbsolutePath()); JFileChooser embeddedEditor = (JFileChooser) m_fileEditor.getCustomEditor(); if (tmp2.isDirectory()) { embeddedEditor.setCurrentDirectory(tmp2); if (embeddedEditor.getFileSelectionMode() == JFileChooser.DIRECTORIES_ONLY) { super.setAsText(directory.toString()); } } else { embeddedEditor.setSelectedFile(tmp2); if (embeddedEditor.getFileSelectionMode() == JFileChooser.FILES_ONLY) { super.setAsText(directory.toString()); } } }
/** * Constructor * * @param matchDetails the internally encoded match details string * @param newAttName the name of the new attribute that will be the label * @param consumeNonMatching true if non-matching instances should be consumed * @param nominalBinary true if, in the case where no user labels have been * supplied, the new attribute should be a nominal binary one rather * than numeric * @param inputStructure the incoming instances structure * @param statusMessagePrefix an optional status message prefix string for * logging * @param log the log to use (may be null) * @param env environment variables */ public SubstringLabelerRules(String matchDetails, String newAttName, boolean consumeNonMatching, boolean nominalBinary, Instances inputStructure, String statusMessagePrefix, Logger log, Environment env) throws Exception { m_matchRules = matchRulesFromInternal(matchDetails, inputStructure, statusMessagePrefix, log, env); m_inputStructure = new Instances(inputStructure, 0); m_attName = newAttName; m_statusMessagePrefix = statusMessagePrefix; m_consumeNonMatching = consumeNonMatching; m_nominalBinary = nominalBinary; m_env = env; makeOutputStructure(); }
/** * Get a list of match rules from an internally encoded match specification * * @param matchDetails the internally encoded specification of the match rules * @param inputStructure the input instances structure * @param statusMessagePrefix an optional status message prefix for logging * @param log the log to use * @param env environment variables * @return a list of match rules */ public static List<SubstringLabelerMatchRule> matchRulesFromInternal( String matchDetails, Instances inputStructure, String statusMessagePrefix, Logger log, Environment env) { List<SubstringLabelerMatchRule> matchRules = new ArrayList<SubstringLabelerMatchRule>(); String[] matchParts = matchDetails.split(MATCH_RULE_SEPARATOR); for (String p : matchParts) { SubstringLabelerMatchRule m = new SubstringLabelerMatchRule(p.trim()); m.m_statusMessagePrefix = statusMessagePrefix == null ? "" : statusMessagePrefix; m.m_logger = log; m.init(env, inputStructure); matchRules.add(m); } return matchRules; }
/** * Run the ARFF job (if necessary) * * @return true if the ARFF job succeeded * @throws DistributedWekaException if a problem occurs */ protected boolean initializeAndRunArffJob() throws DistributedWekaException { if (m_env == null) { m_env = Environment.getSystemWide(); } // Run the ARFF header job first if (m_runArffJob) { m_arffHeaderJob.setEnvironment(m_env); m_arffHeaderJob.setLog(getLog()); m_arffHeaderJob.setStatusMessagePrefix(m_statusMessagePrefix); if (!m_arffHeaderJob.runJob()) { statusMessage("Unable to continue - creating the ARFF header failed!"); logMessage("Unable to continue - creating the ARFF header failed!"); return false; } } // configure our output subdirectory String outputPath = m_mrConfig.getOutputPath(); outputPath += OUTPUT_SUBDIR; outputPath = environmentSubstitute(outputPath); m_mrConfig.setOutputPath(outputPath); return true; }
/** * Utility method to resolve all environment variables in a given path * * @param path the path in HDFS * @param env environment variables to use * @return the path with all environment variables resolved */ public static String resolvePath(String path, Environment env) { if (env != null) { try { path = env.substitute(path); } catch (Exception ex) { } } return path; // if (path.startsWith("hdfs://")) { // return path; // } // // String uri = "hdfs://" + config.getHDFSHost() + ":" + // config.getHDFSPort() // // + (path.startsWith("/") ? path : "/" + path); // + path; // // if (env != null) { // try { // uri = env.substitute(uri); // } catch (Exception ex) { // } // } // return uri; }
/** * Copy a set of local files into the Weka installation directory in HDFS * * @param localFiles a list of local files to copy * @param config the HDFSConfig containing connection details * @param env environment variables * @param overwrite true if the destination file should be overwritten (if it * exists already) * @throws IOException if a problem occurs */ public static void copyFilesToWekaHDFSInstallationDirectory( List<String> localFiles, HDFSConfig config, Environment env, boolean overwrite) throws IOException { for (String local : localFiles) { String hdfsDest = local.substring(local.lastIndexOf(File.separator) + 1, local.length()); hdfsDest = WEKA_LIBRARIES_LOCATION + hdfsDest; copyToHDFS(local, hdfsDest, config, env, overwrite); } }
/** * Set the model to use * * @param model the model to use * @param modelHeader the header of the training data used to train the model * @param dataHeader the header of the incoming data * @throws DistributedWekaException if more than 50% of the attributes * expected by the model are missing or have a type mismatch with * the incoming data */ public void setModel(Object model, Instances modelHeader, Instances dataHeader) throws DistributedWekaException { m_missingMismatch.clear(); if (dataHeader == null || modelHeader == null) { throw new DistributedWekaException( "Can't continue without a header for the model and incoming data"); } try { m_isUsingStringAttributes = modelHeader.checkForStringAttributes(); m_model = ScoringModel.createScorer(model); if (modelHeader != null) { m_model.setHeader(modelHeader); } if (m_model.isBatchPredicor()) { m_batchScoringData = new Instances(modelHeader, 0); Environment env = Environment.getSystemWide(); String batchSize = ((BatchPredictor) model).getBatchSize(); if (!DistributedJobConfig.isEmpty(batchSize)) { m_batchSize = Integer.parseInt(env.substitute(batchSize)); } else { m_batchSize = 1000; } } } catch (Exception ex) { throw new DistributedWekaException(ex); } buildAttributeMap(modelHeader, dataHeader); }
/** * Load serialized models to include in the ensemble * * @param data training instances (used in a header compatibility check with * each of the loaded models) * * @throws Exception if there is a problem de-serializing a model */ private void loadClassifiers(Instances data) throws Exception { for (String path : m_classifiersToLoad) { if (Environment.containsEnvVariables(path)) { try { path = m_env.substitute(path); } catch (Exception ex) { } } File toLoad = new File(path); if (!toLoad.isFile()) { throw new Exception("\"" + path + "\" does not seem to be a valid file!"); } ObjectInputStream is = new ObjectInputStream(new BufferedInputStream(new FileInputStream( toLoad))); Object c = is.readObject(); if (!(c instanceof Classifier)) { is.close(); throw new Exception("\"" + path + "\" does not contain a classifier!"); } Object header = null; header = is.readObject(); if (header instanceof Instances) { if (data != null && !data.equalHeaders((Instances) header)) { is.close(); throw new Exception("\"" + path + "\" was trained with data that is " + "of a differnet structure than the incoming training data"); } } if (header == null) { System.out.println("[Vote] warning: no header instances for \"" + path + "\""); } is.close(); addPreBuiltClassifier((Classifier) c); } }
/** * Set the path from which to load a model. Loading occurs when the first test * instance is received or getModelHeader() is called programatically. * Environment variables can be used in the supplied path - e.g. * ${HOME}/myModel.model. * * @param modelPath the path to the model to load. * @throws Exception if a problem occurs during loading. */ public void setModelPath(String modelPath) throws Exception { if (m_env == null) { m_env = Environment.getSystemWide(); } m_modelPath = modelPath; // loadModel(modelPath); }
/** * Constructs a new ImageSaver */ public ImageSaver() { useDefaultVisual(); setLayout(new BorderLayout()); add(m_visual, BorderLayout.CENTER); m_env = Environment.getSystemWide(); }
/** * Constructor */ public Join() { useDefaultVisual(); setLayout(new BorderLayout()); add(m_visual, BorderLayout.CENTER); m_env = Environment.getSystemWide(); m_stopRequested = new AtomicBoolean(false); }
/** * Constructor * * @param matchDetails the internally encoded match details string * @param inputStructure the incoming instances structure * @param statusMessagePrefix an optional status message prefix string for * logging * @param log the log to use (may be null) * @param env environment variables */ public SubstringReplacerRules(String matchDetails, Instances inputStructure, String statusMessagePrefix, Logger log, Environment env) { m_matchRules = matchRulesFromInternal(matchDetails, inputStructure, statusMessagePrefix, log, env); m_inputStructure = new Instances(inputStructure); m_outputStructure = new Instances(inputStructure).stringFreeStructure(); m_env = env; m_statusMessagePrefix = statusMessagePrefix; }
/** * Main method for testing this class. * * <pre> * Usage:\n\nFlowRunner <serialized kf file> * </pre> * * @param args command line arguments */ public static void main(String[] args) { System.setProperty("apple.awt.UIElement", "true"); weka.core.logging.Logger.log(weka.core.logging.Logger.Level.INFO, "Logging started"); if (args.length < 1) { System.err.println("Usage:\n\nFlowRunner <serialized kf file> [-s]\n\n" + "\tUse -s to launch start points sequentially (default launches " + "in parallel)."); } else { try { FlowRunner fr = new FlowRunner(); FlowRunner.SimpleLogger sl = new FlowRunner.SimpleLogger(); String fileName = args[0]; if (args.length == 2 && args[1].equals("-s")) { fr.setStartSequentially(true); } // start with the system-wide vars Environment env = Environment.getSystemWide(); fr.setLog(sl); fr.setEnvironment(env); fr.load(fileName); fr.run(); fr.waitUntilFinished(); System.out.println("Finished all flows."); System.exit(1); } catch (Exception ex) { ex.printStackTrace(); System.err.println(ex.getMessage()); } } }
private void readObject(ObjectInputStream aStream) throws IOException, ClassNotFoundException { aStream.defaultReadObject(); // set a default environment to use m_env = Environment.getSystemWide(); }
/** * Initialize this attribute spec by resolving any environment variables and * setting up the date format (if necessary) * * @param env environment variables to use */ public void init(Environment env) { m_nameS = m_name; m_typeS = m_type; m_valueS = m_value; try { m_nameS = env.substitute(m_nameS); m_typeS = env.substitute(m_typeS); m_valueS = env.substitute(m_valueS); } catch (Exception ex) { } if (m_typeS.toLowerCase().startsWith("date") && m_typeS.indexOf(":") > 0) { String format = m_typeS.substring(m_typeS.indexOf(":") + 1, m_typeS.length()); m_dateFormat = new SimpleDateFormat(format); if (!m_valueS.toLowerCase().equals("now")) { try { m_parsedDate = m_dateFormat.parse(m_valueS); } catch (ParseException e) { throw new IllegalArgumentException("Date value \"" + m_valueS + " \" can't be parsed with formatting string \"" + format + "\""); } } } }
/** * Gets a string that describes the start action. The KnowledgeFlow uses this * in the popup contextual menu for the component. The string can be proceeded * by a '$' character to indicate that the component can't be started at * present. * * @return a string describing the start action. */ @Override public String getStartMessage() { boolean ok = true; String entry = "Start loading"; if (m_ioThread == null) { if (m_Loader instanceof FileSourcedConverter) { String temp = ((FileSourcedConverter) m_Loader).retrieveFile() .getPath(); Environment env = (m_env == null) ? Environment.getSystemWide() : m_env; try { temp = env.substitute(temp); } catch (Exception ex) { } File tempF = new File(temp); // forward slashes are platform independent for resources read from the // classpath String tempFixedPathSepForResource = temp.replace(File.separatorChar, '/'); if (!tempF.isFile() && this.getClass().getClassLoader() .getResource(tempFixedPathSepForResource) == null) { ok = false; } } if (!ok) { entry = "$" + entry; } } return entry; }
/** * Set the environment variables to use. * * @param env the environment variables to use */ @Override public void setEnvironment(Environment env) { m_env = env; try { // causes setSource(File) to be called and // forces the input stream to be reset with a new file // that has environment variables resolved with those // in the new Environment object reset(); } catch (IOException ex) { // we won't complain about it here... } }
/** * Default constructors a new TextSaver */ public TextSaver() { useDefaultVisual(); setLayout(new BorderLayout()); add(m_visual, BorderLayout.CENTER); m_env = Environment.getSystemWide(); }
@Override public void init(Instances structure, Environment env) { super.init(structure, env); for (ExpressionNode n : m_children) { n.init(structure, env); } }
/** * Set the flow for the KnowledgeFlow to edit. Assumes that client has loaded * a Vector of beans and a Vector of connections. the supplied beans and * connections are deep-copied via serialization before being set in the * layout. The beans get added to the flow at position 0. * * @param v a Vector containing a Vector of beans and a Vector of connections * @exception Exception if something goes wrong */ @SuppressWarnings("unchecked") public void setFlow(Vector<Vector<?>> v) throws Exception { // Vector beansCopy = null, connectionsCopy = null; // clearLayout(); if (getAllowMultipleTabs()) { throw new Exception("[KnowledgeFlow] setFlow() - can only set a flow in " + "singe tab only mode"); } /* * int tabI = 0; * * BeanInstance. * removeAllBeansFromContainer((JComponent)m_mainKFPerspective. * getBeanLayout(tabI), tabI); BeanInstance.setBeanInstances(new Vector(), * m_mainKFPerspective.getBeanLayout(tabI)); * BeanConnection.setConnections(new Vector()); */ // m_mainKFPerspective.removeTab(0); // m_mainKFPerspective.addTab("Untitled"); m_beanLayout.removeAll(); BeanInstance.init(); BeanConnection.init(); SerializedObject so = new SerializedObject(v); Vector<Vector<?>> copy = (Vector<Vector<?>>) so.getObject(); Vector<Object> beans = (Vector<Object>) copy.elementAt(0); Vector<BeanConnection> connections = (Vector<BeanConnection>) copy .elementAt(1); // reset environment variables m_flowEnvironment = new Environment(); integrateFlow(beans, connections, true, false); revalidate(); notifyIsDirty(); }
public void init(Environment env, Instances structure) { String attNameI = m_attributeNameOrIndex; try { attNameI = env.substitute(attNameI); } catch (Exception ex) { } if (attNameI.equalsIgnoreCase("/first")) { m_attribute = structure.attribute(0); } else if (attNameI.equalsIgnoreCase("/last")) { m_attribute = structure.attribute(structure.numAttributes() - 1); } else { // try actual attribute name m_attribute = structure.attribute(attNameI); if (m_attribute == null) { // try as an index try { int index = Integer.parseInt(attNameI); m_attribute = structure.attribute(index); } catch (NumberFormatException n) { throw new IllegalArgumentException("Unable to locate attribute " + attNameI + " as either a named attribute or as a valid " + "attribute index"); } } } }
/** * Constructs a new Sorter */ public Sorter() { useDefaultVisual(); setLayout(new BorderLayout()); add(m_visual, BorderLayout.CENTER); m_env = Environment.getSystemWide(); m_stopRequested = new AtomicBoolean(false); }
@Override public void setEnvironment(Environment env) { m_env = env; }
/** * Initializes and runs the phase that creates randomly shuffled data chunks * from the input file(s). * * @param header the header of the training data * @return true if the job is successful * @throws DistributedWekaException if a problem occurs * @throws IOException if a problem occurs */ protected boolean initializeAndRunRandomizeDataJob(Instances header) throws DistributedWekaException, IOException { if (!getRandomlyShuffleData()) { return true; } if (m_env == null) { m_env = Environment.getSystemWide(); } logMessage("Checking to see if randomize data chunk job is needed..."); statusMessage("Checking to see if randomize data chunk job is needed..."); m_randomizeJob.setEnvironment(m_env); m_randomizeJob.setLog(getLog()); m_randomizeJob.setStatusMessagePrefix(m_statusMessagePrefix); if (!DistributedJobConfig.isEmpty(getRandomSeed())) { m_randomizeJob.setRandomSeed(environmentSubstitute(getRandomSeed())); } m_randomizeJob .setNumRandomizedDataChunks(getRandomlyShuffleDataNumChunks()); // make sure that the class attribute does not get set by default! m_randomizeJob.setDontDefaultToLastAttIfClassNotSpecified(true); if (!m_randomizeJob.runJob()) { statusMessage("Unable to continue - randomized data chunk job failed!"); logMessage("Unable to continue - randomized data chunk job failed!"); return false; } // unset any mapredMaxSplitSize here because we // now have the number of maps determined by the number of // data chunks generated by the randomization job m_mrConfig.setMapredMaxSplitSize(""); // alter the input path to point to the output // directory of the randomize job. // String randomizeOutputPath = m_randomizeConfig.getOutputPath(); String randomizeOutputPath = m_randomizeJob.getRandomizedChunkOutputPath(); m_mrConfig.setInputPaths(randomizeOutputPath); return true; }