/** * Batch scoring method. Calls the appropriate method for the base learner if * it implements BatchPredictor. Otherwise it simply calls the * distributionForInstance() method repeatedly. * * @param insts the instances to get predictions for * @return an array of probability distributions, one for each instance * @throws Exception if a problem occurs */ public double[][] distributionsForInstances(Instances insts) throws Exception { if (getClassifier() instanceof BatchPredictor) { Instances filteredInsts = Filter.useFilter(insts, m_Filter); if (filteredInsts.numInstances() != insts.numInstances()) { throw new WekaException( "FilteredClassifier: filter has returned more/less instances than required."); } return ((BatchPredictor) getClassifier()) .distributionsForInstances(filteredInsts); } else { double[][] result = new double[insts.numInstances()][insts.numClasses()]; for (int i = 0; i < insts.numInstances(); i++) { result[i] = distributionForInstance(insts.instance(i)); } return result; } }
/** * Prints the classifications to the buffer. * * @param classifier the classifier to use for printing the classifications * @param testset the test instances * @throws Exception if check fails or error occurs during printing of * classifications */ public void printClassifications(Classifier classifier, Instances testset) throws Exception { int i; if (classifier instanceof BatchPredictor && ((BatchPredictor) classifier).implementsMoreEfficientBatchPrediction()) { double[][] predictions = ((BatchPredictor) classifier).distributionsForInstances(testset); for (i = 0; i < testset.numInstances(); i++) { printClassification(predictions[i], testset.instance(i), i); } } else { for (i = 0; i < testset.numInstances(); i++) { doPrintClassification(classifier, testset.instance(i), i); } } }
/** * Batch scoring method. Calls the appropriate method for the base learner * if it implements BatchPredictor. Otherwise it simply calls the * distributionForInstance() method repeatedly. * * @param insts the instances to get predictions for * @return an array of probability distributions, one for each instance * @throws Exception if a problem occurs */ public double[][] distributionsForInstances(Instances insts) throws Exception { if (getClassifier() instanceof BatchPredictor) { Instances filteredInsts = Filter.useFilter(insts, m_Filter); if (filteredInsts.numInstances() != insts.numInstances()) { throw new WekaException("FilteredClassifier: filter has returned more/less instances than required."); } return ((BatchPredictor)getClassifier()).distributionsForInstances(filteredInsts); } else { double[][] result = new double[insts.numInstances()][insts.numClasses()]; for (int i = 0; i < insts.numInstances(); i++) { result[i] = distributionForInstance(insts.instance(i)); } return result; } }
/** * Prints the classifications to the buffer. * * @param classifier the classifier to use for printing the classifications * @param testset the data source to obtain the test instances from * @throws Exception if check fails or error occurs during printing of * classifications */ public void printClassifications(Classifier classifier, DataSource testset) throws Exception { int i; Instances test; Instance inst; i = 0; testset.reset(); if (classifier instanceof BatchPredictor) { test = testset.getDataSet(m_Header.classIndex()); double[][] predictions = ((BatchPredictor) classifier) .distributionsForInstances(test); for (i = 0; i < test.numInstances(); i++) { printClassification(predictions[i], test.instance(i), i); } } else { test = testset.getStructure(m_Header.classIndex()); while (testset.hasMoreElements(test)) { inst = testset.nextElement(test); doPrintClassification(classifier, inst, i); i++; } } }
/** * Prints the classifications to the buffer. * * @param classifier the classifier to use for printing the classifications * @param testset the test instances * @throws Exception if check fails or error occurs during printing of * classifications */ public void printClassifications(Classifier classifier, Instances testset) throws Exception { int i; if (classifier instanceof BatchPredictor) { double[][] predictions = ((BatchPredictor) classifier) .distributionsForInstances(testset); for (i = 0; i < testset.numInstances(); i++) { printClassification(predictions[i], testset.instance(i), i); } } else { for (i = 0; i < testset.numInstances(); i++) { doPrintClassification(classifier, testset.instance(i), i); } } }
/** * Set the model to use * * @param model the model to use * @param modelHeader the header of the training data used to train the model * @param dataHeader the header of the incoming data * @throws DistributedWekaException if more than 50% of the attributes * expected by the model are missing or have a type mismatch with * the incoming data */ public void setModel(Object model, Instances modelHeader, Instances dataHeader) throws DistributedWekaException { m_missingMismatch.clear(); if (dataHeader == null || modelHeader == null) { throw new DistributedWekaException( "Can't continue without a header for the model and incoming data"); } try { m_isUsingStringAttributes = modelHeader.checkForStringAttributes(); m_model = ScoringModel.createScorer(model); if (modelHeader != null) { m_model.setHeader(modelHeader); } if (m_model.isBatchPredicor()) { m_batchScoringData = new Instances(modelHeader, 0); Environment env = Environment.getSystemWide(); String batchSize = ((BatchPredictor) model).getBatchSize(); if (!DistributedJobConfig.isEmpty(batchSize)) { m_batchSize = Integer.parseInt(env.substitute(batchSize)); } else { m_batchSize = 1000; } } } catch (Exception ex) { throw new DistributedWekaException(ex); } buildAttributeMap(modelHeader, dataHeader); }
/** * Set the batch size to use. Gets passed through to the base learner if it * implements BatchPrecitor. Otherwise it is just ignored. * * @param size the batch size to use */ public void setBatchSize(String size) { if (getClassifier() instanceof BatchPredictor) { ((BatchPredictor) getClassifier()).setBatchSize(size); } }
/** * Gets the preferred batch size from the base learner if it implements * BatchPredictor. Returns 1 as the preferred batch size otherwise. * * @return the batch size to use */ public String getBatchSize() { if (getClassifier() instanceof BatchPredictor) { return ((BatchPredictor) getClassifier()).getBatchSize(); } else { return "1"; } }
/** * Returns true if the base classifier implements BatchPredictor and is able * to generate batch predictions efficiently * * @return true if the base classifier can generate batch predictions * efficiently */ public boolean implementsMoreEfficientBatchPrediction() { if (!(getClassifier() instanceof BatchPredictor)) { return false; } return ((BatchPredictor) getClassifier()) .implementsMoreEfficientBatchPrediction(); }
/** * Prints the classifications to the buffer. * * @param classifier the classifier to use for printing the classifications * @param testset the data source to obtain the test instances from * @throws Exception if check fails or error occurs during printing of * classifications */ public void printClassifications(Classifier classifier, DataSource testset) throws Exception { int i; Instances test; Instance inst; i = 0; testset.reset(); if (classifier instanceof BatchPredictor && ((BatchPredictor) classifier).implementsMoreEfficientBatchPrediction()) { test = testset.getDataSet(m_Header.classIndex()); double[][] predictions = ((BatchPredictor) classifier).distributionsForInstances(test); for (i = 0; i < test.numInstances(); i++) { printClassification(predictions[i], test.instance(i), i); } } else { test = testset.getStructure(m_Header.classIndex()); while (testset.hasMoreElements(test)) { inst = testset.nextElement(test); doPrintClassification(classifier, inst, i); i++; } } }
/** * Set the batch size to use. Gets passed through to the base learner * if it implements BatchPrecitor. Otherwise it is just ignored. * * @param size the batch size to use */ public void setBatchSize(String size) { if (getClassifier() instanceof BatchPredictor) { ((BatchPredictor)getClassifier()).setBatchSize(size); } }
/** * Gets the preferred batch size from the * base learner if it implements BatchPredictor. * Returns 1 as the preferred batch size otherwise. * * @return the batch size to use */ public String getBatchSize() { if (getClassifier() instanceof BatchPredictor) { return ((BatchPredictor)getClassifier()).getBatchSize(); } else { return "1"; } }
/** * Batch scoring method * * @param insts the instances to get predictions for * @return an array of probability distributions, one for each instance * @throws Exception if a problem occurs */ public double[][] distributionsForInstances(Instances insts) throws Exception { if (!isBatchPredictor()) { throw new Exception("Weka model cannot produce batch predictions!"); } return ((BatchPredictor)m_model).distributionsForInstances(insts); }
/** * Finalize this task. This is where the actual evaluation occurs in the batch * case - the order of the data gets randomized (and stratified if class is * nominal) and the test fold extracted. * * @throws Exception if a problem occurs */ public void finalizeTask() throws Exception { if (m_classifier == null) { throw new Exception("No classifier has been set"); } if (m_classifier instanceof UpdateableClassifier && !m_batchTrainedIncremental) { // nothing to do except possibly down-sample predictions for // auc/prc if (m_predFrac > 0) { ((AggregateableEvaluationWithPriors) m_eval).prunePredictions( m_predFrac, m_seed); } return; } m_trainingHeader.compactify(); Instances test = m_trainingHeader; Random r = new Random(m_seed); test.randomize(r); if (test.classAttribute().isNominal() && m_totalFolds > 1) { test.stratify(m_totalFolds); } if (m_totalFolds > 1 && m_foldNumber >= 1) { test = test.testCV(m_totalFolds, m_foldNumber - 1); } m_numTestInstances = test.numInstances(); if (m_classifier instanceof BatchPredictor) { // this method always stores the predictions for AUC, so we need to get // rid of them if we're note doing any AUC computation m_eval.evaluateModel(m_classifier, test); if (m_predFrac < 0) { ((AggregateableEvaluationWithPriors) m_eval).deleteStoredPredictions(); } } else { for (int i = 0; i < test.numInstances(); i++) { if (m_predFrac > 0) { m_eval.evaluateModelOnceAndRecordPrediction(m_classifier, test.instance(i)); } else { m_eval.evaluateModelOnce(m_classifier, test.instance(i)); } } } // down-sample predictions for auc/prc if (m_predFrac > 0) { ((AggregateableEvaluationWithPriors) m_eval).prunePredictions(m_predFrac, m_seed); } }
/** * Return true if the underlying model is a BatchPredictor * * @return return true if the underlying model is a BatchPredictor */ public boolean isBatchPredicor() { return getModel() == null ? false : getModel() instanceof BatchPredictor; }
/** * Returns predictions in the case where the base model is a BatchPredictor * * @param insts the instances to provide predictions for * @return the predictions * @throws Exception if a problem occurs */ public double[][] distributionsForInstances(Instances insts) throws Exception { return ((BatchPredictor) getModel()).distributionsForInstances(insts); }
/** * Returns true if the encapsulated Weka model can produce * predictions in a batch. * * @return true if the encapsulated Weka model can produce * predictions in a batch */ public boolean isBatchPredictor() { return (m_model instanceof BatchPredictor); }